diff --git a/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.blg b/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.blg index 0ec1b58b17db603813279dd01b32517b1045e2f8..05c9ea0ae5bbe96596c46d22d19865cec123123c 100644 --- a/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.blg +++ b/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.blg @@ -13,44 +13,44 @@ Database file #1: ../Thesis_Docs/sources/references.bib -- See the "IEEEtran_bst_HOWTO.pdf" manual for usage information. Done. -You've used 25 entries, +You've used 31 entries, 4087 wiz_defined-function locations, - 962 strings with 13639 characters, -and the built_in function-call counts, 22307 in all, are: -= -- 1697 -> -- 639 -< -- 184 -+ -- 347 -- -- 123 -* -- 1077 -:= -- 3093 -add.period$ -- 62 -call.type$ -- 25 -change.case$ -- 27 -chr.to.int$ -- 447 -cite$ -- 25 -duplicate$ -- 1561 -empty$ -- 1857 -format.name$ -- 140 -if$ -- 5278 + 997 strings with 14573 characters, +and the built_in function-call counts, 27027 in all, are: += -- 2080 +> -- 729 +< -- 231 ++ -- 399 +- -- 135 +* -- 1291 +:= -- 3775 +add.period$ -- 76 +call.type$ -- 31 +change.case$ -- 32 +chr.to.int$ -- 561 +cite$ -- 31 +duplicate$ -- 1891 +empty$ -- 2264 +format.name$ -- 156 +if$ -- 6400 int.to.chr$ -- 0 -int.to.str$ -- 25 -missing$ -- 289 -newline$ -- 100 -num.names$ -- 25 -pop$ -- 727 +int.to.str$ -- 31 +missing$ -- 339 +newline$ -- 118 +num.names$ -- 31 +pop$ -- 844 preamble$ -- 1 purify$ -- 0 quote$ -- 2 -skip$ -- 1693 +skip$ -- 2069 stack$ -- 0 -substring$ -- 1113 -swap$ -- 1292 -text.length$ -- 35 +substring$ -- 1384 +swap$ -- 1565 +text.length$ -- 44 text.prefix$ -- 0 top$ -- 5 -type$ -- 25 +type$ -- 31 warning$ -- 0 -while$ -- 107 -width$ -- 27 -write$ -- 259 +while$ -- 133 +width$ -- 33 +write$ -- 315 diff --git a/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.pdf b/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.pdf index 6ae176cae52f91cd797f3fa379e6023401e82a60..94a51000a5488b770965bd5a97fd156fd0ee6de9 100644 Binary files a/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.pdf and b/Thesis_Docs/Nikkhah_Nasab-Aida-Mastersthesis.pdf differ diff --git a/Thesis_Docs/main.tex b/Thesis_Docs/main.tex index 150a269b3e45236c6b7e4520321dc1d761c53c1e..425629fec6f0060257668fcce303a6f61286231d 100644 --- a/Thesis_Docs/main.tex +++ b/Thesis_Docs/main.tex @@ -10,7 +10,7 @@ The primary objective of this research is to advance network security by develop The research is guided by several key questions, including: How can beaconing behavior be effectively detected within large-scale network data to provide early warning of potential threats? What is the impact of periodicity in network communications on distinguishing between benign and malicious activities? Furthermore, is the beaconing behavior detectable in generated synthetic data, and how does its detectability compare to that in real-world data? \section{Structure of the Thesis} -The thesis is organized into a cohesive narrative that begins by establishing the foundational background and core concepts essential to understanding network security and periodicity detection. Following this, a review of related work contextualizes the current research within the broader field. The methodology chapter then details the advanced techniques introduced in the framework. Chapter 5, Data Analysis is an exploration of real-world network log data to uncover patterns and insights related to beaconing behavior, setting the stage for subsequent evaluations. Chapter 6 is a detailed description of the procedures and techniques employed to generate synthetic beaconing data, which is used to validate the performance of the detection framework under controlled conditions. Chapter 7 is Evaluation and Results. An investigation and comparison of the framework’s performance on both real and synthetic data, summarizing key findings and contributions, and discussing potential improvements. Finally Chapter 8 is Conclusions and Future Work. The final chapter presents the overall conclusions of the research, outlines the contributions made, and proposes directions for future research in the field of network security. +The thesis is organized into a cohesive narrative that begins by establishing the foundational background and core concepts essential to understanding network security and periodicity detection. Following this, a review of related work contextualizes the current research within the broader field. The methodology chapter then details the advanced techniques introduced in the framework. Chapter 5, Data Analysis is an exploration of real-world network log data to uncover patterns and insights related to beaconing behavior, setting the stage for subsequent evaluations. Chapter 6 represents a detailed description of the procedures and techniques employed to generate synthetic beaconing data, which is used to validate the performance of the detection framework under controlled conditions. Chapter 7 represents Evaluation and Results. An investigation and comparison of the framework’s performance on both real and synthetic data, summarizing key findings and contributions, and discussing potential improvements. Finally Chapter 8 represents Conclusions and Future Work. The final chapter presents the overall conclusions of the research, outlines the contributions made, and proposes directions for future research in the field of network security. \chapter{Background} This chapter provides the foundational knowledge necessary for understanding the context and significance of this research. It begins with an overview of the cybersecurity landscape, emphasizing the current state, emerging trends, and persistent challenges faced by organizations. It then explores Advanced Persistent Threats (APTs) and their sophisticated, covert tactics that pose significant risks to enterprise networks. The discussion also covers the concept of periodicity in network communication, which is for detecting anomalies in cybersecurity contexts. On top of that, the chapter represents the role of time series databases, with a specific focus on InfluxDB, in managing and analyzing the vast amounts of data generated in cybersecurity operations. Finally, the chapter introduces the BAYWATCH framework, which serves as the foundation for the research by providing a structured approach to detecting beaconing behavior in network traffic. @@ -35,7 +35,7 @@ Advanced Persistent Threats (APTs) represent one of the most sophisticated and d \label{fig:apt_attack_lifecycle} \end{figure} -Figure \ref{fig:apt_attack_lifecycle} illustrates the lifecycle of an APT attack, highlighting the various stages involved, from initial reconnaissance to exfiltration of data. Understanding these stages is crucial for developing effective detection and mitigation strategies. +Figure \ref{fig:apt_attack_lifecycle} illustrates the lifecycle of an APT attack, highlighting the various stages involved, from initial reconnaissance to exfiltration of data. Understanding these stages is important for developing effective detection and mitigation strategies. APT actors employ various covert tactics to remain undetected and achieve their objectives. Some of these tactics include: @@ -47,7 +47,7 @@ APT actors employ various covert tactics to remain undetected and achieve their \end{itemize} \section{Enterprise Networks} -Enterprise networks are the backbone of modern organizations, providing the necessary infrastructure for communication, data sharing, and operational efficiency. However, their complexity and scale make them attractive targets for cyber attackers. Understanding the architecture, components, and vulnerabilities of enterprise networks is crucial for developing effective cybersecurity strategies. +Enterprise networks are the backbone of modern organizations, providing the necessary infrastructure for communication, data sharing, and operational efficiency. However, their complexity and scale make them attractive targets for cyber attackers. Understanding the architecture, components, and vulnerabilities of enterprise networks is important for developing effective cybersecurity strategies. \begin{figure}[htbp] \centering @@ -62,9 +62,9 @@ Figure \ref{fig:enterprise_network_diagram} provides a visual representation of Enterprise networks typically consist of multiple interconnected subsystems, including: \begin{itemize} - \item \textbf{Network Architecture:} The physical and logical design of the network, including the layout and interconnection of routers, switches, firewalls, and other network devices. A well-designed architecture enhances security by segmenting the network and controlling traffic flow. - \item \textbf{Security Protocols:} Protocols such as TLS (Transport Layer Security) and IPSec (Internet Protocol Security) protect data in transit. Additionally, firewalls, intrusion detection/prevention systems (IDS/IPS), and encryption mechanisms are employed to safeguard data and systems. - \item \textbf{Access Controls:} Policies and technologies that regulate who can access specific data and resources within the network. This includes user authentication, role-based access control (RBAC), and multi-factor authentication (MFA) to ensure that only authorized personnel can access sensitive information. + \item \textbf{Network Architecture:} The physical and logical design of the network, including the layout and interconnection of routers, switches, firewalls, and other network devices. A well-designed architecture enhances security by segmenting the network and controlling traffic flow \cite{o1992dynamic}. + \item \textbf{Security Protocols:} Protocols such as TLS (Transport Layer Security) and IPSec (Internet Protocol Security) protect data in transit. Additionally, firewalls, intrusion detection/prevention systems (IDS/IPS), and encryption mechanisms are employed to safeguard data and systems \cite{krawczyk2013security}, \cite{davis2001ipsec}, \cite{abbas2023subject}. + \item \textbf{Access Controls:} Policies and technologies that regulate who can access specific data and resources within the network. This includes user authentication, role-based access control (RBAC), and multi-factor authentication (MFA) to ensure that only authorized personnel can access sensitive information \cite{thomas1997team}. \item \textbf{Network Monitoring and Management:} Tools and practices for monitoring network traffic, identifying anomalies, and managing network resources to maintain performance and security. \end{itemize} @@ -81,7 +81,9 @@ Despite the implementation of robust security measures, enterprise networks rema \section{Time Series Databases and InfluxDB} Time-series databases (TSDBs) are optimized for storing and querying temporal data. In cybersecurity, they enable efficient analysis of network traffic patterns over time. -InfluxDB is a popular TSDB known for its high throughput and SQL-like query language (Flux). Key features include: +InfluxDB is a popular TSDB known for its high throughput and SQL-like query language (Flux) \cite{ahmad2017hands}. + +Key features include: \begin{itemize} \item Time-optimized storage for efficient data retrieval. \item Retention policies for automated data lifecycle management. @@ -233,7 +235,8 @@ To minimize the manual investigation workload, the BAYWATCH framework employs a reducing the number of cases that require manual investigation. \section{Summary} -This chapter has provided a comprehensive overview of the cybersecurity landscape, APTs and their covert tactics, enterprise networks, periodicity in network communication, and time series databases, with a detailed focus on InfluxDB. These foundational topics are crucial for understanding the subsequent chapters, which will represent related work, methodology, implementation, experiments, and results. The knowledge gained from this background will inform the development and evaluation of advanced techniques for detecting and mitigating cyber threats in enterprise networks. +This chapter has provided a comprehensive overview of the cybersecurity landscape, APTs and their covert tactics, enterprise networks, periodicity in network communication, and time series databases, with a detailed focus on InfluxDB. These foundational topics are important for understanding the subsequent chapters, which will represent related work, methodology, implementation, experiments, and results. The knowledge gained from this background will inform the development and evaluation of advanced techniques for detecting and mitigating cyber threats in enterprise networks. +In this work, using machine learning techniques like bootstrapping and classifiers is not implemented, and the focus is on time series analysis and periodicity detection in both real and synthetic data. \chapter{Related Work} @@ -281,7 +284,7 @@ The BAYWATCH framework is a comprehensive methodology designed to identify steal \section{Real Data Source} -The real-world data used in this study was collected from a large-scale enterprise network, capturing user activities as they navigate various URLs throughout the workday. This dataset provides a detailed perspective on user interactions, enabling an in-depth analysis of browsing patterns and behaviors. The data is stored in JSON format, which offers flexibility and readability, making it easier to manage and manipulate large volumes of information. Each entry in the dataset records a specific user interaction, including precise timestamps and the URLs visited, allowing for a chronological reconstruction of user activities. This level of detail is crucial for identifying patterns and trends over time, such as peak usage periods or frequent transitions between specific URLs. +The real-world data used in this study was collected from a large-scale enterprise network, capturing user activities as they navigate various URLs throughout the workday. This dataset provides a detailed perspective on user interactions, enabling an in-depth analysis of browsing patterns and behaviors. The data is stored in JSON format, which offers flexibility and readability, making it easier to manage and manipulate large volumes of information. Each entry in the dataset records a specific user interaction, including precise timestamps and the URLs visited, allowing for a chronological reconstruction of user activities. This level of detail is important for identifying patterns and trends over time, such as peak usage periods or frequent transitions between specific URLs. \subsection{Data Structure and Schema} The dataset is structured as a collection of JSON files, with each file containing detailed logs of user interactions. Each entry in the JSON files includes the following fields: @@ -698,7 +701,7 @@ The x-axis represents the frequency range, corresponding to different time inter \bigskip -Table \ref{tab:candidates} presents candidate data obtained from both real network traces and beaconing analysis. The table is organized into three columns: the first lists the measured attributes, including host IP addresses, URLs exhibiting beaconing behavior, observed frequencies (in Hertz), and amplitude values of the periodic signals. The subsequent columns represent two distinct candidates. Candidate 1 is characterized by the host IP address "127.0.0.1", a URL "beacon7.example.com", frequencies "0.05 \& 0.15" Hz (indicating multiple frequency components), and maximum amplitude of 0.014. Candidate 2, on the other hand, features the host IP address "10.16.102.224", the URL "m4v4r4c5.stackpathcdn.com", a single frequency component at 0.1 Hz, and maximum amplitude of 0.024. Such candidates are crucial because they alert analysts directly by flagging URLs with beaconing behavior, thereby providing actionable intelligence for further investigation. +Table \ref{tab:candidates} presents candidate data obtained from both real network traces and beaconing analysis. The table is organized into three columns: the first lists the measured attributes, including host IP addresses, URLs exhibiting beaconing behavior, observed frequencies (in Hertz), and amplitude values of the periodic signals. The subsequent columns represent two distinct candidates. Candidate 1 is characterized by the host IP address "127.0.0.1", a URL "beacon7.example.com", frequencies "0.05 \& 0.15" Hz (indicating multiple frequency components), and maximum amplitude of 0.014. Candidate 2, on the other hand, features the host IP address "10.16.102.224", the URL "m4v4r4c5.stackpathcdn.com", a single frequency component at 0.1 Hz, and maximum amplitude of 0.024. Such candidates are important because they alert analysts directly by flagging URLs with beaconing behavior, thereby providing actionable intelligence for further investigation. By applying the detection algorithm to this dataset and analyzing the output, it becomes evident that the algorithm effectively identifies periodic signals in both real and synthetic beaconing behaviors. The results highlight the robustness of the method, demonstrating its ability to distinguish between beaconing and non-beaconing activity while accurately capturing different periodic transmission intervals. diff --git a/Thesis_Docs/sources/references.bib b/Thesis_Docs/sources/references.bib index 897cef28ae1e8bcad983ca85877a32631cc344bf..98449c852d0de59c64b2b7a90ed75a1678dcb6c8 100644 --- a/Thesis_Docs/sources/references.bib +++ b/Thesis_Docs/sources/references.bib @@ -7,6 +7,60 @@ organization={IEEE} } +@article{o1992dynamic, + title={A dynamic network architecture}, + author={O'Malley, Sean W and Peterson, Larry L}, + journal={ACM Transactions on Computer Systems (TOCS)}, + volume={10}, + number={2}, + pages={110--143}, + year={1992}, + publisher={ACM New York, NY, USA} +} + +@inproceedings{krawczyk2013security, + title={On the security of the TLS protocol: A systematic analysis}, + author={Krawczyk, Hugo and Paterson, Kenneth G and Wee, Hoeteck}, + booktitle={Annual Cryptology Conference}, + pages={429--448}, + year={2013}, + organization={Springer} +} + +@book{davis2001ipsec, + title={IPSec: Securing VPNs}, + author={Davis, Carlton R}, + year={2001}, + publisher={McGraw-Hill Professional} +} + +@article{abbas2023subject, + title={Subject review: Intrusion detection system (IDS) and intrusion prevention system (IPS)}, + author={Abbas, Safana and Naser, Wedad and Kadhim, Amal}, + journal={Global Journal of Engineering and Technology Advances}, + volume={2}, + number={14}, + pages={155--158}, + year={2023} +} + +@inproceedings{thomas1997team, + title={Team-based access control (TMAC) a primitive for applying role-based access controls in collaborative environments}, + author={Thomas, Roshan K}, + booktitle={Proceedings of the second ACM workshop on Role-based access control}, + pages={13--19}, + year={1997} +} + +@incollection{ahmad2017hands, + title={Hands-on influxdb}, + author={Ahmad, Khaleel and Ansari, Masroor}, + booktitle={NoSQL}, + pages={341--354}, + year={2017}, + publisher={Chapman and Hall/CRC} +} + @inproceedings{zhang2023global, author = {Zhang, Yizhe and Dong, Hongying and Nottingham, Alastair and Buchanan, Molly and Brown, Donald E. and Sun, Yixin}, title = {Global Analysis with Aggregation-based Beaconing Detection across Large Campus Networks},