usiem/components/
parsing.rs

1use dyn_clone::{clone_trait_object, DynClone};
2use serde::{Deserialize, Serialize};
3
4use crate::{
5    events::{schema::FieldSchema, SiemLog},
6    prelude::SiemIp,
7};
8
9use super::dataset::holder::DatasetHolder;
10
11/// A simple object with the logic to parse Logs.
12pub trait LogParser: DynClone + Send {
13    /// Parse the log. If it fails it must give a reason why. This allow optimization of the parsing process.
14    fn parse_log(&self, log: SiemLog, datasets: &DatasetHolder)
15        -> Result<SiemLog, LogParsingError>;
16    /// Name of the parser
17    fn name(&self) -> &'static str;
18    /// Description of the parser
19    fn description(&self) -> &'static str;
20    /// Get parser schema
21    fn schema(&self) -> &FieldSchema;
22    /// Get a log generator to test this parser
23    fn generator(&self) -> Box<dyn LogGenerator>;
24}
25clone_trait_object!(LogParser);
26
27/// This is the most complex type of parser. It's statefull to store past logs.
28/// Think of the USB event in linux, we need the rest of the logs to extract all information.
29/// The Parser component which uses this parsers must be able to store and load past Logs
30/// if the user connects to a different SIEM node (LoadBalancing).
31pub trait MultilineLogParser: DynClone + Send {
32    /// Parse the log. If it fails it must give a reason why. This allow optimization of the parsing process.
33    fn parse_log(
34        &mut self,
35        log: SiemLog,
36        datasets: &DatasetHolder,
37    ) -> Result<Option<SiemLog>, LogParsingError>;
38    /// Name of the parser
39    fn name(&self) -> &'static str;
40    /// Description of the parser
41    fn description(&self) -> &'static str;
42    /// The connection with the origin has been closed. We must preserve the logs stored inside this parser
43    /// so another node can use them to parse the logs of the same machine.
44    fn cleaning(&mut self) -> Vec<SiemLog>;
45    /// Return those logs that would not be used by the parser, or are older as to reduce the memmory usage.
46    fn unused(&mut self) -> Vec<SiemLog>;
47    /// Get parser schema
48    fn schema(&self) -> &FieldSchema;
49}
50
51clone_trait_object!(MultilineLogParser);
52
53/// Error at parsing a log
54#[derive(Clone, Debug, Serialize, Deserialize)]
55pub enum LogParsingError {
56    /// The parser can't be used with this log
57    NoValidParser(SiemLog),
58    /// The log is for this parser but there is a bug in the code
59    ParserError(SiemLog, String),
60    /// The log is for this parser but the submodule has not been implemented.
61    NotImplemented(SiemLog),
62    /// The log has change format the parser cant process it.
63    FormatError(SiemLog, String),
64    /// Log was discarded. It does not have utility or there are storage limitations.
65    Discard,
66}
67
68pub trait LogGenerator {
69    fn configure(&mut self, config: GeneratorConfig);
70    /// Generate a random log
71    fn log(&self) -> String;
72    /// Of the total overall logs that are generated in an organization,
73    /// whats the procentage of logs generated by this source?
74    /// The bigger, the most probability of being generated
75    fn weight(&self) -> u8;
76}
77
78/// Helps to generate more realistic logs and use them to match rules and trigger alerts
79pub struct GeneratorConfig {
80    pub malicious_users: Vec<String>,
81    pub user_generator: Box<dyn Fn() -> String>,
82    pub public_networks: Vec<(SiemIp, u32)>,
83    pub local_networks: Vec<(SiemIp, u32)>,
84    pub domain: String,
85    pub hostname_generator: Box<dyn Fn() -> String>,
86    pub malicious_ips: Vec<SiemIp>,
87}
88
89impl Default for GeneratorConfig {
90    fn default() -> Self {
91        use std::time::{SystemTime, UNIX_EPOCH};
92        // Poor mans random generator...
93        Self {
94            malicious_users: Vec::new(),
95            user_generator: Box::new(|| {
96                let nanos = SystemTime::now()
97                    .duration_since(UNIX_EPOCH)
98                    .unwrap()
99                    .subsec_nanos();
100                let nanos = nanos & 0xffff;
101                format!("User{}", nanos)
102            }),
103            public_networks: Default::default(),
104            local_networks: vec![([192, 168, 1, 1].into(), 32)],
105            domain: "Contoso".into(),
106            hostname_generator: Box::new(|| {
107                let nanos = SystemTime::now()
108                    .duration_since(UNIX_EPOCH)
109                    .unwrap()
110                    .subsec_nanos();
111                let nanos = nanos & 0xffff;
112                format!("Host{}", nanos)
113            }),
114            malicious_ips: Default::default(),
115        }
116    }
117}