Skip to main content

rsigma_parser/
ast.rs

1//! AST types for all Sigma constructs: rules, detections, conditions,
2//! correlations, and filters.
3//!
4//! Reference: Sigma specification V2.0.0 (2024-08-08)
5//! Reference: pySigma types, conditions, correlations, rule modules
6
7use std::collections::HashMap;
8use std::fmt;
9use std::str::FromStr;
10
11use serde::Serialize;
12
13use crate::value::{SigmaValue, Timespan};
14
15// =============================================================================
16// Enumerations
17// =============================================================================
18
19/// Rule maturity status.
20///
21/// Reference: pySigma rule.py SigmaStatus
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
23#[serde(rename_all = "lowercase")]
24pub enum Status {
25    Stable,
26    Test,
27    Experimental,
28    Deprecated,
29    Unsupported,
30}
31
32impl FromStr for Status {
33    type Err = ();
34    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
35        match s {
36            "stable" => Ok(Status::Stable),
37            "test" => Ok(Status::Test),
38            "experimental" => Ok(Status::Experimental),
39            "deprecated" => Ok(Status::Deprecated),
40            "unsupported" => Ok(Status::Unsupported),
41            _ => Err(()),
42        }
43    }
44}
45
46/// Severity level of a triggered rule.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
48#[serde(rename_all = "lowercase")]
49pub enum Level {
50    Informational,
51    Low,
52    Medium,
53    High,
54    Critical,
55}
56
57impl FromStr for Level {
58    type Err = ();
59    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
60        match s {
61            "informational" => Ok(Level::Informational),
62            "low" => Ok(Level::Low),
63            "medium" => Ok(Level::Medium),
64            "high" => Ok(Level::High),
65            "critical" => Ok(Level::Critical),
66            _ => Err(()),
67        }
68    }
69}
70
71/// Relationship type for the `related` field.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
73#[serde(rename_all = "lowercase")]
74pub enum RelationType {
75    Derived,
76    Obsolete,
77    Merged,
78    Renamed,
79    Similar,
80}
81
82impl FromStr for RelationType {
83    type Err = ();
84    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
85        match s {
86            "derived" => Ok(RelationType::Derived),
87            "obsolete" => Ok(RelationType::Obsolete),
88            "merged" => Ok(RelationType::Merged),
89            "renamed" => Ok(RelationType::Renamed),
90            "similar" => Ok(RelationType::Similar),
91            _ => Err(()),
92        }
93    }
94}
95
96// =============================================================================
97// Field Modifiers
98// =============================================================================
99
100/// All supported Sigma field modifiers.
101///
102/// Reference: pySigma modifiers.py modifier_mapping
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
104#[serde(rename_all = "lowercase")]
105pub enum Modifier {
106    // String matching modifiers
107    Contains,
108    StartsWith,
109    EndsWith,
110
111    // Value linking
112    All,
113
114    // Encoding modifiers
115    Base64,
116    Base64Offset,
117    Wide,
118    Utf16be,
119    Utf16,
120    WindAsh,
121
122    // Pattern matching
123    Re,
124    Cidr,
125
126    // Case sensitivity
127    Cased,
128
129    // Field existence
130    Exists,
131
132    // Placeholder expansion
133    Expand,
134
135    // Field reference
136    FieldRef,
137
138    // Numeric/value comparison
139    Gt,
140    Gte,
141    Lt,
142    Lte,
143    /// Not equal: field value must differ from the specified value.
144    Neq,
145
146    // Regex flags
147    #[serde(rename = "i")]
148    IgnoreCase,
149    #[serde(rename = "m")]
150    Multiline,
151    #[serde(rename = "s")]
152    DotAll,
153
154    // Timestamp parts
155    Minute,
156    Hour,
157    Day,
158    Week,
159    Month,
160    Year,
161}
162
163/// Parse a modifier identifier string.
164///
165/// Reference: pySigma modifiers.py modifier_mapping
166impl FromStr for Modifier {
167    type Err = ();
168    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
169        match s {
170            "contains" => Ok(Modifier::Contains),
171            "startswith" => Ok(Modifier::StartsWith),
172            "endswith" => Ok(Modifier::EndsWith),
173            "all" => Ok(Modifier::All),
174            "base64" => Ok(Modifier::Base64),
175            "base64offset" => Ok(Modifier::Base64Offset),
176            "wide" | "utf16le" => Ok(Modifier::Wide),
177            "utf16be" => Ok(Modifier::Utf16be),
178            "utf16" => Ok(Modifier::Utf16),
179            "windash" => Ok(Modifier::WindAsh),
180            "re" => Ok(Modifier::Re),
181            "cidr" => Ok(Modifier::Cidr),
182            "cased" => Ok(Modifier::Cased),
183            "exists" => Ok(Modifier::Exists),
184            "expand" => Ok(Modifier::Expand),
185            "fieldref" => Ok(Modifier::FieldRef),
186            "gt" => Ok(Modifier::Gt),
187            "gte" => Ok(Modifier::Gte),
188            "lt" => Ok(Modifier::Lt),
189            "lte" => Ok(Modifier::Lte),
190            "neq" => Ok(Modifier::Neq),
191            "i" | "ignorecase" => Ok(Modifier::IgnoreCase),
192            "m" | "multiline" => Ok(Modifier::Multiline),
193            "s" | "dotall" => Ok(Modifier::DotAll),
194            "minute" => Ok(Modifier::Minute),
195            "hour" => Ok(Modifier::Hour),
196            "day" => Ok(Modifier::Day),
197            "week" => Ok(Modifier::Week),
198            "month" => Ok(Modifier::Month),
199            "year" => Ok(Modifier::Year),
200            _ => Err(()),
201        }
202    }
203}
204
205// =============================================================================
206// Field Specification
207// =============================================================================
208
209/// A field name with optional modifiers, parsed from detection keys like
210/// `TargetObject|endswith` or `Destination|contains|all`.
211///
212/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
213#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
214pub struct FieldSpec {
215    /// Field name (`None` for keyword detections without a field).
216    pub name: Option<String>,
217    /// Ordered list of modifiers applied to this field.
218    pub modifiers: Vec<Modifier>,
219}
220
221impl FieldSpec {
222    pub fn new(name: Option<String>, modifiers: Vec<Modifier>) -> Self {
223        FieldSpec { name, modifiers }
224    }
225
226    pub fn has_modifier(&self, m: Modifier) -> bool {
227        self.modifiers.contains(&m)
228    }
229
230    pub fn is_keyword(&self) -> bool {
231        self.name.is_none()
232    }
233}
234
235// =============================================================================
236// Condition Expression AST
237// =============================================================================
238
239/// Parsed condition expression AST.
240///
241/// Produced by the PEG parser + Pratt parser from condition strings like
242/// `selection and not filter` or `1 of selection_* and not 1 of filter_*`.
243///
244/// Reference: pySigma conditions.py ConditionItem hierarchy
245#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
246pub enum ConditionExpr {
247    /// Logical AND of sub-expressions.
248    And(Vec<ConditionExpr>),
249    /// Logical OR of sub-expressions.
250    Or(Vec<ConditionExpr>),
251    /// Logical NOT of a sub-expression.
252    Not(Box<ConditionExpr>),
253    /// Reference to a named detection identifier.
254    Identifier(String),
255    /// Quantified selector: `1 of selection_*`, `all of them`, etc.
256    Selector {
257        quantifier: Quantifier,
258        pattern: SelectorPattern,
259    },
260}
261
262impl fmt::Display for ConditionExpr {
263    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264        match self {
265            ConditionExpr::And(args) => {
266                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
267                write!(f, "({})", parts.join(" and "))
268            }
269            ConditionExpr::Or(args) => {
270                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
271                write!(f, "({})", parts.join(" or "))
272            }
273            ConditionExpr::Not(arg) => write!(f, "not {arg}"),
274            ConditionExpr::Identifier(id) => write!(f, "{id}"),
275            ConditionExpr::Selector {
276                quantifier,
277                pattern,
278            } => write!(f, "{quantifier} of {pattern}"),
279        }
280    }
281}
282
283/// Quantifier in a selector expression.
284#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
285pub enum Quantifier {
286    /// Match any (at least one): `1 of ...` or `any of ...`
287    Any,
288    /// Match all: `all of ...`
289    All,
290    /// Match a specific count: `N of ...`
291    Count(u64),
292}
293
294impl fmt::Display for Quantifier {
295    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
296        match self {
297            Quantifier::Any => write!(f, "1"),
298            Quantifier::All => write!(f, "all"),
299            Quantifier::Count(n) => write!(f, "{n}"),
300        }
301    }
302}
303
304/// Target pattern in a selector expression.
305#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
306pub enum SelectorPattern {
307    /// All detection identifiers: `... of them`
308    Them,
309    /// A wildcard pattern matching detection names: `... of selection_*`
310    Pattern(String),
311}
312
313impl fmt::Display for SelectorPattern {
314    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
315        match self {
316            SelectorPattern::Them => write!(f, "them"),
317            SelectorPattern::Pattern(p) => write!(f, "{p}"),
318        }
319    }
320}
321
322// =============================================================================
323// Detection Section
324// =============================================================================
325
326/// A single detection item: a field (with modifiers) mapped to one or more values.
327///
328/// Examples:
329/// - `EventType: "user.mfa.factor.deactivate"` → field="EventType", values=["user.mfa..."]
330/// - `Destination|contains|all: ['new-object', 'net.webclient']` → field="Destination",
331///   modifiers=[Contains, All], values=[...]
332///
333/// Reference: pySigma rule/detection.py SigmaDetectionItem
334#[derive(Debug, Clone, PartialEq, Serialize)]
335pub struct DetectionItem {
336    /// The field specification (name + modifiers).
337    pub field: FieldSpec,
338    /// One or more values to match against.
339    pub values: Vec<SigmaValue>,
340}
341
342/// A detection definition: a group of detection items or nested detections.
343///
344/// When constructed from a YAML mapping, items are AND-linked.
345/// When constructed from a YAML list of mappings, sub-detections are OR-linked.
346///
347/// Reference: pySigma rule/detection.py SigmaDetection
348#[derive(Debug, Clone, PartialEq, Serialize)]
349pub enum Detection {
350    /// AND-linked detection items (from a YAML mapping).
351    AllOf(Vec<DetectionItem>),
352    /// OR-linked sub-detections (from a YAML list of mappings).
353    AnyOf(Vec<Detection>),
354    /// Keyword detection: plain value(s) without a field.
355    Keywords(Vec<SigmaValue>),
356}
357
358/// The complete detection section of a Sigma rule.
359///
360/// Contains named detection identifiers, condition expressions, and optional timeframe.
361///
362/// Reference: pySigma rule/detection.py SigmaDetections
363#[derive(Debug, Clone, PartialEq, Serialize)]
364pub struct Detections {
365    /// Named detections (e.g. `selection`, `filter_main`, etc.)
366    pub named: HashMap<String, Detection>,
367    /// One or more condition expressions (parsed from condition string or list).
368    pub conditions: Vec<ConditionExpr>,
369    /// Raw condition strings (before parsing).
370    pub condition_strings: Vec<String>,
371    /// Optional timeframe for aggregation rules (deprecated in favor of correlations).
372    pub timeframe: Option<String>,
373}
374
375// =============================================================================
376// Log Source
377// =============================================================================
378
379/// Log source specification.
380///
381/// Reference: Sigma schema `logsource` object
382#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
383pub struct LogSource {
384    pub category: Option<String>,
385    pub product: Option<String>,
386    pub service: Option<String>,
387    pub definition: Option<String>,
388    /// Any additional custom logsource fields.
389    #[serde(flatten)]
390    pub custom: HashMap<String, String>,
391}
392
393// =============================================================================
394// Related Rule Reference
395// =============================================================================
396
397/// A reference to a related Sigma rule.
398#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
399pub struct Related {
400    pub id: String,
401    pub relation_type: RelationType,
402}
403
404// =============================================================================
405// Sigma Detection Rule
406// =============================================================================
407
408/// A complete Sigma detection rule.
409///
410/// Reference: Sigma schema V2.0.0, pySigma rule.py SigmaRule
411#[derive(Debug, Clone, PartialEq, Serialize)]
412pub struct SigmaRule {
413    // Required fields
414    pub title: String,
415    pub logsource: LogSource,
416    pub detection: Detections,
417
418    // Optional metadata
419    pub id: Option<String>,
420    pub name: Option<String>,
421    pub related: Vec<Related>,
422    pub taxonomy: Option<String>,
423    pub status: Option<Status>,
424    pub description: Option<String>,
425    pub license: Option<String>,
426    pub author: Option<String>,
427    pub references: Vec<String>,
428    pub date: Option<String>,
429    pub modified: Option<String>,
430    pub fields: Vec<String>,
431    pub falsepositives: Vec<String>,
432    pub level: Option<Level>,
433    pub tags: Vec<String>,
434    pub scope: Vec<String>,
435
436    /// Custom attributes attached to the rule.
437    ///
438    /// Populated from (a) any top-level YAML key that is not part of the
439    /// standard Sigma rule schema, (b) the entries of the dedicated top-level
440    /// `custom_attributes:` mapping (explicit entries win over arbitrary keys
441    /// of the same name), and (c) pipeline transformations such as
442    /// `SetCustomAttribute`, which are applied last and override both.
443    ///
444    /// Mirrors pySigma's `SigmaRule.custom_attributes` dict. Engines and
445    /// backends can read these to modify per-rule behavior.
446    #[serde(skip_serializing_if = "HashMap::is_empty")]
447    pub custom_attributes: HashMap<String, serde_yaml::Value>,
448}
449
450// =============================================================================
451// Correlation Rule
452// =============================================================================
453
454/// Correlation rule type.
455///
456/// Reference: pySigma correlations.py SigmaCorrelationType
457#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
458#[serde(rename_all = "snake_case")]
459pub enum CorrelationType {
460    EventCount,
461    ValueCount,
462    Temporal,
463    TemporalOrdered,
464    ValueSum,
465    ValueAvg,
466    ValuePercentile,
467    ValueMedian,
468}
469
470impl FromStr for CorrelationType {
471    type Err = ();
472    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
473        match s {
474            "event_count" => Ok(CorrelationType::EventCount),
475            "value_count" => Ok(CorrelationType::ValueCount),
476            "temporal" => Ok(CorrelationType::Temporal),
477            "temporal_ordered" => Ok(CorrelationType::TemporalOrdered),
478            "value_sum" => Ok(CorrelationType::ValueSum),
479            "value_avg" => Ok(CorrelationType::ValueAvg),
480            "value_percentile" => Ok(CorrelationType::ValuePercentile),
481            "value_median" => Ok(CorrelationType::ValueMedian),
482            _ => Err(()),
483        }
484    }
485}
486
487/// Comparison operator in a correlation condition.
488///
489/// Reference: pySigma correlations.py SigmaCorrelationConditionOperator
490#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
491pub enum ConditionOperator {
492    Lt,
493    Lte,
494    Gt,
495    Gte,
496    Eq,
497    Neq,
498}
499
500impl FromStr for ConditionOperator {
501    type Err = ();
502    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
503        match s {
504            "lt" => Ok(ConditionOperator::Lt),
505            "lte" => Ok(ConditionOperator::Lte),
506            "gt" => Ok(ConditionOperator::Gt),
507            "gte" => Ok(ConditionOperator::Gte),
508            "eq" => Ok(ConditionOperator::Eq),
509            "neq" => Ok(ConditionOperator::Neq),
510            _ => Err(()),
511        }
512    }
513}
514
515/// Condition for a correlation rule.
516///
517/// Reference: pySigma correlations.py SigmaCorrelationCondition
518#[derive(Debug, Clone, PartialEq, Serialize)]
519pub enum CorrelationCondition {
520    /// Threshold condition with one or more predicates (supports ranges).
521    ///
522    /// Single: `gte: 100`
523    /// Range: `gt: 100` + `lte: 200`
524    Threshold {
525        /// One or more (operator, value) predicates. All must be satisfied.
526        predicates: Vec<(ConditionOperator, u64)>,
527        /// Optional field reference (required for `value_count` type).
528        field: Option<String>,
529    },
530    /// Extended boolean condition for temporal types: `"rule_a and rule_b"`
531    Extended(ConditionExpr),
532}
533
534/// Field alias mapping in a correlation rule.
535///
536/// Maps a canonical alias name to per-rule field name mappings.
537///
538/// Reference: pySigma correlations.py SigmaCorrelationFieldAlias
539#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
540pub struct FieldAlias {
541    pub alias: String,
542    /// Maps rule reference (ID or name) → field name in that rule's events.
543    pub mapping: HashMap<String, String>,
544}
545
546/// A Sigma correlation rule.
547///
548/// Reference: pySigma correlations.py SigmaCorrelationRule
549#[derive(Debug, Clone, PartialEq, Serialize)]
550pub struct CorrelationRule {
551    // Metadata (shared with detection rules)
552    pub title: String,
553    pub id: Option<String>,
554    pub name: Option<String>,
555    pub status: Option<Status>,
556    pub description: Option<String>,
557    pub author: Option<String>,
558    pub date: Option<String>,
559    pub modified: Option<String>,
560    pub references: Vec<String>,
561    pub taxonomy: Option<String>,
562    pub tags: Vec<String>,
563    pub falsepositives: Vec<String>,
564    pub level: Option<Level>,
565
566    // Correlation-specific fields
567    pub correlation_type: CorrelationType,
568    pub rules: Vec<String>,
569    pub group_by: Vec<String>,
570    pub timespan: Timespan,
571    pub condition: CorrelationCondition,
572    pub aliases: Vec<FieldAlias>,
573    pub generate: bool,
574
575    /// Custom attributes attached to the correlation rule.
576    ///
577    /// Populated the same way as `SigmaRule.custom_attributes`: arbitrary
578    /// top-level YAML keys, the dedicated `custom_attributes:` block, and
579    /// pipeline `SetCustomAttribute` transformations (last-write-wins).
580    /// Engine-level `rsigma.*` extensions (e.g. `rsigma.correlation_event_mode`,
581    /// `rsigma.suppress`, `rsigma.action`) are read from here.
582    #[serde(skip_serializing_if = "HashMap::is_empty")]
583    pub custom_attributes: HashMap<String, serde_yaml::Value>,
584}
585
586// =============================================================================
587// Filter Rule
588// =============================================================================
589
590/// A Sigma filter rule that modifies the detection logic of referenced rules.
591///
592/// Filters add additional conditions (typically exclusions) to existing rules
593/// without modifying the original rule files.
594#[derive(Debug, Clone, PartialEq, Serialize)]
595pub struct FilterRule {
596    pub title: String,
597    pub id: Option<String>,
598    pub name: Option<String>,
599    pub status: Option<Status>,
600    pub description: Option<String>,
601    pub author: Option<String>,
602    pub date: Option<String>,
603    pub modified: Option<String>,
604    pub logsource: Option<LogSource>,
605
606    /// Rules this filter applies to (by ID or name).
607    pub rules: Vec<String>,
608    /// The filter detection section.
609    pub detection: Detections,
610}
611
612// =============================================================================
613// Collection / Document
614// =============================================================================
615
616/// A single parsed document from a Sigma YAML file.
617///
618/// A YAML file may contain multiple documents separated by `---`.
619/// Each document is either a detection rule, correlation rule, filter, or action.
620#[derive(Debug, Clone, PartialEq, Serialize)]
621pub enum SigmaDocument {
622    Rule(Box<SigmaRule>),
623    Correlation(CorrelationRule),
624    Filter(FilterRule),
625}
626
627/// A collection of parsed Sigma documents from one or more YAML files.
628#[derive(Debug, Clone, Serialize)]
629pub struct SigmaCollection {
630    pub rules: Vec<SigmaRule>,
631    pub correlations: Vec<CorrelationRule>,
632    pub filters: Vec<FilterRule>,
633    /// Parsing errors that were collected (when `collect_errors` is true).
634    #[serde(skip)]
635    pub errors: Vec<String>,
636}
637
638impl SigmaCollection {
639    pub fn new() -> Self {
640        SigmaCollection {
641            rules: Vec::new(),
642            correlations: Vec::new(),
643            filters: Vec::new(),
644            errors: Vec::new(),
645        }
646    }
647
648    /// Total number of parsed documents.
649    pub fn len(&self) -> usize {
650        self.rules.len() + self.correlations.len() + self.filters.len()
651    }
652
653    pub fn is_empty(&self) -> bool {
654        self.len() == 0
655    }
656}
657
658impl Default for SigmaCollection {
659    fn default() -> Self {
660        Self::new()
661    }
662}