Skip to main content

rsigma_parser/
ast.rs

1//! AST types for all Sigma constructs: rules, detections, conditions,
2//! correlations, and filters.
3//!
4//! Reference: Sigma specification V2.0.0 (2024-08-08)
5//! Reference: pySigma types, conditions, correlations, rule modules
6
7use std::collections::HashMap;
8use std::fmt;
9use std::str::FromStr;
10
11use serde::Serialize;
12
13use crate::value::{SigmaValue, Timespan};
14
15// =============================================================================
16// Enumerations
17// =============================================================================
18
19/// Rule maturity status.
20///
21/// Reference: pySigma rule.py SigmaStatus
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
23#[serde(rename_all = "lowercase")]
24pub enum Status {
25    Stable,
26    Test,
27    Experimental,
28    Deprecated,
29    Unsupported,
30}
31
32impl FromStr for Status {
33    type Err = ();
34    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
35        match s {
36            "stable" => Ok(Status::Stable),
37            "test" => Ok(Status::Test),
38            "experimental" => Ok(Status::Experimental),
39            "deprecated" => Ok(Status::Deprecated),
40            "unsupported" => Ok(Status::Unsupported),
41            _ => Err(()),
42        }
43    }
44}
45
46/// Severity level of a triggered rule.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
48#[serde(rename_all = "lowercase")]
49pub enum Level {
50    Informational,
51    Low,
52    Medium,
53    High,
54    Critical,
55}
56
57impl Level {
58    pub fn as_str(&self) -> &'static str {
59        match self {
60            Level::Informational => "informational",
61            Level::Low => "low",
62            Level::Medium => "medium",
63            Level::High => "high",
64            Level::Critical => "critical",
65        }
66    }
67}
68
69impl FromStr for Level {
70    type Err = ();
71    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
72        match s {
73            "informational" => Ok(Level::Informational),
74            "low" => Ok(Level::Low),
75            "medium" => Ok(Level::Medium),
76            "high" => Ok(Level::High),
77            "critical" => Ok(Level::Critical),
78            _ => Err(()),
79        }
80    }
81}
82
83/// Relationship type for the `related` field.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
85#[serde(rename_all = "lowercase")]
86pub enum RelationType {
87    Correlation,
88    Derived,
89    Obsolete,
90    Merged,
91    Renamed,
92    Similar,
93}
94
95impl FromStr for RelationType {
96    type Err = ();
97    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
98        match s {
99            "correlation" => Ok(RelationType::Correlation),
100            "derived" => Ok(RelationType::Derived),
101            "obsolete" => Ok(RelationType::Obsolete),
102            "merged" => Ok(RelationType::Merged),
103            "renamed" => Ok(RelationType::Renamed),
104            "similar" => Ok(RelationType::Similar),
105            _ => Err(()),
106        }
107    }
108}
109
110// =============================================================================
111// Field Modifiers
112// =============================================================================
113
114/// All supported Sigma field modifiers.
115///
116/// Reference: pySigma modifiers.py modifier_mapping
117#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
118#[serde(rename_all = "lowercase")]
119pub enum Modifier {
120    // String matching modifiers
121    Contains,
122    StartsWith,
123    EndsWith,
124
125    // Value linking
126    All,
127
128    // Encoding modifiers
129    Base64,
130    Base64Offset,
131    Wide,
132    Utf16be,
133    Utf16,
134    WindAsh,
135
136    // Pattern matching
137    Re,
138    Cidr,
139
140    // Case sensitivity
141    Cased,
142
143    // Field existence
144    Exists,
145
146    // Placeholder expansion
147    Expand,
148
149    // Field reference
150    FieldRef,
151
152    // Numeric/value comparison
153    Gt,
154    Gte,
155    Lt,
156    Lte,
157    /// Not equal: field value must differ from the specified value.
158    Neq,
159
160    // Regex flags
161    #[serde(rename = "i")]
162    IgnoreCase,
163    #[serde(rename = "m")]
164    Multiline,
165    #[serde(rename = "s")]
166    DotAll,
167
168    // Timestamp parts
169    Minute,
170    Hour,
171    Day,
172    Week,
173    Month,
174    Year,
175}
176
177/// Parse a modifier identifier string.
178///
179/// Reference: pySigma modifiers.py modifier_mapping
180impl FromStr for Modifier {
181    type Err = ();
182    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
183        match s {
184            "contains" => Ok(Modifier::Contains),
185            "startswith" => Ok(Modifier::StartsWith),
186            "endswith" => Ok(Modifier::EndsWith),
187            "all" => Ok(Modifier::All),
188            "base64" => Ok(Modifier::Base64),
189            "base64offset" => Ok(Modifier::Base64Offset),
190            "wide" | "utf16le" => Ok(Modifier::Wide),
191            "utf16be" => Ok(Modifier::Utf16be),
192            "utf16" => Ok(Modifier::Utf16),
193            "windash" => Ok(Modifier::WindAsh),
194            "re" => Ok(Modifier::Re),
195            "cidr" => Ok(Modifier::Cidr),
196            "cased" => Ok(Modifier::Cased),
197            "exists" => Ok(Modifier::Exists),
198            "expand" => Ok(Modifier::Expand),
199            "fieldref" => Ok(Modifier::FieldRef),
200            "gt" => Ok(Modifier::Gt),
201            "gte" => Ok(Modifier::Gte),
202            "lt" => Ok(Modifier::Lt),
203            "lte" => Ok(Modifier::Lte),
204            "neq" => Ok(Modifier::Neq),
205            "i" | "ignorecase" => Ok(Modifier::IgnoreCase),
206            "m" | "multiline" => Ok(Modifier::Multiline),
207            "s" | "dotall" => Ok(Modifier::DotAll),
208            "minute" => Ok(Modifier::Minute),
209            "hour" => Ok(Modifier::Hour),
210            "day" => Ok(Modifier::Day),
211            "week" => Ok(Modifier::Week),
212            "month" => Ok(Modifier::Month),
213            "year" => Ok(Modifier::Year),
214            _ => Err(()),
215        }
216    }
217}
218
219// =============================================================================
220// Field Specification
221// =============================================================================
222
223/// A field name with optional modifiers, parsed from detection keys like
224/// `TargetObject|endswith` or `Destination|contains|all`.
225///
226/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
227#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
228pub struct FieldSpec {
229    /// Field name (`None` for keyword detections without a field).
230    pub name: Option<String>,
231    /// Ordered list of modifiers applied to this field.
232    pub modifiers: Vec<Modifier>,
233}
234
235impl FieldSpec {
236    pub fn new(name: Option<String>, modifiers: Vec<Modifier>) -> Self {
237        FieldSpec { name, modifiers }
238    }
239
240    pub fn has_modifier(&self, m: Modifier) -> bool {
241        self.modifiers.contains(&m)
242    }
243
244    pub fn is_keyword(&self) -> bool {
245        self.name.is_none()
246    }
247}
248
249// =============================================================================
250// Condition Expression AST
251// =============================================================================
252
253/// Parsed condition expression AST.
254///
255/// Produced by the PEG parser + Pratt parser from condition strings like
256/// `selection and not filter` or `1 of selection_* and not 1 of filter_*`.
257///
258/// Reference: pySigma conditions.py ConditionItem hierarchy
259#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
260pub enum ConditionExpr {
261    /// Logical AND of sub-expressions.
262    And(Vec<ConditionExpr>),
263    /// Logical OR of sub-expressions.
264    Or(Vec<ConditionExpr>),
265    /// Logical NOT of a sub-expression.
266    Not(Box<ConditionExpr>),
267    /// Reference to a named detection identifier.
268    Identifier(String),
269    /// Quantified selector: `1 of selection_*`, `all of them`, etc.
270    Selector {
271        quantifier: Quantifier,
272        pattern: SelectorPattern,
273    },
274}
275
276impl fmt::Display for ConditionExpr {
277    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
278        match self {
279            ConditionExpr::And(args) => {
280                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
281                write!(f, "({})", parts.join(" and "))
282            }
283            ConditionExpr::Or(args) => {
284                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
285                write!(f, "({})", parts.join(" or "))
286            }
287            ConditionExpr::Not(arg) => write!(f, "not {arg}"),
288            ConditionExpr::Identifier(id) => write!(f, "{id}"),
289            ConditionExpr::Selector {
290                quantifier,
291                pattern,
292            } => write!(f, "{quantifier} of {pattern}"),
293        }
294    }
295}
296
297/// Quantifier in a selector expression.
298#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
299pub enum Quantifier {
300    /// Match any (at least one): `1 of ...` or `any of ...`
301    Any,
302    /// Match all: `all of ...`
303    All,
304    /// Match a specific count: `N of ...`
305    Count(u64),
306}
307
308impl fmt::Display for Quantifier {
309    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310        match self {
311            Quantifier::Any => write!(f, "1"),
312            Quantifier::All => write!(f, "all"),
313            Quantifier::Count(n) => write!(f, "{n}"),
314        }
315    }
316}
317
318/// Target pattern in a selector expression.
319#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
320pub enum SelectorPattern {
321    /// All detection identifiers: `... of them`
322    Them,
323    /// A wildcard pattern matching detection names: `... of selection_*`
324    Pattern(String),
325}
326
327impl fmt::Display for SelectorPattern {
328    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329        match self {
330            SelectorPattern::Them => write!(f, "them"),
331            SelectorPattern::Pattern(p) => write!(f, "{p}"),
332        }
333    }
334}
335
336// =============================================================================
337// Detection Section
338// =============================================================================
339
340/// A single detection item: a field (with modifiers) mapped to one or more values.
341///
342/// Examples:
343/// - `EventType: "user.mfa.factor.deactivate"` → field="EventType", values=["user.mfa..."]
344/// - `Destination|contains|all: ['new-object', 'net.webclient']` → field="Destination",
345///   modifiers=[Contains, All], values=[...]
346///
347/// Reference: pySigma rule/detection.py SigmaDetectionItem
348#[derive(Debug, Clone, PartialEq, Serialize)]
349pub struct DetectionItem {
350    /// The field specification (name + modifiers).
351    pub field: FieldSpec,
352    /// One or more values to match against.
353    pub values: Vec<SigmaValue>,
354}
355
356/// A detection definition: a group of detection items or nested detections.
357///
358/// When constructed from a YAML mapping, items are AND-linked.
359/// When constructed from a YAML list of mappings, sub-detections are OR-linked.
360///
361/// Reference: pySigma rule/detection.py SigmaDetection
362#[derive(Debug, Clone, PartialEq, Serialize)]
363pub enum Detection {
364    /// AND-linked detection items (from a YAML mapping).
365    AllOf(Vec<DetectionItem>),
366    /// OR-linked sub-detections (from a YAML list of mappings).
367    AnyOf(Vec<Detection>),
368    /// Keyword detection: plain value(s) without a field.
369    Keywords(Vec<SigmaValue>),
370}
371
372/// The complete detection section of a Sigma rule.
373///
374/// Contains named detection identifiers, condition expressions, and optional timeframe.
375///
376/// Reference: pySigma rule/detection.py SigmaDetections
377#[derive(Debug, Clone, PartialEq, Serialize)]
378pub struct Detections {
379    /// Named detections (e.g. `selection`, `filter_main`, etc.)
380    pub named: HashMap<String, Detection>,
381    /// One or more condition expressions (parsed from condition string or list).
382    pub conditions: Vec<ConditionExpr>,
383    /// Raw condition strings (before parsing).
384    pub condition_strings: Vec<String>,
385    /// Optional timeframe for aggregation rules (deprecated in favor of correlations).
386    pub timeframe: Option<String>,
387}
388
389// =============================================================================
390// Log Source
391// =============================================================================
392
393/// Log source specification.
394///
395/// Reference: Sigma schema `logsource` object
396#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
397pub struct LogSource {
398    pub category: Option<String>,
399    pub product: Option<String>,
400    pub service: Option<String>,
401    pub definition: Option<String>,
402    /// Any additional custom logsource fields.
403    #[serde(flatten)]
404    pub custom: HashMap<String, String>,
405}
406
407// =============================================================================
408// Related Rule Reference
409// =============================================================================
410
411/// A reference to a related Sigma rule.
412#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
413pub struct Related {
414    pub id: String,
415    pub relation_type: RelationType,
416}
417
418// =============================================================================
419// Sigma Detection Rule
420// =============================================================================
421
422/// A complete Sigma detection rule.
423///
424/// Reference: Sigma schema V2.0.0, pySigma rule.py SigmaRule
425#[derive(Debug, Clone, PartialEq, Serialize)]
426pub struct SigmaRule {
427    // Required fields
428    pub title: String,
429    pub logsource: LogSource,
430    pub detection: Detections,
431
432    // Optional metadata
433    pub id: Option<String>,
434    pub name: Option<String>,
435    pub related: Vec<Related>,
436    pub taxonomy: Option<String>,
437    pub status: Option<Status>,
438    pub description: Option<String>,
439    pub license: Option<String>,
440    pub author: Option<String>,
441    pub references: Vec<String>,
442    pub date: Option<String>,
443    pub modified: Option<String>,
444    pub fields: Vec<String>,
445    pub falsepositives: Vec<String>,
446    pub level: Option<Level>,
447    pub tags: Vec<String>,
448    pub scope: Vec<String>,
449
450    /// Custom attributes attached to the rule.
451    ///
452    /// Populated from (a) any top-level YAML key that is not part of the
453    /// standard Sigma rule schema, (b) the entries of the dedicated top-level
454    /// `custom_attributes:` mapping (explicit entries win over arbitrary keys
455    /// of the same name), and (c) pipeline transformations such as
456    /// `SetCustomAttribute`, which are applied last and override both.
457    ///
458    /// Mirrors pySigma's `SigmaRule.custom_attributes` dict. Engines and
459    /// backends can read these to modify per-rule behavior.
460    #[serde(skip_serializing_if = "HashMap::is_empty")]
461    pub custom_attributes: HashMap<String, yaml_serde::Value>,
462}
463
464// =============================================================================
465// Correlation Rule
466// =============================================================================
467
468/// Correlation rule type.
469///
470/// Reference: pySigma correlations.py SigmaCorrelationType
471#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
472#[serde(rename_all = "snake_case")]
473pub enum CorrelationType {
474    EventCount,
475    ValueCount,
476    Temporal,
477    TemporalOrdered,
478    ValueSum,
479    ValueAvg,
480    ValuePercentile,
481    ValueMedian,
482}
483
484impl CorrelationType {
485    pub fn as_str(&self) -> &'static str {
486        match self {
487            CorrelationType::EventCount => "event_count",
488            CorrelationType::ValueCount => "value_count",
489            CorrelationType::Temporal => "temporal",
490            CorrelationType::TemporalOrdered => "temporal_ordered",
491            CorrelationType::ValueSum => "value_sum",
492            CorrelationType::ValueAvg => "value_avg",
493            CorrelationType::ValuePercentile => "value_percentile",
494            CorrelationType::ValueMedian => "value_median",
495        }
496    }
497}
498
499impl FromStr for CorrelationType {
500    type Err = ();
501    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
502        match s {
503            "event_count" => Ok(CorrelationType::EventCount),
504            "value_count" => Ok(CorrelationType::ValueCount),
505            "temporal" => Ok(CorrelationType::Temporal),
506            "temporal_ordered" => Ok(CorrelationType::TemporalOrdered),
507            "value_sum" => Ok(CorrelationType::ValueSum),
508            "value_avg" => Ok(CorrelationType::ValueAvg),
509            "value_percentile" => Ok(CorrelationType::ValuePercentile),
510            "value_median" => Ok(CorrelationType::ValueMedian),
511            _ => Err(()),
512        }
513    }
514}
515
516/// Comparison operator in a correlation condition.
517///
518/// Reference: pySigma correlations.py SigmaCorrelationConditionOperator
519#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
520pub enum ConditionOperator {
521    Lt,
522    Lte,
523    Gt,
524    Gte,
525    Eq,
526    Neq,
527}
528
529impl FromStr for ConditionOperator {
530    type Err = ();
531    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
532        match s {
533            "lt" => Ok(ConditionOperator::Lt),
534            "lte" => Ok(ConditionOperator::Lte),
535            "gt" => Ok(ConditionOperator::Gt),
536            "gte" => Ok(ConditionOperator::Gte),
537            "eq" => Ok(ConditionOperator::Eq),
538            "neq" => Ok(ConditionOperator::Neq),
539            _ => Err(()),
540        }
541    }
542}
543
544/// Condition for a correlation rule.
545///
546/// Reference: pySigma correlations.py SigmaCorrelationCondition
547#[derive(Debug, Clone, PartialEq, Serialize)]
548pub enum CorrelationCondition {
549    /// Threshold condition with one or more predicates (supports ranges).
550    ///
551    /// Single: `gte: 100`
552    /// Range: `gt: 100` + `lte: 200`
553    Threshold {
554        /// One or more (operator, value) predicates. All must be satisfied.
555        predicates: Vec<(ConditionOperator, u64)>,
556        /// Optional field reference(s) (required for `value_count` type).
557        /// A single string is normalized to a one-element vec.
558        field: Option<Vec<String>>,
559        /// Percentile rank (0-100) for `value_percentile` type.
560        /// Defaults to 50 if not specified.
561        percentile: Option<u64>,
562    },
563    /// Extended boolean condition for temporal types: `"rule_a and rule_b"`
564    Extended(ConditionExpr),
565}
566
567/// Field alias mapping in a correlation rule.
568///
569/// Maps a canonical alias name to per-rule field name mappings.
570///
571/// Reference: pySigma correlations.py SigmaCorrelationFieldAlias
572#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
573pub struct FieldAlias {
574    pub alias: String,
575    /// Maps rule reference (ID or name) → field name in that rule's events.
576    pub mapping: HashMap<String, String>,
577}
578
579/// A Sigma correlation rule.
580///
581/// Reference: pySigma correlations.py SigmaCorrelationRule
582#[derive(Debug, Clone, PartialEq, Serialize)]
583pub struct CorrelationRule {
584    // Metadata (shared with detection rules)
585    pub title: String,
586    pub id: Option<String>,
587    pub name: Option<String>,
588    pub status: Option<Status>,
589    pub description: Option<String>,
590    pub author: Option<String>,
591    pub date: Option<String>,
592    pub modified: Option<String>,
593    pub related: Vec<Related>,
594    pub references: Vec<String>,
595    pub taxonomy: Option<String>,
596    pub license: Option<String>,
597    pub tags: Vec<String>,
598    pub fields: Vec<String>,
599    pub falsepositives: Vec<String>,
600    pub level: Option<Level>,
601    pub scope: Vec<String>,
602
603    // Correlation-specific fields
604    pub correlation_type: CorrelationType,
605    pub rules: Vec<String>,
606    pub group_by: Vec<String>,
607    pub timespan: Timespan,
608    pub condition: CorrelationCondition,
609    pub aliases: Vec<FieldAlias>,
610    pub generate: bool,
611
612    /// Custom attributes attached to the correlation rule.
613    ///
614    /// Populated the same way as `SigmaRule.custom_attributes`: arbitrary
615    /// top-level YAML keys, the dedicated `custom_attributes:` block, and
616    /// pipeline `SetCustomAttribute` transformations (last-write-wins).
617    /// Engine-level `rsigma.*` extensions (e.g. `rsigma.correlation_event_mode`,
618    /// `rsigma.suppress`, `rsigma.action`) are read from here.
619    #[serde(skip_serializing_if = "HashMap::is_empty")]
620    pub custom_attributes: HashMap<String, yaml_serde::Value>,
621}
622
623// =============================================================================
624// Filter Rule
625// =============================================================================
626
627/// Which rules a filter applies to.
628#[derive(Debug, Clone, PartialEq, Serialize)]
629pub enum FilterRuleTarget {
630    /// The filter applies to every loaded rule.
631    Any,
632    /// The filter applies only to rules matching these IDs or titles.
633    Specific(Vec<String>),
634}
635
636/// A Sigma filter rule that modifies the detection logic of referenced rules.
637///
638/// Filters add additional conditions (typically exclusions) to existing rules
639/// without modifying the original rule files.
640#[derive(Debug, Clone, PartialEq, Serialize)]
641pub struct FilterRule {
642    pub title: String,
643    pub id: Option<String>,
644    pub name: Option<String>,
645    pub taxonomy: Option<String>,
646    pub status: Option<Status>,
647    pub description: Option<String>,
648    pub author: Option<String>,
649    pub date: Option<String>,
650    pub modified: Option<String>,
651    pub related: Vec<Related>,
652    pub license: Option<String>,
653    pub references: Vec<String>,
654    pub tags: Vec<String>,
655    pub fields: Vec<String>,
656    pub falsepositives: Vec<String>,
657    pub level: Option<Level>,
658    pub scope: Vec<String>,
659    pub logsource: Option<LogSource>,
660
661    /// Rules this filter applies to (by ID or name), or all rules.
662    pub rules: FilterRuleTarget,
663    /// The filter detection section.
664    pub detection: Detections,
665
666    /// Custom attributes attached to the filter rule.
667    #[serde(skip_serializing_if = "HashMap::is_empty")]
668    pub custom_attributes: HashMap<String, yaml_serde::Value>,
669}
670
671// =============================================================================
672// Collection / Document
673// =============================================================================
674
675/// A single parsed document from a Sigma YAML file.
676///
677/// A YAML file may contain multiple documents separated by `---`.
678/// Each document is either a detection rule, correlation rule, filter, or action.
679#[derive(Debug, Clone, PartialEq, Serialize)]
680pub enum SigmaDocument {
681    Rule(Box<SigmaRule>),
682    Correlation(CorrelationRule),
683    Filter(FilterRule),
684}
685
686/// A collection of parsed Sigma documents from one or more YAML files.
687#[derive(Debug, Clone, Serialize)]
688pub struct SigmaCollection {
689    pub rules: Vec<SigmaRule>,
690    pub correlations: Vec<CorrelationRule>,
691    pub filters: Vec<FilterRule>,
692    /// Parsing errors that were collected (when `collect_errors` is true).
693    #[serde(skip)]
694    pub errors: Vec<String>,
695}
696
697impl SigmaCollection {
698    pub fn new() -> Self {
699        SigmaCollection {
700            rules: Vec::new(),
701            correlations: Vec::new(),
702            filters: Vec::new(),
703            errors: Vec::new(),
704        }
705    }
706
707    /// Total number of parsed documents.
708    pub fn len(&self) -> usize {
709        self.rules.len() + self.correlations.len() + self.filters.len()
710    }
711
712    pub fn is_empty(&self) -> bool {
713        self.len() == 0
714    }
715}
716
717impl Default for SigmaCollection {
718    fn default() -> Self {
719        Self::new()
720    }
721}