Skip to main content

rsigma_parser/
ast.rs

1//! AST types for all Sigma constructs: rules, detections, conditions,
2//! correlations, and filters.
3//!
4//! Reference: Sigma specification V2.0.0 (2024-08-08)
5//! Reference: pySigma types, conditions, correlations, rule modules
6
7use std::collections::HashMap;
8use std::fmt;
9use std::str::FromStr;
10
11use serde::Serialize;
12
13use crate::value::{SigmaValue, Timespan};
14
15// =============================================================================
16// Enumerations
17// =============================================================================
18
19/// Rule maturity status.
20///
21/// Reference: pySigma rule.py SigmaStatus
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
23#[serde(rename_all = "lowercase")]
24pub enum Status {
25    Stable,
26    Test,
27    Experimental,
28    Deprecated,
29    Unsupported,
30}
31
32impl FromStr for Status {
33    type Err = ();
34    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
35        match s {
36            "stable" => Ok(Status::Stable),
37            "test" => Ok(Status::Test),
38            "experimental" => Ok(Status::Experimental),
39            "deprecated" => Ok(Status::Deprecated),
40            "unsupported" => Ok(Status::Unsupported),
41            _ => Err(()),
42        }
43    }
44}
45
46/// Severity level of a triggered rule.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
48#[serde(rename_all = "lowercase")]
49pub enum Level {
50    Informational,
51    Low,
52    Medium,
53    High,
54    Critical,
55}
56
57impl FromStr for Level {
58    type Err = ();
59    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
60        match s {
61            "informational" => Ok(Level::Informational),
62            "low" => Ok(Level::Low),
63            "medium" => Ok(Level::Medium),
64            "high" => Ok(Level::High),
65            "critical" => Ok(Level::Critical),
66            _ => Err(()),
67        }
68    }
69}
70
71/// Relationship type for the `related` field.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
73#[serde(rename_all = "lowercase")]
74pub enum RelationType {
75    Correlation,
76    Derived,
77    Obsolete,
78    Merged,
79    Renamed,
80    Similar,
81}
82
83impl FromStr for RelationType {
84    type Err = ();
85    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
86        match s {
87            "correlation" => Ok(RelationType::Correlation),
88            "derived" => Ok(RelationType::Derived),
89            "obsolete" => Ok(RelationType::Obsolete),
90            "merged" => Ok(RelationType::Merged),
91            "renamed" => Ok(RelationType::Renamed),
92            "similar" => Ok(RelationType::Similar),
93            _ => Err(()),
94        }
95    }
96}
97
98// =============================================================================
99// Field Modifiers
100// =============================================================================
101
102/// All supported Sigma field modifiers.
103///
104/// Reference: pySigma modifiers.py modifier_mapping
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
106#[serde(rename_all = "lowercase")]
107pub enum Modifier {
108    // String matching modifiers
109    Contains,
110    StartsWith,
111    EndsWith,
112
113    // Value linking
114    All,
115
116    // Encoding modifiers
117    Base64,
118    Base64Offset,
119    Wide,
120    Utf16be,
121    Utf16,
122    WindAsh,
123
124    // Pattern matching
125    Re,
126    Cidr,
127
128    // Case sensitivity
129    Cased,
130
131    // Field existence
132    Exists,
133
134    // Placeholder expansion
135    Expand,
136
137    // Field reference
138    FieldRef,
139
140    // Numeric/value comparison
141    Gt,
142    Gte,
143    Lt,
144    Lte,
145    /// Not equal: field value must differ from the specified value.
146    Neq,
147
148    // Regex flags
149    #[serde(rename = "i")]
150    IgnoreCase,
151    #[serde(rename = "m")]
152    Multiline,
153    #[serde(rename = "s")]
154    DotAll,
155
156    // Timestamp parts
157    Minute,
158    Hour,
159    Day,
160    Week,
161    Month,
162    Year,
163}
164
165/// Parse a modifier identifier string.
166///
167/// Reference: pySigma modifiers.py modifier_mapping
168impl FromStr for Modifier {
169    type Err = ();
170    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
171        match s {
172            "contains" => Ok(Modifier::Contains),
173            "startswith" => Ok(Modifier::StartsWith),
174            "endswith" => Ok(Modifier::EndsWith),
175            "all" => Ok(Modifier::All),
176            "base64" => Ok(Modifier::Base64),
177            "base64offset" => Ok(Modifier::Base64Offset),
178            "wide" | "utf16le" => Ok(Modifier::Wide),
179            "utf16be" => Ok(Modifier::Utf16be),
180            "utf16" => Ok(Modifier::Utf16),
181            "windash" => Ok(Modifier::WindAsh),
182            "re" => Ok(Modifier::Re),
183            "cidr" => Ok(Modifier::Cidr),
184            "cased" => Ok(Modifier::Cased),
185            "exists" => Ok(Modifier::Exists),
186            "expand" => Ok(Modifier::Expand),
187            "fieldref" => Ok(Modifier::FieldRef),
188            "gt" => Ok(Modifier::Gt),
189            "gte" => Ok(Modifier::Gte),
190            "lt" => Ok(Modifier::Lt),
191            "lte" => Ok(Modifier::Lte),
192            "neq" => Ok(Modifier::Neq),
193            "i" | "ignorecase" => Ok(Modifier::IgnoreCase),
194            "m" | "multiline" => Ok(Modifier::Multiline),
195            "s" | "dotall" => Ok(Modifier::DotAll),
196            "minute" => Ok(Modifier::Minute),
197            "hour" => Ok(Modifier::Hour),
198            "day" => Ok(Modifier::Day),
199            "week" => Ok(Modifier::Week),
200            "month" => Ok(Modifier::Month),
201            "year" => Ok(Modifier::Year),
202            _ => Err(()),
203        }
204    }
205}
206
207// =============================================================================
208// Field Specification
209// =============================================================================
210
211/// A field name with optional modifiers, parsed from detection keys like
212/// `TargetObject|endswith` or `Destination|contains|all`.
213///
214/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
215#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
216pub struct FieldSpec {
217    /// Field name (`None` for keyword detections without a field).
218    pub name: Option<String>,
219    /// Ordered list of modifiers applied to this field.
220    pub modifiers: Vec<Modifier>,
221}
222
223impl FieldSpec {
224    pub fn new(name: Option<String>, modifiers: Vec<Modifier>) -> Self {
225        FieldSpec { name, modifiers }
226    }
227
228    pub fn has_modifier(&self, m: Modifier) -> bool {
229        self.modifiers.contains(&m)
230    }
231
232    pub fn is_keyword(&self) -> bool {
233        self.name.is_none()
234    }
235}
236
237// =============================================================================
238// Condition Expression AST
239// =============================================================================
240
241/// Parsed condition expression AST.
242///
243/// Produced by the PEG parser + Pratt parser from condition strings like
244/// `selection and not filter` or `1 of selection_* and not 1 of filter_*`.
245///
246/// Reference: pySigma conditions.py ConditionItem hierarchy
247#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
248pub enum ConditionExpr {
249    /// Logical AND of sub-expressions.
250    And(Vec<ConditionExpr>),
251    /// Logical OR of sub-expressions.
252    Or(Vec<ConditionExpr>),
253    /// Logical NOT of a sub-expression.
254    Not(Box<ConditionExpr>),
255    /// Reference to a named detection identifier.
256    Identifier(String),
257    /// Quantified selector: `1 of selection_*`, `all of them`, etc.
258    Selector {
259        quantifier: Quantifier,
260        pattern: SelectorPattern,
261    },
262}
263
264impl fmt::Display for ConditionExpr {
265    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266        match self {
267            ConditionExpr::And(args) => {
268                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
269                write!(f, "({})", parts.join(" and "))
270            }
271            ConditionExpr::Or(args) => {
272                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
273                write!(f, "({})", parts.join(" or "))
274            }
275            ConditionExpr::Not(arg) => write!(f, "not {arg}"),
276            ConditionExpr::Identifier(id) => write!(f, "{id}"),
277            ConditionExpr::Selector {
278                quantifier,
279                pattern,
280            } => write!(f, "{quantifier} of {pattern}"),
281        }
282    }
283}
284
285/// Quantifier in a selector expression.
286#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
287pub enum Quantifier {
288    /// Match any (at least one): `1 of ...` or `any of ...`
289    Any,
290    /// Match all: `all of ...`
291    All,
292    /// Match a specific count: `N of ...`
293    Count(u64),
294}
295
296impl fmt::Display for Quantifier {
297    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
298        match self {
299            Quantifier::Any => write!(f, "1"),
300            Quantifier::All => write!(f, "all"),
301            Quantifier::Count(n) => write!(f, "{n}"),
302        }
303    }
304}
305
306/// Target pattern in a selector expression.
307#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
308pub enum SelectorPattern {
309    /// All detection identifiers: `... of them`
310    Them,
311    /// A wildcard pattern matching detection names: `... of selection_*`
312    Pattern(String),
313}
314
315impl fmt::Display for SelectorPattern {
316    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
317        match self {
318            SelectorPattern::Them => write!(f, "them"),
319            SelectorPattern::Pattern(p) => write!(f, "{p}"),
320        }
321    }
322}
323
324// =============================================================================
325// Detection Section
326// =============================================================================
327
328/// A single detection item: a field (with modifiers) mapped to one or more values.
329///
330/// Examples:
331/// - `EventType: "user.mfa.factor.deactivate"` → field="EventType", values=["user.mfa..."]
332/// - `Destination|contains|all: ['new-object', 'net.webclient']` → field="Destination",
333///   modifiers=[Contains, All], values=[...]
334///
335/// Reference: pySigma rule/detection.py SigmaDetectionItem
336#[derive(Debug, Clone, PartialEq, Serialize)]
337pub struct DetectionItem {
338    /// The field specification (name + modifiers).
339    pub field: FieldSpec,
340    /// One or more values to match against.
341    pub values: Vec<SigmaValue>,
342}
343
344/// A detection definition: a group of detection items or nested detections.
345///
346/// When constructed from a YAML mapping, items are AND-linked.
347/// When constructed from a YAML list of mappings, sub-detections are OR-linked.
348///
349/// Reference: pySigma rule/detection.py SigmaDetection
350#[derive(Debug, Clone, PartialEq, Serialize)]
351pub enum Detection {
352    /// AND-linked detection items (from a YAML mapping).
353    AllOf(Vec<DetectionItem>),
354    /// OR-linked sub-detections (from a YAML list of mappings).
355    AnyOf(Vec<Detection>),
356    /// Keyword detection: plain value(s) without a field.
357    Keywords(Vec<SigmaValue>),
358}
359
360/// The complete detection section of a Sigma rule.
361///
362/// Contains named detection identifiers, condition expressions, and optional timeframe.
363///
364/// Reference: pySigma rule/detection.py SigmaDetections
365#[derive(Debug, Clone, PartialEq, Serialize)]
366pub struct Detections {
367    /// Named detections (e.g. `selection`, `filter_main`, etc.)
368    pub named: HashMap<String, Detection>,
369    /// One or more condition expressions (parsed from condition string or list).
370    pub conditions: Vec<ConditionExpr>,
371    /// Raw condition strings (before parsing).
372    pub condition_strings: Vec<String>,
373    /// Optional timeframe for aggregation rules (deprecated in favor of correlations).
374    pub timeframe: Option<String>,
375}
376
377// =============================================================================
378// Log Source
379// =============================================================================
380
381/// Log source specification.
382///
383/// Reference: Sigma schema `logsource` object
384#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
385pub struct LogSource {
386    pub category: Option<String>,
387    pub product: Option<String>,
388    pub service: Option<String>,
389    pub definition: Option<String>,
390    /// Any additional custom logsource fields.
391    #[serde(flatten)]
392    pub custom: HashMap<String, String>,
393}
394
395// =============================================================================
396// Related Rule Reference
397// =============================================================================
398
399/// A reference to a related Sigma rule.
400#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
401pub struct Related {
402    pub id: String,
403    pub relation_type: RelationType,
404}
405
406// =============================================================================
407// Sigma Detection Rule
408// =============================================================================
409
410/// A complete Sigma detection rule.
411///
412/// Reference: Sigma schema V2.0.0, pySigma rule.py SigmaRule
413#[derive(Debug, Clone, PartialEq, Serialize)]
414pub struct SigmaRule {
415    // Required fields
416    pub title: String,
417    pub logsource: LogSource,
418    pub detection: Detections,
419
420    // Optional metadata
421    pub id: Option<String>,
422    pub name: Option<String>,
423    pub related: Vec<Related>,
424    pub taxonomy: Option<String>,
425    pub status: Option<Status>,
426    pub description: Option<String>,
427    pub license: Option<String>,
428    pub author: Option<String>,
429    pub references: Vec<String>,
430    pub date: Option<String>,
431    pub modified: Option<String>,
432    pub fields: Vec<String>,
433    pub falsepositives: Vec<String>,
434    pub level: Option<Level>,
435    pub tags: Vec<String>,
436    pub scope: Vec<String>,
437
438    /// Custom attributes attached to the rule.
439    ///
440    /// Populated from (a) any top-level YAML key that is not part of the
441    /// standard Sigma rule schema, (b) the entries of the dedicated top-level
442    /// `custom_attributes:` mapping (explicit entries win over arbitrary keys
443    /// of the same name), and (c) pipeline transformations such as
444    /// `SetCustomAttribute`, which are applied last and override both.
445    ///
446    /// Mirrors pySigma's `SigmaRule.custom_attributes` dict. Engines and
447    /// backends can read these to modify per-rule behavior.
448    #[serde(skip_serializing_if = "HashMap::is_empty")]
449    pub custom_attributes: HashMap<String, serde_yaml::Value>,
450}
451
452// =============================================================================
453// Correlation Rule
454// =============================================================================
455
456/// Correlation rule type.
457///
458/// Reference: pySigma correlations.py SigmaCorrelationType
459#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
460#[serde(rename_all = "snake_case")]
461pub enum CorrelationType {
462    EventCount,
463    ValueCount,
464    Temporal,
465    TemporalOrdered,
466    ValueSum,
467    ValueAvg,
468    ValuePercentile,
469    ValueMedian,
470}
471
472impl FromStr for CorrelationType {
473    type Err = ();
474    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
475        match s {
476            "event_count" => Ok(CorrelationType::EventCount),
477            "value_count" => Ok(CorrelationType::ValueCount),
478            "temporal" => Ok(CorrelationType::Temporal),
479            "temporal_ordered" => Ok(CorrelationType::TemporalOrdered),
480            "value_sum" => Ok(CorrelationType::ValueSum),
481            "value_avg" => Ok(CorrelationType::ValueAvg),
482            "value_percentile" => Ok(CorrelationType::ValuePercentile),
483            "value_median" => Ok(CorrelationType::ValueMedian),
484            _ => Err(()),
485        }
486    }
487}
488
489/// Comparison operator in a correlation condition.
490///
491/// Reference: pySigma correlations.py SigmaCorrelationConditionOperator
492#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
493pub enum ConditionOperator {
494    Lt,
495    Lte,
496    Gt,
497    Gte,
498    Eq,
499    Neq,
500}
501
502impl FromStr for ConditionOperator {
503    type Err = ();
504    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
505        match s {
506            "lt" => Ok(ConditionOperator::Lt),
507            "lte" => Ok(ConditionOperator::Lte),
508            "gt" => Ok(ConditionOperator::Gt),
509            "gte" => Ok(ConditionOperator::Gte),
510            "eq" => Ok(ConditionOperator::Eq),
511            "neq" => Ok(ConditionOperator::Neq),
512            _ => Err(()),
513        }
514    }
515}
516
517/// Condition for a correlation rule.
518///
519/// Reference: pySigma correlations.py SigmaCorrelationCondition
520#[derive(Debug, Clone, PartialEq, Serialize)]
521pub enum CorrelationCondition {
522    /// Threshold condition with one or more predicates (supports ranges).
523    ///
524    /// Single: `gte: 100`
525    /// Range: `gt: 100` + `lte: 200`
526    Threshold {
527        /// One or more (operator, value) predicates. All must be satisfied.
528        predicates: Vec<(ConditionOperator, u64)>,
529        /// Optional field reference(s) (required for `value_count` type).
530        /// A single string is normalized to a one-element vec.
531        field: Option<Vec<String>>,
532        /// Percentile rank (0-100) for `value_percentile` type.
533        /// Defaults to 50 if not specified.
534        percentile: Option<u64>,
535    },
536    /// Extended boolean condition for temporal types: `"rule_a and rule_b"`
537    Extended(ConditionExpr),
538}
539
540/// Field alias mapping in a correlation rule.
541///
542/// Maps a canonical alias name to per-rule field name mappings.
543///
544/// Reference: pySigma correlations.py SigmaCorrelationFieldAlias
545#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
546pub struct FieldAlias {
547    pub alias: String,
548    /// Maps rule reference (ID or name) → field name in that rule's events.
549    pub mapping: HashMap<String, String>,
550}
551
552/// A Sigma correlation rule.
553///
554/// Reference: pySigma correlations.py SigmaCorrelationRule
555#[derive(Debug, Clone, PartialEq, Serialize)]
556pub struct CorrelationRule {
557    // Metadata (shared with detection rules)
558    pub title: String,
559    pub id: Option<String>,
560    pub name: Option<String>,
561    pub status: Option<Status>,
562    pub description: Option<String>,
563    pub author: Option<String>,
564    pub date: Option<String>,
565    pub modified: Option<String>,
566    pub related: Vec<Related>,
567    pub references: Vec<String>,
568    pub taxonomy: Option<String>,
569    pub license: Option<String>,
570    pub tags: Vec<String>,
571    pub fields: Vec<String>,
572    pub falsepositives: Vec<String>,
573    pub level: Option<Level>,
574    pub scope: Vec<String>,
575
576    // Correlation-specific fields
577    pub correlation_type: CorrelationType,
578    pub rules: Vec<String>,
579    pub group_by: Vec<String>,
580    pub timespan: Timespan,
581    pub condition: CorrelationCondition,
582    pub aliases: Vec<FieldAlias>,
583    pub generate: bool,
584
585    /// Custom attributes attached to the correlation rule.
586    ///
587    /// Populated the same way as `SigmaRule.custom_attributes`: arbitrary
588    /// top-level YAML keys, the dedicated `custom_attributes:` block, and
589    /// pipeline `SetCustomAttribute` transformations (last-write-wins).
590    /// Engine-level `rsigma.*` extensions (e.g. `rsigma.correlation_event_mode`,
591    /// `rsigma.suppress`, `rsigma.action`) are read from here.
592    #[serde(skip_serializing_if = "HashMap::is_empty")]
593    pub custom_attributes: HashMap<String, serde_yaml::Value>,
594}
595
596// =============================================================================
597// Filter Rule
598// =============================================================================
599
600/// Which rules a filter applies to.
601#[derive(Debug, Clone, PartialEq, Serialize)]
602pub enum FilterRuleTarget {
603    /// The filter applies to every loaded rule.
604    Any,
605    /// The filter applies only to rules matching these IDs or titles.
606    Specific(Vec<String>),
607}
608
609/// A Sigma filter rule that modifies the detection logic of referenced rules.
610///
611/// Filters add additional conditions (typically exclusions) to existing rules
612/// without modifying the original rule files.
613#[derive(Debug, Clone, PartialEq, Serialize)]
614pub struct FilterRule {
615    pub title: String,
616    pub id: Option<String>,
617    pub name: Option<String>,
618    pub taxonomy: Option<String>,
619    pub status: Option<Status>,
620    pub description: Option<String>,
621    pub author: Option<String>,
622    pub date: Option<String>,
623    pub modified: Option<String>,
624    pub related: Vec<Related>,
625    pub license: Option<String>,
626    pub references: Vec<String>,
627    pub tags: Vec<String>,
628    pub fields: Vec<String>,
629    pub falsepositives: Vec<String>,
630    pub level: Option<Level>,
631    pub scope: Vec<String>,
632    pub logsource: Option<LogSource>,
633
634    /// Rules this filter applies to (by ID or name), or all rules.
635    pub rules: FilterRuleTarget,
636    /// The filter detection section.
637    pub detection: Detections,
638
639    /// Custom attributes attached to the filter rule.
640    #[serde(skip_serializing_if = "HashMap::is_empty")]
641    pub custom_attributes: HashMap<String, serde_yaml::Value>,
642}
643
644// =============================================================================
645// Collection / Document
646// =============================================================================
647
648/// A single parsed document from a Sigma YAML file.
649///
650/// A YAML file may contain multiple documents separated by `---`.
651/// Each document is either a detection rule, correlation rule, filter, or action.
652#[derive(Debug, Clone, PartialEq, Serialize)]
653pub enum SigmaDocument {
654    Rule(Box<SigmaRule>),
655    Correlation(CorrelationRule),
656    Filter(FilterRule),
657}
658
659/// A collection of parsed Sigma documents from one or more YAML files.
660#[derive(Debug, Clone, Serialize)]
661pub struct SigmaCollection {
662    pub rules: Vec<SigmaRule>,
663    pub correlations: Vec<CorrelationRule>,
664    pub filters: Vec<FilterRule>,
665    /// Parsing errors that were collected (when `collect_errors` is true).
666    #[serde(skip)]
667    pub errors: Vec<String>,
668}
669
670impl SigmaCollection {
671    pub fn new() -> Self {
672        SigmaCollection {
673            rules: Vec::new(),
674            correlations: Vec::new(),
675            filters: Vec::new(),
676            errors: Vec::new(),
677        }
678    }
679
680    /// Total number of parsed documents.
681    pub fn len(&self) -> usize {
682        self.rules.len() + self.correlations.len() + self.filters.len()
683    }
684
685    pub fn is_empty(&self) -> bool {
686        self.len() == 0
687    }
688}
689
690impl Default for SigmaCollection {
691    fn default() -> Self {
692        Self::new()
693    }
694}