Skip to main content

rsigma_parser/
ast.rs

1//! AST types for all Sigma constructs: rules, detections, conditions,
2//! correlations, and filters.
3//!
4//! Reference: Sigma specification V2.0.0 (2024-08-08)
5//! Reference: pySigma types, conditions, correlations, rule modules
6
7use std::collections::HashMap;
8use std::fmt;
9use std::str::FromStr;
10
11use serde::Serialize;
12
13use crate::value::{SigmaValue, Timespan};
14
15// =============================================================================
16// Enumerations
17// =============================================================================
18
19/// Rule maturity status.
20///
21/// Reference: pySigma rule.py SigmaStatus
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
23#[serde(rename_all = "lowercase")]
24pub enum Status {
25    Stable,
26    Test,
27    Experimental,
28    Deprecated,
29    Unsupported,
30}
31
32impl FromStr for Status {
33    type Err = ();
34    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
35        match s {
36            "stable" => Ok(Status::Stable),
37            "test" => Ok(Status::Test),
38            "experimental" => Ok(Status::Experimental),
39            "deprecated" => Ok(Status::Deprecated),
40            "unsupported" => Ok(Status::Unsupported),
41            _ => Err(()),
42        }
43    }
44}
45
46/// Severity level of a triggered rule.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
48#[serde(rename_all = "lowercase")]
49pub enum Level {
50    Informational,
51    Low,
52    Medium,
53    High,
54    Critical,
55}
56
57impl FromStr for Level {
58    type Err = ();
59    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
60        match s {
61            "informational" => Ok(Level::Informational),
62            "low" => Ok(Level::Low),
63            "medium" => Ok(Level::Medium),
64            "high" => Ok(Level::High),
65            "critical" => Ok(Level::Critical),
66            _ => Err(()),
67        }
68    }
69}
70
71/// Relationship type for the `related` field.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
73#[serde(rename_all = "lowercase")]
74pub enum RelationType {
75    Derived,
76    Obsolete,
77    Merged,
78    Renamed,
79    Similar,
80}
81
82impl FromStr for RelationType {
83    type Err = ();
84    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
85        match s {
86            "derived" => Ok(RelationType::Derived),
87            "obsolete" => Ok(RelationType::Obsolete),
88            "merged" => Ok(RelationType::Merged),
89            "renamed" => Ok(RelationType::Renamed),
90            "similar" => Ok(RelationType::Similar),
91            _ => Err(()),
92        }
93    }
94}
95
96// =============================================================================
97// Field Modifiers
98// =============================================================================
99
100/// All supported Sigma field modifiers.
101///
102/// Reference: pySigma modifiers.py modifier_mapping
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
104#[serde(rename_all = "lowercase")]
105pub enum Modifier {
106    // String matching modifiers
107    Contains,
108    StartsWith,
109    EndsWith,
110
111    // Value linking
112    All,
113
114    // Encoding modifiers
115    Base64,
116    Base64Offset,
117    Wide,
118    Utf16be,
119    Utf16,
120    WindAsh,
121
122    // Pattern matching
123    Re,
124    Cidr,
125
126    // Case sensitivity
127    Cased,
128
129    // Field existence
130    Exists,
131
132    // Placeholder expansion
133    Expand,
134
135    // Field reference
136    FieldRef,
137
138    // Numeric/value comparison
139    Gt,
140    Gte,
141    Lt,
142    Lte,
143    /// Not equal: field value must differ from the specified value.
144    Neq,
145
146    // Regex flags
147    #[serde(rename = "i")]
148    IgnoreCase,
149    #[serde(rename = "m")]
150    Multiline,
151    #[serde(rename = "s")]
152    DotAll,
153
154    // Timestamp parts
155    Minute,
156    Hour,
157    Day,
158    Week,
159    Month,
160    Year,
161}
162
163/// Parse a modifier identifier string.
164///
165/// Reference: pySigma modifiers.py modifier_mapping
166impl FromStr for Modifier {
167    type Err = ();
168    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
169        match s {
170            "contains" => Ok(Modifier::Contains),
171            "startswith" => Ok(Modifier::StartsWith),
172            "endswith" => Ok(Modifier::EndsWith),
173            "all" => Ok(Modifier::All),
174            "base64" => Ok(Modifier::Base64),
175            "base64offset" => Ok(Modifier::Base64Offset),
176            "wide" | "utf16le" => Ok(Modifier::Wide),
177            "utf16be" => Ok(Modifier::Utf16be),
178            "utf16" => Ok(Modifier::Utf16),
179            "windash" => Ok(Modifier::WindAsh),
180            "re" => Ok(Modifier::Re),
181            "cidr" => Ok(Modifier::Cidr),
182            "cased" => Ok(Modifier::Cased),
183            "exists" => Ok(Modifier::Exists),
184            "expand" => Ok(Modifier::Expand),
185            "fieldref" => Ok(Modifier::FieldRef),
186            "gt" => Ok(Modifier::Gt),
187            "gte" => Ok(Modifier::Gte),
188            "lt" => Ok(Modifier::Lt),
189            "lte" => Ok(Modifier::Lte),
190            "neq" => Ok(Modifier::Neq),
191            "i" | "ignorecase" => Ok(Modifier::IgnoreCase),
192            "m" | "multiline" => Ok(Modifier::Multiline),
193            "s" | "dotall" => Ok(Modifier::DotAll),
194            "minute" => Ok(Modifier::Minute),
195            "hour" => Ok(Modifier::Hour),
196            "day" => Ok(Modifier::Day),
197            "week" => Ok(Modifier::Week),
198            "month" => Ok(Modifier::Month),
199            "year" => Ok(Modifier::Year),
200            _ => Err(()),
201        }
202    }
203}
204
205// =============================================================================
206// Field Specification
207// =============================================================================
208
209/// A field name with optional modifiers, parsed from detection keys like
210/// `TargetObject|endswith` or `Destination|contains|all`.
211///
212/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
213#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
214pub struct FieldSpec {
215    /// Field name (`None` for keyword detections without a field).
216    pub name: Option<String>,
217    /// Ordered list of modifiers applied to this field.
218    pub modifiers: Vec<Modifier>,
219}
220
221impl FieldSpec {
222    pub fn new(name: Option<String>, modifiers: Vec<Modifier>) -> Self {
223        FieldSpec { name, modifiers }
224    }
225
226    pub fn has_modifier(&self, m: Modifier) -> bool {
227        self.modifiers.contains(&m)
228    }
229
230    pub fn is_keyword(&self) -> bool {
231        self.name.is_none()
232    }
233}
234
235// =============================================================================
236// Condition Expression AST
237// =============================================================================
238
239/// Parsed condition expression AST.
240///
241/// Produced by the PEG parser + Pratt parser from condition strings like
242/// `selection and not filter` or `1 of selection_* and not 1 of filter_*`.
243///
244/// Reference: pySigma conditions.py ConditionItem hierarchy
245#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
246pub enum ConditionExpr {
247    /// Logical AND of sub-expressions.
248    And(Vec<ConditionExpr>),
249    /// Logical OR of sub-expressions.
250    Or(Vec<ConditionExpr>),
251    /// Logical NOT of a sub-expression.
252    Not(Box<ConditionExpr>),
253    /// Reference to a named detection identifier.
254    Identifier(String),
255    /// Quantified selector: `1 of selection_*`, `all of them`, etc.
256    Selector {
257        quantifier: Quantifier,
258        pattern: SelectorPattern,
259    },
260}
261
262impl fmt::Display for ConditionExpr {
263    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264        match self {
265            ConditionExpr::And(args) => {
266                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
267                write!(f, "({})", parts.join(" and "))
268            }
269            ConditionExpr::Or(args) => {
270                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
271                write!(f, "({})", parts.join(" or "))
272            }
273            ConditionExpr::Not(arg) => write!(f, "not {arg}"),
274            ConditionExpr::Identifier(id) => write!(f, "{id}"),
275            ConditionExpr::Selector {
276                quantifier,
277                pattern,
278            } => write!(f, "{quantifier} of {pattern}"),
279        }
280    }
281}
282
283/// Quantifier in a selector expression.
284#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
285pub enum Quantifier {
286    /// Match any (at least one): `1 of ...` or `any of ...`
287    Any,
288    /// Match all: `all of ...`
289    All,
290    /// Match a specific count: `N of ...`
291    Count(u64),
292}
293
294impl fmt::Display for Quantifier {
295    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
296        match self {
297            Quantifier::Any => write!(f, "1"),
298            Quantifier::All => write!(f, "all"),
299            Quantifier::Count(n) => write!(f, "{n}"),
300        }
301    }
302}
303
304/// Target pattern in a selector expression.
305#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
306pub enum SelectorPattern {
307    /// All detection identifiers: `... of them`
308    Them,
309    /// A wildcard pattern matching detection names: `... of selection_*`
310    Pattern(String),
311}
312
313impl fmt::Display for SelectorPattern {
314    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
315        match self {
316            SelectorPattern::Them => write!(f, "them"),
317            SelectorPattern::Pattern(p) => write!(f, "{p}"),
318        }
319    }
320}
321
322// =============================================================================
323// Detection Section
324// =============================================================================
325
326/// A single detection item: a field (with modifiers) mapped to one or more values.
327///
328/// Examples:
329/// - `EventType: "user.mfa.factor.deactivate"` → field="EventType", values=["user.mfa..."]
330/// - `Destination|contains|all: ['new-object', 'net.webclient']` → field="Destination",
331///   modifiers=[Contains, All], values=[...]
332///
333/// Reference: pySigma rule/detection.py SigmaDetectionItem
334#[derive(Debug, Clone, PartialEq, Serialize)]
335pub struct DetectionItem {
336    /// The field specification (name + modifiers).
337    pub field: FieldSpec,
338    /// One or more values to match against.
339    pub values: Vec<SigmaValue>,
340}
341
342/// A detection definition: a group of detection items or nested detections.
343///
344/// When constructed from a YAML mapping, items are AND-linked.
345/// When constructed from a YAML list of mappings, sub-detections are OR-linked.
346///
347/// Reference: pySigma rule/detection.py SigmaDetection
348#[derive(Debug, Clone, PartialEq, Serialize)]
349pub enum Detection {
350    /// AND-linked detection items (from a YAML mapping).
351    AllOf(Vec<DetectionItem>),
352    /// OR-linked sub-detections (from a YAML list of mappings).
353    AnyOf(Vec<Detection>),
354    /// Keyword detection: plain value(s) without a field.
355    Keywords(Vec<SigmaValue>),
356}
357
358/// The complete detection section of a Sigma rule.
359///
360/// Contains named detection identifiers, condition expressions, and optional timeframe.
361///
362/// Reference: pySigma rule/detection.py SigmaDetections
363#[derive(Debug, Clone, PartialEq, Serialize)]
364pub struct Detections {
365    /// Named detections (e.g. `selection`, `filter_main`, etc.)
366    pub named: HashMap<String, Detection>,
367    /// One or more condition expressions (parsed from condition string or list).
368    pub conditions: Vec<ConditionExpr>,
369    /// Raw condition strings (before parsing).
370    pub condition_strings: Vec<String>,
371    /// Optional timeframe for aggregation rules (deprecated in favor of correlations).
372    pub timeframe: Option<String>,
373}
374
375// =============================================================================
376// Log Source
377// =============================================================================
378
379/// Log source specification.
380///
381/// Reference: Sigma schema `logsource` object
382#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
383pub struct LogSource {
384    pub category: Option<String>,
385    pub product: Option<String>,
386    pub service: Option<String>,
387    pub definition: Option<String>,
388    /// Any additional custom logsource fields.
389    #[serde(flatten)]
390    pub custom: HashMap<String, String>,
391}
392
393// =============================================================================
394// Related Rule Reference
395// =============================================================================
396
397/// A reference to a related Sigma rule.
398#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
399pub struct Related {
400    pub id: String,
401    pub relation_type: RelationType,
402}
403
404// =============================================================================
405// Sigma Detection Rule
406// =============================================================================
407
408/// A complete Sigma detection rule.
409///
410/// Reference: Sigma schema V2.0.0, pySigma rule.py SigmaRule
411#[derive(Debug, Clone, PartialEq, Serialize)]
412pub struct SigmaRule {
413    // Required fields
414    pub title: String,
415    pub logsource: LogSource,
416    pub detection: Detections,
417
418    // Optional metadata
419    pub id: Option<String>,
420    pub name: Option<String>,
421    pub related: Vec<Related>,
422    pub taxonomy: Option<String>,
423    pub status: Option<Status>,
424    pub description: Option<String>,
425    pub license: Option<String>,
426    pub author: Option<String>,
427    pub references: Vec<String>,
428    pub date: Option<String>,
429    pub modified: Option<String>,
430    pub fields: Vec<String>,
431    pub falsepositives: Vec<String>,
432    pub level: Option<Level>,
433    pub tags: Vec<String>,
434    pub scope: Vec<String>,
435
436    /// Custom attributes set by pipeline transformations (e.g. `SetCustomAttribute`).
437    ///
438    /// Backends / engines can read these to modify behavior per-rule.
439    /// This mirrors pySigma's `SigmaRule.custom_attributes` dict.
440    #[serde(skip_serializing_if = "HashMap::is_empty")]
441    pub custom_attributes: HashMap<String, String>,
442}
443
444// =============================================================================
445// Correlation Rule
446// =============================================================================
447
448/// Correlation rule type.
449///
450/// Reference: pySigma correlations.py SigmaCorrelationType
451#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
452#[serde(rename_all = "snake_case")]
453pub enum CorrelationType {
454    EventCount,
455    ValueCount,
456    Temporal,
457    TemporalOrdered,
458    ValueSum,
459    ValueAvg,
460    ValuePercentile,
461    ValueMedian,
462}
463
464impl FromStr for CorrelationType {
465    type Err = ();
466    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
467        match s {
468            "event_count" => Ok(CorrelationType::EventCount),
469            "value_count" => Ok(CorrelationType::ValueCount),
470            "temporal" => Ok(CorrelationType::Temporal),
471            "temporal_ordered" => Ok(CorrelationType::TemporalOrdered),
472            "value_sum" => Ok(CorrelationType::ValueSum),
473            "value_avg" => Ok(CorrelationType::ValueAvg),
474            "value_percentile" => Ok(CorrelationType::ValuePercentile),
475            "value_median" => Ok(CorrelationType::ValueMedian),
476            _ => Err(()),
477        }
478    }
479}
480
481/// Comparison operator in a correlation condition.
482///
483/// Reference: pySigma correlations.py SigmaCorrelationConditionOperator
484#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
485pub enum ConditionOperator {
486    Lt,
487    Lte,
488    Gt,
489    Gte,
490    Eq,
491    Neq,
492}
493
494impl FromStr for ConditionOperator {
495    type Err = ();
496    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
497        match s {
498            "lt" => Ok(ConditionOperator::Lt),
499            "lte" => Ok(ConditionOperator::Lte),
500            "gt" => Ok(ConditionOperator::Gt),
501            "gte" => Ok(ConditionOperator::Gte),
502            "eq" => Ok(ConditionOperator::Eq),
503            "neq" => Ok(ConditionOperator::Neq),
504            _ => Err(()),
505        }
506    }
507}
508
509/// Condition for a correlation rule.
510///
511/// Reference: pySigma correlations.py SigmaCorrelationCondition
512#[derive(Debug, Clone, PartialEq, Serialize)]
513pub enum CorrelationCondition {
514    /// Threshold condition with one or more predicates (supports ranges).
515    ///
516    /// Single: `gte: 100`
517    /// Range: `gt: 100` + `lte: 200`
518    Threshold {
519        /// One or more (operator, value) predicates. All must be satisfied.
520        predicates: Vec<(ConditionOperator, u64)>,
521        /// Optional field reference (required for `value_count` type).
522        field: Option<String>,
523    },
524    /// Extended boolean condition for temporal types: `"rule_a and rule_b"`
525    Extended(ConditionExpr),
526}
527
528/// Field alias mapping in a correlation rule.
529///
530/// Maps a canonical alias name to per-rule field name mappings.
531///
532/// Reference: pySigma correlations.py SigmaCorrelationFieldAlias
533#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
534pub struct FieldAlias {
535    pub alias: String,
536    /// Maps rule reference (ID or name) → field name in that rule's events.
537    pub mapping: HashMap<String, String>,
538}
539
540/// A Sigma correlation rule.
541///
542/// Reference: pySigma correlations.py SigmaCorrelationRule
543#[derive(Debug, Clone, PartialEq, Serialize)]
544pub struct CorrelationRule {
545    // Metadata (shared with detection rules)
546    pub title: String,
547    pub id: Option<String>,
548    pub name: Option<String>,
549    pub status: Option<Status>,
550    pub description: Option<String>,
551    pub author: Option<String>,
552    pub date: Option<String>,
553    pub modified: Option<String>,
554    pub references: Vec<String>,
555    pub tags: Vec<String>,
556    pub level: Option<Level>,
557
558    // Correlation-specific fields
559    pub correlation_type: CorrelationType,
560    pub rules: Vec<String>,
561    pub group_by: Vec<String>,
562    pub timespan: Timespan,
563    pub condition: CorrelationCondition,
564    pub aliases: Vec<FieldAlias>,
565    pub generate: bool,
566    /// Custom key-value attributes (e.g. `rsigma.correlation_event_mode`).
567    /// Parsed from the top-level `custom_attributes` mapping in the YAML or set
568    /// programmatically. Mirrors `SigmaRule.custom_attributes`.
569    pub custom_attributes: HashMap<String, String>,
570}
571
572// =============================================================================
573// Filter Rule
574// =============================================================================
575
576/// A Sigma filter rule that modifies the detection logic of referenced rules.
577///
578/// Filters add additional conditions (typically exclusions) to existing rules
579/// without modifying the original rule files.
580#[derive(Debug, Clone, PartialEq, Serialize)]
581pub struct FilterRule {
582    pub title: String,
583    pub id: Option<String>,
584    pub name: Option<String>,
585    pub status: Option<Status>,
586    pub description: Option<String>,
587    pub author: Option<String>,
588    pub date: Option<String>,
589    pub modified: Option<String>,
590    pub logsource: Option<LogSource>,
591
592    /// Rules this filter applies to (by ID or name).
593    pub rules: Vec<String>,
594    /// The filter detection section.
595    pub detection: Detections,
596}
597
598// =============================================================================
599// Collection / Document
600// =============================================================================
601
602/// A single parsed document from a Sigma YAML file.
603///
604/// A YAML file may contain multiple documents separated by `---`.
605/// Each document is either a detection rule, correlation rule, filter, or action.
606#[derive(Debug, Clone, PartialEq, Serialize)]
607pub enum SigmaDocument {
608    Rule(Box<SigmaRule>),
609    Correlation(CorrelationRule),
610    Filter(FilterRule),
611}
612
613/// A collection of parsed Sigma documents from one or more YAML files.
614#[derive(Debug, Clone, Serialize)]
615pub struct SigmaCollection {
616    pub rules: Vec<SigmaRule>,
617    pub correlations: Vec<CorrelationRule>,
618    pub filters: Vec<FilterRule>,
619    /// Parsing errors that were collected (when `collect_errors` is true).
620    #[serde(skip)]
621    pub errors: Vec<String>,
622}
623
624impl SigmaCollection {
625    pub fn new() -> Self {
626        SigmaCollection {
627            rules: Vec::new(),
628            correlations: Vec::new(),
629            filters: Vec::new(),
630            errors: Vec::new(),
631        }
632    }
633
634    /// Total number of parsed documents.
635    pub fn len(&self) -> usize {
636        self.rules.len() + self.correlations.len() + self.filters.len()
637    }
638
639    pub fn is_empty(&self) -> bool {
640        self.len() == 0
641    }
642}
643
644impl Default for SigmaCollection {
645    fn default() -> Self {
646        Self::new()
647    }
648}