Skip to main content

rsigma_parser/
ast.rs

1//! AST types for all Sigma constructs: rules, detections, conditions,
2//! correlations, and filters.
3//!
4//! Reference: Sigma specification V2.0.0 (2024-08-08)
5//! Reference: pySigma types, conditions, correlations, rule modules
6
7use std::collections::HashMap;
8use std::fmt;
9use std::str::FromStr;
10
11use serde::Serialize;
12
13use crate::value::{SigmaValue, Timespan};
14
15// =============================================================================
16// Enumerations
17// =============================================================================
18
19/// Rule maturity status.
20///
21/// Reference: pySigma rule.py SigmaStatus
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
23#[serde(rename_all = "lowercase")]
24pub enum Status {
25    Stable,
26    Test,
27    Experimental,
28    Deprecated,
29    Unsupported,
30}
31
32impl FromStr for Status {
33    type Err = ();
34    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
35        match s {
36            "stable" => Ok(Status::Stable),
37            "test" => Ok(Status::Test),
38            "experimental" => Ok(Status::Experimental),
39            "deprecated" => Ok(Status::Deprecated),
40            "unsupported" => Ok(Status::Unsupported),
41            _ => Err(()),
42        }
43    }
44}
45
46/// Severity level of a triggered rule.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
48#[serde(rename_all = "lowercase")]
49pub enum Level {
50    Informational,
51    Low,
52    Medium,
53    High,
54    Critical,
55}
56
57impl Level {
58    pub fn as_str(&self) -> &'static str {
59        match self {
60            Level::Informational => "informational",
61            Level::Low => "low",
62            Level::Medium => "medium",
63            Level::High => "high",
64            Level::Critical => "critical",
65        }
66    }
67}
68
69impl FromStr for Level {
70    type Err = ();
71    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
72        match s {
73            "informational" => Ok(Level::Informational),
74            "low" => Ok(Level::Low),
75            "medium" => Ok(Level::Medium),
76            "high" => Ok(Level::High),
77            "critical" => Ok(Level::Critical),
78            _ => Err(()),
79        }
80    }
81}
82
83/// Relationship type for the `related` field.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
85#[serde(rename_all = "lowercase")]
86pub enum RelationType {
87    Correlation,
88    Derived,
89    Obsolete,
90    Merged,
91    Renamed,
92    Similar,
93}
94
95impl FromStr for RelationType {
96    type Err = ();
97    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
98        match s {
99            "correlation" => Ok(RelationType::Correlation),
100            "derived" => Ok(RelationType::Derived),
101            "obsolete" => Ok(RelationType::Obsolete),
102            "merged" => Ok(RelationType::Merged),
103            "renamed" => Ok(RelationType::Renamed),
104            "similar" => Ok(RelationType::Similar),
105            _ => Err(()),
106        }
107    }
108}
109
110// =============================================================================
111// Field Modifiers
112// =============================================================================
113
114/// All supported Sigma field modifiers.
115///
116/// Reference: pySigma modifiers.py modifier_mapping
117#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
118#[serde(rename_all = "lowercase")]
119pub enum Modifier {
120    // String matching modifiers
121    Contains,
122    StartsWith,
123    EndsWith,
124
125    // Value linking
126    All,
127
128    // Encoding modifiers
129    Base64,
130    Base64Offset,
131    Wide,
132    Utf16be,
133    Utf16,
134    WindAsh,
135
136    // Pattern matching
137    Re,
138    Cidr,
139
140    // Case sensitivity
141    Cased,
142
143    // Field existence
144    Exists,
145
146    // Placeholder expansion
147    Expand,
148
149    // Field reference
150    FieldRef,
151
152    // Numeric/value comparison
153    Gt,
154    Gte,
155    Lt,
156    Lte,
157    /// Not equal: field value must differ from the specified value.
158    Neq,
159
160    // Regex flags
161    #[serde(rename = "i")]
162    IgnoreCase,
163    #[serde(rename = "m")]
164    Multiline,
165    #[serde(rename = "s")]
166    DotAll,
167
168    // Timestamp parts
169    Minute,
170    Hour,
171    Day,
172    Week,
173    Month,
174    Year,
175}
176
177/// Parse a modifier identifier string.
178///
179/// Reference: pySigma modifiers.py modifier_mapping
180impl FromStr for Modifier {
181    type Err = ();
182    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
183        match s {
184            "contains" => Ok(Modifier::Contains),
185            "startswith" => Ok(Modifier::StartsWith),
186            "endswith" => Ok(Modifier::EndsWith),
187            "all" => Ok(Modifier::All),
188            "base64" => Ok(Modifier::Base64),
189            "base64offset" => Ok(Modifier::Base64Offset),
190            "wide" | "utf16le" => Ok(Modifier::Wide),
191            "utf16be" => Ok(Modifier::Utf16be),
192            "utf16" => Ok(Modifier::Utf16),
193            "windash" => Ok(Modifier::WindAsh),
194            "re" => Ok(Modifier::Re),
195            "cidr" => Ok(Modifier::Cidr),
196            "cased" => Ok(Modifier::Cased),
197            "exists" => Ok(Modifier::Exists),
198            "expand" => Ok(Modifier::Expand),
199            "fieldref" => Ok(Modifier::FieldRef),
200            "gt" => Ok(Modifier::Gt),
201            "gte" => Ok(Modifier::Gte),
202            "lt" => Ok(Modifier::Lt),
203            "lte" => Ok(Modifier::Lte),
204            "neq" => Ok(Modifier::Neq),
205            "i" | "ignorecase" => Ok(Modifier::IgnoreCase),
206            "m" | "multiline" => Ok(Modifier::Multiline),
207            "s" | "dotall" => Ok(Modifier::DotAll),
208            "minute" => Ok(Modifier::Minute),
209            "hour" => Ok(Modifier::Hour),
210            "day" => Ok(Modifier::Day),
211            "week" => Ok(Modifier::Week),
212            "month" => Ok(Modifier::Month),
213            "year" => Ok(Modifier::Year),
214            _ => Err(()),
215        }
216    }
217}
218
219// =============================================================================
220// Field Specification
221// =============================================================================
222
223/// A field name with optional modifiers, parsed from detection keys like
224/// `TargetObject|endswith` or `Destination|contains|all`.
225///
226/// Reference: pySigma rule/detection.py SigmaDetectionItem.from_mapping
227#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
228pub struct FieldSpec {
229    /// Field name (`None` for keyword detections without a field).
230    pub name: Option<String>,
231    /// Ordered list of modifiers applied to this field.
232    pub modifiers: Vec<Modifier>,
233}
234
235impl FieldSpec {
236    pub fn new(name: Option<String>, modifiers: Vec<Modifier>) -> Self {
237        FieldSpec { name, modifiers }
238    }
239
240    pub fn has_modifier(&self, m: Modifier) -> bool {
241        self.modifiers.contains(&m)
242    }
243
244    pub fn is_keyword(&self) -> bool {
245        self.name.is_none()
246    }
247}
248
249// =============================================================================
250// Condition Expression AST
251// =============================================================================
252
253/// Parsed condition expression AST.
254///
255/// Produced by the PEG parser + Pratt parser from condition strings like
256/// `selection and not filter` or `1 of selection_* and not 1 of filter_*`.
257///
258/// Reference: pySigma conditions.py ConditionItem hierarchy
259#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
260pub enum ConditionExpr {
261    /// Logical AND of sub-expressions.
262    And(Vec<ConditionExpr>),
263    /// Logical OR of sub-expressions.
264    Or(Vec<ConditionExpr>),
265    /// Logical NOT of a sub-expression.
266    Not(Box<ConditionExpr>),
267    /// Reference to a named detection identifier.
268    Identifier(String),
269    /// Quantified selector: `1 of selection_*`, `all of them`, etc.
270    Selector {
271        quantifier: Quantifier,
272        pattern: SelectorPattern,
273    },
274}
275
276impl fmt::Display for ConditionExpr {
277    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
278        match self {
279            ConditionExpr::And(args) => {
280                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
281                write!(f, "({})", parts.join(" and "))
282            }
283            ConditionExpr::Or(args) => {
284                let parts: Vec<String> = args.iter().map(|a| format!("{a}")).collect();
285                write!(f, "({})", parts.join(" or "))
286            }
287            ConditionExpr::Not(arg) => write!(f, "not {arg}"),
288            ConditionExpr::Identifier(id) => write!(f, "{id}"),
289            ConditionExpr::Selector {
290                quantifier,
291                pattern,
292            } => write!(f, "{quantifier} of {pattern}"),
293        }
294    }
295}
296
297/// Quantifier in a selector expression.
298#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
299pub enum Quantifier {
300    /// Match any (at least one): `1 of ...` or `any of ...`
301    Any,
302    /// Match all: `all of ...`
303    All,
304    /// Match a specific count: `N of ...`
305    Count(u64),
306}
307
308impl fmt::Display for Quantifier {
309    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310        match self {
311            Quantifier::Any => write!(f, "1"),
312            Quantifier::All => write!(f, "all"),
313            Quantifier::Count(n) => write!(f, "{n}"),
314        }
315    }
316}
317
318/// Target pattern in a selector expression.
319#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
320pub enum SelectorPattern {
321    /// All detection identifiers: `... of them`
322    Them,
323    /// A wildcard pattern matching detection names: `... of selection_*`
324    Pattern(String),
325}
326
327impl fmt::Display for SelectorPattern {
328    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329        match self {
330            SelectorPattern::Them => write!(f, "them"),
331            SelectorPattern::Pattern(p) => write!(f, "{p}"),
332        }
333    }
334}
335
336// =============================================================================
337// Detection Section
338// =============================================================================
339
340/// Quantifier for matching the members of an array-valued field.
341///
342/// Used by array object-scope blocks (`field[any]:` / `field[all]:`). This is
343/// deliberately distinct from the `all` value-list modifier ([`Modifier::All`]),
344/// which links several *values* of one field with AND. An array quantifier
345/// instead ranges over the *members* of an array-valued field.
346///
347/// Reference: proposed Sigma array-matching extension (sigma-specification
348/// Discussion #106).
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
350#[serde(rename_all = "lowercase")]
351pub enum ArrayQuantifier {
352    /// At least one array member must satisfy the nested detection.
353    Any,
354    /// Every array member must satisfy the nested detection, and the array
355    /// must be non-empty.
356    All,
357    /// Every array member must satisfy the nested detection, but an empty or
358    /// missing array also matches (the vacuously-true reading of [`All`]).
359    ///
360    /// [`All`]: ArrayQuantifier::All
361    AllOrEmpty,
362    /// No array member satisfies the nested detection (the dual of [`Any`]).
363    /// Matches an empty or missing array (vacuously, no member matches).
364    ///
365    /// [`Any`]: ArrayQuantifier::Any
366    None,
367}
368
369impl fmt::Display for ArrayQuantifier {
370    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
371        match self {
372            ArrayQuantifier::Any => write!(f, "any"),
373            ArrayQuantifier::All => write!(f, "all"),
374            ArrayQuantifier::AllOrEmpty => write!(f, "all_or_empty"),
375            ArrayQuantifier::None => write!(f, "none"),
376        }
377    }
378}
379
380/// A single detection item: a field (with modifiers) mapped to one or more values.
381///
382/// Examples:
383/// - `EventType: "user.mfa.factor.deactivate"` → field="EventType", values=["user.mfa..."]
384/// - `Destination|contains|all: ['new-object', 'net.webclient']` → field="Destination",
385///   modifiers=[Contains, All], values=[...]
386///
387/// Reference: pySigma rule/detection.py SigmaDetectionItem
388#[derive(Debug, Clone, PartialEq, Serialize)]
389pub struct DetectionItem {
390    /// The field specification (name + modifiers).
391    pub field: FieldSpec,
392    /// One or more values to match against.
393    pub values: Vec<SigmaValue>,
394}
395
396/// A detection definition: a group of detection items or nested detections.
397///
398/// When constructed from a YAML mapping, items are AND-linked.
399/// When constructed from a YAML list of mappings, sub-detections are OR-linked.
400///
401/// Reference: pySigma rule/detection.py SigmaDetection
402#[derive(Debug, Clone, PartialEq, Serialize)]
403pub enum Detection {
404    /// AND-linked detection items (from a YAML mapping).
405    AllOf(Vec<DetectionItem>),
406    /// OR-linked sub-detections (from a YAML list of mappings).
407    AnyOf(Vec<Detection>),
408    /// Keyword detection: plain value(s) without a field.
409    Keywords(Vec<SigmaValue>),
410    /// Array object-scope quantifier block: `field[any]:` / `field[all]:`
411    /// opening a nested detection that is evaluated against a single array
412    /// member.
413    ///
414    /// - `field` is the dot-path to the array (quantifier markers stripped).
415    /// - `quantifier` decides whether one (`any`) or every (`all`) member must
416    ///   satisfy `body`.
417    /// - `body` is the nested detection applied per member. A `body` item with
418    ///   no field name (`FieldSpec::name == None`) matches the array member
419    ///   value itself (the scalar-array case `field[all]: value`).
420    ///
421    /// This is the only construct that expresses same-member correlation across
422    /// multiple predicates, and the only one that lowers cleanly to backend
423    /// array primitives (Elasticsearch `nested`, KQL `mv-apply`, SQL
424    /// `jsonb_array_elements`, Splunk `mvexpand`).
425    ArrayMatch {
426        /// Dot-path to the array field (quantifier markers stripped).
427        field: String,
428        /// Whether one or all members must satisfy `body`.
429        quantifier: ArrayQuantifier,
430        /// Nested detection evaluated against a single array member.
431        body: Box<Detection>,
432    },
433    /// AND of heterogeneous sub-detections. Produced when a YAML mapping mixes
434    /// plain detection items with one or more array object-scope blocks, which
435    /// [`Detection::AllOf`] (a list of simple items) cannot represent.
436    And(Vec<Detection>),
437    /// Extended object-scope block body: named element-scoped sub-selections
438    /// combined by a `condition` expression (the recursive "mini-event" form),
439    /// enabling per-element `and`/`or`/`not`. Produced only as an
440    /// [`ArrayMatch`](Detection::ArrayMatch) body when the block map carries a
441    /// `condition:` key. The basic conjunction-map body is the degenerate case
442    /// (an implicit AND of items); this is the explicit-condition form.
443    Conditional {
444        /// Element-scoped named sub-selections (each a nested detection).
445        named: HashMap<String, Detection>,
446        /// Boolean combination of the named sub-selections, evaluated per
447        /// array member.
448        condition: ConditionExpr,
449    },
450}
451
452/// The complete detection section of a Sigma rule.
453///
454/// Contains named detection identifiers, condition expressions, and optional timeframe.
455///
456/// Reference: pySigma rule/detection.py SigmaDetections
457#[derive(Debug, Clone, PartialEq, Serialize)]
458pub struct Detections {
459    /// Named detections (e.g. `selection`, `filter_main`, etc.)
460    pub named: HashMap<String, Detection>,
461    /// One or more condition expressions (parsed from condition string or list).
462    pub conditions: Vec<ConditionExpr>,
463    /// Raw condition strings (before parsing).
464    pub condition_strings: Vec<String>,
465    /// Optional timeframe for aggregation rules (deprecated in favor of correlations).
466    pub timeframe: Option<String>,
467}
468
469// =============================================================================
470// Log Source
471// =============================================================================
472
473/// Log source specification.
474///
475/// Reference: Sigma schema `logsource` object
476#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
477pub struct LogSource {
478    pub category: Option<String>,
479    pub product: Option<String>,
480    pub service: Option<String>,
481    pub definition: Option<String>,
482    /// Any additional custom logsource fields.
483    #[serde(flatten)]
484    pub custom: HashMap<String, String>,
485}
486
487// =============================================================================
488// Related Rule Reference
489// =============================================================================
490
491/// A reference to a related Sigma rule.
492#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
493pub struct Related {
494    pub id: String,
495    pub relation_type: RelationType,
496}
497
498// =============================================================================
499// Sigma Detection Rule
500// =============================================================================
501
502/// A complete Sigma detection rule.
503///
504/// Reference: Sigma schema V2.0.0, pySigma rule.py SigmaRule
505#[derive(Debug, Clone, PartialEq, Serialize)]
506pub struct SigmaRule {
507    // Required fields
508    pub title: String,
509    pub logsource: LogSource,
510    pub detection: Detections,
511
512    /// The Sigma specification MAJOR version this rule targets (the
513    /// `sigma-version` attribute, e.g. `3`). `None` means absent, which resolves
514    /// to the fixed floor [`crate::version::SPEC_VERSION_FLOOR`]. Only the major
515    /// is stored, since breaking spec changes occur only at major bumps; it
516    /// gates version-sensitive interpretation such as array-matching brackets.
517    #[serde(skip_serializing_if = "Option::is_none")]
518    pub sigma_version: Option<u32>,
519
520    // Optional metadata
521    pub id: Option<String>,
522    pub name: Option<String>,
523    pub related: Vec<Related>,
524    pub taxonomy: Option<String>,
525    pub status: Option<Status>,
526    pub description: Option<String>,
527    pub license: Option<String>,
528    pub author: Option<String>,
529    pub references: Vec<String>,
530    pub date: Option<String>,
531    pub modified: Option<String>,
532    pub fields: Vec<String>,
533    pub falsepositives: Vec<String>,
534    pub level: Option<Level>,
535    pub tags: Vec<String>,
536    pub scope: Vec<String>,
537
538    /// Custom attributes attached to the rule.
539    ///
540    /// Populated from (a) any top-level YAML key that is not part of the
541    /// standard Sigma rule schema, (b) the entries of the dedicated top-level
542    /// `custom_attributes:` mapping (explicit entries win over arbitrary keys
543    /// of the same name), and (c) pipeline transformations such as
544    /// `SetCustomAttribute`, which are applied last and override both.
545    ///
546    /// Mirrors pySigma's `SigmaRule.custom_attributes` dict. Engines and
547    /// backends can read these to modify per-rule behavior.
548    #[serde(skip_serializing_if = "HashMap::is_empty")]
549    pub custom_attributes: HashMap<String, yaml_serde::Value>,
550}
551
552// =============================================================================
553// Correlation Rule
554// =============================================================================
555
556/// Correlation rule type.
557///
558/// Reference: pySigma correlations.py SigmaCorrelationType
559#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
560#[serde(rename_all = "snake_case")]
561pub enum CorrelationType {
562    EventCount,
563    ValueCount,
564    Temporal,
565    TemporalOrdered,
566    ValueSum,
567    ValueAvg,
568    ValuePercentile,
569    ValueMedian,
570}
571
572impl CorrelationType {
573    pub fn as_str(&self) -> &'static str {
574        match self {
575            CorrelationType::EventCount => "event_count",
576            CorrelationType::ValueCount => "value_count",
577            CorrelationType::Temporal => "temporal",
578            CorrelationType::TemporalOrdered => "temporal_ordered",
579            CorrelationType::ValueSum => "value_sum",
580            CorrelationType::ValueAvg => "value_avg",
581            CorrelationType::ValuePercentile => "value_percentile",
582            CorrelationType::ValueMedian => "value_median",
583        }
584    }
585}
586
587impl FromStr for CorrelationType {
588    type Err = ();
589    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
590        match s {
591            "event_count" => Ok(CorrelationType::EventCount),
592            "value_count" => Ok(CorrelationType::ValueCount),
593            "temporal" => Ok(CorrelationType::Temporal),
594            "temporal_ordered" => Ok(CorrelationType::TemporalOrdered),
595            "value_sum" => Ok(CorrelationType::ValueSum),
596            "value_avg" => Ok(CorrelationType::ValueAvg),
597            "value_percentile" => Ok(CorrelationType::ValuePercentile),
598            "value_median" => Ok(CorrelationType::ValueMedian),
599            _ => Err(()),
600        }
601    }
602}
603
604/// Window semantics for a correlation rule's `timespan`.
605///
606/// Controls how `timespan` is anchored to the event stream. `Sliding` is the
607/// default and matches the behavior the Sigma correlation specification already
608/// prefers (a trailing per-event window), so omitting `window` never changes the
609/// meaning of an existing rule.
610///
611/// - `Sliding`: trailing window `(t - timespan, t]` evaluated per event.
612/// - `Tumbling`: fixed, boundary-aligned, non-overlapping buckets of size
613///   `timespan`.
614/// - `Session`: dynamic window that extends while consecutive in-group events
615///   stay within `gap`, capped by `timespan` as the maximum total span.
616#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize)]
617#[serde(rename_all = "snake_case")]
618pub enum WindowMode {
619    #[default]
620    Sliding,
621    Tumbling,
622    Session,
623}
624
625impl WindowMode {
626    pub fn as_str(&self) -> &'static str {
627        match self {
628            WindowMode::Sliding => "sliding",
629            WindowMode::Tumbling => "tumbling",
630            WindowMode::Session => "session",
631        }
632    }
633}
634
635impl FromStr for WindowMode {
636    type Err = ();
637    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
638        match s {
639            "sliding" => Ok(WindowMode::Sliding),
640            "tumbling" => Ok(WindowMode::Tumbling),
641            "session" => Ok(WindowMode::Session),
642            _ => Err(()),
643        }
644    }
645}
646
647/// Comparison operator in a correlation condition.
648///
649/// Reference: pySigma correlations.py SigmaCorrelationConditionOperator
650#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
651pub enum ConditionOperator {
652    Lt,
653    Lte,
654    Gt,
655    Gte,
656    Eq,
657    Neq,
658}
659
660impl FromStr for ConditionOperator {
661    type Err = ();
662    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
663        match s {
664            "lt" => Ok(ConditionOperator::Lt),
665            "lte" => Ok(ConditionOperator::Lte),
666            "gt" => Ok(ConditionOperator::Gt),
667            "gte" => Ok(ConditionOperator::Gte),
668            "eq" => Ok(ConditionOperator::Eq),
669            "neq" => Ok(ConditionOperator::Neq),
670            _ => Err(()),
671        }
672    }
673}
674
675/// Condition for a correlation rule.
676///
677/// Reference: pySigma correlations.py SigmaCorrelationCondition
678#[derive(Debug, Clone, PartialEq, Serialize)]
679pub enum CorrelationCondition {
680    /// Threshold condition with one or more predicates (supports ranges).
681    ///
682    /// Single: `gte: 100`
683    /// Range: `gt: 100` + `lte: 200`
684    Threshold {
685        /// One or more (operator, value) predicates. All must be satisfied.
686        predicates: Vec<(ConditionOperator, u64)>,
687        /// Optional field reference(s) (required for `value_count` type).
688        /// A single string is normalized to a one-element vec.
689        field: Option<Vec<String>>,
690        /// Percentile rank (0-100) for `value_percentile` type.
691        /// Defaults to 50 if not specified.
692        percentile: Option<u64>,
693    },
694    /// Extended boolean condition for temporal types: `"rule_a and rule_b"`
695    Extended(ConditionExpr),
696}
697
698/// Field alias mapping in a correlation rule.
699///
700/// Maps a canonical alias name to per-rule field name mappings.
701///
702/// Reference: pySigma correlations.py SigmaCorrelationFieldAlias
703#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
704pub struct FieldAlias {
705    pub alias: String,
706    /// Maps rule reference (ID or name) → field name in that rule's events.
707    pub mapping: HashMap<String, String>,
708}
709
710/// A Sigma correlation rule.
711///
712/// Reference: pySigma correlations.py SigmaCorrelationRule
713#[derive(Debug, Clone, PartialEq, Serialize)]
714pub struct CorrelationRule {
715    // Metadata (shared with detection rules)
716    pub title: String,
717    /// The Sigma specification MAJOR version this document targets (the
718    /// `sigma-version` attribute). See [`SigmaRule::sigma_version`]. A
719    /// correlation rule and the rules it aggregates should share a major.
720    #[serde(skip_serializing_if = "Option::is_none")]
721    pub sigma_version: Option<u32>,
722    pub id: Option<String>,
723    pub name: Option<String>,
724    pub status: Option<Status>,
725    pub description: Option<String>,
726    pub author: Option<String>,
727    pub date: Option<String>,
728    pub modified: Option<String>,
729    pub related: Vec<Related>,
730    pub references: Vec<String>,
731    pub taxonomy: Option<String>,
732    pub license: Option<String>,
733    pub tags: Vec<String>,
734    pub fields: Vec<String>,
735    pub falsepositives: Vec<String>,
736    pub level: Option<Level>,
737    pub scope: Vec<String>,
738
739    // Correlation-specific fields
740    pub correlation_type: CorrelationType,
741    pub rules: Vec<String>,
742    pub group_by: Vec<String>,
743    pub timespan: Timespan,
744    /// Window semantics for `timespan`: `sliding` (default), `tumbling`, or
745    /// `session`. Absent in the source defaults to [`WindowMode::Sliding`].
746    pub window: WindowMode,
747    /// Maximum inactivity between consecutive in-group events for a `session`
748    /// window. Required when `window` is `session`, and unset otherwise.
749    #[serde(skip_serializing_if = "Option::is_none")]
750    pub gap: Option<Timespan>,
751    pub condition: CorrelationCondition,
752    pub aliases: Vec<FieldAlias>,
753    pub generate: bool,
754
755    /// Custom attributes attached to the correlation rule.
756    ///
757    /// Populated the same way as `SigmaRule.custom_attributes`: arbitrary
758    /// top-level YAML keys, the dedicated `custom_attributes:` block, and
759    /// pipeline `SetCustomAttribute` transformations (last-write-wins).
760    /// Engine-level `rsigma.*` extensions (e.g. `rsigma.correlation_event_mode`,
761    /// `rsigma.suppress`, `rsigma.action`) are read from here.
762    #[serde(skip_serializing_if = "HashMap::is_empty")]
763    pub custom_attributes: HashMap<String, yaml_serde::Value>,
764}
765
766// =============================================================================
767// Filter Rule
768// =============================================================================
769
770/// Which rules a filter applies to.
771#[derive(Debug, Clone, PartialEq, Serialize)]
772pub enum FilterRuleTarget {
773    /// The filter applies to every loaded rule.
774    Any,
775    /// The filter applies only to rules matching these IDs or titles.
776    Specific(Vec<String>),
777}
778
779/// A Sigma filter rule that modifies the detection logic of referenced rules.
780///
781/// Filters add additional conditions (typically exclusions) to existing rules
782/// without modifying the original rule files.
783#[derive(Debug, Clone, PartialEq, Serialize)]
784pub struct FilterRule {
785    pub title: String,
786    /// The Sigma specification MAJOR version this document targets (the
787    /// `sigma-version` attribute). See [`SigmaRule::sigma_version`]. A filter and
788    /// the rules it targets should share a major.
789    #[serde(skip_serializing_if = "Option::is_none")]
790    pub sigma_version: Option<u32>,
791    pub id: Option<String>,
792    pub name: Option<String>,
793    pub taxonomy: Option<String>,
794    pub status: Option<Status>,
795    pub description: Option<String>,
796    pub author: Option<String>,
797    pub date: Option<String>,
798    pub modified: Option<String>,
799    pub related: Vec<Related>,
800    pub license: Option<String>,
801    pub references: Vec<String>,
802    pub tags: Vec<String>,
803    pub fields: Vec<String>,
804    pub falsepositives: Vec<String>,
805    pub level: Option<Level>,
806    pub scope: Vec<String>,
807    pub logsource: Option<LogSource>,
808
809    /// Rules this filter applies to (by ID or name), or all rules.
810    pub rules: FilterRuleTarget,
811    /// The filter detection section.
812    pub detection: Detections,
813
814    /// Custom attributes attached to the filter rule.
815    #[serde(skip_serializing_if = "HashMap::is_empty")]
816    pub custom_attributes: HashMap<String, yaml_serde::Value>,
817}
818
819// =============================================================================
820// Collection / Document
821// =============================================================================
822
823/// A single parsed document from a Sigma YAML file.
824///
825/// A YAML file may contain multiple documents separated by `---`.
826/// Each document is either a detection rule, correlation rule, filter, or action.
827#[derive(Debug, Clone, PartialEq, Serialize)]
828pub enum SigmaDocument {
829    Rule(Box<SigmaRule>),
830    Correlation(CorrelationRule),
831    Filter(FilterRule),
832}
833
834/// A collection of parsed Sigma documents from one or more YAML files.
835#[derive(Debug, Clone, Serialize)]
836pub struct SigmaCollection {
837    pub rules: Vec<SigmaRule>,
838    pub correlations: Vec<CorrelationRule>,
839    pub filters: Vec<FilterRule>,
840    /// Per-document parse errors accumulated while building the
841    /// collection. Populated by [`parse_sigma_yaml`](crate::parse_sigma_yaml)
842    /// and friends; one entry per document the parser could not
843    /// produce a [`SigmaRule`], [`CorrelationRule`], or [`FilterRule`]
844    /// from. The collection is still returned on `Ok(_)` so callers
845    /// can decide whether a partial parse is acceptable; the
846    /// [`SigmaCollection::has_errors`] / [`SigmaCollection::error_count`]
847    /// / [`SigmaCollection::into_result`] helpers cover the common
848    /// "treat any error as a failure" path.
849    #[serde(skip)]
850    pub errors: Vec<String>,
851}
852
853impl SigmaCollection {
854    pub fn new() -> Self {
855        SigmaCollection {
856            rules: Vec::new(),
857            correlations: Vec::new(),
858            filters: Vec::new(),
859            errors: Vec::new(),
860        }
861    }
862
863    /// Total number of parsed documents.
864    pub fn len(&self) -> usize {
865        self.rules.len() + self.correlations.len() + self.filters.len()
866    }
867
868    pub fn is_empty(&self) -> bool {
869        self.len() == 0
870    }
871
872    /// True when the parser recorded one or more per-document parse
873    /// errors while building this collection.
874    pub fn has_errors(&self) -> bool {
875        !self.errors.is_empty()
876    }
877
878    /// Number of per-document parse errors recorded while building
879    /// this collection. Equivalent to `self.errors.len()`.
880    pub fn error_count(&self) -> usize {
881        self.errors.len()
882    }
883
884    /// Promote the accumulated errors to a hard failure. Returns the
885    /// collection when [`SigmaCollection::has_errors`] is false;
886    /// otherwise returns the collection's [`errors`](Self::errors) so
887    /// callers can format them. The original collection is consumed
888    /// either way so the success path can move out of `self` without
889    /// re-cloning the documents.
890    pub fn into_result(self) -> Result<Self, Vec<String>> {
891        if self.has_errors() {
892            Err(self.errors)
893        } else {
894            Ok(self)
895        }
896    }
897}
898
899impl Default for SigmaCollection {
900    fn default() -> Self {
901        Self::new()
902    }
903}