Skip to main content

rsigma_eval/
fields.rs

1//! Rule-field extraction shared between `rsigma rule fields` and the daemon's
2//! field-observability endpoints.
3//!
4//! [`RuleFieldSet::collect`] walks a [`SigmaCollection`] (after optional
5//! pipeline transformations are applied) and records every field name
6//! referenced by detection items, correlation `group-by` / threshold / alias
7//! fields, filter detections, and rule-level `fields:` metadata. The result
8//! tracks per-field provenance (rule titles + source kinds) so callers can
9//! decide whether to surface a finding as a gap signal, a broken-coverage
10//! signal, or a coverage summary.
11//!
12//! The CLI command `rsigma rule fields` and the daemon's
13//! `GET /api/v1/fields/*` endpoints share this implementation so the
14//! field set the operator inspects offline matches exactly what the engine
15//! references at runtime.
16
17use std::collections::{BTreeMap, BTreeSet};
18
19use rsigma_parser::{
20    CorrelationCondition, CorrelationRule, Detection, DetectionItem, Detections, FilterRule,
21    SigmaCollection, SigmaRule,
22};
23use serde::Serialize;
24
25use crate::pipeline::{Pipeline, apply_pipelines};
26
27/// Where in a rule a field reference came from.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
29#[serde(rename_all = "lowercase")]
30pub enum FieldSource {
31    /// Field used in a detection condition (`selection`, etc.).
32    Detection,
33    /// Field used by a correlation rule (group-by, threshold field, alias mapping).
34    Correlation,
35    /// Field used in a filter rule's detection block.
36    Filter,
37    /// Field listed in rule-level `fields:` metadata.
38    Metadata,
39}
40
41impl FieldSource {
42    /// Stable string identifier used in JSON serialization and human output.
43    pub fn as_str(self) -> &'static str {
44        match self {
45            FieldSource::Detection => "detection",
46            FieldSource::Correlation => "correlation",
47            FieldSource::Filter => "filter",
48            FieldSource::Metadata => "metadata",
49        }
50    }
51}
52
53/// Provenance for a single field name across the loaded rule set.
54#[derive(Debug, Clone, Default, PartialEq, Eq)]
55pub struct FieldOrigin {
56    /// Rule titles that reference this field.
57    pub rule_titles: BTreeSet<String>,
58    /// Source kinds (detection, correlation, filter, metadata) where this
59    /// field was seen.
60    pub sources: BTreeSet<FieldSource>,
61}
62
63/// Set of field names referenced by a loaded `SigmaCollection`, optionally
64/// after applying processing pipelines.
65///
66/// Built via [`RuleFieldSet::collect`] and queried via [`contains`](Self::contains),
67/// [`iter`](Self::iter), and [`len`](Self::len). Cheap to clone for sharing
68/// across threads behind an `Arc`.
69#[derive(Debug, Clone, Default, PartialEq, Eq)]
70pub struct RuleFieldSet {
71    fields: BTreeMap<String, FieldOrigin>,
72}
73
74impl RuleFieldSet {
75    /// Walk a rule collection (and any pipelines) and return the resulting
76    /// field set. When `pipelines` is non-empty, each rule is cloned and
77    /// transformed before its fields are collected so the recorded names
78    /// match what the engine evaluates against. Rules whose pipeline
79    /// application fails fall back to the untransformed names so the set
80    /// stays observable even when a pipeline misfires on one rule.
81    ///
82    /// `include_filters` controls whether filter-rule detection blocks
83    /// contribute to the set; mirrors the existing `--no-filters` flag on
84    /// `rsigma rule fields`.
85    pub fn collect(
86        collection: &SigmaCollection,
87        pipelines: &[Pipeline],
88        include_filters: bool,
89    ) -> Self {
90        let mut collector = Collector::default();
91
92        if pipelines.is_empty() {
93            for rule in &collection.rules {
94                collector.collect_rule(rule);
95            }
96        } else {
97            for rule in &collection.rules {
98                let mut transformed = rule.clone();
99                if apply_pipelines(pipelines, &mut transformed).is_err() {
100                    collector.collect_rule(rule);
101                    continue;
102                }
103                collector.collect_rule(&transformed);
104            }
105        }
106
107        for corr in &collection.correlations {
108            collector.collect_correlation(corr);
109        }
110
111        if include_filters {
112            for filter in &collection.filters {
113                collector.collect_filter(filter);
114            }
115        }
116
117        Self {
118            fields: collector.fields,
119        }
120    }
121
122    /// True if any rule references this field name.
123    pub fn contains(&self, field: &str) -> bool {
124        self.fields.contains_key(field)
125    }
126
127    /// Look up provenance for a single field name.
128    pub fn origin(&self, field: &str) -> Option<&FieldOrigin> {
129        self.fields.get(field)
130    }
131
132    /// Iterate field names and their provenance in sorted order.
133    pub fn iter(&self) -> impl Iterator<Item = (&str, &FieldOrigin)> {
134        self.fields.iter().map(|(k, v)| (k.as_str(), v))
135    }
136
137    /// Iterate just the field names in sorted order.
138    pub fn names(&self) -> impl Iterator<Item = &str> {
139        self.fields.keys().map(String::as_str)
140    }
141
142    /// Number of distinct fields in the set.
143    pub fn len(&self) -> usize {
144        self.fields.len()
145    }
146
147    /// True when no fields were collected.
148    pub fn is_empty(&self) -> bool {
149        self.fields.is_empty()
150    }
151}
152
153#[derive(Default)]
154struct Collector {
155    fields: BTreeMap<String, FieldOrigin>,
156}
157
158impl Collector {
159    fn add(&mut self, field: &str, rule_title: &str, source: FieldSource) {
160        let entry = self.fields.entry(field.to_string()).or_default();
161        entry.rule_titles.insert(rule_title.to_string());
162        entry.sources.insert(source);
163    }
164
165    fn collect_detection_items(
166        &mut self,
167        detection: &Detection,
168        rule_title: &str,
169        source: FieldSource,
170    ) {
171        match detection {
172            Detection::AllOf(items) => {
173                for item in items {
174                    self.collect_item(item, rule_title, source);
175                }
176            }
177            Detection::AnyOf(subs) => {
178                for sub in subs {
179                    self.collect_detection_items(sub, rule_title, source);
180                }
181            }
182            Detection::Keywords(_) => {}
183        }
184    }
185
186    fn collect_item(&mut self, item: &DetectionItem, rule_title: &str, source: FieldSource) {
187        if let Some(ref name) = item.field.name {
188            self.add(name, rule_title, source);
189        }
190    }
191
192    fn collect_detections(
193        &mut self,
194        detections: &Detections,
195        rule_title: &str,
196        source: FieldSource,
197    ) {
198        for det in detections.named.values() {
199            self.collect_detection_items(det, rule_title, source);
200        }
201    }
202
203    fn collect_rule(&mut self, rule: &SigmaRule) {
204        self.collect_detections(&rule.detection, &rule.title, FieldSource::Detection);
205        for f in &rule.fields {
206            self.add(f, &rule.title, FieldSource::Metadata);
207        }
208    }
209
210    fn collect_correlation(&mut self, corr: &CorrelationRule) {
211        for f in &corr.group_by {
212            self.add(f, &corr.title, FieldSource::Correlation);
213        }
214        if let CorrelationCondition::Threshold {
215            field: Some(ref fields),
216            ..
217        } = corr.condition
218        {
219            for f in fields {
220                self.add(f, &corr.title, FieldSource::Correlation);
221            }
222        }
223        for alias in &corr.aliases {
224            for mapped_field in alias.mapping.values() {
225                self.add(mapped_field, &corr.title, FieldSource::Correlation);
226            }
227        }
228        for f in &corr.fields {
229            self.add(f, &corr.title, FieldSource::Metadata);
230        }
231    }
232
233    fn collect_filter(&mut self, filter: &FilterRule) {
234        self.collect_detections(&filter.detection, &filter.title, FieldSource::Filter);
235        for f in &filter.fields {
236            self.add(f, &filter.title, FieldSource::Metadata);
237        }
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244    use rsigma_parser::parse_sigma_yaml;
245
246    fn build(yaml: &str) -> SigmaCollection {
247        parse_sigma_yaml(yaml).expect("parse")
248    }
249
250    #[test]
251    fn collects_detection_fields() {
252        let collection = build(
253            r#"
254title: Test
255status: test
256logsource:
257    category: test
258detection:
259    selection:
260        CommandLine|contains: whoami
261        EventID: 1
262    condition: selection
263"#,
264        );
265        let set = RuleFieldSet::collect(&collection, &[], true);
266        assert!(set.contains("CommandLine"));
267        assert!(set.contains("EventID"));
268        assert!(
269            set.origin("CommandLine")
270                .unwrap()
271                .sources
272                .contains(&FieldSource::Detection)
273        );
274    }
275
276    #[test]
277    fn collects_correlation_group_by() {
278        let collection = build(
279            r#"
280title: Login
281id: login-rule
282logsource:
283    category: auth
284detection:
285    selection:
286        EventType: login
287    condition: selection
288---
289title: Many Logins
290correlation:
291    type: event_count
292    rules:
293        - login-rule
294    group-by:
295        - User
296    timespan: 60s
297    condition:
298        gte: 3
299"#,
300        );
301        let set = RuleFieldSet::collect(&collection, &[], true);
302        assert!(set.contains("EventType"));
303        assert!(set.contains("User"));
304        let user_origin = set.origin("User").unwrap();
305        assert!(user_origin.sources.contains(&FieldSource::Correlation));
306    }
307
308    #[test]
309    fn include_filters_toggle() {
310        let collection = build(
311            r#"
312title: Detection
313status: test
314logsource:
315    category: test
316detection:
317    selection:
318        DetField: x
319    condition: selection
320---
321title: Filter
322filter:
323    rules:
324        - non-existent
325    selection:
326        FilterField: y
327    condition: selection
328"#,
329        );
330        let with_filters = RuleFieldSet::collect(&collection, &[], true);
331        let without_filters = RuleFieldSet::collect(&collection, &[], false);
332        assert!(with_filters.contains("FilterField"));
333        assert!(!without_filters.contains("FilterField"));
334        assert!(with_filters.contains("DetField"));
335        assert!(without_filters.contains("DetField"));
336    }
337
338    #[test]
339    fn empty_collection_is_empty_set() {
340        let collection = SigmaCollection::default();
341        let set = RuleFieldSet::collect(&collection, &[], true);
342        assert!(set.is_empty());
343        assert_eq!(set.len(), 0);
344    }
345}