rsigma 0.17.0

CLI for parsing, validating, linting and evaluating Sigma detection rules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
//! `rsigma rule coverage`: map a rule set onto MITRE ATT&CK.
//!
//! The command extracts ATT&CK technique/tactic tags from a rule set, exports
//! an ATT&CK Navigator layer (format 4.5, scored by rule count), and reports
//! coverage gaps against three optional cross-references: the Atomic Red Team
//! index, the SigmaHQ baseline heatmap, and a user-supplied target technique
//! list. `--fail-on-gaps` turns any uncovered cross-reference into a non-zero
//! exit for CI.
//!
//! It works entirely from technique IDs already present on the rules; it does
//! not need the full ATT&CK matrix (the Navigator renders that, and each
//! cross-reference supplies its own technique set).

mod navigator;
mod report;
mod sources;

use std::collections::{BTreeMap, BTreeSet};
use std::path::PathBuf;
use std::process;

use clap::parser::ValueSource;
use clap::{ArgMatches, Args};
use rsigma_parser::SigmaCollection;

use crate::commands::reports::CoverageReport;
use crate::config;
use crate::exit_code;
use crate::output::OutputCtx;
use sources::{DEFAULT_ATOMICS_URL, DEFAULT_BASELINE_URL};

/// Arguments for `rsigma rule coverage`.
#[derive(Args, Debug)]
pub(crate) struct CoverageArgs {
    /// Path to a YAML config file. Overrides config-file discovery.
    /// CLI flags still take precedence over config-file values.
    #[arg(long = "config", value_name = "PATH")]
    pub config: Option<PathBuf>,

    /// Print the effective config (defaults < file < env) and exit.
    #[arg(long = "dry-run")]
    pub dry_run: bool,

    /// Path to a Sigma rule file or directory of rules (repeatable).
    #[arg(short = 'r', long = "rules", value_name = "PATH")]
    pub rules: Vec<PathBuf>,

    /// Write an ATT&CK Navigator layer (format 4.5) JSON to this file.
    #[arg(long = "navigator", value_name = "FILE")]
    pub navigator: Option<PathBuf>,

    /// Cross-reference against the Atomic Red Team index. Accepts a local path
    /// (index.yaml or an atomic-red-team `atomics/` directory) or a URL; bare
    /// `--atomics` uses the upstream `atomics/Indexes/index.yaml`.
    #[arg(
        long = "atomics",
        value_name = "PATH_OR_URL",
        num_args = 0..=1,
        default_missing_value = DEFAULT_ATOMICS_URL,
    )]
    pub atomics: Option<String>,

    /// Cross-reference against a baseline ATT&CK Navigator layer. Accepts a
    /// local path or URL; bare `--baseline` uses the SigmaHQ coverage heatmap.
    #[arg(
        long = "baseline",
        value_name = "PATH_OR_URL",
        num_args = 0..=1,
        default_missing_value = DEFAULT_BASELINE_URL,
    )]
    pub baseline: Option<String>,

    /// Cross-reference against a target technique list (one technique ID per
    /// line; `#` comments allowed).
    #[arg(long = "targets", value_name = "FILE")]
    pub targets: Option<PathBuf>,

    /// Exit with code 1 when any requested cross-reference reports uncovered
    /// techniques (for CI gating).
    #[arg(long = "fail-on-gaps")]
    pub fail_on_gaps: bool,
}

/// Overlay the `coverage` config section (defaults < file < env) onto `args`
/// for any flag the operator did not set explicitly, then handle `--dry-run`.
pub(crate) fn apply_coverage_config(args: &mut CoverageArgs, matches: &ArgMatches) {
    let base = config::load_and_merge(args.config.as_deref());
    if args.dry_run {
        config::print_dry_run("coverage", &base);
        process::exit(exit_code::SUCCESS);
    }
    overlay_coverage_config(args, matches, base);
}

/// Pure overlay of the resolved `coverage` section onto `args` (no disk
/// access), split out so it can be unit-tested.
fn overlay_coverage_config(
    args: &mut CoverageArgs,
    matches: &ArgMatches,
    base: config::RsigmaConfigPartial,
) {
    let explicit = |id: &str| {
        matches!(
            matches.value_source(id),
            Some(ValueSource::CommandLine | ValueSource::EnvVariable)
        )
    };

    if let Some(cov) = base.coverage {
        // `--rules` is repeatable with no clap default, so an empty vec means
        // the operator left it off; let the config layer fill it.
        if !explicit("rules")
            && args.rules.is_empty()
            && let Some(v) = cov.rules
        {
            args.rules = v;
        }
        // `--atomics`/`--baseline` have no clap default, so `is_none` means the
        // operator left them off; let the config layer fill them.
        if args.atomics.is_none()
            && let Some(v) = cov.atomics
        {
            args.atomics = Some(v);
        }
        if args.baseline.is_none()
            && let Some(v) = cov.baseline
        {
            args.baseline = Some(v);
        }
        if args.targets.is_none()
            && let Some(v) = cov.targets
        {
            args.targets = Some(v);
        }
        if !explicit("fail_on_gaps")
            && let Some(v) = cov.fail_on_gaps
        {
            args.fail_on_gaps = v;
        }
    }
}

/// Run `rule coverage`. Returns the process exit code (0 success, 1 gaps under
/// `--fail-on-gaps`, 2 rule error, 3 config error). Rule errors exit directly
/// via [`crate::load_collection_multi`].
pub(crate) fn cmd_coverage(args: CoverageArgs, ctx: OutputCtx) -> i32 {
    if args.rules.is_empty() {
        eprintln!("error: no rules path; pass --rules <PATH> (repeatable)");
        return exit_code::CONFIG_ERROR;
    }

    let collection = crate::load_collection_multi(&args.rules);
    let coverage = Coverage::from_collection(&collection);

    let atomics = match &args.atomics {
        Some(spec) => match sources::load_atomics(spec) {
            Ok(c) => Some(c),
            Err(e) => {
                eprintln!("error: {e}");
                return exit_code::CONFIG_ERROR;
            }
        },
        None => None,
    };
    let baseline = match &args.baseline {
        Some(spec) => match sources::load_baseline(spec) {
            Ok(c) => Some(c),
            Err(e) => {
                eprintln!("error: {e}");
                return exit_code::CONFIG_ERROR;
            }
        },
        None => None,
    };
    let targets = match &args.targets {
        Some(path) => match sources::load_targets(path) {
            Ok(t) => Some(t),
            Err(e) => {
                eprintln!("error: {e}");
                return exit_code::CONFIG_ERROR;
            }
        },
        None => None,
    };

    if let Some(path) = &args.navigator {
        let layer = navigator::build_layer(&coverage, "rsigma coverage");
        let json = navigator::to_pretty_json(&layer);
        if let Err(e) = std::fs::write(path, format!("{json}\n")) {
            eprintln!(
                "error: could not write Navigator layer to {}: {e}",
                path.display()
            );
            return exit_code::CONFIG_ERROR;
        }
        if ctx.show_progress() {
            eprintln!("Wrote ATT&CK Navigator layer to {}", path.display());
        }
    }

    let report = CoverageReport::build(&coverage, atomics, baseline, targets);
    report.render(&ctx);
    report.exit_code(args.fail_on_gaps)
}

// ---------------------------------------------------------------------------
// Technique ID helpers
// ---------------------------------------------------------------------------

/// Normalize and validate an ATT&CK technique ID: `T` + 4+ digits with an
/// optional `.` + sub-technique digits. Returns the uppercased ID or `None` if
/// it does not look like a technique.
pub(crate) fn normalize_technique(raw: &str) -> Option<String> {
    let up = raw.trim().to_ascii_uppercase();
    let body = up.strip_prefix('T')?;
    let (num, sub) = match body.split_once('.') {
        Some((n, s)) => (n, Some(s)),
        None => (body, None),
    };
    if num.len() < 4 || !num.bytes().all(|b| b.is_ascii_digit()) {
        return None;
    }
    if let Some(s) = sub
        && (s.is_empty() || !s.bytes().all(|b| b.is_ascii_digit()))
    {
        return None;
    }
    Some(up)
}

/// The parent technique of a sub-technique (`T1059.001` -> `T1059`), or `None`
/// for a base technique.
pub(crate) fn parent_technique(id: &str) -> Option<&str> {
    id.split_once('.').map(|(parent, _)| parent)
}

/// MITRE ATT&CK enterprise tactic slugs, as the ATT&CK Navigator spells them
/// (hyphenated). These are the canonical form the report and layer emit.
const TACTICS: &[&str] = &[
    "reconnaissance",
    "resource-development",
    "initial-access",
    "execution",
    "persistence",
    "privilege-escalation",
    "defense-evasion",
    "credential-access",
    "discovery",
    "lateral-movement",
    "collection",
    "command-and-control",
    "exfiltration",
    "impact",
];

/// Resolve an `attack.<tactic>` short name to its canonical Navigator slug.
///
/// Both spellings seen in the wild are accepted: the hyphenated form the
/// SigmaHQ corpus uses (`attack.privilege-escalation`) and the underscore form
/// the Sigma spec and pySigma use (`attack.privilege_escalation`). Unknown
/// short names (custom taxonomies, ATT&CK groups/software) return `None`.
fn tactic_slug(short: &str) -> Option<&'static str> {
    let normalized = short.replace('_', "-");
    TACTICS.iter().copied().find(|slug| *slug == normalized)
}

// ---------------------------------------------------------------------------
// Coverage extraction
// ---------------------------------------------------------------------------

/// Per-technique aggregate: which rules reference it and which tactics those
/// rules tagged.
#[derive(Debug, Default)]
pub(crate) struct TechniqueAgg {
    /// Distinct rules referencing this technique, keyed by rule identity (the
    /// rule `id` when present, else its title) so two rules that happen to
    /// share a title are still counted separately. Maps identity -> display
    /// title.
    rules: BTreeMap<String, String>,
    pub(crate) tactics: BTreeSet<String>,
}

impl TechniqueAgg {
    /// Number of distinct rules referencing this technique.
    pub(crate) fn rule_count(&self) -> usize {
        self.rules.len()
    }

    /// Sorted, de-duplicated display titles of the referencing rules.
    pub(crate) fn titles(&self) -> Vec<String> {
        let mut titles: Vec<String> = self.rules.values().cloned().collect();
        titles.sort();
        titles.dedup();
        titles
    }
}

/// The ATT&CK coverage computed from a rule set.
#[derive(Debug, Default)]
pub(crate) struct Coverage {
    pub(crate) techniques: BTreeMap<String, TechniqueAgg>,
    pub(crate) tactics: BTreeSet<String>,
    pub(crate) untagged_rules: Vec<String>,
    pub(crate) rules_total: usize,
    pub(crate) rules_tagged: usize,
}

/// Result of testing whether a target technique is covered by the rule set.
pub(crate) struct Covers {
    pub(crate) covered: bool,
    pub(crate) via_subtechnique: bool,
}

impl Coverage {
    /// Build coverage from a parsed collection. Detection and correlation rules
    /// contribute their `attack.*` tags; filter rules are excluded (they
    /// suppress rather than detect).
    pub(crate) fn from_collection(collection: &SigmaCollection) -> Self {
        let mut cov = Coverage::default();
        for rule in &collection.rules {
            cov.ingest(rule.id.as_deref(), &rule.title, &rule.tags);
        }
        for corr in &collection.correlations {
            cov.ingest(corr.id.as_deref(), &corr.title, &corr.tags);
        }
        cov.untagged_rules.sort();
        cov.untagged_rules.dedup();
        cov
    }

    fn ingest(&mut self, id: Option<&str>, title: &str, tags: &[String]) {
        self.rules_total += 1;
        let (techniques, tactics, has_attack) = classify_tags(tags);
        if has_attack {
            self.rules_tagged += 1;
        } else {
            self.untagged_rules.push(title.to_string());
        }
        // Identify a rule by its id when present, else its title, so two
        // distinct rules that share a title are still counted separately.
        let identity = id.unwrap_or(title).to_string();
        for slug in &tactics {
            self.tactics.insert(slug.clone());
        }
        for tech in &techniques {
            let agg = self.techniques.entry(tech.clone()).or_default();
            agg.rules.insert(identity.clone(), title.to_string());
            for slug in &tactics {
                agg.tactics.insert(slug.clone());
            }
        }
    }

    /// Test whether `target` is covered. A base technique is covered directly,
    /// or via any sub-technique rule (`via_subtechnique`). A sub-technique is
    /// covered only by a rule on that exact sub-technique (a coarser parent
    /// rule does not vouch for it).
    pub(crate) fn covers(&self, target: &str) -> Covers {
        let direct = self.techniques.contains_key(target);
        if target.contains('.') {
            return Covers {
                covered: direct,
                via_subtechnique: false,
            };
        }
        let via = !direct
            && self
                .techniques
                .keys()
                .any(|k| parent_technique(k) == Some(target));
        Covers {
            covered: direct || via,
            via_subtechnique: via,
        }
    }
}

/// Extract `(techniques, tactic_slugs, has_attack_tag)` from a rule's tags.
fn classify_tags(tags: &[String]) -> (Vec<String>, Vec<String>, bool) {
    let mut techniques = Vec::new();
    let mut tactics = Vec::new();
    let mut has_attack = false;
    for tag in tags {
        let lower = tag.to_ascii_lowercase();
        let Some(rest) = lower.strip_prefix("attack.") else {
            continue;
        };
        has_attack = true;
        // Technique tags are `attack.t<digits>[.<digits>]`.
        if let Some(after_t) = rest.strip_prefix('t')
            && after_t.bytes().next().is_some_and(|b| b.is_ascii_digit())
        {
            if let Some(id) = normalize_technique(&format!("t{after_t}")) {
                techniques.push(id);
            }
            continue;
        }
        if let Some(slug) = tactic_slug(rest) {
            tactics.push(slug.to_string());
        }
        // Other `attack.*` namespaces (groups `g*`, software `s*`, …) count as
        // tagged but contribute no technique/tactic.
    }
    (techniques, tactics, has_attack)
}

#[cfg(test)]
mod tests {
    use super::*;
    use clap::{Command, FromArgMatches};

    fn coverage_from(yaml: &str) -> Coverage {
        Coverage::from_collection(&rsigma_parser::parse_sigma_yaml(yaml).expect("parse"))
    }

    #[test]
    fn normalize_technique_accepts_and_rejects() {
        assert_eq!(normalize_technique("t1059").as_deref(), Some("T1059"));
        assert_eq!(
            normalize_technique("T1059.001").as_deref(),
            Some("T1059.001")
        );
        assert_eq!(normalize_technique("  t1003 ").as_deref(), Some("T1003"));
        assert_eq!(normalize_technique("TA0001"), None); // tactic id, not technique
        assert_eq!(normalize_technique("1059"), None); // no T prefix
        assert_eq!(normalize_technique("T10"), None); // too few digits
        assert_eq!(normalize_technique("T1059.xy"), None); // non-digit sub
    }

    #[test]
    fn classify_tags_splits_techniques_tactics_and_other() {
        let tags = vec![
            "attack.t1059".to_string(),
            "attack.t1059.001".to_string(),
            "attack.execution".to_string(),
            "attack.g0016".to_string(), // group: tagged but no technique/tactic
            "cve.2023.1234".to_string(),
        ];
        let (techs, tactics, has_attack) = classify_tags(&tags);
        assert_eq!(techs, vec!["T1059".to_string(), "T1059.001".to_string()]);
        assert_eq!(tactics, vec!["execution".to_string()]);
        assert!(has_attack);
    }

    #[test]
    fn classify_tags_accepts_hyphen_and_underscore_tactics() {
        // SigmaHQ uses the hyphenated form; the Sigma spec/pySigma use
        // underscores. Both normalize to the canonical Navigator slug.
        let (_, hyphen, _) = classify_tags(&["attack.privilege-escalation".to_string()]);
        let (_, underscore, _) = classify_tags(&["attack.privilege_escalation".to_string()]);
        assert_eq!(hyphen, vec!["privilege-escalation".to_string()]);
        assert_eq!(underscore, vec!["privilege-escalation".to_string()]);
        // A custom (non-ATT&CK) tactic tag is not mapped.
        let (_, custom, has_attack) = classify_tags(&["attack.stealth".to_string()]);
        assert!(custom.is_empty());
        assert!(has_attack);
    }

    #[test]
    fn no_attack_tag_is_untagged() {
        let (techs, tactics, has_attack) = classify_tags(&["cve.2023.1".to_string()]);
        assert!(techs.is_empty());
        assert!(tactics.is_empty());
        assert!(!has_attack);
    }

    #[test]
    fn coverage_dedupes_rule_titles_and_tactics() {
        let cov = coverage_from(
            r#"
title: A
id: 00000000-0000-0000-0000-0000000000a1
logsource: {category: test, product: test}
detection: {sel: {Image: a}, condition: sel}
tags: [attack.execution, attack.t1059]
---
title: B
id: 00000000-0000-0000-0000-0000000000a2
logsource: {category: test, product: test}
detection: {sel: {Image: b}, condition: sel}
tags: [attack.execution, attack.t1059]
"#,
        );
        let agg = cov.techniques.get("T1059").unwrap();
        assert_eq!(agg.rule_count(), 2);
        assert_eq!(
            agg.tactics.iter().cloned().collect::<Vec<_>>(),
            vec!["execution".to_string()]
        );
        assert_eq!(cov.tactics.len(), 1);
    }

    #[test]
    fn distinct_rules_sharing_a_title_count_separately() {
        // Two rules with the same title but different ids both tag T1059.
        // They are distinct rules and must be counted as 2, not collapsed.
        let cov = coverage_from(
            r#"
title: Same Title
id: 00000000-0000-0000-0000-0000000000d1
logsource: {category: test, product: test}
detection: {sel: {Image: a}, condition: sel}
tags: [attack.t1059]
---
title: Same Title
id: 00000000-0000-0000-0000-0000000000d2
logsource: {category: test, product: test}
detection: {sel: {Image: b}, condition: sel}
tags: [attack.t1059]
"#,
        );
        let agg = cov.techniques.get("T1059").unwrap();
        assert_eq!(agg.rule_count(), 2);
        // Display de-duplicates identical titles.
        assert_eq!(agg.titles(), vec!["Same Title".to_string()]);
    }

    #[test]
    fn covers_parent_via_subtechnique_but_not_reverse() {
        let cov = coverage_from(
            r#"
title: Sub
id: 00000000-0000-0000-0000-0000000000a1
logsource: {category: test, product: test}
detection: {sel: {Image: a}, condition: sel}
tags: [attack.t1059.001]
"#,
        );
        // Parent target covered via the sub-technique rule.
        let parent = cov.covers("T1059");
        assert!(parent.covered && parent.via_subtechnique);
        // The exact sub is covered directly.
        assert!(cov.covers("T1059.001").covered);
        // A different sub is not covered by the T1059.001 rule.
        assert!(!cov.covers("T1059.002").covered);
    }

    fn parse(argv: &[&str]) -> (CoverageArgs, ArgMatches) {
        let cmd = CoverageArgs::augment_args(Command::new("coverage"));
        let matches = cmd.get_matches_from(argv);
        let args = CoverageArgs::from_arg_matches(&matches).expect("valid args");
        (args, matches)
    }

    fn partial(yaml: &str) -> config::RsigmaConfigPartial {
        yaml_serde::from_str(yaml).expect("valid partial")
    }

    #[test]
    fn bare_atomics_flag_uses_default_url() {
        let (args, _) = parse(&["coverage", "-r", "/r", "--atomics"]);
        assert_eq!(args.atomics.as_deref(), Some(DEFAULT_ATOMICS_URL));
    }

    #[test]
    fn atomics_flag_with_value_overrides_default() {
        let (args, _) = parse(&["coverage", "-r", "/r", "--atomics=/local/index.yaml"]);
        assert_eq!(args.atomics.as_deref(), Some("/local/index.yaml"));
    }

    #[test]
    fn config_fills_unset_atomics_and_fail_on_gaps() {
        let (mut args, matches) = parse(&["coverage", "-r", "/r"]);
        let base = partial("coverage:\n  atomics: /file/index.yaml\n  fail_on_gaps: true\n");
        overlay_coverage_config(&mut args, &matches, base);
        assert_eq!(args.atomics.as_deref(), Some("/file/index.yaml"));
        assert!(args.fail_on_gaps);
    }

    #[test]
    fn config_fills_unset_rules() {
        // No -r on the command line; the rules come from the config file.
        let (mut args, matches) = parse(&["coverage"]);
        let base = partial("coverage:\n  rules:\n    - /file/rules\n");
        overlay_coverage_config(&mut args, &matches, base);
        assert_eq!(args.rules, vec![PathBuf::from("/file/rules")]);
    }

    #[test]
    fn cli_rules_beat_config() {
        let (mut args, matches) = parse(&["coverage", "-r", "/cli/rules"]);
        let base = partial("coverage:\n  rules:\n    - /file/rules\n");
        overlay_coverage_config(&mut args, &matches, base);
        assert_eq!(args.rules, vec![PathBuf::from("/cli/rules")]);
    }

    #[test]
    fn cli_atomics_beats_config() {
        let (mut args, matches) = parse(&["coverage", "-r", "/r", "--atomics=/cli/index.yaml"]);
        let base = partial("coverage:\n  atomics: /file/index.yaml\n");
        overlay_coverage_config(&mut args, &matches, base);
        assert_eq!(args.atomics.as_deref(), Some("/cli/index.yaml"));
    }
}