rsigma 0.17.0

CLI for parsing, validating, linting and evaluating Sigma detection rules
//! ATT&CK Navigator layer (format 4.5) export.
//!
//! The layer scores each technique by the number of rules that reference it,
//! the same "score function count" semantics SigmaHQ uses for its published
//! heatmap, so a rsigma-generated layer and the SigmaHQ baseline overlay on the
//! same gradient. Output is serialized from typed structs (never hand-built
//! JSON) and is deterministic for golden testing: techniques are emitted in
//! sorted ID order and the gradient is pinned to the observed score range.

use super::{Coverage, parent_technique};
pub(crate) use crate::commands::navigator::to_pretty_json;
use crate::commands::navigator::{DOMAIN, Gradient, Layer, NavTechnique, Versions};

/// Max rule titles listed in a technique's `comment` before truncation.
const MAX_COMMENT_TITLES: usize = 8;

/// Build a Navigator layer from the computed coverage. `name` is the layer's
/// display name shown in the Navigator tab.
pub(crate) fn build_layer(coverage: &Coverage, name: &str) -> Layer {
    let mut max_score = 1u64;
    let mut techniques = Vec::with_capacity(coverage.techniques.len());

    for (id, agg) in &coverage.techniques {
        let score = agg.rule_count() as u64;
        max_score = max_score.max(score);

        // A parent technique with at least one annotated sub-technique is
        // expanded so the sub-technique scores are visible in the Navigator.
        let show_subtechniques = !id.contains('.')
            && coverage
                .techniques
                .keys()
                .any(|k| parent_technique(k) == Some(id.as_str()));

        techniques.push(NavTechnique {
            technique_id: id.clone(),
            score,
            comment: comment_for(&agg.titles()),
            enabled: true,
            show_subtechniques,
        });
    }

    Layer {
        name: name.to_string(),
        versions: Versions::current(),
        domain: DOMAIN,
        description: format!(
            "Rule coverage generated by rsigma; score = number of rules per technique ({} techniques).",
            coverage.techniques.len()
        ),
        sorting: 3, // descending by score
        hide_disabled: false,
        gradient: Gradient {
            colors: vec!["#ffffcc", "#fd8d3c", "#bd0026"],
            min_value: 0,
            max_value: max_score,
        },
        techniques,
    }
}

/// Build a technique comment from its rule titles, capped so a technique
/// referenced by many rules does not produce an unwieldy annotation.
fn comment_for(titles: &[String]) -> String {
    let total = titles.len();
    let shown: Vec<&str> = titles
        .iter()
        .take(MAX_COMMENT_TITLES)
        .map(|s| s.as_str())
        .collect();
    let mut comment = shown.join(", ");
    if total > MAX_COMMENT_TITLES {
        comment.push_str(&format!(", (+{} more)", total - MAX_COMMENT_TITLES));
    }
    comment
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::commands::coverage::Coverage;

    fn coverage_from(yaml: &str) -> Coverage {
        let collection = rsigma_parser::parse_sigma_yaml(yaml).expect("rules parse");
        Coverage::from_collection(&collection)
    }

    #[test]
    fn layer_scores_by_rule_count_and_expands_parents() {
        let yaml = r#"
title: A
id: 00000000-0000-0000-0000-0000000000a1
logsource: {category: process_creation, product: windows}
detection:
    sel: {Image|endswith: '\a.exe'}
    condition: sel
tags:
    - attack.t1059
    - attack.execution
---
title: B
id: 00000000-0000-0000-0000-0000000000a2
logsource: {category: process_creation, product: windows}
detection:
    sel: {Image|endswith: '\b.exe'}
    condition: sel
tags:
    - attack.t1059
    - attack.t1059.001
"#;
        let cov = coverage_from(yaml);
        let layer = build_layer(&cov, "test");
        let t1059 = layer
            .techniques
            .iter()
            .find(|t| t.technique_id == "T1059")
            .expect("T1059 present");
        assert_eq!(t1059.score, 2); // referenced by both rules
        assert!(t1059.show_subtechniques); // T1059.001 is annotated
        // Gradient max tracks the highest score.
        assert_eq!(layer.gradient.max_value, 2);
    }

    #[test]
    fn layer_serializes_format_4_5_header() {
        let cov = coverage_from(
            r#"
title: A
id: 00000000-0000-0000-0000-0000000000a1
logsource: {category: test, product: test}
detection: {sel: {Image: x}, condition: sel}
tags: [attack.t1003]
"#,
        );
        let json = to_pretty_json(&build_layer(&cov, "n"));
        assert!(json.contains("\"layer\": \"4.5\""));
        assert!(json.contains("\"techniqueID\": \"T1003\""));
    }
}