use std::collections::{BTreeMap, BTreeSet};
use rsigma_eval::RuleFieldSet;
use rsigma_parser::{LogSource, SigmaCollection};
use super::mapping::{MappingTable, entry_products};
#[derive(Debug, Default)]
pub(crate) struct Observed {
pub present: bool,
pub missing: BTreeSet<String>,
pub unknown: Vec<ObservedField>,
pub events_observed: u64,
pub unique_keys: usize,
}
impl Observed {
fn is_observed(&self, field: &str) -> bool {
self.present && !self.missing.contains(field)
}
}
#[derive(Debug, Clone)]
pub(crate) struct ObservedField {
pub field: String,
pub count: u64,
}
#[derive(Debug, Clone)]
pub(crate) struct DataSourceVisibility {
pub data_source: String,
pub score: u8,
pub data_components: Vec<String>,
pub products: Vec<String>,
pub logsources: Vec<String>,
pub mapped_fields: Vec<String>,
pub observed_fields: Vec<String>,
pub blind_spot: bool,
}
impl DataSourceVisibility {
pub(crate) fn available(&self) -> bool {
!self.observed_fields.is_empty()
}
}
#[derive(Debug, Clone)]
pub(crate) struct TechniqueVisibility {
pub technique_id: String,
pub score: u8,
pub data_sources: Vec<String>,
}
#[derive(Debug, Clone)]
pub(crate) struct UntappedSource {
pub data_source: String,
pub observed_fields: Vec<ObservedField>,
}
#[derive(Debug, Clone)]
pub(crate) struct VisibilityAnalysis {
pub data_sources: Vec<DataSourceVisibility>,
pub techniques: Vec<TechniqueVisibility>,
pub untapped: Vec<UntappedSource>,
pub unmapped_logsources: Vec<String>,
pub rules_total: usize,
pub logsources_total: usize,
pub events_observed: u64,
pub observed_unique_keys: usize,
pub has_observed: bool,
}
impl VisibilityAnalysis {
pub(crate) fn blind_spots(&self) -> Vec<&DataSourceVisibility> {
self.data_sources.iter().filter(|d| d.blind_spot).collect()
}
}
#[derive(Default)]
struct Agg {
components: BTreeSet<String>,
products: BTreeSet<String>,
logsources: BTreeSet<String>,
mapped_fields: BTreeSet<String>,
observed_fields: BTreeSet<String>,
}
pub(crate) fn score_fraction(observed: usize, mapped: usize) -> u8 {
if mapped == 0 {
return 0;
}
let frac = observed as f64 / mapped as f64;
if frac <= 0.0 {
0
} else if frac >= 1.0 {
4
} else if frac <= 0.25 {
1
} else if frac <= 0.5 {
2
} else {
3
}
}
pub(crate) fn level_name(score: u8) -> &'static str {
match score {
0 => "none",
1 => "minimal",
2 => "medium",
3 => "good",
_ => "excellent",
}
}
fn logsource_display(ls: &LogSource) -> Option<String> {
let parts: Vec<&str> = [&ls.category, &ls.product, &ls.service]
.into_iter()
.filter_map(|p| p.as_deref())
.collect();
if parts.is_empty() {
None
} else {
Some(parts.join("/"))
}
}
pub(crate) fn analyze(
collection: &SigmaCollection,
rule_field_set: &RuleFieldSet,
observed: &Observed,
mapping: &MappingTable,
) -> VisibilityAnalysis {
let mut sources: BTreeMap<String, Agg> = BTreeMap::new();
let mut logsources_seen: BTreeSet<String> = BTreeSet::new();
let mut unmapped: BTreeSet<String> = BTreeSet::new();
for rule in &collection.rules {
let Some(display) = logsource_display(&rule.logsource) else {
continue;
};
logsources_seen.insert(display.clone());
let matches = mapping.logsource_matches(&rule.logsource);
if matches.is_empty() {
unmapped.insert(display.clone());
continue;
}
for entry in matches {
let agg = sources.entry(entry.data_source.clone()).or_default();
agg.components.insert(entry.data_component.clone());
agg.products.extend(entry_products(&rule.logsource, entry));
agg.logsources.insert(display.clone());
}
}
for (field, _origin) in rule_field_set.iter() {
let Some(component) = mapping.field_component(field) else {
continue;
};
let Some(ds) = mapping.component_source(component) else {
continue;
};
let agg = sources.entry(ds.to_string()).or_default();
agg.components.insert(component.to_string());
agg.mapped_fields.insert(field.to_string());
if observed.is_observed(field) {
agg.observed_fields.insert(field.to_string());
}
}
let mut data_sources: Vec<DataSourceVisibility> = sources
.into_iter()
.map(|(name, agg)| {
let score = score_fraction(agg.observed_fields.len(), agg.mapped_fields.len());
let blind_spot = !agg.mapped_fields.is_empty() && agg.observed_fields.is_empty();
DataSourceVisibility {
data_source: name,
score,
data_components: agg.components.into_iter().collect(),
products: agg.products.into_iter().collect(),
logsources: agg.logsources.into_iter().collect(),
mapped_fields: agg.mapped_fields.into_iter().collect(),
observed_fields: agg.observed_fields.into_iter().collect(),
blind_spot,
}
})
.collect();
data_sources.sort_by(|a, b| a.data_source.cmp(&b.data_source));
let expected: BTreeSet<&str> = data_sources
.iter()
.map(|d| d.data_source.as_str())
.collect();
let mut tech: BTreeMap<String, (u8, BTreeSet<String>)> = BTreeMap::new();
for ds in &data_sources {
for component in &ds.data_components {
for technique in mapping.component_techniques(component) {
let entry = tech
.entry(technique.clone())
.or_insert((0, BTreeSet::new()));
entry.0 = entry.0.max(ds.score);
entry.1.insert(ds.data_source.clone());
}
}
}
let techniques: Vec<TechniqueVisibility> = tech
.into_iter()
.map(|(technique_id, (score, ds))| TechniqueVisibility {
technique_id,
score,
data_sources: ds.into_iter().collect(),
})
.collect();
let mut untapped_map: BTreeMap<String, Vec<ObservedField>> = BTreeMap::new();
for uf in &observed.unknown {
let Some(component) = mapping.field_component(&uf.field) else {
continue;
};
let Some(ds) = mapping.component_source(component) else {
continue;
};
if expected.contains(ds) {
continue;
}
untapped_map
.entry(ds.to_string())
.or_default()
.push(uf.clone());
}
let untapped: Vec<UntappedSource> = untapped_map
.into_iter()
.map(|(data_source, mut fields)| {
fields.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.field.cmp(&b.field)));
UntappedSource {
data_source,
observed_fields: fields,
}
})
.collect();
VisibilityAnalysis {
data_sources,
techniques,
untapped,
unmapped_logsources: unmapped.into_iter().collect(),
rules_total: collection.rules.len(),
logsources_total: logsources_seen.len(),
events_observed: observed.events_observed,
observed_unique_keys: observed.unique_keys,
has_observed: observed.present,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn analysis(yaml: &str, observed: Observed) -> VisibilityAnalysis {
let collection = rsigma_parser::parse_sigma_yaml(yaml).expect("rules parse");
let rfs = RuleFieldSet::collect(&collection, &[], true);
let mapping = MappingTable::bundled();
analyze(&collection, &rfs, &observed, &mapping)
}
const RULE: &str = r#"
title: Suspicious Process
id: 00000000-0000-0000-0000-0000000000a1
logsource: {category: process_creation, product: windows}
detection:
sel:
Image|endswith: '\evil.exe'
CommandLine|contains: '--steal'
condition: sel
"#;
#[test]
fn score_fraction_boundaries() {
assert_eq!(score_fraction(0, 0), 0);
assert_eq!(score_fraction(0, 4), 0);
assert_eq!(score_fraction(1, 4), 1);
assert_eq!(score_fraction(2, 4), 2);
assert_eq!(score_fraction(3, 4), 3);
assert_eq!(score_fraction(4, 4), 4);
assert_eq!(score_fraction(1, 1), 4);
}
#[test]
fn no_observed_is_baseline_all_zero() {
let a = analysis(RULE, Observed::default());
let process = a
.data_sources
.iter()
.find(|d| d.data_source == "Process")
.expect("Process data source expected");
assert_eq!(process.score, 0);
assert!(process.blind_spot, "no observation => blind spot");
assert!(!process.available());
assert!(!a.has_observed);
}
#[test]
fn all_fields_observed_scores_excellent() {
let observed = Observed {
present: true,
missing: BTreeSet::new(),
unknown: Vec::new(),
events_observed: 100,
unique_keys: 2,
};
let a = analysis(RULE, observed);
let process = a
.data_sources
.iter()
.find(|d| d.data_source == "Process")
.unwrap();
assert_eq!(process.score, 4);
assert!(!process.blind_spot);
assert!(process.available());
}
#[test]
fn partial_observation_is_a_blind_spot_only_when_all_missing() {
let observed = Observed {
present: true,
missing: BTreeSet::from(["Image".to_string()]),
unknown: Vec::new(),
events_observed: 10,
unique_keys: 1,
};
let a = analysis(RULE, observed);
let process = a
.data_sources
.iter()
.find(|d| d.data_source == "Process")
.unwrap();
assert_eq!(process.score, 2);
assert!(!process.blind_spot);
}
#[test]
fn unmapped_logsource_is_surfaced() {
let yaml = r#"
title: Odd
id: 00000000-0000-0000-0000-0000000000b1
logsource: {category: totally_unknown_thing}
detection: {sel: {Foo: bar}, condition: sel}
"#;
let a = analysis(yaml, Observed::default());
assert!(
a.unmapped_logsources
.contains(&"totally_unknown_thing".to_string())
);
}
#[test]
fn techniques_roll_up_from_data_sources() {
let observed = Observed {
present: true,
missing: BTreeSet::new(),
unknown: Vec::new(),
events_observed: 5,
unique_keys: 2,
};
let a = analysis(RULE, observed);
let t = a
.techniques
.iter()
.find(|t| t.technique_id == "T1059")
.expect("T1059 reachable via Process Creation");
assert_eq!(t.score, 4);
assert!(t.data_sources.contains(&"Process".to_string()));
}
#[test]
fn untapped_source_when_observed_but_unreferenced() {
let observed = Observed {
present: true,
missing: BTreeSet::new(),
unknown: vec![ObservedField {
field: "TargetObject".to_string(),
count: 42,
}],
events_observed: 50,
unique_keys: 3,
};
let a = analysis(RULE, observed);
let untapped = a
.untapped
.iter()
.find(|u| u.data_source == "Windows Registry")
.expect("Windows Registry is untapped");
assert_eq!(untapped.observed_fields[0].field, "TargetObject");
}
}