use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use crate::finding::{DialTier, Finding, FindingBody, Provenance, Severity};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct PrimeSite {
pub file: String,
pub line: usize,
pub tier: DialTier,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct PrimeCluster {
pub cluster_key: String,
pub class: String,
pub class_provenance: Provenance,
pub severity: Severity,
pub blast_radius: usize,
pub sites: Vec<PrimeSite>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SessionPrime {
pub total_findings: usize,
pub total_clusters: usize,
pub top_clusters: Vec<PrimeCluster>,
}
pub const DEFAULT_TOP_N: usize = 10;
pub const MAX_SITES_PER_CLUSTER: usize = 5;
#[must_use]
pub fn session_prime(findings: &[Finding]) -> SessionPrime {
session_prime_top_n(findings, DEFAULT_TOP_N)
}
#[must_use]
pub fn session_prime_top_n(findings: &[Finding], n: usize) -> SessionPrime {
let mut by_cluster: BTreeMap<String, Vec<&Finding>> = BTreeMap::new();
for f in findings {
if matches!(f.body, FindingBody::FingerprintMatch { .. }) {
by_cluster.entry(f.cluster_key.clone()).or_default().push(f);
}
}
let total_findings: usize = by_cluster.values().map(Vec::len).sum();
let total_clusters = by_cluster.len();
let mut clusters: Vec<PrimeCluster> = by_cluster
.into_iter()
.map(|(cluster_key, group)| build_cluster(cluster_key, &group))
.collect();
clusters.sort_by(|a, b| {
severity_rank(b.severity)
.cmp(&severity_rank(a.severity))
.then(b.blast_radius.cmp(&a.blast_radius))
.then(a.cluster_key.cmp(&b.cluster_key))
});
clusters.truncate(n);
SessionPrime {
total_findings,
total_clusters,
top_clusters: clusters,
}
}
fn build_cluster(cluster_key: String, group: &[&Finding]) -> PrimeCluster {
let blast_radius = group.len();
let severity = group
.iter()
.map(|f| f.severity)
.max_by_key(|s| severity_rank(*s))
.unwrap_or(Severity::Low);
let class_provenance = group
.iter()
.map(|f| f.class_provenance)
.min_by_key(|p| provenance_rank(*p))
.unwrap_or(Provenance::DEFAULT);
let class = group
.first()
.and_then(|f| match &f.body {
FindingBody::FingerprintMatch { class, .. } => Some(class.clone()),
_ => None,
})
.unwrap_or_default();
let mut sites: Vec<PrimeSite> = group
.iter()
.map(|f| PrimeSite {
file: f.file.clone(),
line: f.line,
tier: match &f.body {
FindingBody::FingerprintMatch { tier, .. }
| FindingBody::DialVerdict { tier, .. } => *tier,
FindingBody::MarkedUnknown { .. } => DialTier::Suspected,
},
})
.collect();
sites.sort_by(|a, b| a.file.cmp(&b.file).then(a.line.cmp(&b.line)));
sites.truncate(MAX_SITES_PER_CLUSTER);
PrimeCluster {
cluster_key,
class,
class_provenance,
severity,
blast_radius,
sites,
}
}
const fn severity_rank(s: Severity) -> u8 {
match s {
Severity::Low => 0,
Severity::Medium => 1,
Severity::High => 2,
}
}
const fn provenance_rank(p: Provenance) -> u8 {
match p {
Provenance::Encountered => 0,
Provenance::Constructable => 1,
Provenance::Heuristic => 2,
Provenance::Imagined => 3,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::finding::{OriginStage, Presentation, cluster_key_of};
fn match_finding(class: &str, file: &str, line: usize, sev: Severity) -> Finding {
let digest = format!("d-{class}");
Finding {
schema_version: crate::finding::FINDING_SCHEMA_VERSION,
file: file.to_string(),
line,
structural_digest: digest.clone(),
shape_digest: String::new(),
cluster_key: cluster_key_of(&digest, class),
severity: sev,
source: "scan:catalog-match".to_string(),
class_provenance: Provenance::Constructable,
presentation: Presentation::Passive,
timestamp: line as u64,
origin_stage: OriginStage::Scan,
body: FindingBody::FingerprintMatch {
class: class.to_string(),
tier: DialTier::Suspected,
},
}
}
#[test]
fn groups_by_cluster_and_counts_blast_radius() {
let findings = vec![
match_finding("panic-in-drop", "a.rs", 1, Severity::High),
match_finding("panic-in-drop", "b.rs", 2, Severity::High),
match_finding("unbounded-deser", "c.rs", 3, Severity::High),
];
let prime = session_prime(&findings);
assert_eq!(prime.total_findings, 3);
assert_eq!(prime.total_clusters, 2);
let drop_cluster = prime
.top_clusters
.iter()
.find(|c| c.class == "panic-in-drop")
.expect("drop cluster present");
assert_eq!(drop_cluster.blast_radius, 2, "two panic-in-drop sites");
}
#[test]
fn ranks_by_severity_then_blast_radius() {
let findings = vec![
match_finding("low-but-wide", "a.rs", 1, Severity::Medium),
match_finding("low-but-wide", "a.rs", 2, Severity::Medium),
match_finding("low-but-wide", "a.rs", 3, Severity::Medium),
match_finding("high-narrow", "b.rs", 1, Severity::High),
match_finding("high-wide", "c.rs", 1, Severity::High),
match_finding("high-wide", "c.rs", 2, Severity::High),
];
let prime = session_prime(&findings);
let order: Vec<&str> = prime
.top_clusters
.iter()
.map(|c| c.class.as_str())
.collect();
assert_eq!(order, vec!["high-wide", "high-narrow", "low-but-wide"]);
}
#[test]
fn top_n_truncates_but_totals_report_the_full_picture() {
let findings: Vec<Finding> = (0..20)
.map(|i| match_finding(&format!("class-{i}"), "a.rs", i, Severity::High))
.collect();
let prime = session_prime_top_n(&findings, 3);
assert_eq!(prime.top_clusters.len(), 3, "top-3 only");
assert_eq!(prime.total_clusters, 20, "but totals report all 20");
assert_eq!(prime.total_findings, 20);
}
#[test]
fn carries_provenance_through_never_upgrades() {
let findings = vec![match_finding("c", "a.rs", 1, Severity::High)];
let prime = session_prime(&findings);
assert_eq!(
prime.top_clusters[0].class_provenance,
Provenance::Constructable
);
}
#[test]
fn each_site_carries_its_dial_tier_per_site_honesty() {
let findings = vec![match_finding("c", "a.rs", 1, Severity::High)];
let prime = session_prime(&findings);
let site = &prime.top_clusters[0].sites[0];
assert_eq!(
site.tier,
DialTier::Suspected,
"the scan-floor match's Suspected tier must reach the site"
);
}
#[test]
fn sites_sample_is_bounded_but_blast_radius_is_exact() {
let findings: Vec<Finding> = (0..12)
.map(|i| match_finding("wide", "a.rs", i, Severity::High))
.collect();
let prime = session_prime(&findings);
let cluster = &prime.top_clusters[0];
assert_eq!(cluster.blast_radius, 12, "exact total");
assert_eq!(
cluster.sites.len(),
MAX_SITES_PER_CLUSTER,
"sample is bounded"
);
}
#[test]
fn empty_population_is_an_empty_digest() {
let prime = session_prime(&[]);
assert_eq!(prime.total_findings, 0);
assert_eq!(prime.total_clusters, 0);
assert!(prime.top_clusters.is_empty());
}
#[test]
fn serializes_co_natively() {
let findings = vec![match_finding("c", "a.rs", 1, Severity::High)];
let prime = session_prime(&findings);
let json = serde_json::to_string(&prime).expect("SessionPrime serializes");
assert!(json.contains("\"total_findings\":1"));
let back: SessionPrime = serde_json::from_str(&json).expect("round-trips");
assert_eq!(back, prime);
}
}