Skip to main content

mollify_core/
baseline.rs

1//! Regression baselines: snapshot the set of finding fingerprints, then on a
2//! later run report only what's **new** relative to that snapshot. This is the
3//! "no new issues" CI gate (complementary to git-attribution `--gate new-only`):
4//! it works without git and survives file moves, because fingerprints are
5//! content-derived (RESEARCH.md §2.11 — evidence-preserving).
6
7use camino::Utf8Path;
8use mollify_types::Finding;
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct Baseline {
13    pub schema: String,
14    /// Sorted, de-duplicated finding fingerprints captured at snapshot time.
15    pub fingerprints: Vec<String>,
16}
17
18const SCHEMA: &str = "mollify-baseline/1";
19
20impl Baseline {
21    /// Build a baseline from the current findings.
22    pub fn from_findings(findings: &[Finding]) -> Baseline {
23        let mut fingerprints: Vec<String> =
24            findings.iter().map(|f| f.fingerprint.clone()).collect();
25        fingerprints.sort();
26        fingerprints.dedup();
27        Baseline {
28            schema: SCHEMA.into(),
29            fingerprints,
30        }
31    }
32
33    /// Write the baseline to `path` as pretty JSON.
34    pub fn save(&self, path: &Utf8Path) -> std::io::Result<()> {
35        if let Some(parent) = path.parent() {
36            if !parent.as_str().is_empty() {
37                std::fs::create_dir_all(parent)?;
38            }
39        }
40        let json = serde_json::to_string_pretty(self).unwrap();
41        std::fs::write(path, json)
42    }
43
44    /// Load a baseline from `path` (None if missing/invalid).
45    pub fn load(path: &Utf8Path) -> Option<Baseline> {
46        let text = std::fs::read_to_string(path).ok()?;
47        serde_json::from_str(&text).ok()
48    }
49}
50
51/// Partition `findings` into (new, known) relative to a baseline's fingerprints.
52pub fn split_new<'a>(
53    findings: &'a [Finding],
54    baseline: &Baseline,
55) -> (Vec<&'a Finding>, Vec<&'a Finding>) {
56    let known: rustc_hash::FxHashSet<&str> =
57        baseline.fingerprints.iter().map(|s| s.as_str()).collect();
58    findings
59        .iter()
60        .partition(|f| !known.contains(f.fingerprint.as_str()))
61}
62
63#[cfg(test)]
64mod tests {
65    use super::*;
66    use mollify_types::{Category, Confidence, Location, Severity};
67
68    fn finding(fp: &str) -> Finding {
69        Finding {
70            fingerprint: fp.into(),
71            rule: "r".into(),
72            category: Category::DeadCode,
73            severity: Severity::Warn,
74            confidence: Confidence::Likely,
75            attribution: None,
76            reason: "x".into(),
77            location: Location {
78                path: "a.py".into(),
79                line: 1,
80                column: 0,
81                end_line: None,
82            },
83            actions: vec![],
84        }
85    }
86
87    #[test]
88    fn new_findings_are_those_not_in_baseline() {
89        let base = Baseline::from_findings(&[finding("a:1"), finding("b:2")]);
90        let current = vec![finding("a:1"), finding("c:3")];
91        let (new, known) = split_new(&current, &base);
92        assert_eq!(new.len(), 1);
93        assert_eq!(new[0].fingerprint, "c:3");
94        assert_eq!(known.len(), 1);
95    }
96
97    #[test]
98    fn roundtrips_through_disk() {
99        let dir = std::env::temp_dir().join(format!("mollify-baseline-{}", std::process::id()));
100        std::fs::create_dir_all(&dir).unwrap();
101        let p = camino::Utf8PathBuf::from_path_buf(dir.join("bl.json")).unwrap();
102        let b = Baseline::from_findings(&[finding("a:1")]);
103        b.save(&p).unwrap();
104        let loaded = Baseline::load(&p).unwrap();
105        assert_eq!(loaded.fingerprints, vec!["a:1".to_string()]);
106        std::fs::remove_dir_all(&dir).ok();
107    }
108}