Skip to main content

cargo_capsec/
baseline.rs

1//! Baseline diffing — track capability changes across runs.
2//!
3//! The baseline system enables incremental adoption in CI. On the first run,
4//! `--baseline` saves all findings to `.capsec-baseline.json`. On subsequent runs,
5//! `--diff` compares current findings against the saved baseline and reports only
6//! what's new or removed.
7//!
8//! This lets teams adopt `cargo capsec audit` on existing projects without fixing
9//! every finding upfront — `--diff --fail-on high` only fails on *new* high-risk
10//! findings introduced by a PR.
11
12use crate::detector::Finding;
13use serde::{Deserialize, Serialize};
14use std::collections::HashSet;
15use std::path::Path;
16
17const BASELINE_FILE: &str = ".capsec-baseline.json";
18
19/// A single entry in the saved baseline file.
20///
21/// Baseline entries are compared by value equality — if a finding's crate, function,
22/// call text, and category all match, it's considered the same finding across runs.
23/// This means code movement (same call, different line) won't trigger a diff.
24#[derive(Serialize, Deserialize, Clone, Debug)]
25pub struct BaselineEntry {
26    /// Name of the crate containing the finding.
27    pub crate_name: String,
28    /// Version of the crate.
29    pub crate_version: String,
30    /// Source file path.
31    pub file: String,
32    /// Function name containing the call.
33    pub function: String,
34    /// The expanded call path (e.g., `"std::fs::read"`).
35    pub call_text: String,
36    /// Category label (e.g., `"FS"`, `"NET"`).
37    pub category: String,
38}
39
40impl PartialEq for BaselineEntry {
41    fn eq(&self, other: &Self) -> bool {
42        self.crate_name == other.crate_name
43            && self.file == other.file
44            && self.function == other.function
45            && self.call_text == other.call_text
46            && self.category == other.category
47    }
48}
49
50impl Eq for BaselineEntry {}
51
52impl std::hash::Hash for BaselineEntry {
53    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
54        self.crate_name.hash(state);
55        self.file.hash(state);
56        self.function.hash(state);
57        self.call_text.hash(state);
58        self.category.hash(state);
59    }
60}
61
62impl From<&Finding> for BaselineEntry {
63    fn from(f: &Finding) -> Self {
64        Self {
65            crate_name: f.crate_name.clone(),
66            crate_version: f.crate_version.clone(),
67            file: f.file.clone(),
68            function: f.function.clone(),
69            call_text: f.call_text.clone(),
70            category: f.category.label().to_string(),
71        }
72    }
73}
74
75/// The result of comparing current findings against a saved baseline.
76pub struct DiffResult {
77    /// Findings that exist now but weren't in the baseline (newly introduced).
78    pub new_findings: Vec<BaselineEntry>,
79    /// Findings that were in the baseline but no longer exist (resolved or removed).
80    pub removed_findings: Vec<BaselineEntry>,
81    /// Number of findings present in both current and baseline.
82    pub unchanged_count: usize,
83}
84
85/// Loads a previously saved baseline from `.capsec-baseline.json` in the workspace root.
86///
87/// Returns `None` if the file doesn't exist or can't be parsed.
88pub fn load_baseline(
89    workspace_root: &Path,
90    cap: &impl capsec_core::has::Has<capsec_core::permission::FsRead>,
91) -> Option<HashSet<BaselineEntry>> {
92    let path = workspace_root.join(BASELINE_FILE);
93    let data = capsec_std::fs::read_to_string(path, cap).ok()?;
94    serde_json::from_str(&data).ok()
95}
96
97/// Saves current findings as the new baseline to `.capsec-baseline.json`.
98pub fn save_baseline(
99    workspace_root: &Path,
100    findings: &[Finding],
101    cap: &impl capsec_core::has::Has<capsec_core::permission::FsWrite>,
102) -> Result<(), String> {
103    let entries: Vec<BaselineEntry> = findings.iter().map(BaselineEntry::from).collect();
104    let json = serde_json::to_string_pretty(&entries)
105        .map_err(|e| format!("Failed to serialize baseline: {e}"))?;
106    capsec_std::fs::write(workspace_root.join(BASELINE_FILE), json, cap)
107        .map_err(|e| format!("Failed to write baseline: {e}"))
108}
109
110/// Computes the difference between current findings and a saved baseline.
111///
112/// Returns which findings are new, which were removed, and how many are unchanged.
113pub fn diff(current: &[Finding], baseline: &HashSet<BaselineEntry>) -> DiffResult {
114    let current_set: HashSet<BaselineEntry> = current.iter().map(BaselineEntry::from).collect();
115
116    let new_findings: Vec<BaselineEntry> = current_set.difference(baseline).cloned().collect();
117    let removed_findings: Vec<BaselineEntry> = baseline.difference(&current_set).cloned().collect();
118    let unchanged_count = current_set.intersection(baseline).count();
119
120    DiffResult {
121        new_findings,
122        removed_findings,
123        unchanged_count,
124    }
125}
126
127/// Prints a human-readable diff summary to stderr.
128pub fn print_diff(diff_result: &DiffResult) {
129    if !diff_result.new_findings.is_empty() {
130        eprintln!(
131            "\n{} new finding(s) since last baseline:",
132            diff_result.new_findings.len()
133        );
134        for entry in &diff_result.new_findings {
135            eprintln!(
136                "  + [{}] {}::{} — {}",
137                entry.category, entry.crate_name, entry.function, entry.call_text
138            );
139        }
140    }
141    if !diff_result.removed_findings.is_empty() {
142        eprintln!(
143            "\n{} finding(s) removed since last baseline:",
144            diff_result.removed_findings.len()
145        );
146        for entry in &diff_result.removed_findings {
147            eprintln!(
148                "  - [{}] {}::{} — {}",
149                entry.category, entry.crate_name, entry.function, entry.call_text
150            );
151        }
152    }
153    eprintln!("\n{} finding(s) unchanged.", diff_result.unchanged_count);
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use crate::authorities::{Category, Risk};
160
161    fn make_finding(call: &str, category: Category) -> Finding {
162        Finding {
163            file: "src/lib.rs".to_string(),
164            function: "test".to_string(),
165            function_line: 1,
166            call_line: 2,
167            call_col: 5,
168            call_text: call.to_string(),
169            category,
170            subcategory: "test".to_string(),
171            risk: Risk::Medium,
172            description: "test".to_string(),
173            is_build_script: false,
174            crate_name: "test-crate".to_string(),
175            crate_version: "0.1.0".to_string(),
176            is_deny_violation: false,
177            is_transitive: false,
178        }
179    }
180
181    #[test]
182    fn diff_detects_new_findings() {
183        let baseline: HashSet<BaselineEntry> = HashSet::new();
184        let current = vec![make_finding("std::fs::read", Category::Fs)];
185        let result = diff(&current, &baseline);
186        assert_eq!(result.new_findings.len(), 1);
187        assert_eq!(result.removed_findings.len(), 0);
188        assert_eq!(result.unchanged_count, 0);
189    }
190
191    #[test]
192    fn diff_detects_removed_findings() {
193        let entry = BaselineEntry {
194            crate_name: "old-crate".to_string(),
195            crate_version: "0.1.0".to_string(),
196            file: "src/lib.rs".to_string(),
197            function: "old_func".to_string(),
198            call_text: "std::net::TcpStream::connect".to_string(),
199            category: "NET".to_string(),
200        };
201        let baseline: HashSet<BaselineEntry> = [entry].into_iter().collect();
202        let result = diff(&[], &baseline);
203        assert_eq!(result.removed_findings.len(), 1);
204        assert_eq!(result.new_findings.len(), 0);
205    }
206
207    #[test]
208    fn version_bump_does_not_cause_spurious_diff() {
209        let mut finding_v1 = make_finding("std::fs::read", Category::Fs);
210        finding_v1.crate_version = "0.1.0".to_string();
211        let baseline: HashSet<BaselineEntry> =
212            [BaselineEntry::from(&finding_v1)].into_iter().collect();
213
214        let mut finding_v2 = make_finding("std::fs::read", Category::Fs);
215        finding_v2.crate_version = "0.2.0".to_string();
216        let result = diff(&[finding_v2], &baseline);
217
218        assert_eq!(
219            result.new_findings.len(),
220            0,
221            "version bump should not create new findings"
222        );
223        assert_eq!(
224            result.removed_findings.len(),
225            0,
226            "version bump should not remove findings"
227        );
228        assert_eq!(result.unchanged_count, 1);
229    }
230
231    #[test]
232    fn diff_detects_unchanged() {
233        let finding = make_finding("std::fs::read", Category::Fs);
234        let baseline: HashSet<BaselineEntry> =
235            [BaselineEntry::from(&finding)].into_iter().collect();
236        let result = diff(&[finding], &baseline);
237        assert_eq!(result.unchanged_count, 1);
238        assert_eq!(result.new_findings.len(), 0);
239        assert_eq!(result.removed_findings.len(), 0);
240    }
241}