Skip to main content

diffguard_analytics/
lib.rs

1//! Analytics helpers for diffguard.
2//!
3//! This crate is intentionally pure (no filesystem/process/env I/O).
4
5use std::collections::BTreeSet;
6
7use diffguard_types::{CheckReceipt, Finding, Scope, VerdictCounts, VerdictStatus};
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use sha2::{Digest, Sha256};
11
12pub const FALSE_POSITIVE_BASELINE_SCHEMA_V1: &str = "diffguard.false_positive_baseline.v1";
13pub const TREND_HISTORY_SCHEMA_V1: &str = "diffguard.trend_history.v1";
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
16pub struct FalsePositiveBaseline {
17    pub schema: String,
18    #[serde(default, skip_serializing_if = "Vec::is_empty")]
19    pub entries: Vec<FalsePositiveEntry>,
20}
21
22impl Default for FalsePositiveBaseline {
23    fn default() -> Self {
24        Self {
25            schema: FALSE_POSITIVE_BASELINE_SCHEMA_V1.to_string(),
26            entries: vec![],
27        }
28    }
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
32pub struct FalsePositiveEntry {
33    pub fingerprint: String,
34    pub rule_id: String,
35    pub path: String,
36    pub line: u32,
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub note: Option<String>,
39}
40
41/// Deterministically normalizes a false-positive baseline:
42/// - ensures schema id is set
43/// - sorts entries
44/// - deduplicates by fingerprint
45pub fn normalize_false_positive_baseline(
46    mut baseline: FalsePositiveBaseline,
47) -> FalsePositiveBaseline {
48    if baseline.schema.is_empty() {
49        baseline.schema = FALSE_POSITIVE_BASELINE_SCHEMA_V1.to_string();
50    }
51    baseline.entries.sort_by(|a, b| {
52        a.fingerprint
53            .cmp(&b.fingerprint)
54            .then_with(|| a.rule_id.cmp(&b.rule_id))
55            .then_with(|| a.path.cmp(&b.path))
56            .then_with(|| a.line.cmp(&b.line))
57    });
58    baseline
59        .entries
60        .dedup_by(|a, b| a.fingerprint == b.fingerprint);
61    baseline
62}
63
64/// Computes the stable finding fingerprint used for baseline tracking.
65///
66/// Format: SHA-256 of `rule_id:path:line:match_text`.
67pub fn fingerprint_for_finding(finding: &Finding) -> String {
68    let input = format!(
69        "{}:{}:{}:{}",
70        finding.rule_id, finding.path, finding.line, finding.match_text
71    );
72    let hash = Sha256::digest(input.as_bytes());
73    hex::encode(hash)
74}
75
76/// Builds a baseline from receipt findings.
77pub fn baseline_from_receipt(receipt: &CheckReceipt) -> FalsePositiveBaseline {
78    let mut baseline = FalsePositiveBaseline {
79        schema: FALSE_POSITIVE_BASELINE_SCHEMA_V1.to_string(),
80        entries: receipt
81            .findings
82            .iter()
83            .map(|f| FalsePositiveEntry {
84                fingerprint: fingerprint_for_finding(f),
85                rule_id: f.rule_id.clone(),
86                path: f.path.clone(),
87                line: f.line,
88                note: None,
89            })
90            .collect(),
91    };
92    baseline = normalize_false_positive_baseline(baseline);
93    baseline
94}
95
96/// Merges two baselines (union by fingerprint), preferring existing entries in `base`.
97pub fn merge_false_positive_baselines(
98    base: &FalsePositiveBaseline,
99    incoming: &FalsePositiveBaseline,
100) -> FalsePositiveBaseline {
101    let mut merged = normalize_false_positive_baseline(incoming.clone());
102    let mut seen = merged
103        .entries
104        .iter()
105        .map(|e| e.fingerprint.clone())
106        .collect::<BTreeSet<_>>();
107
108    for entry in &base.entries {
109        if seen.insert(entry.fingerprint.clone()) {
110            merged.entries.push(entry.clone());
111        } else if let Some(existing) = merged
112            .entries
113            .iter_mut()
114            .find(|e| e.fingerprint == entry.fingerprint)
115        {
116            // Preserve manually curated metadata from the existing baseline.
117            if existing.note.is_none() && entry.note.is_some() {
118                existing.note = entry.note.clone();
119            }
120            if existing.rule_id.is_empty() {
121                existing.rule_id = entry.rule_id.clone();
122            }
123            if existing.path.is_empty() {
124                existing.path = entry.path.clone();
125            }
126            if existing.line == 0 {
127                existing.line = entry.line;
128            }
129        }
130    }
131
132    normalize_false_positive_baseline(merged)
133}
134
135/// Returns the baseline as a fingerprint set for fast lookup.
136pub fn false_positive_fingerprint_set(baseline: &FalsePositiveBaseline) -> BTreeSet<String> {
137    baseline
138        .entries
139        .iter()
140        .map(|e| e.fingerprint.clone())
141        .collect()
142}
143
144#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
145pub struct TrendHistory {
146    pub schema: String,
147    #[serde(default, skip_serializing_if = "Vec::is_empty")]
148    pub runs: Vec<TrendRun>,
149}
150
151impl Default for TrendHistory {
152    fn default() -> Self {
153        Self {
154            schema: TREND_HISTORY_SCHEMA_V1.to_string(),
155            runs: vec![],
156        }
157    }
158}
159
160#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
161pub struct TrendRun {
162    pub started_at: String,
163    pub ended_at: String,
164    pub duration_ms: u64,
165    pub base: String,
166    pub head: String,
167    pub scope: Scope,
168    pub status: VerdictStatus,
169    pub counts: VerdictCounts,
170    pub files_scanned: u32,
171    pub lines_scanned: u32,
172    pub findings: u32,
173}
174
175#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
176pub struct TrendSummary {
177    pub run_count: u32,
178    pub totals: VerdictCounts,
179    pub total_findings: u32,
180    #[serde(default, skip_serializing_if = "Option::is_none")]
181    pub latest: Option<TrendRun>,
182    #[serde(default, skip_serializing_if = "Option::is_none")]
183    pub delta_from_previous: Option<TrendDelta>,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
187pub struct TrendDelta {
188    pub findings: i64,
189    pub info: i64,
190    pub warn: i64,
191    pub error: i64,
192    pub suppressed: i64,
193}
194
195/// Deterministically normalizes trend history by setting schema id when missing.
196pub fn normalize_trend_history(mut history: TrendHistory) -> TrendHistory {
197    if history.schema.is_empty() {
198        history.schema = TREND_HISTORY_SCHEMA_V1.to_string();
199    }
200    history
201}
202
203/// Converts a check receipt into a trend run sample.
204pub fn trend_run_from_receipt(
205    receipt: &CheckReceipt,
206    started_at: &str,
207    ended_at: &str,
208    duration_ms: u64,
209) -> TrendRun {
210    TrendRun {
211        started_at: started_at.to_string(),
212        ended_at: ended_at.to_string(),
213        duration_ms,
214        base: receipt.diff.base.clone(),
215        head: receipt.diff.head.clone(),
216        scope: receipt.diff.scope,
217        status: receipt.verdict.status,
218        counts: receipt.verdict.counts.clone(),
219        files_scanned: receipt.diff.files_scanned,
220        lines_scanned: receipt.diff.lines_scanned,
221        findings: receipt.findings.len().min(u32::MAX as usize) as u32,
222    }
223}
224
225/// Appends a run to history and optionally trims to `max_runs` newest entries.
226pub fn append_trend_run(
227    mut history: TrendHistory,
228    run: TrendRun,
229    max_runs: Option<usize>,
230) -> TrendHistory {
231    history = normalize_trend_history(history);
232    history.runs.push(run);
233
234    if let Some(limit) = max_runs
235        && limit > 0
236        && history.runs.len() > limit
237    {
238        let drop_count = history.runs.len().saturating_sub(limit);
239        history.runs.drain(0..drop_count);
240    }
241
242    history
243}
244
245/// Summarizes trend history totals and latest delta.
246pub fn summarize_trend_history(history: &TrendHistory) -> TrendSummary {
247    let mut totals = VerdictCounts::default();
248    let mut total_findings = 0u32;
249
250    for run in &history.runs {
251        totals.info = totals.info.saturating_add(run.counts.info);
252        totals.warn = totals.warn.saturating_add(run.counts.warn);
253        totals.error = totals.error.saturating_add(run.counts.error);
254        totals.suppressed = totals.suppressed.saturating_add(run.counts.suppressed);
255        total_findings = total_findings.saturating_add(run.findings);
256    }
257
258    let latest = history.runs.last().cloned();
259    let delta_from_previous = if history.runs.len() >= 2 {
260        let prev = &history.runs[history.runs.len() - 2];
261        let curr = &history.runs[history.runs.len() - 1];
262        Some(TrendDelta {
263            findings: i64::from(curr.findings) - i64::from(prev.findings),
264            info: i64::from(curr.counts.info) - i64::from(prev.counts.info),
265            warn: i64::from(curr.counts.warn) - i64::from(prev.counts.warn),
266            error: i64::from(curr.counts.error) - i64::from(prev.counts.error),
267            suppressed: i64::from(curr.counts.suppressed) - i64::from(prev.counts.suppressed),
268        })
269    } else {
270        None
271    };
272
273    TrendSummary {
274        run_count: history.runs.len().min(u32::MAX as usize) as u32,
275        totals,
276        total_findings,
277        latest,
278        delta_from_previous,
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285    use diffguard_types::{DiffMeta, Severity, ToolMeta, Verdict};
286
287    fn receipt_with_findings() -> CheckReceipt {
288        CheckReceipt {
289            schema: diffguard_types::CHECK_SCHEMA_V1.to_string(),
290            tool: ToolMeta {
291                name: "diffguard".to_string(),
292                version: "0.2.0".to_string(),
293            },
294            diff: DiffMeta {
295                base: "origin/main".to_string(),
296                head: "HEAD".to_string(),
297                context_lines: 0,
298                scope: Scope::Added,
299                files_scanned: 1,
300                lines_scanned: 2,
301            },
302            findings: vec![Finding {
303                rule_id: "rust.no_unwrap".to_string(),
304                severity: Severity::Error,
305                message: "no unwrap".to_string(),
306                path: "src/lib.rs".to_string(),
307                line: 12,
308                column: Some(4),
309                match_text: ".unwrap(".to_string(),
310                snippet: "let x = y.unwrap();".to_string(),
311            }],
312            verdict: Verdict {
313                status: VerdictStatus::Fail,
314                counts: VerdictCounts {
315                    info: 0,
316                    warn: 0,
317                    error: 1,
318                    suppressed: 0,
319                },
320                reasons: vec![],
321            },
322            timing: None,
323        }
324    }
325
326    #[test]
327    fn baseline_from_receipt_is_deterministic() {
328        let receipt = receipt_with_findings();
329        let a = baseline_from_receipt(&receipt);
330        let b = baseline_from_receipt(&receipt);
331        assert_eq!(a, b);
332        assert_eq!(a.schema, FALSE_POSITIVE_BASELINE_SCHEMA_V1);
333        assert_eq!(a.entries.len(), 1);
334        assert_eq!(a.entries[0].fingerprint.len(), 64);
335    }
336
337    #[test]
338    fn merge_baseline_preserves_existing_note() {
339        let mut existing = FalsePositiveBaseline::default();
340        existing.entries.push(FalsePositiveEntry {
341            fingerprint: "abc".to_string(),
342            rule_id: "rule.one".to_string(),
343            path: "a.rs".to_string(),
344            line: 1,
345            note: Some("intentional".to_string()),
346        });
347
348        let mut incoming = FalsePositiveBaseline::default();
349        incoming.entries.push(FalsePositiveEntry {
350            fingerprint: "abc".to_string(),
351            rule_id: "rule.one".to_string(),
352            path: "a.rs".to_string(),
353            line: 1,
354            note: None,
355        });
356
357        let merged = merge_false_positive_baselines(&existing, &incoming);
358        assert_eq!(merged.entries.len(), 1);
359        assert_eq!(merged.entries[0].note.as_deref(), Some("intentional"));
360    }
361
362    #[test]
363    fn append_trend_run_trims_to_max() {
364        let receipt = receipt_with_findings();
365        let run = trend_run_from_receipt(
366            &receipt,
367            "2026-01-01T00:00:00Z",
368            "2026-01-01T00:00:01Z",
369            1000,
370        );
371        let mut history = TrendHistory::default();
372        history = append_trend_run(history, run.clone(), Some(2));
373        history = append_trend_run(history, run.clone(), Some(2));
374        history = append_trend_run(history, run, Some(2));
375        assert_eq!(history.runs.len(), 2);
376    }
377
378    #[test]
379    fn summarize_history_reports_delta() {
380        let receipt = receipt_with_findings();
381        let mut run1 = trend_run_from_receipt(
382            &receipt,
383            "2026-01-01T00:00:00Z",
384            "2026-01-01T00:00:01Z",
385            1000,
386        );
387        run1.findings = 3;
388        run1.counts.warn = 2;
389
390        let mut run2 = run1.clone();
391        run2.findings = 1;
392        run2.counts.warn = 1;
393
394        let history = TrendHistory {
395            schema: TREND_HISTORY_SCHEMA_V1.to_string(),
396            runs: vec![run1, run2],
397        };
398        let summary = summarize_trend_history(&history);
399        assert_eq!(summary.run_count, 2);
400        assert_eq!(summary.total_findings, 4);
401        let delta = summary.delta_from_previous.expect("delta");
402        assert_eq!(delta.findings, -2);
403        assert_eq!(delta.warn, -1);
404    }
405}