Skip to main content

taudit_core/
baselines.rs

1//! Per-pipeline baseline files (`.taudit/baselines/<hash>.json`).
2//!
3//! A *baseline* is a snapshot of the findings present on a pipeline at the
4//! moment it was first onboarded into taudit. Subsequent scans diff against
5//! the baseline so reviewers see only NEW findings; pre-existing findings are
6//! summarised. Baselines are the v0.10 mechanism for adopting taudit on
7//! existing repos without forcing upfront triage of historical findings.
8//!
9//! ## Load-bearing decisions (per design council, 2026-04-26)
10//!
11//! 1. **Layout: one file per pipeline keyed by content hash.** A monolithic
12//!    `.taudit/baseline.json` would merge-conflict on every PR. Per-pipeline
13//!    files (`.taudit/baselines/<sha256>.json`) keep blast radius small.
14//! 2. **Fingerprints reuse `Finding::compute_fingerprint` exactly.** Inventing
15//!    a second hashing scheme is a foot-gun — SARIF, JSON, CloudEvents and
16//!    baselines must agree on what "same finding" means. The shared test
17//!    `baseline_fingerprint_matches_sarif_fingerprint` enforces this.
18//! 3. **Critical findings always exit 1** unless the entry carries
19//!    `severity_override: critical` AND a `reason` AND `expires_at <= 90d`.
20//!    This is the security analyst's non-negotiable: any waiver mechanism
21//!    creates a path for risk to be accepted, so critical waivers must be
22//!    conscious, time-bounded and re-reviewed.
23//! 4. **OSS-friendly default.** No `.taudit/` directory means today's
24//!    behaviour. Baselines are strictly opt-in.
25//!
26//! See `docs/baselines.md` for the full workflow and security guarantees.
27
28use crate::finding::{compute_fingerprint, Finding, Severity};
29use crate::graph::{
30    AuthorityGraph, EdgeKind, NodeKind, META_GITLAB_EXTENDS, META_GITLAB_INCLUDES, META_NEEDS,
31    META_REPOSITORIES,
32};
33use chrono::{DateTime, Duration, Utc};
34use serde::{Deserialize, Serialize};
35use sha2::{Digest, Sha256};
36use std::collections::BTreeMap;
37use std::path::{Path, PathBuf};
38
39/// Maximum lifetime allowed for a critical-severity waiver. Council's
40/// load-bearing constraint: a critical may only bypass exit-1 if its waiver
41/// expires within this window. Longer expirations are rejected at validation
42/// time (and pruned at diff time).
43pub const MAX_CRITICAL_WAIVER_DAYS: i64 = 90;
44
45/// Minimum length (UTF-8 chars) of the `reason` string on a waiver. Empty,
46/// `wip`, `todo`, `fix later` strings train the wrong muscle memory; force
47/// a sentence's worth of justification.
48pub const MIN_REASON_LENGTH: usize = 10;
49
50/// Schema version emitted by `init` and accepted by `load`. Additive 1.x.y
51/// changes are non-breaking; 2.0.0 means breaking changes.
52pub const BASELINE_SCHEMA_VERSION: &str = "1.1.0";
53
54/// Errors returned by baseline I/O and validation.
55#[derive(Debug, thiserror::Error)]
56pub enum BaselineError {
57    #[error("failed to read baseline {path}: {source}")]
58    Read {
59        path: PathBuf,
60        #[source]
61        source: std::io::Error,
62    },
63    #[error("failed to write baseline {path}: {source}")]
64    Write {
65        path: PathBuf,
66        #[source]
67        source: std::io::Error,
68    },
69    #[error("failed to parse baseline {path}: {source}")]
70    Parse {
71        path: PathBuf,
72        #[source]
73        source: serde_json::Error,
74    },
75    #[error("failed to serialize baseline: {0}")]
76    Serialize(#[from] serde_json::Error),
77    #[error("baseline schema version {found:?} not supported (expected major 1.x.y)")]
78    UnsupportedVersion { found: String },
79    #[error("waiver reason must be at least {min} characters (got {got})")]
80    ReasonTooShort { min: usize, got: usize },
81    #[error("critical-severity override requires expires_at <= {days}d from accepted_at")]
82    CriticalWaiverTooLong { days: i64 },
83    #[error("critical-severity override requires expires_at to be set")]
84    CriticalWaiverNoExpiry,
85    #[error("critical-severity override requires a reason")]
86    CriticalWaiverNoReason,
87}
88
89/// One entry in a baseline. Keyed on `fingerprint` (16-hex SHA-256 truncation
90/// computed by [`compute_fingerprint`](crate::finding::compute_fingerprint)).
91///
92/// Two waiver shapes:
93///
94/// * **Plain pre-existing finding.** `reason_waived`, `severity_override`,
95///   `expires_at` all `None`. The finding existed at `init` time; it is
96///   reported as "pre-existing" rather than a regression. Critical findings
97///   in this shape STILL fail exit-1.
98/// * **Explicit waiver.** `reason_waived` populated. If the original
99///   severity was Critical, `severity_override: "critical"` and
100///   `expires_at <= accepted_at + 90d` are mandatory; otherwise the waiver
101///   is rejected at load time and the critical falls through to exit 1.
102#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
103pub struct BaselineFinding {
104    /// 16-hex SHA-256 fingerprint matching the SARIF/JSON/CloudEvents value.
105    pub fingerprint: String,
106    /// Snake-case rule id (custom rule id if present, else
107    /// `FindingCategory` snake_case form).
108    pub rule_id: String,
109    /// Severity captured at `init` time. Used for the critical-bypass check.
110    pub severity: Severity,
111    /// When this entry was first added to the baseline (`init` or `accept`).
112    pub first_seen_at: DateTime<Utc>,
113    /// Free-form justification. Required on `accept` (>=10 chars). `None`
114    /// when the entry was bulk-added by `init`.
115    #[serde(skip_serializing_if = "Option::is_none", default)]
116    pub reason_waived: Option<String>,
117    /// Acknowledges that the original severity was Critical and the waiver
118    /// is intentional. Council's hard rule: any critical bypass must declare
119    /// itself with this field; missing == critical falls through to exit 1.
120    #[serde(skip_serializing_if = "Option::is_none", default)]
121    pub severity_override: Option<Severity>,
122    /// Hard deadline. Mandatory for `severity_override: critical`. After
123    /// this timestamp the waiver is treated as expired (logs a warning and
124    /// the underlying finding counts toward exit-1 again).
125    #[serde(skip_serializing_if = "Option::is_none", default)]
126    pub expires_at: Option<DateTime<Utc>>,
127}
128
129impl BaselineFinding {
130    /// True iff this entry waives a critical via the explicit-override
131    /// shape (severity_override + reason + expires_at <= 90d).
132    pub fn is_valid_critical_waiver(&self, now: DateTime<Utc>) -> bool {
133        if self.severity_override != Some(Severity::Critical) {
134            return false;
135        }
136        let Some(expires_at) = self.expires_at else {
137            return false;
138        };
139        if expires_at <= now {
140            return false;
141        }
142        if (expires_at - self.first_seen_at) > Duration::days(MAX_CRITICAL_WAIVER_DAYS) {
143            return false;
144        }
145        matches!(self.reason_waived.as_deref(), Some(r) if r.chars().count() >= MIN_REASON_LENGTH)
146    }
147
148    /// True iff this waiver carries an `expires_at` that has already passed.
149    pub fn is_expired(&self, now: DateTime<Utc>) -> bool {
150        match self.expires_at {
151            Some(t) => t <= now,
152            None => false,
153        }
154    }
155}
156
157/// Tool/version provenance captured at `init`.
158#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
159pub struct CapturedWith {
160    pub taudit_version: String,
161    /// Free-form description of the rule set at capture time
162    /// (e.g. `"32-builtin"`, `"32-builtin+5-custom"`).
163    pub rules_version: String,
164}
165
166/// One baseline file = one pipeline. Keyed by `pipeline_content_hash` so
167/// renames preserve state and merge conflicts only touch the affected file.
168#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
169pub struct Baseline {
170    pub schema_version: String,
171    pub pipeline_path: String,
172    /// `sha256:<hex>` of the pipeline file's bytes at `init` time.
173    pub pipeline_content_hash: String,
174    /// Optional additive hardening signal captured at `init` time.
175    ///
176    /// Hashes parser-emitted dependency-like material (include/template/
177    /// repository declarations and delegation edges) so suppression can be
178    /// disabled if that material drifts even when the baseline file still
179    /// exists. Absent on legacy baseline files written before v1.1.0.
180    #[serde(skip_serializing_if = "Option::is_none", default)]
181    pub pipeline_identity_material_hash: Option<String>,
182    pub captured_at: DateTime<Utc>,
183    pub captured_by: String,
184    pub captured_with: CapturedWith,
185    /// Sorted by `fingerprint` ASC for stable git diffs.
186    pub baseline_findings: Vec<BaselineFinding>,
187}
188
189impl Baseline {
190    /// Load and parse a baseline from disk. Returns `Ok(None)` if `path`
191    /// does not exist (the OSS-friendly default — absent baseline is fine).
192    pub fn load(path: &Path) -> Result<Option<Self>, BaselineError> {
193        if !path.exists() {
194            return Ok(None);
195        }
196        let bytes = std::fs::read(path).map_err(|source| BaselineError::Read {
197            path: path.to_path_buf(),
198            source,
199        })?;
200        let baseline: Baseline =
201            serde_json::from_slice(&bytes).map_err(|source| BaselineError::Parse {
202                path: path.to_path_buf(),
203                source,
204            })?;
205        if !baseline.schema_version.starts_with("1.") {
206            return Err(BaselineError::UnsupportedVersion {
207                found: baseline.schema_version,
208            });
209        }
210        Ok(Some(baseline))
211    }
212
213    /// Write `self` to `path` as pretty JSON with stable key ordering and
214    /// fingerprint-sorted entries. Creates parent directories as needed.
215    pub fn save(&self, path: &Path) -> Result<(), BaselineError> {
216        if let Some(parent) = path.parent() {
217            std::fs::create_dir_all(parent).map_err(|source| BaselineError::Write {
218                path: path.to_path_buf(),
219                source,
220            })?;
221        }
222        let mut sorted = self.clone();
223        sorted
224            .baseline_findings
225            .sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
226        let mut bytes = serde_json::to_vec_pretty(&sorted)?;
227        bytes.push(b'\n');
228        std::fs::write(path, bytes).map_err(|source| BaselineError::Write {
229            path: path.to_path_buf(),
230            source,
231        })?;
232        Ok(())
233    }
234
235    /// Produce a fresh baseline from `current_findings` against `graph`.
236    /// Each entry is a plain pre-existing finding (no waiver fields set).
237    /// `pipeline_path` should be the pipeline's filesystem path as the user
238    /// sees it; `content` is the raw bytes used to derive the content hash.
239    #[allow(clippy::too_many_arguments)]
240    pub fn from_findings(
241        pipeline_path: &str,
242        content: &str,
243        graph: &AuthorityGraph,
244        findings: &[Finding],
245        captured_by: &str,
246        taudit_version: &str,
247        rules_version: &str,
248        now: DateTime<Utc>,
249    ) -> Self {
250        let mut baseline_findings: Vec<BaselineFinding> = findings
251            .iter()
252            .map(|f| BaselineFinding {
253                fingerprint: compute_fingerprint(f, graph),
254                rule_id: rule_id_for(f),
255                severity: f.severity,
256                first_seen_at: now,
257                reason_waived: None,
258                severity_override: None,
259                expires_at: None,
260            })
261            .collect();
262        // Dedup on fingerprint (template instances collapse into one entry).
263        baseline_findings.sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
264        baseline_findings.dedup_by(|a, b| a.fingerprint == b.fingerprint);
265
266        Baseline {
267            schema_version: BASELINE_SCHEMA_VERSION.to_string(),
268            pipeline_path: pipeline_path.to_string(),
269            pipeline_content_hash: compute_pipeline_hash(content),
270            pipeline_identity_material_hash: Some(compute_pipeline_identity_material_hash(graph)),
271            captured_at: now,
272            captured_by: captured_by.to_string(),
273            captured_with: CapturedWith {
274                taudit_version: taudit_version.to_string(),
275                rules_version: rules_version.to_string(),
276            },
277            baseline_findings,
278        }
279    }
280
281    /// Append a single waiver entry. Validates `reason` length and the
282    /// critical-waiver constraints. Returns the inserted/updated entry.
283    /// If an entry with the same fingerprint already exists, it is replaced
284    /// (idempotent re-acceptance with a refreshed reason / expiry).
285    #[allow(clippy::too_many_arguments)]
286    pub fn accept(
287        &mut self,
288        fingerprint: &str,
289        rule_id: &str,
290        severity: Severity,
291        reason: &str,
292        severity_override: Option<Severity>,
293        expires_at: Option<DateTime<Utc>>,
294        now: DateTime<Utc>,
295    ) -> Result<&BaselineFinding, BaselineError> {
296        let reason_chars = reason.chars().count();
297        if reason_chars < MIN_REASON_LENGTH {
298            return Err(BaselineError::ReasonTooShort {
299                min: MIN_REASON_LENGTH,
300                got: reason_chars,
301            });
302        }
303        if severity_override == Some(Severity::Critical) {
304            let Some(exp) = expires_at else {
305                return Err(BaselineError::CriticalWaiverNoExpiry);
306            };
307            if (exp - now) > Duration::days(MAX_CRITICAL_WAIVER_DAYS) {
308                return Err(BaselineError::CriticalWaiverTooLong {
309                    days: MAX_CRITICAL_WAIVER_DAYS,
310                });
311            }
312        }
313        let entry = BaselineFinding {
314            fingerprint: fingerprint.to_string(),
315            rule_id: rule_id.to_string(),
316            severity,
317            first_seen_at: now,
318            reason_waived: Some(reason.to_string()),
319            severity_override,
320            expires_at,
321        };
322        // Replace existing entry with the same fingerprint, else append.
323        if let Some(slot) = self
324            .baseline_findings
325            .iter_mut()
326            .find(|e| e.fingerprint == entry.fingerprint)
327        {
328            *slot = entry;
329        } else {
330            self.baseline_findings.push(entry);
331        }
332        self.baseline_findings
333            .sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
334        Ok(self
335            .baseline_findings
336            .iter()
337            .find(|e| e.fingerprint == fingerprint)
338            .expect("just inserted"))
339    }
340
341    /// Returns true when the captured identity material matches the current
342    /// parsed graph. Legacy baselines that predate this field are considered
343    /// compatible to preserve backward compatibility.
344    pub fn identity_material_matches(&self, graph: &AuthorityGraph) -> bool {
345        match self.pipeline_identity_material_hash.as_deref() {
346            Some(expected) => expected == compute_pipeline_identity_material_hash(graph),
347            None => true,
348        }
349    }
350}
351
352/// Result of diffing a fresh scan against a baseline. All three buckets
353/// are independently consumable by `verify`'s exit-code logic.
354#[derive(Debug, Clone)]
355pub struct BaselineDiff {
356    /// Findings present in the current scan whose fingerprint is NOT in
357    /// the baseline. These are regressions and drive the verify exit code.
358    pub new: Vec<Finding>,
359    /// Baseline entries whose fingerprint is NOT present in the current
360    /// scan — the underlying issue was fixed (or refactored away). Useful
361    /// for the `taudit baseline diff` summary.
362    pub fixed: Vec<BaselineFinding>,
363    /// Findings present in BOTH the current scan and the baseline. Reported
364    /// for visibility but do not drive exit-1 unless they are critical-
365    /// without-valid-waiver (see [`Self::critical_without_valid_waiver`]).
366    pub preexisting: Vec<Finding>,
367    /// Subset of preexisting baseline entries that carry `reason_waived`.
368    /// Drives the "X waived, Y unwaived" summary.
369    pub waived_count: usize,
370}
371
372impl BaselineDiff {
373    /// Critical findings in `preexisting` whose baseline entry does NOT
374    /// carry a valid critical waiver. These ALWAYS count toward exit 1 —
375    /// the council's load-bearing constraint that critical waivers must be
376    /// explicit, time-bounded, and re-reviewed.
377    pub fn critical_without_valid_waiver(
378        &self,
379        baseline: &Baseline,
380        graph: &AuthorityGraph,
381        now: DateTime<Utc>,
382    ) -> Vec<Finding> {
383        self.preexisting
384            .iter()
385            .filter(|f| f.severity == Severity::Critical)
386            .filter(|f| {
387                let fp = compute_fingerprint(f, graph);
388                match baseline
389                    .baseline_findings
390                    .iter()
391                    .find(|e| e.fingerprint == fp)
392                {
393                    Some(entry) => !entry.is_valid_critical_waiver(now),
394                    None => true, // shouldn't happen — preexisting means present in baseline
395                }
396            })
397            .cloned()
398            .collect()
399    }
400}
401
402/// Diff `current_findings` against `baseline` using the SARIF-equivalent
403/// fingerprint computed from `graph`. Entry point for `verify` and the
404/// `taudit baseline diff` subcommand.
405pub fn diff(
406    current_findings: &[Finding],
407    baseline: &Baseline,
408    graph: &AuthorityGraph,
409) -> BaselineDiff {
410    use std::collections::{HashMap, HashSet};
411
412    let baseline_index: HashMap<&str, &BaselineFinding> = baseline
413        .baseline_findings
414        .iter()
415        .map(|e| (e.fingerprint.as_str(), e))
416        .collect();
417
418    let mut new = Vec::new();
419    let mut preexisting = Vec::new();
420    let mut seen_fingerprints: HashSet<String> = HashSet::new();
421    let mut waived_count = 0usize;
422
423    for finding in current_findings {
424        let fp = compute_fingerprint(finding, graph);
425        seen_fingerprints.insert(fp.clone());
426        match baseline_index.get(fp.as_str()) {
427            Some(entry) => {
428                if entry.reason_waived.is_some() {
429                    waived_count += 1;
430                }
431                preexisting.push(finding.clone());
432            }
433            None => new.push(finding.clone()),
434        }
435    }
436
437    let fixed: Vec<BaselineFinding> = baseline
438        .baseline_findings
439        .iter()
440        .filter(|e| !seen_fingerprints.contains(&e.fingerprint))
441        .cloned()
442        .collect();
443
444    BaselineDiff {
445        new,
446        fixed,
447        preexisting,
448        waived_count,
449    }
450}
451
452/// SHA-256 of `content` formatted as `sha256:<64-hex>`. The `sha256:`
453/// prefix mirrors OCI / git object naming so logs and dashboards can
454/// strip the algorithm tag uniformly.
455pub fn compute_pipeline_hash(content: &str) -> String {
456    let digest = Sha256::digest(content.as_bytes());
457    format_digest(digest)
458}
459
460/// SHA-256 over dependency-like parser material (include/template/repository
461/// declarations and delegation edges), formatted as `sha256:<64-hex>`.
462///
463/// This is intentionally additive to `pipeline_content_hash`: content hash
464/// still keys baseline files for backward compatibility, while this material
465/// hash is used to detect include/template drift and disable suppression when
466/// the parser-visible dependency shape changes.
467pub fn compute_pipeline_identity_material_hash(graph: &AuthorityGraph) -> String {
468    let mut metadata: BTreeMap<String, String> = BTreeMap::new();
469    for key in [META_REPOSITORIES, META_GITLAB_INCLUDES] {
470        if let Some(value) = graph.metadata.get(key) {
471            metadata.insert(key.to_string(), value.clone());
472        }
473    }
474
475    let mut delegations: Vec<String> = graph
476        .edges
477        .iter()
478        .filter(|e| e.kind == EdgeKind::DelegatesTo)
479        .filter_map(|e| {
480            let from = graph.node(e.from)?;
481            let to = graph.node(e.to)?;
482            Some(format!(
483                "{}:{}->{}:{}:{:?}",
484                from.id, from.name, to.id, to.name, to.trust_zone
485            ))
486        })
487        .collect();
488    delegations.sort();
489
490    let mut step_dependency_metadata: Vec<String> = graph
491        .nodes
492        .iter()
493        .filter(|n| n.kind == NodeKind::Step)
494        .flat_map(|n| {
495            [META_NEEDS, META_GITLAB_EXTENDS]
496                .iter()
497                .filter_map(move |k| {
498                    n.metadata
499                        .get(*k)
500                        .map(|v| format!("{}:{}={}", n.name, k, v))
501                })
502        })
503        .collect();
504    step_dependency_metadata.sort();
505
506    let canonical = serde_json::json!({
507        "metadata": metadata,
508        "delegates_to": delegations,
509        "step_dependency_metadata": step_dependency_metadata,
510    });
511
512    let bytes = serde_json::to_vec(&canonical).expect("identity material must serialize");
513    let digest = Sha256::digest(bytes);
514    format_digest(digest)
515}
516
517fn format_digest(digest: impl AsRef<[u8]>) -> String {
518    let mut hex = String::with_capacity(64);
519    for byte in digest.as_ref() {
520        use std::fmt::Write;
521        let _ = write!(&mut hex, "{byte:02x}");
522    }
523    format!("sha256:{hex}")
524}
525
526/// Default location for per-pipeline baselines, given the working directory.
527/// Returns `<root>/.taudit/baselines/`.
528pub fn baselines_dir(root: &Path) -> PathBuf {
529    root.join(".taudit").join("baselines")
530}
531
532/// Filename for one pipeline's baseline. The `sha256:` prefix is stripped
533/// so the file is portable on filesystems that disallow `:` (Windows NTFS).
534pub fn baseline_filename_for(pipeline_content_hash: &str) -> String {
535    let hex = pipeline_content_hash
536        .strip_prefix("sha256:")
537        .unwrap_or(pipeline_content_hash);
538    format!("{hex}.json")
539}
540
541/// Convenience: full `<root>/.taudit/baselines/<hex>.json` path for the
542/// given content hash.
543pub fn baseline_path_for(root: &Path, pipeline_content_hash: &str) -> PathBuf {
544    baselines_dir(root).join(baseline_filename_for(pipeline_content_hash))
545}
546
547/// Public alias of [`compute_fingerprint`] — re-exported here so the baseline
548/// module is the single import point for "what is the fingerprint of this
549/// finding for baseline purposes". The shared test
550/// `baseline_fingerprint_matches_sarif_fingerprint` asserts these are
551/// byte-equal forever.
552pub fn compute_finding_fingerprint(finding: &Finding, graph: &AuthorityGraph) -> String {
553    compute_fingerprint(finding, graph)
554}
555
556/// Snake-case rule id for `f`. Mirrors the same logic the SARIF reporter
557/// uses (custom rule id from `[id] message` prefix wins over category).
558fn rule_id_for(f: &Finding) -> String {
559    if let Some(id) = f.message.strip_prefix('[') {
560        if let Some(end) = id.find(']') {
561            let candidate = &id[..end];
562            if !candidate.is_empty() {
563                return candidate.to_string();
564            }
565        }
566    }
567    serde_json::to_value(f.category)
568        .ok()
569        .and_then(|v| v.as_str().map(str::to_string))
570        .unwrap_or_else(|| "unknown".to_string())
571}
572
573// ── Tests ───────────────────────────────────────────────────
574
575#[cfg(test)]
576mod tests {
577    use super::*;
578    use crate::finding::{FindingCategory, FindingExtras, FindingSource, Recommendation};
579    use crate::graph::{AuthorityGraph, NodeKind, PipelineSource, TrustZone};
580
581    fn source(file: &str) -> PipelineSource {
582        PipelineSource {
583            file: file.to_string(),
584            repo: None,
585            git_ref: None,
586            commit_sha: None,
587        }
588    }
589
590    fn make_graph(file: &str) -> (AuthorityGraph, crate::graph::NodeId) {
591        let mut g = AuthorityGraph::new(source(file));
592        let s = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
593        (g, s)
594    }
595
596    fn make_finding(
597        category: FindingCategory,
598        severity: Severity,
599        msg: &str,
600        nodes: Vec<crate::graph::NodeId>,
601    ) -> Finding {
602        Finding {
603            severity,
604            category,
605            path: None,
606            nodes_involved: nodes,
607            message: msg.to_string(),
608            recommendation: Recommendation::Manual {
609                action: "fix".to_string(),
610            },
611            source: FindingSource::BuiltIn,
612            extras: FindingExtras::default(),
613        }
614    }
615
616    fn now() -> DateTime<Utc> {
617        DateTime::parse_from_rfc3339("2026-04-26T12:00:00Z")
618            .unwrap()
619            .with_timezone(&Utc)
620    }
621
622    /// COUNCIL-MANDATED SHARED TEST: baseline fingerprint and SARIF
623    /// fingerprint MUST be byte-equal. If this ever fails, suppression
624    /// across SARIF/JSON/CloudEvents/baseline silently drifts. Non-
625    /// negotiable per the council design doc, Section C, item 5.
626    #[test]
627    fn baseline_fingerprint_matches_sarif_fingerprint() {
628        let (graph, s) = make_graph(".github/workflows/release.yml");
629        let f = make_finding(
630            FindingCategory::AuthorityPropagation,
631            Severity::High,
632            "AWS_KEY reaches third party",
633            vec![s],
634        );
635        let baseline_fp = compute_finding_fingerprint(&f, &graph);
636        let sarif_fp = compute_fingerprint(&f, &graph);
637        assert_eq!(
638            baseline_fp, sarif_fp,
639            "baseline and SARIF fingerprints MUST be byte-equal — do not introduce a second fingerprint scheme"
640        );
641    }
642
643    #[test]
644    fn pipeline_hash_is_deterministic_and_prefixed() {
645        let h = compute_pipeline_hash("on: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n");
646        assert!(h.starts_with("sha256:"));
647        assert_eq!(h.len(), 7 + 64);
648        let h2 = compute_pipeline_hash("on: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n");
649        assert_eq!(h, h2, "same content -> same hash");
650        let h3 = compute_pipeline_hash("on: push\n");
651        assert_ne!(h, h3);
652    }
653
654    #[test]
655    fn identity_material_hash_changes_when_dependency_metadata_changes() {
656        let (mut g1, _) = make_graph("ci.yml");
657        g1.metadata.insert(
658            META_REPOSITORIES.to_string(),
659            r#"[{"alias":"templates","used":true}]"#.to_string(),
660        );
661
662        let (mut g2, _) = make_graph("ci.yml");
663        g2.metadata.insert(
664            META_REPOSITORIES.to_string(),
665            r#"[{"alias":"templates","used":false}]"#.to_string(),
666        );
667
668        let h1 = compute_pipeline_identity_material_hash(&g1);
669        let h2 = compute_pipeline_identity_material_hash(&g2);
670        assert_ne!(
671            h1, h2,
672            "repository/include metadata drift must change identity material"
673        );
674    }
675
676    #[test]
677    fn identity_material_hash_changes_when_template_delegation_changes() {
678        let mut g1 = AuthorityGraph::new(source("ci.yml"));
679        let s1 = g1.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
680        let t1 = g1.add_node(
681            NodeKind::Image,
682            "templates/release.yml",
683            TrustZone::FirstParty,
684        );
685        g1.add_edge(s1, t1, EdgeKind::DelegatesTo);
686
687        let mut g2 = AuthorityGraph::new(source("ci.yml"));
688        let s2 = g2.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
689        let t2 = g2.add_node(
690            NodeKind::Image,
691            "templates/release-v2.yml",
692            TrustZone::FirstParty,
693        );
694        g2.add_edge(s2, t2, EdgeKind::DelegatesTo);
695
696        let h1 = compute_pipeline_identity_material_hash(&g1);
697        let h2 = compute_pipeline_identity_material_hash(&g2);
698        assert_ne!(
699            h1, h2,
700            "template delegation target drift must change identity material"
701        );
702    }
703
704    #[test]
705    fn init_captures_current_findings() {
706        let (graph, s) = make_graph("ci.yml");
707        let f1 = make_finding(
708            FindingCategory::UnpinnedAction,
709            Severity::High,
710            "actions/checkout@v4 unpinned",
711            vec![s],
712        );
713        let f2 = make_finding(
714            FindingCategory::AuthorityPropagation,
715            Severity::Critical,
716            "AWS_KEY reaches untrusted",
717            vec![s],
718        );
719        let baseline = Baseline::from_findings(
720            "ci.yml",
721            "on: push\n",
722            &graph,
723            &[f1, f2],
724            "ryan@example.com",
725            "0.10.0",
726            "32-builtin",
727            now(),
728        );
729        assert_eq!(baseline.baseline_findings.len(), 2);
730        assert_eq!(baseline.captured_by, "ryan@example.com");
731        assert_eq!(baseline.captured_with.taudit_version, "0.10.0");
732        assert!(
733            baseline.pipeline_identity_material_hash.is_some(),
734            "new captures should persist identity material hash"
735        );
736        // Sorted by fingerprint
737        let fps: Vec<&str> = baseline
738            .baseline_findings
739            .iter()
740            .map(|e| e.fingerprint.as_str())
741            .collect();
742        let mut sorted = fps.clone();
743        sorted.sort();
744        assert_eq!(fps, sorted, "entries must be fingerprint-sorted");
745        // No waiver fields on init
746        for entry in &baseline.baseline_findings {
747            assert!(entry.reason_waived.is_none());
748            assert!(entry.severity_override.is_none());
749            assert!(entry.expires_at.is_none());
750        }
751    }
752
753    #[test]
754    fn save_then_load_round_trips() {
755        let dir = tempdir();
756        let (graph, s) = make_graph("ci.yml");
757        let f = make_finding(
758            FindingCategory::UnpinnedAction,
759            Severity::High,
760            "actions/checkout@v4 unpinned",
761            vec![s],
762        );
763        let baseline = Baseline::from_findings(
764            "ci.yml",
765            "x",
766            &graph,
767            &[f],
768            "ryan",
769            "0.10.0",
770            "32-builtin",
771            now(),
772        );
773        let path = dir.join("b.json");
774        baseline.save(&path).expect("save");
775        let loaded = Baseline::load(&path).expect("load").expect("present");
776        assert_eq!(baseline, loaded);
777    }
778
779    #[test]
780    fn load_returns_none_when_absent() {
781        let dir = tempdir();
782        let path = dir.join("does-not-exist.json");
783        assert!(Baseline::load(&path).expect("ok").is_none());
784    }
785
786    #[test]
787    fn legacy_baseline_without_identity_material_remains_compatible() {
788        let baseline = empty_baseline();
789        let (graph, _) = make_graph("ci.yml");
790        assert!(
791            baseline.identity_material_matches(&graph),
792            "legacy baseline must remain compatible"
793        );
794    }
795
796    #[test]
797    fn accept_rejects_short_reason() {
798        let mut baseline = empty_baseline();
799        let err = baseline
800            .accept(
801                "abcd1234abcd1234",
802                "unpinned_action",
803                Severity::High,
804                "wip",
805                None,
806                None,
807                now(),
808            )
809            .unwrap_err();
810        assert!(matches!(err, BaselineError::ReasonTooShort { .. }));
811    }
812
813    #[test]
814    fn accept_critical_without_expires_is_rejected() {
815        let mut baseline = empty_baseline();
816        let err = baseline
817            .accept(
818                "deadbeefdeadbeef",
819                "trigger_context_mismatch",
820                Severity::Critical,
821                "Threat-modeled exception per ABC-123",
822                Some(Severity::Critical),
823                None, // no expiry
824                now(),
825            )
826            .unwrap_err();
827        assert!(matches!(err, BaselineError::CriticalWaiverNoExpiry));
828    }
829
830    #[test]
831    fn accept_critical_with_expiry_beyond_90d_is_rejected() {
832        let mut baseline = empty_baseline();
833        let too_long = now() + Duration::days(100);
834        let err = baseline
835            .accept(
836                "deadbeefdeadbeef",
837                "trigger_context_mismatch",
838                Severity::Critical,
839                "Threat-modeled exception per ABC-123",
840                Some(Severity::Critical),
841                Some(too_long),
842                now(),
843            )
844            .unwrap_err();
845        assert!(matches!(
846            err,
847            BaselineError::CriticalWaiverTooLong { days: 90 }
848        ));
849    }
850
851    #[test]
852    fn accept_critical_with_valid_expiry_succeeds() {
853        let mut baseline = empty_baseline();
854        let exp = now() + Duration::days(60);
855        baseline
856            .accept(
857                "deadbeefdeadbeef",
858                "trigger_context_mismatch",
859                Severity::Critical,
860                "Threat-modeled exception per ABC-123",
861                Some(Severity::Critical),
862                Some(exp),
863                now(),
864            )
865            .expect("valid critical waiver");
866        let entry = &baseline.baseline_findings[0];
867        assert!(entry.is_valid_critical_waiver(now()));
868        // After the expiry, the waiver no longer protects.
869        assert!(!entry.is_valid_critical_waiver(exp + Duration::seconds(1)));
870    }
871
872    #[test]
873    fn diff_classifies_new_fixed_preexisting() {
874        let (graph, s) = make_graph("ci.yml");
875        let f_old = make_finding(
876            FindingCategory::UnpinnedAction,
877            Severity::High,
878            "actions/checkout@v4 unpinned",
879            vec![s],
880        );
881        let f_unchanged = make_finding(
882            FindingCategory::AuthorityPropagation,
883            Severity::High,
884            "AWS_KEY reaches untrusted",
885            vec![s],
886        );
887        let baseline = Baseline::from_findings(
888            "ci.yml",
889            "x",
890            &graph,
891            &[f_old.clone(), f_unchanged.clone()],
892            "ryan",
893            "0.10.0",
894            "32-builtin",
895            now(),
896        );
897        // Current scan: keep `unchanged`, drop `old`, add `new`.
898        let f_new = make_finding(
899            FindingCategory::OverPrivilegedIdentity,
900            Severity::Medium,
901            "GITHUB_TOKEN over-privileged",
902            vec![s],
903        );
904        let current = vec![f_unchanged.clone(), f_new.clone()];
905        let diff = diff(&current, &baseline, &graph);
906        assert_eq!(diff.new.len(), 1, "f_new is new");
907        assert_eq!(diff.fixed.len(), 1, "f_old was fixed");
908        assert_eq!(diff.preexisting.len(), 1, "f_unchanged preexisting");
909        assert_eq!(diff.waived_count, 0, "no waivers yet");
910    }
911
912    #[test]
913    fn critical_preexisting_without_waiver_blocks_exit_zero() {
914        let (graph, s) = make_graph("ci.yml");
915        let crit = make_finding(
916            FindingCategory::AuthorityPropagation,
917            Severity::Critical,
918            "AWS_KEY reaches untrusted",
919            vec![s],
920        );
921        let baseline = Baseline::from_findings(
922            "ci.yml",
923            "x",
924            &graph,
925            std::slice::from_ref(&crit),
926            "ryan",
927            "0.10.0",
928            "32-builtin",
929            now(),
930        );
931        let diff = diff(&[crit], &baseline, &graph);
932        assert_eq!(diff.preexisting.len(), 1);
933        // Plain pre-existing entry — no severity_override, no waiver — must
934        // STILL force a critical to count toward exit 1.
935        let blockers = diff.critical_without_valid_waiver(&baseline, &graph, now());
936        assert_eq!(
937            blockers.len(),
938            1,
939            "critical without explicit waiver must always block"
940        );
941    }
942
943    #[test]
944    fn critical_with_explicit_waiver_does_not_block() {
945        let (graph, s) = make_graph("ci.yml");
946        let crit = make_finding(
947            FindingCategory::AuthorityPropagation,
948            Severity::Critical,
949            "AWS_KEY reaches untrusted",
950            vec![s],
951        );
952        let mut baseline = Baseline::from_findings(
953            "ci.yml",
954            "x",
955            &graph,
956            std::slice::from_ref(&crit),
957            "ryan",
958            "0.10.0",
959            "32-builtin",
960            now(),
961        );
962        // Promote the entry to a valid critical waiver.
963        let fp = compute_fingerprint(&crit, &graph);
964        baseline
965            .accept(
966                &fp,
967                "authority_propagation",
968                Severity::Critical,
969                "Threat-modeled; documented exception ABC-123",
970                Some(Severity::Critical),
971                Some(now() + Duration::days(60)),
972                now(),
973            )
974            .expect("valid waiver");
975        let diff = diff(&[crit], &baseline, &graph);
976        let blockers = diff.critical_without_valid_waiver(&baseline, &graph, now());
977        assert_eq!(blockers.len(), 0, "valid waiver bypasses exit 1");
978    }
979
980    #[test]
981    fn expired_critical_waiver_no_longer_protects() {
982        let (graph, s) = make_graph("ci.yml");
983        let crit = make_finding(
984            FindingCategory::AuthorityPropagation,
985            Severity::Critical,
986            "AWS_KEY reaches untrusted",
987            vec![s],
988        );
989        let mut baseline = Baseline::from_findings(
990            "ci.yml",
991            "x",
992            &graph,
993            std::slice::from_ref(&crit),
994            "ryan",
995            "0.10.0",
996            "32-builtin",
997            now(),
998        );
999        let fp = compute_fingerprint(&crit, &graph);
1000        let exp = now() + Duration::days(30);
1001        baseline
1002            .accept(
1003                &fp,
1004                "authority_propagation",
1005                Severity::Critical,
1006                "Threat-modeled; documented exception ABC-123",
1007                Some(Severity::Critical),
1008                Some(exp),
1009                now(),
1010            )
1011            .expect("valid waiver");
1012        // Time passes past the expiry — the waiver no longer protects.
1013        let later = exp + Duration::days(1);
1014        let diff = diff(&[crit], &baseline, &graph);
1015        let blockers = diff.critical_without_valid_waiver(&baseline, &graph, later);
1016        assert_eq!(blockers.len(), 1, "expired waiver must not protect");
1017    }
1018
1019    #[test]
1020    fn baselines_dir_and_filename_layout() {
1021        let root = std::path::Path::new("/tmp/repo");
1022        let dir = baselines_dir(root);
1023        assert_eq!(dir, std::path::PathBuf::from("/tmp/repo/.taudit/baselines"));
1024        let f = baseline_filename_for("sha256:abcdef0123");
1025        assert_eq!(f, "abcdef0123.json");
1026        let p = baseline_path_for(root, "sha256:abcdef0123");
1027        assert_eq!(
1028            p,
1029            std::path::PathBuf::from("/tmp/repo/.taudit/baselines/abcdef0123.json")
1030        );
1031    }
1032
1033    #[test]
1034    fn unsupported_schema_version_rejected() {
1035        let dir = tempdir();
1036        let path = dir.join("b.json");
1037        let body = r#"{"schema_version":"2.0.0","pipeline_path":"x","pipeline_content_hash":"sha256:x","captured_at":"2026-04-26T12:00:00Z","captured_by":"r","captured_with":{"taudit_version":"0.10.0","rules_version":"32-builtin"},"baseline_findings":[]}"#;
1038        std::fs::write(&path, body).unwrap();
1039        let err = Baseline::load(&path).unwrap_err();
1040        assert!(matches!(err, BaselineError::UnsupportedVersion { .. }));
1041    }
1042
1043    // ── Test helpers ─────────────────────────────────────
1044
1045    fn empty_baseline() -> Baseline {
1046        Baseline {
1047            schema_version: BASELINE_SCHEMA_VERSION.to_string(),
1048            pipeline_path: "ci.yml".to_string(),
1049            pipeline_content_hash: compute_pipeline_hash("x"),
1050            pipeline_identity_material_hash: None,
1051            captured_at: now(),
1052            captured_by: "ryan".to_string(),
1053            captured_with: CapturedWith {
1054                taudit_version: "0.10.0".to_string(),
1055                rules_version: "32-builtin".to_string(),
1056            },
1057            baseline_findings: Vec::new(),
1058        }
1059    }
1060
1061    /// Per-process tempdir helper. Avoids pulling in the `tempfile` crate
1062    /// just for tests — we control the cleanup ourselves.
1063    fn tempdir() -> std::path::PathBuf {
1064        let pid = std::process::id();
1065        let nanos = std::time::SystemTime::now()
1066            .duration_since(std::time::UNIX_EPOCH)
1067            .unwrap()
1068            .as_nanos();
1069        let p = std::env::temp_dir().join(format!("taudit-baselines-test-{pid}-{nanos}"));
1070        std::fs::create_dir_all(&p).unwrap();
1071        p
1072    }
1073}