Skip to main content

taudit_core/
baselines.rs

1//! Per-pipeline baseline files (`.taudit/baselines/<hash>.json`).
2//!
3//! A *baseline* is a snapshot of the findings present on a pipeline at the
4//! moment it was first onboarded into taudit. Subsequent scans diff against
5//! the baseline so reviewers see only NEW findings; pre-existing findings are
6//! summarised. Baselines are the v0.10 mechanism for adopting taudit on
7//! existing repos without forcing upfront triage of historical findings.
8//!
9//! ## Load-bearing decisions (per design council, 2026-04-26)
10//!
11//! 1. **Layout: one file per pipeline keyed by content hash.** A monolithic
12//!    `.taudit/baseline.json` would merge-conflict on every PR. Per-pipeline
13//!    files (`.taudit/baselines/<sha256>.json`) keep blast radius small.
14//! 2. **Fingerprints reuse `Finding::compute_fingerprint` exactly.** Inventing
15//!    a second hashing scheme is a foot-gun — SARIF, JSON, CloudEvents and
16//!    baselines must agree on what "same finding" means. The shared test
17//!    `baseline_fingerprint_matches_sarif_fingerprint` enforces this.
18//! 3. **Critical findings always exit 1** unless the entry carries
19//!    `severity_override: critical` AND a `reason` AND `expires_at <= 90d`.
20//!    This is the security analyst's non-negotiable: any waiver mechanism
21//!    creates a path for risk to be accepted, so critical waivers must be
22//!    conscious, time-bounded and re-reviewed.
23//! 4. **OSS-friendly default.** No `.taudit/` directory means today's
24//!    behaviour. Baselines are strictly opt-in.
25//!
26//! See `docs/baselines.md` for the full workflow and security guarantees.
27
28use crate::finding::{compute_fingerprint, rule_id_for, Finding, Severity};
29use crate::graph::{
30    AuthorityGraph, EdgeKind, NodeKind, META_GITLAB_EXTENDS, META_GITLAB_INCLUDES, META_NEEDS,
31    META_REPOSITORIES,
32};
33use chrono::{DateTime, Duration, Utc};
34use serde::{Deserialize, Serialize};
35use sha2::{Digest, Sha256};
36use std::collections::BTreeMap;
37use std::io::Write;
38use std::path::{Path, PathBuf};
39
40/// Maximum lifetime allowed for a critical-severity waiver. Council's
41/// load-bearing constraint: a critical may only bypass exit-1 if its waiver
42/// expires within this window. Longer expirations are rejected at validation
43/// time (and pruned at diff time).
44pub const MAX_CRITICAL_WAIVER_DAYS: i64 = 90;
45
46/// Minimum length (UTF-8 chars) of the `reason` string on a waiver. Empty,
47/// `wip`, `todo`, `fix later` strings train the wrong muscle memory; force
48/// a sentence's worth of justification.
49pub const MIN_REASON_LENGTH: usize = 10;
50
51/// Schema version emitted by `init` and accepted by `load`. Additive 1.x.y
52/// changes are non-breaking; 2.0.0 means breaking changes.
53pub const BASELINE_SCHEMA_VERSION: &str = "1.1.0";
54
55/// Errors returned by baseline I/O and validation.
56#[derive(Debug, thiserror::Error)]
57pub enum BaselineError {
58    #[error("failed to read baseline {path}: {source}")]
59    Read {
60        path: PathBuf,
61        #[source]
62        source: std::io::Error,
63    },
64    #[error("failed to write baseline {path}: {source}")]
65    Write {
66        path: PathBuf,
67        #[source]
68        source: std::io::Error,
69    },
70    #[error("failed to parse baseline {path}: {source}")]
71    Parse {
72        path: PathBuf,
73        #[source]
74        source: serde_json::Error,
75    },
76    #[error("failed to serialize baseline: {0}")]
77    Serialize(#[from] serde_json::Error),
78    #[error("baseline schema version {found:?} not supported (expected major 1.x.y)")]
79    UnsupportedVersion { found: String },
80    #[error("waiver reason must be at least {min} characters (got {got})")]
81    ReasonTooShort { min: usize, got: usize },
82    #[error("critical-severity override requires expires_at <= {days}d from accepted_at")]
83    CriticalWaiverTooLong { days: i64 },
84    #[error("critical-severity override requires expires_at to be set")]
85    CriticalWaiverNoExpiry,
86    #[error("critical-severity override requires a reason")]
87    CriticalWaiverNoReason,
88}
89
90/// One entry in a baseline. Keyed on `fingerprint` (32-hex SHA-256 truncation
91/// computed by [`compute_fingerprint`](crate::finding::compute_fingerprint)).
92///
93/// Two waiver shapes:
94///
95/// * **Plain pre-existing finding.** `reason_waived`, `severity_override`,
96///   `expires_at` all `None`. The finding existed at `init` time; it is
97///   reported as "pre-existing" rather than a regression. Critical findings
98///   in this shape STILL fail exit-1.
99/// * **Explicit waiver.** `reason_waived` populated. If the original
100///   severity was Critical, `severity_override: "critical"` and
101///   `expires_at <= accepted_at + 90d` are mandatory; otherwise the waiver
102///   is rejected at load time and the critical falls through to exit 1.
103#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
104pub struct BaselineFinding {
105    /// 32-hex SHA-256 fingerprint matching the SARIF/JSON/CloudEvents value.
106    pub fingerprint: String,
107    /// Snake-case rule id (custom rule id if present, else
108    /// `FindingCategory` snake_case form).
109    pub rule_id: String,
110    /// Severity captured at `init` time. Used for the critical-bypass check.
111    pub severity: Severity,
112    /// When this entry was first added to the baseline (`init` or `accept`).
113    pub first_seen_at: DateTime<Utc>,
114    /// Free-form justification. Required on `accept` (>=10 chars). `None`
115    /// when the entry was bulk-added by `init`.
116    #[serde(skip_serializing_if = "Option::is_none", default)]
117    pub reason_waived: Option<String>,
118    /// Acknowledges that the original severity was Critical and the waiver
119    /// is intentional. Council's hard rule: any critical bypass must declare
120    /// itself with this field; missing == critical falls through to exit 1.
121    #[serde(skip_serializing_if = "Option::is_none", default)]
122    pub severity_override: Option<Severity>,
123    /// Hard deadline. Mandatory for `severity_override: critical`. After
124    /// this timestamp the waiver is treated as expired (logs a warning and
125    /// the underlying finding counts toward exit-1 again).
126    #[serde(skip_serializing_if = "Option::is_none", default)]
127    pub expires_at: Option<DateTime<Utc>>,
128}
129
130impl BaselineFinding {
131    /// True iff this entry waives a critical via the explicit-override
132    /// shape (severity_override + reason + expires_at <= 90d).
133    pub fn is_valid_critical_waiver(&self, now: DateTime<Utc>) -> bool {
134        if self.severity_override != Some(Severity::Critical) {
135            return false;
136        }
137        let Some(expires_at) = self.expires_at else {
138            return false;
139        };
140        if expires_at <= now {
141            return false;
142        }
143        if (expires_at - self.first_seen_at) > Duration::days(MAX_CRITICAL_WAIVER_DAYS) {
144            return false;
145        }
146        matches!(self.reason_waived.as_deref(), Some(r) if r.chars().count() >= MIN_REASON_LENGTH)
147    }
148
149    /// True iff this waiver carries an `expires_at` that has already passed.
150    pub fn is_expired(&self, now: DateTime<Utc>) -> bool {
151        match self.expires_at {
152            Some(t) => t <= now,
153            None => false,
154        }
155    }
156}
157
158/// Tool/version provenance captured at `init`.
159#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
160pub struct CapturedWith {
161    pub taudit_version: String,
162    /// Free-form description of the rule set at capture time
163    /// (e.g. `"32-builtin"`, `"32-builtin+5-custom"`).
164    pub rules_version: String,
165}
166
167/// One baseline file = one pipeline. Keyed by `pipeline_content_hash` so
168/// renames preserve state and merge conflicts only touch the affected file.
169#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
170pub struct Baseline {
171    pub schema_version: String,
172    pub pipeline_path: String,
173    /// `sha256:<hex>` of the pipeline file's bytes at `init` time.
174    pub pipeline_content_hash: String,
175    /// Optional additive hardening signal captured at `init` time.
176    ///
177    /// Hashes parser-emitted dependency-like material (include/template/
178    /// repository declarations and delegation edges) so suppression can be
179    /// disabled if that material drifts even when the baseline file still
180    /// exists. Absent on legacy baseline files written before v1.1.0.
181    #[serde(skip_serializing_if = "Option::is_none", default)]
182    pub pipeline_identity_material_hash: Option<String>,
183    pub captured_at: DateTime<Utc>,
184    pub captured_by: String,
185    pub captured_with: CapturedWith,
186    /// Sorted by `fingerprint` ASC for stable git diffs.
187    pub baseline_findings: Vec<BaselineFinding>,
188}
189
190impl Baseline {
191    /// Load and parse a baseline from disk. Returns `Ok(None)` if `path`
192    /// does not exist (the OSS-friendly default — absent baseline is fine).
193    pub fn load(path: &Path) -> Result<Option<Self>, BaselineError> {
194        if !path.exists() {
195            return Ok(None);
196        }
197        let bytes = std::fs::read(path).map_err(|source| BaselineError::Read {
198            path: path.to_path_buf(),
199            source,
200        })?;
201        let baseline: Baseline =
202            serde_json::from_slice(&bytes).map_err(|source| BaselineError::Parse {
203                path: path.to_path_buf(),
204                source,
205            })?;
206        if !baseline.schema_version.starts_with("1.") {
207            return Err(BaselineError::UnsupportedVersion {
208                found: baseline.schema_version,
209            });
210        }
211        Ok(Some(baseline))
212    }
213
214    /// Write `self` to `path` as pretty JSON with stable key ordering and
215    /// fingerprint-sorted entries. Creates parent directories as needed.
216    ///
217    /// ## Atomicity contract
218    ///
219    /// The write is **atomic at the rename boundary on POSIX**: bytes are
220    /// staged into a `.<name>.tmp.<pid>.<nanos>` file in the same parent
221    /// directory and then `fs::rename`d over the destination. POSIX
222    /// guarantees `rename(2)` is atomic within a single filesystem, so a
223    /// concurrent reader either sees the prior baseline content or the new
224    /// content — never a truncated/partial JSON.
225    ///
226    /// If the process is `SIGKILL`ed (or crashes) **between** the temp-file
227    /// write and the rename, the destination is unchanged and a
228    /// dot-prefixed temp file is left in the parent directory. This is
229    /// acceptable: the next successful `save` overwrites that temp slot,
230    /// and the temp prefix `.tmp.` makes manual cleanup trivial. We do
231    /// **not** call `fsync` here — durability against host crash is a
232    /// premature optimisation absent a measured requirement.
233    pub fn save(&self, path: &Path) -> Result<(), BaselineError> {
234        let parent = path.parent().ok_or_else(|| BaselineError::Write {
235            path: path.to_path_buf(),
236            source: std::io::Error::new(
237                std::io::ErrorKind::InvalidInput,
238                "baseline path has no parent directory",
239            ),
240        })?;
241        std::fs::create_dir_all(parent).map_err(|source| BaselineError::Write {
242            path: path.to_path_buf(),
243            source,
244        })?;
245
246        let mut sorted = self.clone();
247        sorted
248            .baseline_findings
249            .sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
250        let mut bytes = serde_json::to_vec_pretty(&sorted)?;
251        bytes.push(b'\n');
252        atomic_write(path, &bytes).map_err(|source| BaselineError::Write {
253            path: path.to_path_buf(),
254            source,
255        })?;
256        Ok(())
257    }
258
259    /// Produce a fresh baseline from `current_findings` against `graph`.
260    /// Each entry is a plain pre-existing finding (no waiver fields set).
261    /// `pipeline_path` should be the pipeline's filesystem path as the user
262    /// sees it; `content` is the raw bytes used to derive the content hash.
263    #[allow(clippy::too_many_arguments)]
264    pub fn from_findings(
265        pipeline_path: &str,
266        content: &str,
267        graph: &AuthorityGraph,
268        findings: &[Finding],
269        captured_by: &str,
270        taudit_version: &str,
271        rules_version: &str,
272        now: DateTime<Utc>,
273    ) -> Self {
274        // BUG-2: baseline init bulk-accepts ALL current findings. For CRITICAL
275        // findings the waiver contract requires a dated expiry ≤ 90 days.
276        // init sets expires_at = now + 90 days automatically so that running
277        // `taudit baseline init` on a fresh repo doesn't leave 100+ critical
278        // findings unwaived and requires 0 per-finding `baseline accept` calls.
279        let critical_expiry = now + chrono::Duration::days(MAX_CRITICAL_WAIVER_DAYS);
280        let mut baseline_findings: Vec<BaselineFinding> = findings
281            .iter()
282            .map(|f| BaselineFinding {
283                fingerprint: compute_fingerprint(f, graph),
284                rule_id: rule_id_for(f),
285                severity: f.severity,
286                first_seen_at: now,
287                // BUG-2: is_valid_critical_waiver requires severity_override,
288                // reason_waived (≥10 chars), and expires_at all set. baseline
289                // init stamps all three for Critical findings automatically.
290                reason_waived: if f.severity == Severity::Critical {
291                    Some("Accepted at baseline init — review before expiry".into())
292                } else {
293                    None
294                },
295                severity_override: if f.severity == Severity::Critical {
296                    Some(Severity::Critical)
297                } else {
298                    None
299                },
300                expires_at: if f.severity == Severity::Critical {
301                    Some(critical_expiry)
302                } else {
303                    None
304                },
305            })
306            .collect();
307        // Dedup on fingerprint (template instances collapse into one entry).
308        baseline_findings.sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
309        baseline_findings.dedup_by(|a, b| a.fingerprint == b.fingerprint);
310
311        Baseline {
312            schema_version: BASELINE_SCHEMA_VERSION.to_string(),
313            // v3 fingerprint contract: paths are stored in the baseline
314            // with forward-slash separators so a Windows-captured baseline
315            // and a Linux-captured baseline of the same logical pipeline
316            // are byte-identical. Same normalisation applied in
317            // `compute_fingerprint`.
318            pipeline_path: pipeline_path.replace('\\', "/"),
319            pipeline_content_hash: compute_pipeline_hash(content),
320            pipeline_identity_material_hash: Some(compute_pipeline_identity_material_hash(graph)),
321            captured_at: now,
322            captured_by: captured_by.to_string(),
323            captured_with: CapturedWith {
324                taudit_version: taudit_version.to_string(),
325                rules_version: rules_version.to_string(),
326            },
327            baseline_findings,
328        }
329    }
330
331    /// Append a single waiver entry. Validates `reason` length and the
332    /// critical-waiver constraints. Returns the inserted/updated entry.
333    /// If an entry with the same fingerprint already exists, it is replaced
334    /// (idempotent re-acceptance with a refreshed reason / expiry).
335    #[allow(clippy::too_many_arguments)]
336    pub fn accept(
337        &mut self,
338        fingerprint: &str,
339        rule_id: &str,
340        severity: Severity,
341        reason: &str,
342        severity_override: Option<Severity>,
343        expires_at: Option<DateTime<Utc>>,
344        now: DateTime<Utc>,
345    ) -> Result<&BaselineFinding, BaselineError> {
346        let reason_chars = reason.chars().count();
347        if reason_chars < MIN_REASON_LENGTH {
348            return Err(BaselineError::ReasonTooShort {
349                min: MIN_REASON_LENGTH,
350                got: reason_chars,
351            });
352        }
353        if severity_override == Some(Severity::Critical) {
354            let Some(exp) = expires_at else {
355                return Err(BaselineError::CriticalWaiverNoExpiry);
356            };
357            if (exp - now) > Duration::days(MAX_CRITICAL_WAIVER_DAYS) {
358                return Err(BaselineError::CriticalWaiverTooLong {
359                    days: MAX_CRITICAL_WAIVER_DAYS,
360                });
361            }
362        }
363        let entry = BaselineFinding {
364            fingerprint: fingerprint.to_string(),
365            rule_id: rule_id.to_string(),
366            severity,
367            first_seen_at: now,
368            reason_waived: Some(reason.to_string()),
369            severity_override,
370            expires_at,
371        };
372        // Replace existing entry with the same fingerprint, else append.
373        if let Some(slot) = self
374            .baseline_findings
375            .iter_mut()
376            .find(|e| e.fingerprint == entry.fingerprint)
377        {
378            *slot = entry;
379        } else {
380            self.baseline_findings.push(entry);
381        }
382        self.baseline_findings
383            .sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
384        Ok(self
385            .baseline_findings
386            .iter()
387            .find(|e| e.fingerprint == fingerprint)
388            .expect("just inserted"))
389    }
390
391    /// Returns true when the captured identity material matches the current
392    /// parsed graph. Legacy baselines that predate this field are considered
393    /// compatible to preserve backward compatibility.
394    pub fn identity_material_matches(&self, graph: &AuthorityGraph) -> bool {
395        match self.pipeline_identity_material_hash.as_deref() {
396            Some(expected) => expected == compute_pipeline_identity_material_hash(graph),
397            None => true,
398        }
399    }
400}
401
402/// Result of diffing a fresh scan against a baseline. All three buckets
403/// are independently consumable by `verify`'s exit-code logic.
404#[derive(Debug, Clone)]
405pub struct BaselineDiff {
406    /// Findings present in the current scan whose fingerprint is NOT in
407    /// the baseline. These are regressions and drive the verify exit code.
408    pub new: Vec<Finding>,
409    /// Baseline entries whose fingerprint is NOT present in the current
410    /// scan — the underlying issue was fixed (or refactored away). Useful
411    /// for the `taudit baseline diff` summary.
412    pub fixed: Vec<BaselineFinding>,
413    /// Findings present in BOTH the current scan and the baseline. Reported
414    /// for visibility but do not drive exit-1 unless they are critical-
415    /// without-valid-waiver (see [`Self::critical_without_valid_waiver`]).
416    pub preexisting: Vec<Finding>,
417    /// Subset of preexisting baseline entries that carry `reason_waived`.
418    /// Drives the "X waived, Y unwaived" summary.
419    pub waived_count: usize,
420}
421
422impl BaselineDiff {
423    /// Critical findings in `preexisting` whose baseline entry does NOT
424    /// carry a valid critical waiver. These ALWAYS count toward exit 1 —
425    /// the council's load-bearing constraint that critical waivers must be
426    /// explicit, time-bounded, and re-reviewed.
427    pub fn critical_without_valid_waiver(
428        &self,
429        baseline: &Baseline,
430        graph: &AuthorityGraph,
431        now: DateTime<Utc>,
432    ) -> Vec<Finding> {
433        self.preexisting
434            .iter()
435            .filter(|f| f.severity == Severity::Critical)
436            .filter(|f| {
437                let fp = compute_fingerprint(f, graph);
438                match baseline
439                    .baseline_findings
440                    .iter()
441                    .find(|e| e.fingerprint == fp)
442                {
443                    Some(entry) => !entry.is_valid_critical_waiver(now),
444                    None => true, // shouldn't happen — preexisting means present in baseline
445                }
446            })
447            .cloned()
448            .collect()
449    }
450}
451
452/// Diff `current_findings` against `baseline` using the SARIF-equivalent
453/// fingerprint computed from `graph`. Entry point for `verify` and the
454/// `taudit baseline diff` subcommand.
455pub fn diff(
456    current_findings: &[Finding],
457    baseline: &Baseline,
458    graph: &AuthorityGraph,
459) -> BaselineDiff {
460    use std::collections::{HashMap, HashSet};
461
462    let baseline_index: HashMap<&str, &BaselineFinding> = baseline
463        .baseline_findings
464        .iter()
465        .map(|e| (e.fingerprint.as_str(), e))
466        .collect();
467
468    let mut new = Vec::new();
469    let mut preexisting = Vec::new();
470    let mut seen_fingerprints: HashSet<String> = HashSet::new();
471    let mut waived_count = 0usize;
472
473    for finding in current_findings {
474        let fp = compute_fingerprint(finding, graph);
475        seen_fingerprints.insert(fp.clone());
476        match baseline_index.get(fp.as_str()) {
477            Some(entry) => {
478                if entry.reason_waived.is_some() {
479                    waived_count += 1;
480                }
481                preexisting.push(finding.clone());
482            }
483            None => new.push(finding.clone()),
484        }
485    }
486
487    let fixed: Vec<BaselineFinding> = baseline
488        .baseline_findings
489        .iter()
490        .filter(|e| !seen_fingerprints.contains(&e.fingerprint))
491        .cloned()
492        .collect();
493
494    BaselineDiff {
495        new,
496        fixed,
497        preexisting,
498        waived_count,
499    }
500}
501
502/// SHA-256 of `content` formatted as `sha256:<64-hex>`. The `sha256:`
503/// prefix mirrors OCI / git object naming so logs and dashboards can
504/// strip the algorithm tag uniformly.
505///
506/// CRLF is normalised to LF before hashing so that a pipeline file produces
507/// the same hash regardless of whether git's `core.autocrlf` converted its
508/// line endings (BUG-1: Windows baselines silently suppressed nothing).
509pub fn compute_pipeline_hash(content: &str) -> String {
510    let normalised: std::borrow::Cow<str> = if content.contains('\r') {
511        content.replace("\r\n", "\n").into()
512    } else {
513        content.into()
514    };
515    let digest = Sha256::digest(normalised.as_bytes());
516    format_digest(digest)
517}
518
519/// SHA-256 over dependency-like parser material (include/template/repository
520/// declarations and delegation edges), formatted as `sha256:<64-hex>`.
521///
522/// This is intentionally additive to `pipeline_content_hash`: content hash
523/// still keys baseline files for backward compatibility, while this material
524/// hash is used to detect include/template drift and disable suppression when
525/// the parser-visible dependency shape changes.
526pub fn compute_pipeline_identity_material_hash(graph: &AuthorityGraph) -> String {
527    let mut metadata: BTreeMap<String, String> = BTreeMap::new();
528    for key in [META_REPOSITORIES, META_GITLAB_INCLUDES] {
529        if let Some(value) = graph.metadata.get(key) {
530            metadata.insert(key.to_string(), value.clone());
531        }
532    }
533
534    let mut delegations: Vec<String> = graph
535        .edges
536        .iter()
537        .filter(|e| e.kind == EdgeKind::DelegatesTo)
538        .filter_map(|e| {
539            let from = graph.node(e.from)?;
540            let to = graph.node(e.to)?;
541            // BUG-7: do NOT include NodeId in this canonical string. NodeId
542            // is a `usize` insertion-order index into `graph.nodes`; any
543            // benign parser change (e.g. capturing one extra Image node)
544            // shifts every subsequent id and silently invalidates every
545            // existing field baseline via `identity_material_matches`.
546            // Names + trust zones are sufficient to detect dependency-shape
547            // drift, which is what this hash is contracted to detect.
548            Some(format!("{}->{}:{:?}", from.name, to.name, to.trust_zone))
549        })
550        .collect();
551    delegations.sort();
552
553    let mut step_dependency_metadata: Vec<String> = graph
554        .nodes
555        .iter()
556        .filter(|n| n.kind == NodeKind::Step)
557        .flat_map(|n| {
558            [META_NEEDS, META_GITLAB_EXTENDS]
559                .iter()
560                .filter_map(move |k| {
561                    n.metadata
562                        .get(*k)
563                        .map(|v| format!("{}:{}={}", n.name, k, v))
564                })
565        })
566        .collect();
567    step_dependency_metadata.sort();
568
569    let canonical = serde_json::json!({
570        "metadata": metadata,
571        "delegates_to": delegations,
572        "step_dependency_metadata": step_dependency_metadata,
573    });
574
575    let bytes = serde_json::to_vec(&canonical).expect("identity material must serialize");
576    let digest = Sha256::digest(bytes);
577    format_digest(digest)
578}
579
580fn format_digest(digest: impl AsRef<[u8]>) -> String {
581    let mut hex = String::with_capacity(64);
582    for byte in digest.as_ref() {
583        use std::fmt::Write;
584        let _ = write!(&mut hex, "{byte:02x}");
585    }
586    format!("sha256:{hex}")
587}
588
589fn atomic_write(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
590    let parent = path.parent().unwrap_or_else(|| Path::new("."));
591    let file_name = path
592        .file_name()
593        .and_then(|s| s.to_str())
594        .unwrap_or("baseline.json");
595    let tmp_path = parent.join(format!(
596        ".{file_name}.{}.{}.tmp",
597        std::process::id(),
598        unique_nanos()
599    ));
600
601    let mut file = std::fs::OpenOptions::new()
602        .write(true)
603        .create_new(true)
604        .open(&tmp_path)?;
605    if let Err(err) = file.write_all(bytes).and_then(|_| file.sync_all()) {
606        let _ = std::fs::remove_file(&tmp_path);
607        return Err(err);
608    }
609    drop(file);
610
611    if let Err(err) = std::fs::rename(&tmp_path, path) {
612        let _ = std::fs::remove_file(&tmp_path);
613        return Err(err);
614    }
615
616    // Best-effort directory sync makes the rename durable on filesystems that
617    // support syncing directories. Some platforms reject opening directories.
618    if let Ok(dir) = std::fs::File::open(parent) {
619        let _ = dir.sync_all();
620    }
621    Ok(())
622}
623
624fn unique_nanos() -> u128 {
625    std::time::SystemTime::now()
626        .duration_since(std::time::UNIX_EPOCH)
627        .map(|d| d.as_nanos())
628        .unwrap_or(0)
629}
630
631/// Default location for per-pipeline baselines, given the working directory.
632/// Returns `<root>/.taudit/baselines/`.
633pub fn baselines_dir(root: &Path) -> PathBuf {
634    root.join(".taudit").join("baselines")
635}
636
637/// Filename for one pipeline's baseline. The `sha256:` prefix is stripped
638/// so the file is portable on filesystems that disallow `:` (Windows NTFS).
639pub fn baseline_filename_for(pipeline_content_hash: &str) -> String {
640    let hex = pipeline_content_hash
641        .strip_prefix("sha256:")
642        .unwrap_or(pipeline_content_hash);
643    format!("{hex}.json")
644}
645
646/// Convenience: full `<root>/.taudit/baselines/<hex>.json` path for the
647/// given content hash.
648pub fn baseline_path_for(root: &Path, pipeline_content_hash: &str) -> PathBuf {
649    baselines_dir(root).join(baseline_filename_for(pipeline_content_hash))
650}
651
652/// Public alias of [`compute_fingerprint`] — re-exported here so the baseline
653/// module is the single import point for "what is the fingerprint of this
654/// finding for baseline purposes". The shared test
655/// `baseline_fingerprint_matches_sarif_fingerprint` asserts these are
656/// byte-equal forever.
657pub fn compute_finding_fingerprint(finding: &Finding, graph: &AuthorityGraph) -> String {
658    compute_fingerprint(finding, graph)
659}
660
661// ── Tests ───────────────────────────────────────────────────
662
663#[cfg(test)]
664mod tests {
665    use super::*;
666    use crate::finding::{FindingCategory, FindingExtras, FindingSource, Recommendation};
667    use crate::graph::{AuthorityGraph, NodeKind, PipelineSource, TrustZone};
668
669    fn source(file: &str) -> PipelineSource {
670        PipelineSource {
671            file: file.to_string(),
672            repo: None,
673            git_ref: None,
674            commit_sha: None,
675        }
676    }
677
678    fn make_graph(file: &str) -> (AuthorityGraph, crate::graph::NodeId) {
679        let mut g = AuthorityGraph::new(source(file));
680        let s = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
681        (g, s)
682    }
683
684    fn make_finding(
685        category: FindingCategory,
686        severity: Severity,
687        msg: &str,
688        nodes: Vec<crate::graph::NodeId>,
689    ) -> Finding {
690        Finding {
691            severity,
692            category,
693            path: None,
694            nodes_involved: nodes,
695            message: msg.to_string(),
696            recommendation: Recommendation::Manual {
697                action: "fix".to_string(),
698            },
699            source: FindingSource::BuiltIn,
700            extras: FindingExtras::default(),
701        }
702    }
703
704    fn now() -> DateTime<Utc> {
705        DateTime::parse_from_rfc3339("2026-04-26T12:00:00Z")
706            .unwrap()
707            .with_timezone(&Utc)
708    }
709
710    /// COUNCIL-MANDATED SHARED TEST: baseline fingerprint and SARIF
711    /// fingerprint MUST be byte-equal. If this ever fails, suppression
712    /// across SARIF/JSON/CloudEvents/baseline silently drifts. Non-
713    /// negotiable per the council design doc, Section C, item 5.
714    #[test]
715    fn baseline_fingerprint_matches_sarif_fingerprint() {
716        let (graph, s) = make_graph(".github/workflows/release.yml");
717        let f = make_finding(
718            FindingCategory::AuthorityPropagation,
719            Severity::High,
720            "AWS_KEY reaches third party",
721            vec![s],
722        );
723        let baseline_fp = compute_finding_fingerprint(&f, &graph);
724        let sarif_fp = compute_fingerprint(&f, &graph);
725        assert_eq!(
726            baseline_fp, sarif_fp,
727            "baseline and SARIF fingerprints MUST be byte-equal — do not introduce a second fingerprint scheme"
728        );
729    }
730
731    #[test]
732    fn pipeline_hash_is_deterministic_and_prefixed() {
733        let h = compute_pipeline_hash("on: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n");
734        assert!(h.starts_with("sha256:"));
735        assert_eq!(h.len(), 7 + 64);
736        let h2 = compute_pipeline_hash("on: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n");
737        assert_eq!(h, h2, "same content -> same hash");
738        let h3 = compute_pipeline_hash("on: push\n");
739        assert_ne!(h, h3);
740    }
741
742    #[test]
743    fn pipeline_hash_crlf_equals_lf() {
744        // BUG-1: git core.autocrlf converts \n → \r\n on Windows checkout.
745        // The hash must be identical so baselines created on Unix work on
746        // Windows and vice versa.
747        let lf = "on: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n";
748        let crlf = "on: push\r\njobs:\r\n  build:\r\n    runs-on: ubuntu-latest\r\n";
749        assert_eq!(
750            compute_pipeline_hash(lf),
751            compute_pipeline_hash(crlf),
752            "CRLF and LF content must produce the same pipeline hash"
753        );
754    }
755
756    #[test]
757    fn identity_material_hash_changes_when_dependency_metadata_changes() {
758        let (mut g1, _) = make_graph("ci.yml");
759        g1.metadata.insert(
760            META_REPOSITORIES.to_string(),
761            r#"[{"alias":"templates","used":true}]"#.to_string(),
762        );
763
764        let (mut g2, _) = make_graph("ci.yml");
765        g2.metadata.insert(
766            META_REPOSITORIES.to_string(),
767            r#"[{"alias":"templates","used":false}]"#.to_string(),
768        );
769
770        let h1 = compute_pipeline_identity_material_hash(&g1);
771        let h2 = compute_pipeline_identity_material_hash(&g2);
772        assert_ne!(
773            h1, h2,
774            "repository/include metadata drift must change identity material"
775        );
776    }
777
778    #[test]
779    fn identity_material_hash_changes_when_template_delegation_changes() {
780        let mut g1 = AuthorityGraph::new(source("ci.yml"));
781        let s1 = g1.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
782        let t1 = g1.add_node(
783            NodeKind::Image,
784            "templates/release.yml",
785            TrustZone::FirstParty,
786        );
787        g1.add_edge(s1, t1, EdgeKind::DelegatesTo);
788
789        let mut g2 = AuthorityGraph::new(source("ci.yml"));
790        let s2 = g2.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
791        let t2 = g2.add_node(
792            NodeKind::Image,
793            "templates/release-v2.yml",
794            TrustZone::FirstParty,
795        );
796        g2.add_edge(s2, t2, EdgeKind::DelegatesTo);
797
798        let h1 = compute_pipeline_identity_material_hash(&g1);
799        let h2 = compute_pipeline_identity_material_hash(&g2);
800        assert_ne!(
801            h1, h2,
802            "template delegation target drift must change identity material"
803        );
804    }
805
806    #[test]
807    fn identity_material_hash_is_stable_across_nodeid_shifts() {
808        // regression: NodeId insertion order must not affect identity-material hash.
809        //
810        // BUG-7: `NodeId` is a `usize` index into `graph.nodes` assigned in
811        // strict insertion order by `add_node`. Previously the canonical
812        // string for each delegation edge embedded `from.id`/`to.id`, so any
813        // benign parser change that captured one extra unrelated node (e.g.
814        // an Image for a self-hosted runner that wasn't captured before)
815        // shifted every subsequent NodeId, silently changing the hash and
816        // disabling suppression on every field baseline.
817        //
818        // Graph A inserts `[secret, step, target]` and delegates step→target.
819        // Graph B inserts `[secret, extra_image, step, target]` — the extra
820        // image is benign (no edges) but bumps the NodeIds of `step` and
821        // `target`. The DelegatesTo chain is logically identical in both
822        // graphs, so the identity-material hash MUST be byte-equal.
823
824        // Graph A: [secret, step, target]
825        let mut g_a = AuthorityGraph::new(source("ci.yml"));
826        let _secret_a = g_a.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
827        let step_a = g_a.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
828        let target_a = g_a.add_node(
829            NodeKind::Image,
830            "templates/release.yml",
831            TrustZone::FirstParty,
832        );
833        g_a.add_edge(step_a, target_a, EdgeKind::DelegatesTo);
834
835        // Graph B: [secret, extra_image, step, target] — same logical
836        // delegation chain, but an unrelated Image node is inserted before
837        // `step`, shifting the NodeIds of `step` and `target` by 1.
838        let mut g_b = AuthorityGraph::new(source("ci.yml"));
839        let _secret_b = g_b.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
840        let _extra_b = g_b.add_node(
841            NodeKind::Image,
842            "self-hosted-runner-image",
843            TrustZone::FirstParty,
844        );
845        let step_b = g_b.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
846        let target_b = g_b.add_node(
847            NodeKind::Image,
848            "templates/release.yml",
849            TrustZone::FirstParty,
850        );
851        g_b.add_edge(step_b, target_b, EdgeKind::DelegatesTo);
852
853        // Sanity check the test setup: the NodeIds of the delegation
854        // endpoints really do differ between A and B. If this ever stops
855        // being true the test no longer exercises the original bug.
856        assert_ne!(
857            (step_a, target_a),
858            (step_b, target_b),
859            "test precondition: extra node must shift NodeIds of delegation endpoints"
860        );
861
862        let h_a = compute_pipeline_identity_material_hash(&g_a);
863        let h_b = compute_pipeline_identity_material_hash(&g_b);
864        assert_eq!(
865            h_a, h_b,
866            "identity-material hash must be insensitive to NodeId insertion order; \
867             a benign parser change that captures one extra unrelated node MUST NOT \
868             invalidate existing field baselines"
869        );
870    }
871
872    #[test]
873    fn init_captures_current_findings() {
874        let (graph, s) = make_graph("ci.yml");
875        let f1 = make_finding(
876            FindingCategory::UnpinnedAction,
877            Severity::High,
878            "actions/checkout@v4 unpinned",
879            vec![s],
880        );
881        let f2 = make_finding(
882            FindingCategory::AuthorityPropagation,
883            Severity::Critical,
884            "AWS_KEY reaches untrusted",
885            vec![s],
886        );
887        let baseline = Baseline::from_findings(
888            "ci.yml",
889            "on: push\n",
890            &graph,
891            &[f1, f2],
892            "ryan@example.com",
893            "0.10.0",
894            "32-builtin",
895            now(),
896        );
897        assert_eq!(baseline.baseline_findings.len(), 2);
898        assert_eq!(baseline.captured_by, "ryan@example.com");
899        assert_eq!(baseline.captured_with.taudit_version, "0.10.0");
900        assert!(
901            baseline.pipeline_identity_material_hash.is_some(),
902            "new captures should persist identity material hash"
903        );
904        // Sorted by fingerprint
905        let fps: Vec<&str> = baseline
906            .baseline_findings
907            .iter()
908            .map(|e| e.fingerprint.as_str())
909            .collect();
910        let mut sorted = fps.clone();
911        sorted.sort();
912        assert_eq!(fps, sorted, "entries must be fingerprint-sorted");
913        // BUG-2: baseline init bulk-accepts all findings. Critical findings get
914        // a full valid waiver (reason_waived + severity_override + expires_at).
915        // Non-critical findings have no waiver fields set.
916        for entry in &baseline.baseline_findings {
917            if entry.severity == Severity::Critical {
918                assert!(
919                    entry.reason_waived.is_some(),
920                    "critical finding from baseline init must have auto-reason"
921                );
922                assert_eq!(
923                    entry.severity_override,
924                    Some(Severity::Critical),
925                    "critical finding from baseline init must have severity_override"
926                );
927                assert!(
928                    entry.expires_at.is_some(),
929                    "critical finding from baseline init must have auto-expiry"
930                );
931            } else {
932                assert!(entry.reason_waived.is_none());
933                assert!(entry.severity_override.is_none());
934                assert!(entry.expires_at.is_none());
935            }
936        }
937    }
938
939    #[test]
940    fn save_then_load_round_trips() {
941        let dir = tempdir();
942        let (graph, s) = make_graph("ci.yml");
943        let f = make_finding(
944            FindingCategory::UnpinnedAction,
945            Severity::High,
946            "actions/checkout@v4 unpinned",
947            vec![s],
948        );
949        let baseline = Baseline::from_findings(
950            "ci.yml",
951            "x",
952            &graph,
953            &[f],
954            "ryan",
955            "0.10.0",
956            "32-builtin",
957            now(),
958        );
959        let path = dir.join("b.json");
960        baseline.save(&path).expect("save");
961        let loaded = Baseline::load(&path).expect("load").expect("present");
962        assert_eq!(baseline, loaded);
963    }
964
965    #[test]
966    fn load_returns_none_when_absent() {
967        let dir = tempdir();
968        let path = dir.join("does-not-exist.json");
969        assert!(Baseline::load(&path).expect("ok").is_none());
970    }
971
972    #[test]
973    fn legacy_baseline_without_identity_material_remains_compatible() {
974        let baseline = empty_baseline();
975        let (graph, _) = make_graph("ci.yml");
976        assert!(
977            baseline.identity_material_matches(&graph),
978            "legacy baseline must remain compatible"
979        );
980    }
981
982    #[test]
983    fn accept_rejects_short_reason() {
984        let mut baseline = empty_baseline();
985        let err = baseline
986            .accept(
987                "abcd1234abcd1234",
988                "unpinned_action",
989                Severity::High,
990                "wip",
991                None,
992                None,
993                now(),
994            )
995            .unwrap_err();
996        assert!(matches!(err, BaselineError::ReasonTooShort { .. }));
997    }
998
999    #[test]
1000    fn accept_critical_without_expires_is_rejected() {
1001        let mut baseline = empty_baseline();
1002        let err = baseline
1003            .accept(
1004                "deadbeefdeadbeef",
1005                "trigger_context_mismatch",
1006                Severity::Critical,
1007                "Threat-modeled exception per ABC-123",
1008                Some(Severity::Critical),
1009                None, // no expiry
1010                now(),
1011            )
1012            .unwrap_err();
1013        assert!(matches!(err, BaselineError::CriticalWaiverNoExpiry));
1014    }
1015
1016    #[test]
1017    fn accept_critical_with_expiry_beyond_90d_is_rejected() {
1018        let mut baseline = empty_baseline();
1019        let too_long = now() + Duration::days(100);
1020        let err = baseline
1021            .accept(
1022                "deadbeefdeadbeef",
1023                "trigger_context_mismatch",
1024                Severity::Critical,
1025                "Threat-modeled exception per ABC-123",
1026                Some(Severity::Critical),
1027                Some(too_long),
1028                now(),
1029            )
1030            .unwrap_err();
1031        assert!(matches!(
1032            err,
1033            BaselineError::CriticalWaiverTooLong { days: 90 }
1034        ));
1035    }
1036
1037    #[test]
1038    fn accept_critical_with_valid_expiry_succeeds() {
1039        let mut baseline = empty_baseline();
1040        let exp = now() + Duration::days(60);
1041        baseline
1042            .accept(
1043                "deadbeefdeadbeef",
1044                "trigger_context_mismatch",
1045                Severity::Critical,
1046                "Threat-modeled exception per ABC-123",
1047                Some(Severity::Critical),
1048                Some(exp),
1049                now(),
1050            )
1051            .expect("valid critical waiver");
1052        let entry = &baseline.baseline_findings[0];
1053        assert!(entry.is_valid_critical_waiver(now()));
1054        // After the expiry, the waiver no longer protects.
1055        assert!(!entry.is_valid_critical_waiver(exp + Duration::seconds(1)));
1056    }
1057
1058    #[test]
1059    fn diff_classifies_new_fixed_preexisting() {
1060        let (graph, s) = make_graph("ci.yml");
1061        let f_old = make_finding(
1062            FindingCategory::UnpinnedAction,
1063            Severity::High,
1064            "actions/checkout@v4 unpinned",
1065            vec![s],
1066        );
1067        let f_unchanged = make_finding(
1068            FindingCategory::AuthorityPropagation,
1069            Severity::High,
1070            "AWS_KEY reaches untrusted",
1071            vec![s],
1072        );
1073        let baseline = Baseline::from_findings(
1074            "ci.yml",
1075            "x",
1076            &graph,
1077            &[f_old.clone(), f_unchanged.clone()],
1078            "ryan",
1079            "0.10.0",
1080            "32-builtin",
1081            now(),
1082        );
1083        // Current scan: keep `unchanged`, drop `old`, add `new`.
1084        let f_new = make_finding(
1085            FindingCategory::OverPrivilegedIdentity,
1086            Severity::Medium,
1087            "GITHUB_TOKEN over-privileged",
1088            vec![s],
1089        );
1090        let current = vec![f_unchanged.clone(), f_new.clone()];
1091        let diff = diff(&current, &baseline, &graph);
1092        assert_eq!(diff.new.len(), 1, "f_new is new");
1093        assert_eq!(diff.fixed.len(), 1, "f_old was fixed");
1094        assert_eq!(diff.preexisting.len(), 1, "f_unchanged preexisting");
1095        assert_eq!(diff.waived_count, 0, "no waivers yet");
1096    }
1097
1098    #[test]
1099    fn critical_preexisting_from_init_is_suppressed() {
1100        // BUG-2: baseline init now bulk-accepts Critical findings by setting
1101        // a full valid waiver (severity_override + reason_waived + expires_at).
1102        // Verify that a critical captured by from_findings is NOT a blocker.
1103        let (graph, s) = make_graph("ci.yml");
1104        let crit = make_finding(
1105            FindingCategory::AuthorityPropagation,
1106            Severity::Critical,
1107            "AWS_KEY reaches untrusted",
1108            vec![s],
1109        );
1110        let baseline = Baseline::from_findings(
1111            "ci.yml",
1112            "x",
1113            &graph,
1114            std::slice::from_ref(&crit),
1115            "ryan",
1116            "0.10.0",
1117            "32-builtin",
1118            now(),
1119        );
1120        let diff = diff(&[crit], &baseline, &graph);
1121        assert_eq!(diff.preexisting.len(), 1);
1122        // from_findings now sets a valid waiver — the critical must NOT block.
1123        let blockers = diff.critical_without_valid_waiver(&baseline, &graph, now());
1124        assert_eq!(
1125            blockers.len(),
1126            0,
1127            "critical from baseline init must be suppressed (valid auto-waiver)"
1128        );
1129    }
1130
1131    #[test]
1132    fn critical_preexisting_without_waiver_blocks_exit_zero() {
1133        // A manually constructed baseline entry with no waiver fields set still
1134        // forces a critical to count toward exit 1 (the original constraint).
1135        let (graph, s) = make_graph("ci.yml");
1136        let crit = make_finding(
1137            FindingCategory::AuthorityPropagation,
1138            Severity::Critical,
1139            "AWS_KEY reaches untrusted",
1140            vec![s],
1141        );
1142        let fp = compute_finding_fingerprint(&crit, &graph);
1143        let mut baseline = Baseline::from_findings(
1144            "ci.yml",
1145            "x",
1146            &graph,
1147            std::slice::from_ref(&crit),
1148            "ryan",
1149            "0.10.0",
1150            "32-builtin",
1151            now(),
1152        );
1153        // Strip the auto-waiver fields to simulate a legacy or manually-edited entry.
1154        for entry in &mut baseline.baseline_findings {
1155            if entry.fingerprint == fp {
1156                entry.reason_waived = None;
1157                entry.severity_override = None;
1158                entry.expires_at = None;
1159            }
1160        }
1161        let diff = diff(&[crit], &baseline, &graph);
1162        assert_eq!(diff.preexisting.len(), 1);
1163        let blockers = diff.critical_without_valid_waiver(&baseline, &graph, now());
1164        assert_eq!(
1165            blockers.len(),
1166            1,
1167            "critical without explicit waiver must always block"
1168        );
1169    }
1170
1171    #[test]
1172    fn critical_with_explicit_waiver_does_not_block() {
1173        let (graph, s) = make_graph("ci.yml");
1174        let crit = make_finding(
1175            FindingCategory::AuthorityPropagation,
1176            Severity::Critical,
1177            "AWS_KEY reaches untrusted",
1178            vec![s],
1179        );
1180        let mut baseline = Baseline::from_findings(
1181            "ci.yml",
1182            "x",
1183            &graph,
1184            std::slice::from_ref(&crit),
1185            "ryan",
1186            "0.10.0",
1187            "32-builtin",
1188            now(),
1189        );
1190        // Promote the entry to a valid critical waiver.
1191        let fp = compute_fingerprint(&crit, &graph);
1192        baseline
1193            .accept(
1194                &fp,
1195                "authority_propagation",
1196                Severity::Critical,
1197                "Threat-modeled; documented exception ABC-123",
1198                Some(Severity::Critical),
1199                Some(now() + Duration::days(60)),
1200                now(),
1201            )
1202            .expect("valid waiver");
1203        let diff = diff(&[crit], &baseline, &graph);
1204        let blockers = diff.critical_without_valid_waiver(&baseline, &graph, now());
1205        assert_eq!(blockers.len(), 0, "valid waiver bypasses exit 1");
1206    }
1207
1208    #[test]
1209    fn expired_critical_waiver_no_longer_protects() {
1210        let (graph, s) = make_graph("ci.yml");
1211        let crit = make_finding(
1212            FindingCategory::AuthorityPropagation,
1213            Severity::Critical,
1214            "AWS_KEY reaches untrusted",
1215            vec![s],
1216        );
1217        let mut baseline = Baseline::from_findings(
1218            "ci.yml",
1219            "x",
1220            &graph,
1221            std::slice::from_ref(&crit),
1222            "ryan",
1223            "0.10.0",
1224            "32-builtin",
1225            now(),
1226        );
1227        let fp = compute_fingerprint(&crit, &graph);
1228        let exp = now() + Duration::days(30);
1229        baseline
1230            .accept(
1231                &fp,
1232                "authority_propagation",
1233                Severity::Critical,
1234                "Threat-modeled; documented exception ABC-123",
1235                Some(Severity::Critical),
1236                Some(exp),
1237                now(),
1238            )
1239            .expect("valid waiver");
1240        // Time passes past the expiry — the waiver no longer protects.
1241        let later = exp + Duration::days(1);
1242        let diff = diff(&[crit], &baseline, &graph);
1243        let blockers = diff.critical_without_valid_waiver(&baseline, &graph, later);
1244        assert_eq!(blockers.len(), 1, "expired waiver must not protect");
1245    }
1246
1247    #[test]
1248    fn baselines_dir_and_filename_layout() {
1249        let root = std::path::Path::new("/tmp/repo");
1250        let dir = baselines_dir(root);
1251        assert_eq!(dir, std::path::PathBuf::from("/tmp/repo/.taudit/baselines"));
1252        let f = baseline_filename_for("sha256:abcdef0123");
1253        assert_eq!(f, "abcdef0123.json");
1254        let p = baseline_path_for(root, "sha256:abcdef0123");
1255        assert_eq!(
1256            p,
1257            std::path::PathBuf::from("/tmp/repo/.taudit/baselines/abcdef0123.json")
1258        );
1259    }
1260
1261    /// Atomic-write regression: simulate a `SIGKILL` between the tempfile
1262    /// write and the rename. We hand-roll the simulation by mimicking
1263    /// `Baseline::save`'s tempfile naming convention, dropping a half-
1264    /// written temp file in the baselines parent dir, and verifying the
1265    /// destination path is either the prior content or absent — never a
1266    /// truncated/corrupt JSON. (Forking a real process would require a
1267    /// child binary; we exercise the invariant directly.)
1268    #[test]
1269    fn baseline_save_is_atomic_under_signalled_interruption() {
1270        let dir = tempdir();
1271        let path = dir.join("atomic.json");
1272
1273        // First successful save establishes prior content.
1274        let baseline = empty_baseline();
1275        baseline.save(&path).expect("first save");
1276        let prior = std::fs::read(&path).expect("read prior");
1277        assert!(
1278            !prior.is_empty(),
1279            "post-save content must be non-empty JSON"
1280        );
1281
1282        // Simulate SIGKILL: a second save partially completes — bytes
1283        // land in a temp sibling but the rename never happens.
1284        let nanos = std::time::SystemTime::now()
1285            .duration_since(std::time::UNIX_EPOCH)
1286            .unwrap()
1287            .as_nanos();
1288        let pid = std::process::id();
1289        let temp_path = dir.join(format!(".atomic.json.tmp.{pid}.{nanos}"));
1290        std::fs::write(&temp_path, b"{ \"truncated\": ").expect("stage truncated temp");
1291        // No rename happens — the simulated process was killed.
1292
1293        // The destination MUST still parse as the prior baseline.
1294        let bytes = std::fs::read(&path).expect("read after simulated kill");
1295        assert_eq!(
1296            bytes, prior,
1297            "destination must remain at prior content when rename never executes"
1298        );
1299        let _: Baseline = serde_json::from_slice(&bytes)
1300            .expect("destination must remain valid JSON after simulated SIGKILL");
1301
1302        // Clean up the simulated leftover so we don't leak between tests.
1303        let _ = std::fs::remove_file(&temp_path);
1304    }
1305
1306    /// Cleanup regression: when `rename` fails, no orphan `.tmp` file
1307    /// must remain in the parent directory. We force `rename` to fail
1308    /// by making the destination path point at a non-empty directory
1309    /// (`rename(file, non_empty_dir)` fails with `ENOTDIR`/`EISDIR`/
1310    /// `ENOTEMPTY` depending on the platform).
1311    #[test]
1312    fn baseline_save_cleans_up_temp_on_rename_failure() {
1313        let dir = tempdir();
1314        let dest = dir.join("baseline.json");
1315        // Make `dest` a non-empty directory so `rename` is guaranteed to fail.
1316        std::fs::create_dir(&dest).expect("create dest dir");
1317        std::fs::write(dest.join("decoy.txt"), b"keep").expect("write decoy");
1318
1319        let baseline = empty_baseline();
1320        let err = baseline
1321            .save(&dest)
1322            .expect_err("save must fail when dest is a non-empty directory");
1323        assert!(matches!(err, BaselineError::Write { .. }));
1324
1325        // Walk the parent dir looking for any `.tmp.` temp files.
1326        let stragglers: Vec<_> = std::fs::read_dir(&dir)
1327            .expect("read parent")
1328            .filter_map(|e| e.ok())
1329            .map(|e| e.file_name().to_string_lossy().into_owned())
1330            .filter(|n| n.contains(".tmp."))
1331            .collect();
1332        assert!(
1333            stragglers.is_empty(),
1334            "rename failure must not leak temp files; found: {stragglers:?}"
1335        );
1336    }
1337
1338    #[test]
1339    fn unsupported_schema_version_rejected() {
1340        let dir = tempdir();
1341        let path = dir.join("b.json");
1342        let body = r#"{"schema_version":"2.0.0","pipeline_path":"x","pipeline_content_hash":"sha256:x","captured_at":"2026-04-26T12:00:00Z","captured_by":"r","captured_with":{"taudit_version":"0.10.0","rules_version":"32-builtin"},"baseline_findings":[]}"#;
1343        std::fs::write(&path, body).unwrap();
1344        let err = Baseline::load(&path).unwrap_err();
1345        assert!(matches!(err, BaselineError::UnsupportedVersion { .. }));
1346    }
1347
1348    // ── Test helpers ─────────────────────────────────────
1349
1350    fn empty_baseline() -> Baseline {
1351        Baseline {
1352            schema_version: BASELINE_SCHEMA_VERSION.to_string(),
1353            pipeline_path: "ci.yml".to_string(),
1354            pipeline_content_hash: compute_pipeline_hash("x"),
1355            pipeline_identity_material_hash: None,
1356            captured_at: now(),
1357            captured_by: "ryan".to_string(),
1358            captured_with: CapturedWith {
1359                taudit_version: "0.10.0".to_string(),
1360                rules_version: "32-builtin".to_string(),
1361            },
1362            baseline_findings: Vec::new(),
1363        }
1364    }
1365
1366    /// Per-process tempdir helper. Avoids pulling in the `tempfile` crate
1367    /// just for tests — we control the cleanup ourselves.
1368    fn tempdir() -> std::path::PathBuf {
1369        let pid = std::process::id();
1370        let nanos = std::time::SystemTime::now()
1371            .duration_since(std::time::UNIX_EPOCH)
1372            .unwrap()
1373            .as_nanos();
1374        let p = std::env::temp_dir().join(format!("taudit-baselines-test-{pid}-{nanos}"));
1375        std::fs::create_dir_all(&p).unwrap();
1376        p
1377    }
1378}