Skip to main content

coding_agent_search/
evidence_bundle.rs

1//! Repairable evidence-bundle manifests for derived search artifacts.
2//!
3//! This module is deliberately producer-neutral: lexical generations, semantic
4//! shards, and database backups can all describe their files as content-addressed
5//! chunks, then ask the same verifier whether the bundle is complete,
6//! partially repairable from parity metadata, or unsafe to use.
7
8use anyhow::{Context, Result, anyhow, bail};
9use serde::{Deserialize, Serialize};
10use std::collections::{BTreeMap, BTreeSet};
11use std::fs::{self, File, OpenOptions};
12use std::io::{Read, Write};
13use std::path::{Component, Path, PathBuf};
14use std::sync::atomic::{AtomicU64, Ordering};
15
16pub const EVIDENCE_BUNDLE_MANIFEST_VERSION: u32 = 1;
17pub const EVIDENCE_BUNDLE_MANIFEST_FILE: &str = "evidence-bundle-manifest.json";
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum EvidenceBundleKind {
22    LexicalGeneration,
23    SemanticShard,
24    DatabaseBackup,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28#[serde(rename_all = "snake_case")]
29pub enum EvidenceBundleChunkRole {
30    Manifest,
31    LexicalShard,
32    SemanticShard,
33    DatabaseMain,
34    DatabaseWal,
35    Metadata,
36    Parity,
37    Other,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41#[serde(rename_all = "snake_case")]
42pub enum EvidenceBundleVerificationStatus {
43    Complete,
44    PartiallyRepairable,
45    Unsafe,
46}
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
49#[serde(rename_all = "snake_case")]
50pub enum EvidenceBundleIssueKind {
51    CorruptManifest,
52    UnsupportedManifestVersion,
53    EmptyManifest,
54    DuplicateChunkPath,
55    UnsafeChunkPath,
56    MissingChunk,
57    SizeMismatch,
58    DigestMismatch,
59    InvalidWalStateChunk,
60    WalMainMismatch,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
64pub struct EvidenceBundleChunk {
65    pub path: String,
66    pub role: EvidenceBundleChunkRole,
67    pub size_bytes: u64,
68    pub blake3: String,
69    #[serde(default = "default_required_chunk")]
70    pub required: bool,
71    #[serde(default, skip_serializing_if = "Option::is_none")]
72    pub parity_group: Option<String>,
73}
74
75impl EvidenceBundleChunk {
76    pub fn from_file(
77        bundle_root: &Path,
78        relative_path: impl Into<String>,
79        role: EvidenceBundleChunkRole,
80        required: bool,
81        parity_group: Option<String>,
82    ) -> Result<Self> {
83        let path = relative_path.into();
84        let resolved = resolve_existing_bundle_path(bundle_root, &path)?;
85        let (size_bytes, blake3) = digest_file(&resolved)
86            .with_context(|| format!("digesting bundle chunk {}", resolved.display()))?;
87        Ok(Self {
88            path,
89            role,
90            size_bytes,
91            blake3,
92            required,
93            parity_group,
94        })
95    }
96}
97
98#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
99pub struct EvidenceBundleParityGroup {
100    pub group_id: String,
101    #[serde(default)]
102    pub chunk_paths: Vec<String>,
103    pub repairable_failed_chunks: u32,
104}
105
106#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
107pub struct DatabaseWalStateEvidence {
108    pub main_chunk_path: String,
109    #[serde(default, skip_serializing_if = "Option::is_none")]
110    pub wal_chunk_path: Option<String>,
111    pub main_state_fingerprint: String,
112    #[serde(default, skip_serializing_if = "Option::is_none")]
113    pub wal_base_fingerprint: Option<String>,
114}
115
116#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
117pub struct EvidenceBundleManifest {
118    pub manifest_version: u32,
119    pub bundle_id: String,
120    pub kind: EvidenceBundleKind,
121    pub created_at_ms: i64,
122    #[serde(default, skip_serializing_if = "Option::is_none")]
123    pub source_db_fingerprint: Option<String>,
124    #[serde(default)]
125    pub chunks: Vec<EvidenceBundleChunk>,
126    #[serde(default)]
127    pub parity_groups: Vec<EvidenceBundleParityGroup>,
128    #[serde(default, skip_serializing_if = "Option::is_none")]
129    pub database_wal_state: Option<DatabaseWalStateEvidence>,
130    #[serde(default = "default_explicit_delete_required")]
131    pub explicit_delete_required: bool,
132}
133
134impl EvidenceBundleManifest {
135    pub fn new(bundle_id: impl Into<String>, kind: EvidenceBundleKind, created_at_ms: i64) -> Self {
136        Self {
137            manifest_version: EVIDENCE_BUNDLE_MANIFEST_VERSION,
138            bundle_id: bundle_id.into(),
139            kind,
140            created_at_ms,
141            source_db_fingerprint: None,
142            chunks: Vec::new(),
143            parity_groups: Vec::new(),
144            database_wal_state: None,
145            explicit_delete_required: true,
146        }
147    }
148
149    pub fn path(bundle_root: &Path) -> PathBuf {
150        bundle_root.join(EVIDENCE_BUNDLE_MANIFEST_FILE)
151    }
152
153    pub fn load(path: &Path) -> Result<Self> {
154        let bytes = fs::read(path)
155            .with_context(|| format!("reading evidence bundle manifest {}", path.display()))?;
156        serde_json::from_slice(&bytes)
157            .with_context(|| format!("parsing evidence bundle manifest {}", path.display()))
158    }
159
160    pub fn save(&self, bundle_root: &Path) -> Result<PathBuf> {
161        fs::create_dir_all(bundle_root)
162            .with_context(|| format!("creating evidence bundle root {}", bundle_root.display()))?;
163        let path = Self::path(bundle_root);
164        let bytes = serde_json::to_vec_pretty(self)
165            .with_context(|| "serializing evidence bundle manifest")?;
166        let tmp_path = write_evidence_bundle_manifest_temp(&path, &bytes)?;
167        fs::rename(&tmp_path, &path).with_context(|| {
168            format!(
169                "publishing evidence bundle manifest {} -> {}",
170                tmp_path.display(),
171                path.display()
172            )
173        })?;
174        Ok(path)
175    }
176
177    pub fn verify(&self, bundle_root: &Path) -> EvidenceBundleVerificationReport {
178        verify_manifest(self, bundle_root)
179    }
180}
181
182fn unique_evidence_bundle_manifest_temp_path(path: &Path) -> PathBuf {
183    static NEXT_NONCE: AtomicU64 = AtomicU64::new(0);
184
185    let timestamp = std::time::SystemTime::now()
186        .duration_since(std::time::UNIX_EPOCH)
187        .unwrap_or_default()
188        .as_nanos();
189    let nonce = NEXT_NONCE.fetch_add(1, Ordering::Relaxed);
190    let file_name = path
191        .file_name()
192        .and_then(|name| name.to_str())
193        .unwrap_or(EVIDENCE_BUNDLE_MANIFEST_FILE);
194
195    path.with_file_name(format!(
196        ".{file_name}.{}.{}.{}.tmp",
197        std::process::id(),
198        timestamp,
199        nonce
200    ))
201}
202
203fn write_evidence_bundle_manifest_temp(final_path: &Path, bytes: &[u8]) -> Result<PathBuf> {
204    for _ in 0..100 {
205        let temp_path = unique_evidence_bundle_manifest_temp_path(final_path);
206        match write_evidence_bundle_manifest_temp_at(&temp_path, bytes) {
207            Ok(()) => return Ok(temp_path),
208            Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => continue,
209            Err(err) => {
210                return Err(err).with_context(|| {
211                    format!("writing evidence bundle manifest {}", temp_path.display())
212                });
213            }
214        }
215    }
216
217    bail!(
218        "failed to allocate unique evidence bundle manifest temp path for {}",
219        final_path.display()
220    )
221}
222
223fn write_evidence_bundle_manifest_temp_at(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
224    let mut file = OpenOptions::new().write(true).create_new(true).open(path)?;
225    file.write_all(bytes)?;
226    file.sync_all()?;
227    Ok(())
228}
229
230#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
231pub struct EvidenceBundleIssue {
232    pub kind: EvidenceBundleIssueKind,
233    pub path: Option<String>,
234    pub message: String,
235    pub repairable: bool,
236}
237
238#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
239pub struct EvidenceBundleGcDryRun {
240    pub dry_run: bool,
241    pub explicit_delete_required: bool,
242    pub deletion_allowed: bool,
243    pub retained_chunk_count: usize,
244    pub retained_bytes: u64,
245    pub reason: String,
246}
247
248#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
249pub struct EvidenceBundleVerificationReport {
250    pub manifest_version: Option<u32>,
251    pub bundle_id: Option<String>,
252    pub kind: Option<EvidenceBundleKind>,
253    pub status: EvidenceBundleVerificationStatus,
254    pub issues: Vec<EvidenceBundleIssue>,
255    pub verified_chunk_count: usize,
256    pub repairable_issue_count: usize,
257    pub unsafe_issue_count: usize,
258    pub expected_chunk_count: usize,
259    pub expected_bytes: u64,
260    pub verified_bytes: u64,
261    pub gc_dry_run: EvidenceBundleGcDryRun,
262}
263
264impl EvidenceBundleVerificationReport {
265    pub fn is_complete(&self) -> bool {
266        self.status == EvidenceBundleVerificationStatus::Complete
267    }
268
269    pub fn is_partially_repairable(&self) -> bool {
270        self.status == EvidenceBundleVerificationStatus::PartiallyRepairable
271    }
272
273    pub fn is_unsafe(&self) -> bool {
274        self.status == EvidenceBundleVerificationStatus::Unsafe
275    }
276}
277
278pub fn verify_evidence_bundle_manifest_file(
279    bundle_root: &Path,
280    manifest_path: &Path,
281) -> EvidenceBundleVerificationReport {
282    match EvidenceBundleManifest::load(manifest_path) {
283        Ok(manifest) => manifest.verify(bundle_root),
284        Err(err) => unsafe_report(
285            EvidenceBundleIssueKind::CorruptManifest,
286            None,
287            format!("manifest could not be loaded: {err}"),
288        ),
289    }
290}
291
292fn verify_manifest(
293    manifest: &EvidenceBundleManifest,
294    bundle_root: &Path,
295) -> EvidenceBundleVerificationReport {
296    let mut issues = Vec::new();
297    let mut chunk_failures = Vec::new();
298    let mut verified_chunk_count = 0usize;
299    let mut verified_bytes = 0u64;
300    let expected_bytes = manifest
301        .chunks
302        .iter()
303        .fold(0u64, |sum, chunk| sum.saturating_add(chunk.size_bytes));
304
305    if manifest.manifest_version != EVIDENCE_BUNDLE_MANIFEST_VERSION {
306        issues.push(issue(
307            EvidenceBundleIssueKind::UnsupportedManifestVersion,
308            None,
309            format!(
310                "manifest version {} is not supported by verifier version {}",
311                manifest.manifest_version, EVIDENCE_BUNDLE_MANIFEST_VERSION
312            ),
313            false,
314        ));
315    }
316    if manifest.chunks.is_empty() {
317        issues.push(issue(
318            EvidenceBundleIssueKind::EmptyManifest,
319            None,
320            "manifest contains no chunks".to_string(),
321            false,
322        ));
323    }
324
325    let parity_index = parity_index(manifest);
326    let mut verified_parity_groups = BTreeSet::new();
327    let mut seen_paths = BTreeSet::new();
328    for chunk in &manifest.chunks {
329        if !seen_paths.insert(chunk.path.clone()) {
330            chunk_failures.push(raw_chunk_failure(
331                EvidenceBundleIssueKind::DuplicateChunkPath,
332                chunk.path.clone(),
333                "duplicate chunk path in manifest".to_string(),
334            ));
335            continue;
336        }
337
338        let resolved = match resolve_bundle_path(bundle_root, &chunk.path) {
339            Ok(path) => path,
340            Err(err) => {
341                chunk_failures.push(raw_chunk_failure(
342                    EvidenceBundleIssueKind::UnsafeChunkPath,
343                    chunk.path.clone(),
344                    err.to_string(),
345                ));
346                continue;
347            }
348        };
349        if !resolved.exists() {
350            if !chunk.required {
351                continue;
352            }
353            chunk_failures.push(raw_chunk_failure(
354                EvidenceBundleIssueKind::MissingChunk,
355                chunk.path.clone(),
356                format!("required bundle chunk {} is missing", chunk.path),
357            ));
358            continue;
359        }
360        let resolved = match resolve_existing_bundle_path(bundle_root, &chunk.path) {
361            Ok(path) => path,
362            Err(err) => {
363                chunk_failures.push(raw_chunk_failure(
364                    EvidenceBundleIssueKind::UnsafeChunkPath,
365                    chunk.path.clone(),
366                    err.to_string(),
367                ));
368                continue;
369            }
370        };
371
372        match digest_file(&resolved) {
373            Ok((actual_size, actual_digest)) => {
374                if actual_size != chunk.size_bytes {
375                    chunk_failures.push(raw_chunk_failure(
376                        EvidenceBundleIssueKind::SizeMismatch,
377                        chunk.path.clone(),
378                        format!(
379                            "chunk {} has size {}, expected {}",
380                            chunk.path, actual_size, chunk.size_bytes
381                        ),
382                    ));
383                    continue;
384                }
385                if actual_digest != chunk.blake3 {
386                    chunk_failures.push(raw_chunk_failure(
387                        EvidenceBundleIssueKind::DigestMismatch,
388                        chunk.path.clone(),
389                        format!("chunk {} digest does not match manifest", chunk.path),
390                    ));
391                    continue;
392                }
393                verified_chunk_count = verified_chunk_count.saturating_add(1);
394                verified_bytes = verified_bytes.saturating_add(actual_size);
395                if chunk.role == EvidenceBundleChunkRole::Parity
396                    && let Some(group) = parity_index.get(&chunk.path)
397                {
398                    verified_parity_groups.insert(group.group_id.clone());
399                }
400            }
401            Err(err) => chunk_failures.push(raw_chunk_failure(
402                EvidenceBundleIssueKind::MissingChunk,
403                chunk.path.clone(),
404                format!("chunk {} could not be read: {err}", chunk.path),
405            )),
406        }
407    }
408
409    let failure_counts = chunk_failure_counts_by_parity_group(&chunk_failures, &parity_index);
410    for failure in chunk_failures {
411        let repairable = chunk_failure_is_repairable(
412            failure.kind,
413            &failure.path,
414            &parity_index,
415            &verified_parity_groups,
416            &failure_counts,
417        );
418        issues.push(issue(
419            failure.kind,
420            Some(failure.path),
421            failure.message,
422            repairable,
423        ));
424    }
425
426    if let Some(wal_state) = &manifest.database_wal_state {
427        validate_wal_state_chunk_declaration(
428            &mut issues,
429            manifest,
430            &wal_state.main_chunk_path,
431            "main DB",
432        );
433        if let Some(wal_chunk_path) = wal_state.wal_chunk_path.as_deref() {
434            validate_wal_state_chunk_declaration(&mut issues, manifest, wal_chunk_path, "WAL");
435            if wal_state.wal_base_fingerprint.as_deref()
436                != Some(wal_state.main_state_fingerprint.as_str())
437            {
438                issues.push(issue(
439                    EvidenceBundleIssueKind::WalMainMismatch,
440                    wal_state.wal_chunk_path.clone(),
441                    format!(
442                        "WAL base fingerprint {:?} does not match main DB fingerprint {}",
443                        wal_state.wal_base_fingerprint, wal_state.main_state_fingerprint
444                    ),
445                    false,
446                ));
447            }
448        }
449    }
450
451    let repairable_issue_count = issues.iter().filter(|issue| issue.repairable).count();
452    let unsafe_issue_count = issues.len().saturating_sub(repairable_issue_count);
453    let status = if unsafe_issue_count > 0 {
454        EvidenceBundleVerificationStatus::Unsafe
455    } else if repairable_issue_count > 0 {
456        EvidenceBundleVerificationStatus::PartiallyRepairable
457    } else {
458        EvidenceBundleVerificationStatus::Complete
459    };
460
461    EvidenceBundleVerificationReport {
462        manifest_version: Some(manifest.manifest_version),
463        bundle_id: Some(manifest.bundle_id.clone()),
464        kind: Some(manifest.kind),
465        status,
466        issues,
467        verified_chunk_count,
468        repairable_issue_count,
469        unsafe_issue_count,
470        expected_chunk_count: manifest.chunks.len(),
471        expected_bytes,
472        verified_bytes,
473        gc_dry_run: EvidenceBundleGcDryRun {
474            dry_run: true,
475            explicit_delete_required: manifest.explicit_delete_required,
476            deletion_allowed: false,
477            retained_chunk_count: manifest.chunks.len(),
478            retained_bytes: expected_bytes,
479            reason: "evidence bundle verifier is read-only; deletion requires a separate explicit operator-approved GC path".to_string(),
480        },
481    }
482}
483
484#[derive(Debug)]
485struct RawChunkFailure {
486    kind: EvidenceBundleIssueKind,
487    path: String,
488    message: String,
489}
490
491fn raw_chunk_failure(
492    kind: EvidenceBundleIssueKind,
493    path: String,
494    message: String,
495) -> RawChunkFailure {
496    RawChunkFailure {
497        kind,
498        path,
499        message,
500    }
501}
502
503fn parity_index(manifest: &EvidenceBundleManifest) -> BTreeMap<String, &EvidenceBundleParityGroup> {
504    let mut index = BTreeMap::new();
505    for group in &manifest.parity_groups {
506        for path in &group.chunk_paths {
507            index.insert(path.clone(), group);
508        }
509    }
510    index
511}
512
513fn chunk_failure_counts_by_parity_group(
514    failures: &[RawChunkFailure],
515    parity_index: &BTreeMap<String, &EvidenceBundleParityGroup>,
516) -> BTreeMap<String, u32> {
517    let mut counts = BTreeMap::new();
518    for failure in failures {
519        if let Some(group) = parity_index.get(&failure.path) {
520            *counts.entry(group.group_id.clone()).or_insert(0) += 1;
521        }
522    }
523    counts
524}
525
526fn chunk_failure_is_repairable(
527    kind: EvidenceBundleIssueKind,
528    path: &str,
529    parity_index: &BTreeMap<String, &EvidenceBundleParityGroup>,
530    verified_parity_groups: &BTreeSet<String>,
531    failure_counts: &BTreeMap<String, u32>,
532) -> bool {
533    if !matches!(
534        kind,
535        EvidenceBundleIssueKind::MissingChunk
536            | EvidenceBundleIssueKind::SizeMismatch
537            | EvidenceBundleIssueKind::DigestMismatch
538    ) {
539        return false;
540    }
541    let Some(group) = parity_index.get(path) else {
542        return false;
543    };
544    if !verified_parity_groups.contains(&group.group_id) {
545        return false;
546    }
547    let failures_in_group = failure_counts
548        .get(&group.group_id)
549        .copied()
550        .unwrap_or_default();
551    failures_in_group > 0 && failures_in_group <= group.repairable_failed_chunks
552}
553
554fn validate_wal_state_chunk_declaration(
555    issues: &mut Vec<EvidenceBundleIssue>,
556    manifest: &EvidenceBundleManifest,
557    path: &str,
558    label: &str,
559) {
560    let Some(chunk) = manifest.chunks.iter().find(|chunk| chunk.path == path) else {
561        issues.push(issue(
562            EvidenceBundleIssueKind::InvalidWalStateChunk,
563            Some(path.to_string()),
564            format!("database_wal_state {label} chunk {path} is not declared in manifest chunks"),
565            false,
566        ));
567        return;
568    };
569
570    if !chunk.required {
571        issues.push(issue(
572            EvidenceBundleIssueKind::InvalidWalStateChunk,
573            Some(path.to_string()),
574            format!("database_wal_state {label} chunk {path} must be declared as required"),
575            false,
576        ));
577    }
578}
579
580fn issue(
581    kind: EvidenceBundleIssueKind,
582    path: Option<String>,
583    message: String,
584    repairable: bool,
585) -> EvidenceBundleIssue {
586    EvidenceBundleIssue {
587        kind,
588        path,
589        message,
590        repairable,
591    }
592}
593
594fn unsafe_report(
595    kind: EvidenceBundleIssueKind,
596    path: Option<String>,
597    message: String,
598) -> EvidenceBundleVerificationReport {
599    EvidenceBundleVerificationReport {
600        manifest_version: None,
601        bundle_id: None,
602        kind: None,
603        status: EvidenceBundleVerificationStatus::Unsafe,
604        issues: vec![issue(kind, path, message, false)],
605        verified_chunk_count: 0,
606        repairable_issue_count: 0,
607        unsafe_issue_count: 1,
608        expected_chunk_count: 0,
609        expected_bytes: 0,
610        verified_bytes: 0,
611        gc_dry_run: EvidenceBundleGcDryRun {
612            dry_run: true,
613            explicit_delete_required: true,
614            deletion_allowed: false,
615            retained_chunk_count: 0,
616            retained_bytes: 0,
617            reason: "corrupt or unreadable evidence bundle manifest cannot authorize deletion"
618                .to_string(),
619        },
620    }
621}
622
623fn digest_file(path: &Path) -> Result<(u64, String)> {
624    let mut file = File::open(path).with_context(|| format!("opening {}", path.display()))?;
625    let mut hasher = blake3::Hasher::new();
626    let mut size = 0u64;
627    let mut buffer = [0u8; 64 * 1024];
628    loop {
629        let read = file
630            .read(&mut buffer)
631            .with_context(|| format!("reading {}", path.display()))?;
632        if read == 0 {
633            break;
634        }
635        size = size.saturating_add(read as u64);
636        hasher.update(&buffer[..read]);
637    }
638    Ok((size, hasher.finalize().to_hex().to_string()))
639}
640
641fn resolve_bundle_path(bundle_root: &Path, relative_path: &str) -> Result<PathBuf> {
642    let path = Path::new(relative_path);
643    if path.is_absolute() {
644        bail!("bundle chunk path must be relative: {relative_path}");
645    }
646    for component in path.components() {
647        match component {
648            Component::Normal(_) => {}
649            Component::CurDir
650            | Component::ParentDir
651            | Component::RootDir
652            | Component::Prefix(_) => {
653                bail!("bundle chunk path contains unsafe component: {relative_path}");
654            }
655        }
656    }
657    if relative_path.is_empty() {
658        return Err(anyhow!("bundle chunk path must not be empty"));
659    }
660    Ok(bundle_root.join(path))
661}
662
663fn resolve_existing_bundle_path(bundle_root: &Path, relative_path: &str) -> Result<PathBuf> {
664    let resolved = resolve_bundle_path(bundle_root, relative_path)?;
665    let canonical_root = fs::canonicalize(bundle_root)
666        .with_context(|| format!("canonicalizing bundle root {}", bundle_root.display()))?;
667    let canonical_resolved = fs::canonicalize(&resolved)
668        .with_context(|| format!("canonicalizing bundle chunk {}", resolved.display()))?;
669    if !canonical_resolved.starts_with(&canonical_root) {
670        bail!("bundle chunk path resolves outside bundle root: {relative_path}");
671    }
672    Ok(canonical_resolved)
673}
674
675fn default_required_chunk() -> bool {
676    true
677}
678
679fn default_explicit_delete_required() -> bool {
680    true
681}
682
683#[cfg(test)]
684mod tests {
685    use super::*;
686    use tempfile::TempDir;
687
688    fn write_chunk(root: &Path, path: &str, bytes: &[u8]) {
689        let full_path = root.join(path);
690        if let Some(parent) = full_path.parent() {
691            fs::create_dir_all(parent).unwrap();
692        }
693        fs::write(full_path, bytes).unwrap();
694    }
695
696    fn chunk(root: &Path, path: &str, role: EvidenceBundleChunkRole) -> EvidenceBundleChunk {
697        EvidenceBundleChunk::from_file(root, path, role, true, None).unwrap()
698    }
699
700    #[cfg(unix)]
701    #[test]
702    fn manifest_temp_write_refuses_existing_symlink() {
703        use std::os::unix::fs::symlink;
704
705        let tmp = TempDir::new().unwrap();
706        let protected = tmp.path().join("protected.json");
707        let temp_path = tmp.path().join(".manifest.json.tmp");
708
709        fs::write(&protected, b"protected").unwrap();
710        symlink(&protected, &temp_path).unwrap();
711
712        let err = write_evidence_bundle_manifest_temp_at(&temp_path, br#"{"bundle":true}"#)
713            .expect_err("existing temp symlink must be rejected");
714
715        assert_eq!(err.kind(), std::io::ErrorKind::AlreadyExists);
716        assert_eq!(fs::read(&protected).unwrap(), b"protected");
717        assert!(
718            fs::symlink_metadata(&temp_path)
719                .unwrap()
720                .file_type()
721                .is_symlink(),
722            "failed temp write should leave the existing symlink untouched"
723        );
724    }
725
726    #[test]
727    fn verifier_proves_complete_lexical_generation_bundle() {
728        let tmp = TempDir::new().unwrap();
729        write_chunk(tmp.path(), "manifest.json", br#"{"docs":2}"#);
730        write_chunk(tmp.path(), "shards/segment-a", b"lexical shard bytes");
731
732        let mut manifest = EvidenceBundleManifest::new(
733            "lexical-generation-1",
734            EvidenceBundleKind::LexicalGeneration,
735            1_700_000_000_000,
736        );
737        manifest.chunks = vec![
738            chunk(
739                tmp.path(),
740                "manifest.json",
741                EvidenceBundleChunkRole::Manifest,
742            ),
743            chunk(
744                tmp.path(),
745                "shards/segment-a",
746                EvidenceBundleChunkRole::LexicalShard,
747            ),
748        ];
749        manifest.save(tmp.path()).unwrap();
750
751        let report = verify_evidence_bundle_manifest_file(
752            tmp.path(),
753            &EvidenceBundleManifest::path(tmp.path()),
754        );
755        assert!(report.is_complete(), "{report:?}");
756        assert_eq!(report.verified_chunk_count, 2);
757        assert_eq!(report.unsafe_issue_count, 0);
758        assert!(!report.gc_dry_run.deletion_allowed);
759    }
760
761    #[test]
762    fn corrupt_manifest_sidecar_is_unsafe_to_use() {
763        let tmp = TempDir::new().unwrap();
764        let manifest_path = EvidenceBundleManifest::path(tmp.path());
765        fs::write(&manifest_path, b"{not-json").unwrap();
766
767        let report = verify_evidence_bundle_manifest_file(tmp.path(), &manifest_path);
768        assert!(report.is_unsafe(), "{report:?}");
769        assert_eq!(
770            report.issues[0].kind,
771            EvidenceBundleIssueKind::CorruptManifest
772        );
773        assert!(!report.issues[0].repairable);
774        assert!(!report.gc_dry_run.deletion_allowed);
775    }
776
777    #[test]
778    fn missing_semantic_shard_with_parity_is_partially_repairable() {
779        let tmp = TempDir::new().unwrap();
780        write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
781        write_chunk(tmp.path(), "semantic/parity-0.bin", b"parity bytes");
782
783        let mut shard = chunk(
784            tmp.path(),
785            "semantic/shard-0.f16",
786            EvidenceBundleChunkRole::SemanticShard,
787        );
788        shard.parity_group = Some("semantic-parity-0".to_string());
789        let mut missing = shard.clone();
790        missing.path = "semantic/shard-1.f16".to_string();
791        missing.size_bytes = 19;
792        missing.blake3 = blake3::hash(b"semantic shard one").to_hex().to_string();
793        let mut parity = chunk(
794            tmp.path(),
795            "semantic/parity-0.bin",
796            EvidenceBundleChunkRole::Parity,
797        );
798        parity.parity_group = Some("semantic-parity-0".to_string());
799
800        let mut manifest = EvidenceBundleManifest::new(
801            "semantic-tier-fast-0",
802            EvidenceBundleKind::SemanticShard,
803            1_700_000_000_001,
804        );
805        manifest.chunks = vec![shard, missing, parity];
806        manifest.parity_groups = vec![EvidenceBundleParityGroup {
807            group_id: "semantic-parity-0".to_string(),
808            chunk_paths: vec![
809                "semantic/shard-0.f16".to_string(),
810                "semantic/shard-1.f16".to_string(),
811                "semantic/parity-0.bin".to_string(),
812            ],
813            repairable_failed_chunks: 1,
814        }];
815
816        let report = manifest.verify(tmp.path());
817        assert!(report.is_partially_repairable(), "{report:?}");
818        assert_eq!(report.repairable_issue_count, 1);
819        assert_eq!(report.unsafe_issue_count, 0);
820        assert_eq!(report.issues[0].kind, EvidenceBundleIssueKind::MissingChunk);
821        assert!(report.issues[0].repairable);
822    }
823
824    #[test]
825    fn declared_parity_without_verified_parity_artifact_is_unsafe() {
826        let tmp = TempDir::new().unwrap();
827        write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
828
829        let mut shard = chunk(
830            tmp.path(),
831            "semantic/shard-0.f16",
832            EvidenceBundleChunkRole::SemanticShard,
833        );
834        shard.parity_group = Some("semantic-parity-0".to_string());
835        let mut missing = shard.clone();
836        missing.path = "semantic/shard-1.f16".to_string();
837        missing.size_bytes = 19;
838        missing.blake3 = blake3::hash(b"semantic shard one").to_hex().to_string();
839
840        let mut manifest = EvidenceBundleManifest::new(
841            "semantic-missing-parity-artifact",
842            EvidenceBundleKind::SemanticShard,
843            1_700_000_000_002,
844        );
845        manifest.chunks = vec![shard, missing];
846        manifest.parity_groups = vec![EvidenceBundleParityGroup {
847            group_id: "semantic-parity-0".to_string(),
848            chunk_paths: vec![
849                "semantic/shard-0.f16".to_string(),
850                "semantic/shard-1.f16".to_string(),
851                "semantic/parity-0.bin".to_string(),
852            ],
853            repairable_failed_chunks: 1,
854        }];
855
856        let report = manifest.verify(tmp.path());
857        assert!(report.is_unsafe(), "{report:?}");
858        assert_eq!(report.repairable_issue_count, 0);
859        assert_eq!(report.unsafe_issue_count, 1);
860        assert_eq!(report.issues[0].kind, EvidenceBundleIssueKind::MissingChunk);
861        assert!(
862            !report.issues[0].repairable,
863            "a parity declaration without a verified parity artifact must not claim repairability"
864        );
865    }
866
867    #[test]
868    fn parity_does_not_repair_manifest_structure_errors() {
869        let tmp = TempDir::new().unwrap();
870        write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
871
872        let mut shard = chunk(
873            tmp.path(),
874            "semantic/shard-0.f16",
875            EvidenceBundleChunkRole::SemanticShard,
876        );
877        shard.parity_group = Some("semantic-parity-0".to_string());
878
879        let mut manifest = EvidenceBundleManifest::new(
880            "semantic-duplicate-path",
881            EvidenceBundleKind::SemanticShard,
882            1_700_000_000_002,
883        );
884        manifest.chunks = vec![shard.clone(), shard];
885        manifest.parity_groups = vec![EvidenceBundleParityGroup {
886            group_id: "semantic-parity-0".to_string(),
887            chunk_paths: vec!["semantic/shard-0.f16".to_string()],
888            repairable_failed_chunks: 1,
889        }];
890
891        let report = manifest.verify(tmp.path());
892        assert!(report.is_unsafe(), "{report:?}");
893        assert_eq!(
894            report.issues[0].kind,
895            EvidenceBundleIssueKind::DuplicateChunkPath
896        );
897        assert!(!report.issues[0].repairable);
898    }
899
900    #[test]
901    fn mismatched_database_wal_state_is_unsafe_even_when_files_hash() {
902        let tmp = TempDir::new().unwrap();
903        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
904        write_chunk(tmp.path(), "db/cass.db-wal", b"wal bytes");
905
906        let mut manifest = EvidenceBundleManifest::new(
907            "db-backup-1",
908            EvidenceBundleKind::DatabaseBackup,
909            1_700_000_000_003,
910        );
911        manifest.chunks = vec![
912            chunk(
913                tmp.path(),
914                "db/cass.db",
915                EvidenceBundleChunkRole::DatabaseMain,
916            ),
917            chunk(
918                tmp.path(),
919                "db/cass.db-wal",
920                EvidenceBundleChunkRole::DatabaseWal,
921            ),
922        ];
923        manifest.database_wal_state = Some(DatabaseWalStateEvidence {
924            main_chunk_path: "db/cass.db".to_string(),
925            wal_chunk_path: Some("db/cass.db-wal".to_string()),
926            main_state_fingerprint: "main-fp".to_string(),
927            wal_base_fingerprint: Some("other-main-fp".to_string()),
928        });
929
930        let report = manifest.verify(tmp.path());
931        assert!(report.is_unsafe(), "{report:?}");
932        assert_eq!(report.verified_chunk_count, 2);
933        assert!(
934            report
935                .issues
936                .iter()
937                .any(|issue| issue.kind == EvidenceBundleIssueKind::WalMainMismatch)
938        );
939    }
940
941    #[test]
942    fn database_wal_state_rejects_undeclared_wal_chunk() {
943        let tmp = TempDir::new().unwrap();
944        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
945
946        let mut manifest = EvidenceBundleManifest::new(
947            "db-backup-undeclared-wal",
948            EvidenceBundleKind::DatabaseBackup,
949            1_700_000_000_003,
950        );
951        manifest.chunks = vec![chunk(
952            tmp.path(),
953            "db/cass.db",
954            EvidenceBundleChunkRole::DatabaseMain,
955        )];
956        manifest.database_wal_state = Some(DatabaseWalStateEvidence {
957            main_chunk_path: "db/cass.db".to_string(),
958            wal_chunk_path: Some("db/cass.db-wal".to_string()),
959            main_state_fingerprint: "main-fp".to_string(),
960            wal_base_fingerprint: Some("main-fp".to_string()),
961        });
962
963        let report = manifest.verify(tmp.path());
964        assert!(report.is_unsafe(), "{report:?}");
965        assert!(
966            report.issues.iter().any(|issue| {
967                issue.kind == EvidenceBundleIssueKind::InvalidWalStateChunk
968                    && issue.path.as_deref() == Some("db/cass.db-wal")
969            }),
970            "database_wal_state must not certify an undeclared WAL chunk: {report:?}"
971        );
972    }
973
974    #[test]
975    fn database_wal_state_rejects_optional_wal_chunk() {
976        let tmp = TempDir::new().unwrap();
977        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
978
979        let mut manifest = EvidenceBundleManifest::new(
980            "db-backup-optional-wal",
981            EvidenceBundleKind::DatabaseBackup,
982            1_700_000_000_003,
983        );
984        manifest.chunks = vec![
985            chunk(
986                tmp.path(),
987                "db/cass.db",
988                EvidenceBundleChunkRole::DatabaseMain,
989            ),
990            EvidenceBundleChunk {
991                path: "db/cass.db-wal".to_string(),
992                role: EvidenceBundleChunkRole::DatabaseWal,
993                size_bytes: 0,
994                blake3: blake3::hash(b"").to_hex().to_string(),
995                required: false,
996                parity_group: None,
997            },
998        ];
999        manifest.database_wal_state = Some(DatabaseWalStateEvidence {
1000            main_chunk_path: "db/cass.db".to_string(),
1001            wal_chunk_path: Some("db/cass.db-wal".to_string()),
1002            main_state_fingerprint: "main-fp".to_string(),
1003            wal_base_fingerprint: Some("main-fp".to_string()),
1004        });
1005
1006        let report = manifest.verify(tmp.path());
1007        assert!(report.is_unsafe(), "{report:?}");
1008        assert!(
1009            report.issues.iter().any(|issue| {
1010                issue.kind == EvidenceBundleIssueKind::InvalidWalStateChunk
1011                    && issue.path.as_deref() == Some("db/cass.db-wal")
1012            }),
1013            "database_wal_state WAL chunks must not be optional: {report:?}"
1014        );
1015    }
1016
1017    #[test]
1018    fn verifier_gc_surface_is_dry_run_and_does_not_delete_files() {
1019        let tmp = TempDir::new().unwrap();
1020        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
1021
1022        let mut manifest = EvidenceBundleManifest::new(
1023            "db-backup-retained",
1024            EvidenceBundleKind::DatabaseBackup,
1025            1_700_000_000_004,
1026        );
1027        manifest.chunks = vec![chunk(
1028            tmp.path(),
1029            "db/cass.db",
1030            EvidenceBundleChunkRole::DatabaseMain,
1031        )];
1032
1033        let report = manifest.verify(tmp.path());
1034        assert!(report.is_complete(), "{report:?}");
1035        assert!(report.gc_dry_run.dry_run);
1036        assert!(report.gc_dry_run.explicit_delete_required);
1037        assert!(!report.gc_dry_run.deletion_allowed);
1038        assert!(tmp.path().join("db/cass.db").exists());
1039    }
1040
1041    #[test]
1042    fn missing_optional_chunk_does_not_make_bundle_unsafe() {
1043        let tmp = TempDir::new().unwrap();
1044        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
1045
1046        let mut manifest = EvidenceBundleManifest::new(
1047            "db-backup-with-optional-sidecar",
1048            EvidenceBundleKind::DatabaseBackup,
1049            1_700_000_000_005,
1050        );
1051        manifest.chunks = vec![
1052            chunk(
1053                tmp.path(),
1054                "db/cass.db",
1055                EvidenceBundleChunkRole::DatabaseMain,
1056            ),
1057            EvidenceBundleChunk {
1058                path: "db/cass.db-shm".to_string(),
1059                role: EvidenceBundleChunkRole::Metadata,
1060                size_bytes: 0,
1061                blake3: blake3::hash(b"").to_hex().to_string(),
1062                required: false,
1063                parity_group: None,
1064            },
1065        ];
1066
1067        let report = manifest.verify(tmp.path());
1068        assert!(report.is_complete(), "{report:?}");
1069        assert_eq!(report.verified_chunk_count, 1);
1070        assert!(report.issues.is_empty());
1071    }
1072
1073    #[test]
1074    fn unsafe_relative_paths_are_rejected() {
1075        let tmp = TempDir::new().unwrap();
1076        let mut manifest = EvidenceBundleManifest::new(
1077            "bad-path",
1078            EvidenceBundleKind::LexicalGeneration,
1079            1_700_000_000_006,
1080        );
1081        manifest.chunks = vec![EvidenceBundleChunk {
1082            path: "../outside".to_string(),
1083            role: EvidenceBundleChunkRole::LexicalShard,
1084            size_bytes: 1,
1085            blake3: blake3::hash(b"x").to_hex().to_string(),
1086            required: true,
1087            parity_group: None,
1088        }];
1089
1090        let report = manifest.verify(tmp.path());
1091        assert!(report.is_unsafe(), "{report:?}");
1092        assert_eq!(
1093            report.issues[0].kind,
1094            EvidenceBundleIssueKind::UnsafeChunkPath
1095        );
1096    }
1097
1098    #[cfg(unix)]
1099    #[test]
1100    fn symlinked_chunk_that_escapes_bundle_root_is_rejected() {
1101        let tmp = TempDir::new().unwrap();
1102        let outside = TempDir::new().unwrap();
1103        let outside_chunk = outside.path().join("segment-a");
1104        fs::write(&outside_chunk, b"outside shard bytes").unwrap();
1105        fs::create_dir_all(tmp.path().join("shards")).unwrap();
1106        std::os::unix::fs::symlink(&outside_chunk, tmp.path().join("shards/segment-a")).unwrap();
1107
1108        let mut manifest = EvidenceBundleManifest::new(
1109            "symlink-escape",
1110            EvidenceBundleKind::LexicalGeneration,
1111            1_700_000_000_007,
1112        );
1113        manifest.chunks = vec![EvidenceBundleChunk {
1114            path: "shards/segment-a".to_string(),
1115            role: EvidenceBundleChunkRole::LexicalShard,
1116            size_bytes: b"outside shard bytes".len() as u64,
1117            blake3: blake3::hash(b"outside shard bytes").to_hex().to_string(),
1118            required: true,
1119            parity_group: None,
1120        }];
1121
1122        let report = manifest.verify(tmp.path());
1123
1124        assert!(report.is_unsafe(), "{report:?}");
1125        assert_eq!(report.verified_chunk_count, 0);
1126        assert_eq!(
1127            report.issues[0].kind,
1128            EvidenceBundleIssueKind::UnsafeChunkPath
1129        );
1130    }
1131}