Skip to main content

coding_agent_search/
evidence_bundle.rs

1//! Repairable evidence-bundle manifests for derived search artifacts.
2//!
3//! This module is deliberately producer-neutral: lexical generations, semantic
4//! shards, and database backups can all describe their files as content-addressed
5//! chunks, then ask the same verifier whether the bundle is complete,
6//! partially repairable from parity metadata, or unsafe to use.
7
8use anyhow::{Context, Result, anyhow, bail};
9use serde::{Deserialize, Serialize};
10use std::collections::{BTreeMap, BTreeSet};
11use std::fs::{self, File};
12use std::io::Read;
13use std::path::{Component, Path, PathBuf};
14
15pub const EVIDENCE_BUNDLE_MANIFEST_VERSION: u32 = 1;
16pub const EVIDENCE_BUNDLE_MANIFEST_FILE: &str = "evidence-bundle-manifest.json";
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(rename_all = "snake_case")]
20pub enum EvidenceBundleKind {
21    LexicalGeneration,
22    SemanticShard,
23    DatabaseBackup,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum EvidenceBundleChunkRole {
29    Manifest,
30    LexicalShard,
31    SemanticShard,
32    DatabaseMain,
33    DatabaseWal,
34    Metadata,
35    Parity,
36    Other,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
40#[serde(rename_all = "snake_case")]
41pub enum EvidenceBundleVerificationStatus {
42    Complete,
43    PartiallyRepairable,
44    Unsafe,
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum EvidenceBundleIssueKind {
50    CorruptManifest,
51    UnsupportedManifestVersion,
52    EmptyManifest,
53    DuplicateChunkPath,
54    UnsafeChunkPath,
55    MissingChunk,
56    SizeMismatch,
57    DigestMismatch,
58    InvalidWalStateChunk,
59    WalMainMismatch,
60}
61
62#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
63pub struct EvidenceBundleChunk {
64    pub path: String,
65    pub role: EvidenceBundleChunkRole,
66    pub size_bytes: u64,
67    pub blake3: String,
68    #[serde(default = "default_required_chunk")]
69    pub required: bool,
70    #[serde(default, skip_serializing_if = "Option::is_none")]
71    pub parity_group: Option<String>,
72}
73
74impl EvidenceBundleChunk {
75    pub fn from_file(
76        bundle_root: &Path,
77        relative_path: impl Into<String>,
78        role: EvidenceBundleChunkRole,
79        required: bool,
80        parity_group: Option<String>,
81    ) -> Result<Self> {
82        let path = relative_path.into();
83        let resolved = resolve_existing_bundle_path(bundle_root, &path)?;
84        let (size_bytes, blake3) = digest_file(&resolved)
85            .with_context(|| format!("digesting bundle chunk {}", resolved.display()))?;
86        Ok(Self {
87            path,
88            role,
89            size_bytes,
90            blake3,
91            required,
92            parity_group,
93        })
94    }
95}
96
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
98pub struct EvidenceBundleParityGroup {
99    pub group_id: String,
100    #[serde(default)]
101    pub chunk_paths: Vec<String>,
102    pub repairable_failed_chunks: u32,
103}
104
105#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
106pub struct DatabaseWalStateEvidence {
107    pub main_chunk_path: String,
108    #[serde(default, skip_serializing_if = "Option::is_none")]
109    pub wal_chunk_path: Option<String>,
110    pub main_state_fingerprint: String,
111    #[serde(default, skip_serializing_if = "Option::is_none")]
112    pub wal_base_fingerprint: Option<String>,
113}
114
115#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
116pub struct EvidenceBundleManifest {
117    pub manifest_version: u32,
118    pub bundle_id: String,
119    pub kind: EvidenceBundleKind,
120    pub created_at_ms: i64,
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub source_db_fingerprint: Option<String>,
123    #[serde(default)]
124    pub chunks: Vec<EvidenceBundleChunk>,
125    #[serde(default)]
126    pub parity_groups: Vec<EvidenceBundleParityGroup>,
127    #[serde(default, skip_serializing_if = "Option::is_none")]
128    pub database_wal_state: Option<DatabaseWalStateEvidence>,
129    #[serde(default = "default_explicit_delete_required")]
130    pub explicit_delete_required: bool,
131}
132
133impl EvidenceBundleManifest {
134    pub fn new(bundle_id: impl Into<String>, kind: EvidenceBundleKind, created_at_ms: i64) -> Self {
135        Self {
136            manifest_version: EVIDENCE_BUNDLE_MANIFEST_VERSION,
137            bundle_id: bundle_id.into(),
138            kind,
139            created_at_ms,
140            source_db_fingerprint: None,
141            chunks: Vec::new(),
142            parity_groups: Vec::new(),
143            database_wal_state: None,
144            explicit_delete_required: true,
145        }
146    }
147
148    pub fn path(bundle_root: &Path) -> PathBuf {
149        bundle_root.join(EVIDENCE_BUNDLE_MANIFEST_FILE)
150    }
151
152    pub fn load(path: &Path) -> Result<Self> {
153        let bytes = fs::read(path)
154            .with_context(|| format!("reading evidence bundle manifest {}", path.display()))?;
155        serde_json::from_slice(&bytes)
156            .with_context(|| format!("parsing evidence bundle manifest {}", path.display()))
157    }
158
159    pub fn save(&self, bundle_root: &Path) -> Result<PathBuf> {
160        fs::create_dir_all(bundle_root)
161            .with_context(|| format!("creating evidence bundle root {}", bundle_root.display()))?;
162        let path = Self::path(bundle_root);
163        let tmp_path = path.with_extension("json.tmp");
164        let bytes = serde_json::to_vec_pretty(self)
165            .with_context(|| "serializing evidence bundle manifest")?;
166        fs::write(&tmp_path, bytes)
167            .with_context(|| format!("writing evidence bundle manifest {}", tmp_path.display()))?;
168        fs::rename(&tmp_path, &path).with_context(|| {
169            format!(
170                "publishing evidence bundle manifest {} -> {}",
171                tmp_path.display(),
172                path.display()
173            )
174        })?;
175        Ok(path)
176    }
177
178    pub fn verify(&self, bundle_root: &Path) -> EvidenceBundleVerificationReport {
179        verify_manifest(self, bundle_root)
180    }
181}
182
183#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
184pub struct EvidenceBundleIssue {
185    pub kind: EvidenceBundleIssueKind,
186    pub path: Option<String>,
187    pub message: String,
188    pub repairable: bool,
189}
190
191#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
192pub struct EvidenceBundleGcDryRun {
193    pub dry_run: bool,
194    pub explicit_delete_required: bool,
195    pub deletion_allowed: bool,
196    pub retained_chunk_count: usize,
197    pub retained_bytes: u64,
198    pub reason: String,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
202pub struct EvidenceBundleVerificationReport {
203    pub manifest_version: Option<u32>,
204    pub bundle_id: Option<String>,
205    pub kind: Option<EvidenceBundleKind>,
206    pub status: EvidenceBundleVerificationStatus,
207    pub issues: Vec<EvidenceBundleIssue>,
208    pub verified_chunk_count: usize,
209    pub repairable_issue_count: usize,
210    pub unsafe_issue_count: usize,
211    pub expected_chunk_count: usize,
212    pub expected_bytes: u64,
213    pub verified_bytes: u64,
214    pub gc_dry_run: EvidenceBundleGcDryRun,
215}
216
217impl EvidenceBundleVerificationReport {
218    pub fn is_complete(&self) -> bool {
219        self.status == EvidenceBundleVerificationStatus::Complete
220    }
221
222    pub fn is_partially_repairable(&self) -> bool {
223        self.status == EvidenceBundleVerificationStatus::PartiallyRepairable
224    }
225
226    pub fn is_unsafe(&self) -> bool {
227        self.status == EvidenceBundleVerificationStatus::Unsafe
228    }
229}
230
231pub fn verify_evidence_bundle_manifest_file(
232    bundle_root: &Path,
233    manifest_path: &Path,
234) -> EvidenceBundleVerificationReport {
235    match EvidenceBundleManifest::load(manifest_path) {
236        Ok(manifest) => manifest.verify(bundle_root),
237        Err(err) => unsafe_report(
238            EvidenceBundleIssueKind::CorruptManifest,
239            None,
240            format!("manifest could not be loaded: {err}"),
241        ),
242    }
243}
244
245fn verify_manifest(
246    manifest: &EvidenceBundleManifest,
247    bundle_root: &Path,
248) -> EvidenceBundleVerificationReport {
249    let mut issues = Vec::new();
250    let mut chunk_failures = Vec::new();
251    let mut verified_chunk_count = 0usize;
252    let mut verified_bytes = 0u64;
253    let expected_bytes = manifest
254        .chunks
255        .iter()
256        .fold(0u64, |sum, chunk| sum.saturating_add(chunk.size_bytes));
257
258    if manifest.manifest_version != EVIDENCE_BUNDLE_MANIFEST_VERSION {
259        issues.push(issue(
260            EvidenceBundleIssueKind::UnsupportedManifestVersion,
261            None,
262            format!(
263                "manifest version {} is not supported by verifier version {}",
264                manifest.manifest_version, EVIDENCE_BUNDLE_MANIFEST_VERSION
265            ),
266            false,
267        ));
268    }
269    if manifest.chunks.is_empty() {
270        issues.push(issue(
271            EvidenceBundleIssueKind::EmptyManifest,
272            None,
273            "manifest contains no chunks".to_string(),
274            false,
275        ));
276    }
277
278    let parity_index = parity_index(manifest);
279    let mut verified_parity_groups = BTreeSet::new();
280    let mut seen_paths = BTreeSet::new();
281    for chunk in &manifest.chunks {
282        if !seen_paths.insert(chunk.path.clone()) {
283            chunk_failures.push(raw_chunk_failure(
284                EvidenceBundleIssueKind::DuplicateChunkPath,
285                chunk.path.clone(),
286                "duplicate chunk path in manifest".to_string(),
287            ));
288            continue;
289        }
290
291        let resolved = match resolve_bundle_path(bundle_root, &chunk.path) {
292            Ok(path) => path,
293            Err(err) => {
294                chunk_failures.push(raw_chunk_failure(
295                    EvidenceBundleIssueKind::UnsafeChunkPath,
296                    chunk.path.clone(),
297                    err.to_string(),
298                ));
299                continue;
300            }
301        };
302        if !resolved.exists() {
303            if !chunk.required {
304                continue;
305            }
306            chunk_failures.push(raw_chunk_failure(
307                EvidenceBundleIssueKind::MissingChunk,
308                chunk.path.clone(),
309                format!("required bundle chunk {} is missing", chunk.path),
310            ));
311            continue;
312        }
313        let resolved = match resolve_existing_bundle_path(bundle_root, &chunk.path) {
314            Ok(path) => path,
315            Err(err) => {
316                chunk_failures.push(raw_chunk_failure(
317                    EvidenceBundleIssueKind::UnsafeChunkPath,
318                    chunk.path.clone(),
319                    err.to_string(),
320                ));
321                continue;
322            }
323        };
324
325        match digest_file(&resolved) {
326            Ok((actual_size, actual_digest)) => {
327                if actual_size != chunk.size_bytes {
328                    chunk_failures.push(raw_chunk_failure(
329                        EvidenceBundleIssueKind::SizeMismatch,
330                        chunk.path.clone(),
331                        format!(
332                            "chunk {} has size {}, expected {}",
333                            chunk.path, actual_size, chunk.size_bytes
334                        ),
335                    ));
336                    continue;
337                }
338                if actual_digest != chunk.blake3 {
339                    chunk_failures.push(raw_chunk_failure(
340                        EvidenceBundleIssueKind::DigestMismatch,
341                        chunk.path.clone(),
342                        format!("chunk {} digest does not match manifest", chunk.path),
343                    ));
344                    continue;
345                }
346                verified_chunk_count = verified_chunk_count.saturating_add(1);
347                verified_bytes = verified_bytes.saturating_add(actual_size);
348                if chunk.role == EvidenceBundleChunkRole::Parity
349                    && let Some(group) = parity_index.get(&chunk.path)
350                {
351                    verified_parity_groups.insert(group.group_id.clone());
352                }
353            }
354            Err(err) => chunk_failures.push(raw_chunk_failure(
355                EvidenceBundleIssueKind::MissingChunk,
356                chunk.path.clone(),
357                format!("chunk {} could not be read: {err}", chunk.path),
358            )),
359        }
360    }
361
362    let failure_counts = chunk_failure_counts_by_parity_group(&chunk_failures, &parity_index);
363    for failure in chunk_failures {
364        let repairable = chunk_failure_is_repairable(
365            failure.kind,
366            &failure.path,
367            &parity_index,
368            &verified_parity_groups,
369            &failure_counts,
370        );
371        issues.push(issue(
372            failure.kind,
373            Some(failure.path),
374            failure.message,
375            repairable,
376        ));
377    }
378
379    if let Some(wal_state) = &manifest.database_wal_state {
380        validate_wal_state_chunk_declaration(
381            &mut issues,
382            manifest,
383            &wal_state.main_chunk_path,
384            "main DB",
385        );
386        if let Some(wal_chunk_path) = wal_state.wal_chunk_path.as_deref() {
387            validate_wal_state_chunk_declaration(&mut issues, manifest, wal_chunk_path, "WAL");
388            if wal_state.wal_base_fingerprint.as_deref()
389                != Some(wal_state.main_state_fingerprint.as_str())
390            {
391                issues.push(issue(
392                    EvidenceBundleIssueKind::WalMainMismatch,
393                    wal_state.wal_chunk_path.clone(),
394                    format!(
395                        "WAL base fingerprint {:?} does not match main DB fingerprint {}",
396                        wal_state.wal_base_fingerprint, wal_state.main_state_fingerprint
397                    ),
398                    false,
399                ));
400            }
401        }
402    }
403
404    let repairable_issue_count = issues.iter().filter(|issue| issue.repairable).count();
405    let unsafe_issue_count = issues.len().saturating_sub(repairable_issue_count);
406    let status = if unsafe_issue_count > 0 {
407        EvidenceBundleVerificationStatus::Unsafe
408    } else if repairable_issue_count > 0 {
409        EvidenceBundleVerificationStatus::PartiallyRepairable
410    } else {
411        EvidenceBundleVerificationStatus::Complete
412    };
413
414    EvidenceBundleVerificationReport {
415        manifest_version: Some(manifest.manifest_version),
416        bundle_id: Some(manifest.bundle_id.clone()),
417        kind: Some(manifest.kind),
418        status,
419        issues,
420        verified_chunk_count,
421        repairable_issue_count,
422        unsafe_issue_count,
423        expected_chunk_count: manifest.chunks.len(),
424        expected_bytes,
425        verified_bytes,
426        gc_dry_run: EvidenceBundleGcDryRun {
427            dry_run: true,
428            explicit_delete_required: manifest.explicit_delete_required,
429            deletion_allowed: false,
430            retained_chunk_count: manifest.chunks.len(),
431            retained_bytes: expected_bytes,
432            reason: "evidence bundle verifier is read-only; deletion requires a separate explicit operator-approved GC path".to_string(),
433        },
434    }
435}
436
437#[derive(Debug)]
438struct RawChunkFailure {
439    kind: EvidenceBundleIssueKind,
440    path: String,
441    message: String,
442}
443
444fn raw_chunk_failure(
445    kind: EvidenceBundleIssueKind,
446    path: String,
447    message: String,
448) -> RawChunkFailure {
449    RawChunkFailure {
450        kind,
451        path,
452        message,
453    }
454}
455
456fn parity_index(manifest: &EvidenceBundleManifest) -> BTreeMap<String, &EvidenceBundleParityGroup> {
457    let mut index = BTreeMap::new();
458    for group in &manifest.parity_groups {
459        for path in &group.chunk_paths {
460            index.insert(path.clone(), group);
461        }
462    }
463    index
464}
465
466fn chunk_failure_counts_by_parity_group(
467    failures: &[RawChunkFailure],
468    parity_index: &BTreeMap<String, &EvidenceBundleParityGroup>,
469) -> BTreeMap<String, u32> {
470    let mut counts = BTreeMap::new();
471    for failure in failures {
472        if let Some(group) = parity_index.get(&failure.path) {
473            *counts.entry(group.group_id.clone()).or_insert(0) += 1;
474        }
475    }
476    counts
477}
478
479fn chunk_failure_is_repairable(
480    kind: EvidenceBundleIssueKind,
481    path: &str,
482    parity_index: &BTreeMap<String, &EvidenceBundleParityGroup>,
483    verified_parity_groups: &BTreeSet<String>,
484    failure_counts: &BTreeMap<String, u32>,
485) -> bool {
486    if !matches!(
487        kind,
488        EvidenceBundleIssueKind::MissingChunk
489            | EvidenceBundleIssueKind::SizeMismatch
490            | EvidenceBundleIssueKind::DigestMismatch
491    ) {
492        return false;
493    }
494    let Some(group) = parity_index.get(path) else {
495        return false;
496    };
497    if !verified_parity_groups.contains(&group.group_id) {
498        return false;
499    }
500    let failures_in_group = failure_counts
501        .get(&group.group_id)
502        .copied()
503        .unwrap_or_default();
504    failures_in_group > 0 && failures_in_group <= group.repairable_failed_chunks
505}
506
507fn validate_wal_state_chunk_declaration(
508    issues: &mut Vec<EvidenceBundleIssue>,
509    manifest: &EvidenceBundleManifest,
510    path: &str,
511    label: &str,
512) {
513    let Some(chunk) = manifest.chunks.iter().find(|chunk| chunk.path == path) else {
514        issues.push(issue(
515            EvidenceBundleIssueKind::InvalidWalStateChunk,
516            Some(path.to_string()),
517            format!("database_wal_state {label} chunk {path} is not declared in manifest chunks"),
518            false,
519        ));
520        return;
521    };
522
523    if !chunk.required {
524        issues.push(issue(
525            EvidenceBundleIssueKind::InvalidWalStateChunk,
526            Some(path.to_string()),
527            format!("database_wal_state {label} chunk {path} must be declared as required"),
528            false,
529        ));
530    }
531}
532
533fn issue(
534    kind: EvidenceBundleIssueKind,
535    path: Option<String>,
536    message: String,
537    repairable: bool,
538) -> EvidenceBundleIssue {
539    EvidenceBundleIssue {
540        kind,
541        path,
542        message,
543        repairable,
544    }
545}
546
547fn unsafe_report(
548    kind: EvidenceBundleIssueKind,
549    path: Option<String>,
550    message: String,
551) -> EvidenceBundleVerificationReport {
552    EvidenceBundleVerificationReport {
553        manifest_version: None,
554        bundle_id: None,
555        kind: None,
556        status: EvidenceBundleVerificationStatus::Unsafe,
557        issues: vec![issue(kind, path, message, false)],
558        verified_chunk_count: 0,
559        repairable_issue_count: 0,
560        unsafe_issue_count: 1,
561        expected_chunk_count: 0,
562        expected_bytes: 0,
563        verified_bytes: 0,
564        gc_dry_run: EvidenceBundleGcDryRun {
565            dry_run: true,
566            explicit_delete_required: true,
567            deletion_allowed: false,
568            retained_chunk_count: 0,
569            retained_bytes: 0,
570            reason: "corrupt or unreadable evidence bundle manifest cannot authorize deletion"
571                .to_string(),
572        },
573    }
574}
575
576fn digest_file(path: &Path) -> Result<(u64, String)> {
577    let mut file = File::open(path).with_context(|| format!("opening {}", path.display()))?;
578    let mut hasher = blake3::Hasher::new();
579    let mut size = 0u64;
580    let mut buffer = [0u8; 64 * 1024];
581    loop {
582        let read = file
583            .read(&mut buffer)
584            .with_context(|| format!("reading {}", path.display()))?;
585        if read == 0 {
586            break;
587        }
588        size = size.saturating_add(read as u64);
589        hasher.update(&buffer[..read]);
590    }
591    Ok((size, hasher.finalize().to_hex().to_string()))
592}
593
594fn resolve_bundle_path(bundle_root: &Path, relative_path: &str) -> Result<PathBuf> {
595    let path = Path::new(relative_path);
596    if path.is_absolute() {
597        bail!("bundle chunk path must be relative: {relative_path}");
598    }
599    for component in path.components() {
600        match component {
601            Component::Normal(_) => {}
602            Component::CurDir
603            | Component::ParentDir
604            | Component::RootDir
605            | Component::Prefix(_) => {
606                bail!("bundle chunk path contains unsafe component: {relative_path}");
607            }
608        }
609    }
610    if relative_path.is_empty() {
611        return Err(anyhow!("bundle chunk path must not be empty"));
612    }
613    Ok(bundle_root.join(path))
614}
615
616fn resolve_existing_bundle_path(bundle_root: &Path, relative_path: &str) -> Result<PathBuf> {
617    let resolved = resolve_bundle_path(bundle_root, relative_path)?;
618    let canonical_root = fs::canonicalize(bundle_root)
619        .with_context(|| format!("canonicalizing bundle root {}", bundle_root.display()))?;
620    let canonical_resolved = fs::canonicalize(&resolved)
621        .with_context(|| format!("canonicalizing bundle chunk {}", resolved.display()))?;
622    if !canonical_resolved.starts_with(&canonical_root) {
623        bail!("bundle chunk path resolves outside bundle root: {relative_path}");
624    }
625    Ok(canonical_resolved)
626}
627
628fn default_required_chunk() -> bool {
629    true
630}
631
632fn default_explicit_delete_required() -> bool {
633    true
634}
635
636#[cfg(test)]
637mod tests {
638    use super::*;
639    use tempfile::TempDir;
640
641    fn write_chunk(root: &Path, path: &str, bytes: &[u8]) {
642        let full_path = root.join(path);
643        if let Some(parent) = full_path.parent() {
644            fs::create_dir_all(parent).unwrap();
645        }
646        fs::write(full_path, bytes).unwrap();
647    }
648
649    fn chunk(root: &Path, path: &str, role: EvidenceBundleChunkRole) -> EvidenceBundleChunk {
650        EvidenceBundleChunk::from_file(root, path, role, true, None).unwrap()
651    }
652
653    #[test]
654    fn verifier_proves_complete_lexical_generation_bundle() {
655        let tmp = TempDir::new().unwrap();
656        write_chunk(tmp.path(), "manifest.json", br#"{"docs":2}"#);
657        write_chunk(tmp.path(), "shards/segment-a", b"lexical shard bytes");
658
659        let mut manifest = EvidenceBundleManifest::new(
660            "lexical-generation-1",
661            EvidenceBundleKind::LexicalGeneration,
662            1_700_000_000_000,
663        );
664        manifest.chunks = vec![
665            chunk(
666                tmp.path(),
667                "manifest.json",
668                EvidenceBundleChunkRole::Manifest,
669            ),
670            chunk(
671                tmp.path(),
672                "shards/segment-a",
673                EvidenceBundleChunkRole::LexicalShard,
674            ),
675        ];
676        manifest.save(tmp.path()).unwrap();
677
678        let report = verify_evidence_bundle_manifest_file(
679            tmp.path(),
680            &EvidenceBundleManifest::path(tmp.path()),
681        );
682        assert!(report.is_complete(), "{report:?}");
683        assert_eq!(report.verified_chunk_count, 2);
684        assert_eq!(report.unsafe_issue_count, 0);
685        assert!(!report.gc_dry_run.deletion_allowed);
686    }
687
688    #[test]
689    fn corrupt_manifest_sidecar_is_unsafe_to_use() {
690        let tmp = TempDir::new().unwrap();
691        let manifest_path = EvidenceBundleManifest::path(tmp.path());
692        fs::write(&manifest_path, b"{not-json").unwrap();
693
694        let report = verify_evidence_bundle_manifest_file(tmp.path(), &manifest_path);
695        assert!(report.is_unsafe(), "{report:?}");
696        assert_eq!(
697            report.issues[0].kind,
698            EvidenceBundleIssueKind::CorruptManifest
699        );
700        assert!(!report.issues[0].repairable);
701        assert!(!report.gc_dry_run.deletion_allowed);
702    }
703
704    #[test]
705    fn missing_semantic_shard_with_parity_is_partially_repairable() {
706        let tmp = TempDir::new().unwrap();
707        write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
708        write_chunk(tmp.path(), "semantic/parity-0.bin", b"parity bytes");
709
710        let mut shard = chunk(
711            tmp.path(),
712            "semantic/shard-0.f16",
713            EvidenceBundleChunkRole::SemanticShard,
714        );
715        shard.parity_group = Some("semantic-parity-0".to_string());
716        let mut missing = shard.clone();
717        missing.path = "semantic/shard-1.f16".to_string();
718        missing.size_bytes = 19;
719        missing.blake3 = blake3::hash(b"semantic shard one").to_hex().to_string();
720        let mut parity = chunk(
721            tmp.path(),
722            "semantic/parity-0.bin",
723            EvidenceBundleChunkRole::Parity,
724        );
725        parity.parity_group = Some("semantic-parity-0".to_string());
726
727        let mut manifest = EvidenceBundleManifest::new(
728            "semantic-tier-fast-0",
729            EvidenceBundleKind::SemanticShard,
730            1_700_000_000_001,
731        );
732        manifest.chunks = vec![shard, missing, parity];
733        manifest.parity_groups = vec![EvidenceBundleParityGroup {
734            group_id: "semantic-parity-0".to_string(),
735            chunk_paths: vec![
736                "semantic/shard-0.f16".to_string(),
737                "semantic/shard-1.f16".to_string(),
738                "semantic/parity-0.bin".to_string(),
739            ],
740            repairable_failed_chunks: 1,
741        }];
742
743        let report = manifest.verify(tmp.path());
744        assert!(report.is_partially_repairable(), "{report:?}");
745        assert_eq!(report.repairable_issue_count, 1);
746        assert_eq!(report.unsafe_issue_count, 0);
747        assert_eq!(report.issues[0].kind, EvidenceBundleIssueKind::MissingChunk);
748        assert!(report.issues[0].repairable);
749    }
750
751    #[test]
752    fn declared_parity_without_verified_parity_artifact_is_unsafe() {
753        let tmp = TempDir::new().unwrap();
754        write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
755
756        let mut shard = chunk(
757            tmp.path(),
758            "semantic/shard-0.f16",
759            EvidenceBundleChunkRole::SemanticShard,
760        );
761        shard.parity_group = Some("semantic-parity-0".to_string());
762        let mut missing = shard.clone();
763        missing.path = "semantic/shard-1.f16".to_string();
764        missing.size_bytes = 19;
765        missing.blake3 = blake3::hash(b"semantic shard one").to_hex().to_string();
766
767        let mut manifest = EvidenceBundleManifest::new(
768            "semantic-missing-parity-artifact",
769            EvidenceBundleKind::SemanticShard,
770            1_700_000_000_002,
771        );
772        manifest.chunks = vec![shard, missing];
773        manifest.parity_groups = vec![EvidenceBundleParityGroup {
774            group_id: "semantic-parity-0".to_string(),
775            chunk_paths: vec![
776                "semantic/shard-0.f16".to_string(),
777                "semantic/shard-1.f16".to_string(),
778                "semantic/parity-0.bin".to_string(),
779            ],
780            repairable_failed_chunks: 1,
781        }];
782
783        let report = manifest.verify(tmp.path());
784        assert!(report.is_unsafe(), "{report:?}");
785        assert_eq!(report.repairable_issue_count, 0);
786        assert_eq!(report.unsafe_issue_count, 1);
787        assert_eq!(report.issues[0].kind, EvidenceBundleIssueKind::MissingChunk);
788        assert!(
789            !report.issues[0].repairable,
790            "a parity declaration without a verified parity artifact must not claim repairability"
791        );
792    }
793
794    #[test]
795    fn parity_does_not_repair_manifest_structure_errors() {
796        let tmp = TempDir::new().unwrap();
797        write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
798
799        let mut shard = chunk(
800            tmp.path(),
801            "semantic/shard-0.f16",
802            EvidenceBundleChunkRole::SemanticShard,
803        );
804        shard.parity_group = Some("semantic-parity-0".to_string());
805
806        let mut manifest = EvidenceBundleManifest::new(
807            "semantic-duplicate-path",
808            EvidenceBundleKind::SemanticShard,
809            1_700_000_000_002,
810        );
811        manifest.chunks = vec![shard.clone(), shard];
812        manifest.parity_groups = vec![EvidenceBundleParityGroup {
813            group_id: "semantic-parity-0".to_string(),
814            chunk_paths: vec!["semantic/shard-0.f16".to_string()],
815            repairable_failed_chunks: 1,
816        }];
817
818        let report = manifest.verify(tmp.path());
819        assert!(report.is_unsafe(), "{report:?}");
820        assert_eq!(
821            report.issues[0].kind,
822            EvidenceBundleIssueKind::DuplicateChunkPath
823        );
824        assert!(!report.issues[0].repairable);
825    }
826
827    #[test]
828    fn mismatched_database_wal_state_is_unsafe_even_when_files_hash() {
829        let tmp = TempDir::new().unwrap();
830        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
831        write_chunk(tmp.path(), "db/cass.db-wal", b"wal bytes");
832
833        let mut manifest = EvidenceBundleManifest::new(
834            "db-backup-1",
835            EvidenceBundleKind::DatabaseBackup,
836            1_700_000_000_003,
837        );
838        manifest.chunks = vec![
839            chunk(
840                tmp.path(),
841                "db/cass.db",
842                EvidenceBundleChunkRole::DatabaseMain,
843            ),
844            chunk(
845                tmp.path(),
846                "db/cass.db-wal",
847                EvidenceBundleChunkRole::DatabaseWal,
848            ),
849        ];
850        manifest.database_wal_state = Some(DatabaseWalStateEvidence {
851            main_chunk_path: "db/cass.db".to_string(),
852            wal_chunk_path: Some("db/cass.db-wal".to_string()),
853            main_state_fingerprint: "main-fp".to_string(),
854            wal_base_fingerprint: Some("other-main-fp".to_string()),
855        });
856
857        let report = manifest.verify(tmp.path());
858        assert!(report.is_unsafe(), "{report:?}");
859        assert_eq!(report.verified_chunk_count, 2);
860        assert!(
861            report
862                .issues
863                .iter()
864                .any(|issue| issue.kind == EvidenceBundleIssueKind::WalMainMismatch)
865        );
866    }
867
868    #[test]
869    fn database_wal_state_rejects_undeclared_wal_chunk() {
870        let tmp = TempDir::new().unwrap();
871        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
872
873        let mut manifest = EvidenceBundleManifest::new(
874            "db-backup-undeclared-wal",
875            EvidenceBundleKind::DatabaseBackup,
876            1_700_000_000_003,
877        );
878        manifest.chunks = vec![chunk(
879            tmp.path(),
880            "db/cass.db",
881            EvidenceBundleChunkRole::DatabaseMain,
882        )];
883        manifest.database_wal_state = Some(DatabaseWalStateEvidence {
884            main_chunk_path: "db/cass.db".to_string(),
885            wal_chunk_path: Some("db/cass.db-wal".to_string()),
886            main_state_fingerprint: "main-fp".to_string(),
887            wal_base_fingerprint: Some("main-fp".to_string()),
888        });
889
890        let report = manifest.verify(tmp.path());
891        assert!(report.is_unsafe(), "{report:?}");
892        assert!(
893            report.issues.iter().any(|issue| {
894                issue.kind == EvidenceBundleIssueKind::InvalidWalStateChunk
895                    && issue.path.as_deref() == Some("db/cass.db-wal")
896            }),
897            "database_wal_state must not certify an undeclared WAL chunk: {report:?}"
898        );
899    }
900
901    #[test]
902    fn database_wal_state_rejects_optional_wal_chunk() {
903        let tmp = TempDir::new().unwrap();
904        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
905
906        let mut manifest = EvidenceBundleManifest::new(
907            "db-backup-optional-wal",
908            EvidenceBundleKind::DatabaseBackup,
909            1_700_000_000_003,
910        );
911        manifest.chunks = vec![
912            chunk(
913                tmp.path(),
914                "db/cass.db",
915                EvidenceBundleChunkRole::DatabaseMain,
916            ),
917            EvidenceBundleChunk {
918                path: "db/cass.db-wal".to_string(),
919                role: EvidenceBundleChunkRole::DatabaseWal,
920                size_bytes: 0,
921                blake3: blake3::hash(b"").to_hex().to_string(),
922                required: false,
923                parity_group: None,
924            },
925        ];
926        manifest.database_wal_state = Some(DatabaseWalStateEvidence {
927            main_chunk_path: "db/cass.db".to_string(),
928            wal_chunk_path: Some("db/cass.db-wal".to_string()),
929            main_state_fingerprint: "main-fp".to_string(),
930            wal_base_fingerprint: Some("main-fp".to_string()),
931        });
932
933        let report = manifest.verify(tmp.path());
934        assert!(report.is_unsafe(), "{report:?}");
935        assert!(
936            report.issues.iter().any(|issue| {
937                issue.kind == EvidenceBundleIssueKind::InvalidWalStateChunk
938                    && issue.path.as_deref() == Some("db/cass.db-wal")
939            }),
940            "database_wal_state WAL chunks must not be optional: {report:?}"
941        );
942    }
943
944    #[test]
945    fn verifier_gc_surface_is_dry_run_and_does_not_delete_files() {
946        let tmp = TempDir::new().unwrap();
947        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
948
949        let mut manifest = EvidenceBundleManifest::new(
950            "db-backup-retained",
951            EvidenceBundleKind::DatabaseBackup,
952            1_700_000_000_004,
953        );
954        manifest.chunks = vec![chunk(
955            tmp.path(),
956            "db/cass.db",
957            EvidenceBundleChunkRole::DatabaseMain,
958        )];
959
960        let report = manifest.verify(tmp.path());
961        assert!(report.is_complete(), "{report:?}");
962        assert!(report.gc_dry_run.dry_run);
963        assert!(report.gc_dry_run.explicit_delete_required);
964        assert!(!report.gc_dry_run.deletion_allowed);
965        assert!(tmp.path().join("db/cass.db").exists());
966    }
967
968    #[test]
969    fn missing_optional_chunk_does_not_make_bundle_unsafe() {
970        let tmp = TempDir::new().unwrap();
971        write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
972
973        let mut manifest = EvidenceBundleManifest::new(
974            "db-backup-with-optional-sidecar",
975            EvidenceBundleKind::DatabaseBackup,
976            1_700_000_000_005,
977        );
978        manifest.chunks = vec![
979            chunk(
980                tmp.path(),
981                "db/cass.db",
982                EvidenceBundleChunkRole::DatabaseMain,
983            ),
984            EvidenceBundleChunk {
985                path: "db/cass.db-shm".to_string(),
986                role: EvidenceBundleChunkRole::Metadata,
987                size_bytes: 0,
988                blake3: blake3::hash(b"").to_hex().to_string(),
989                required: false,
990                parity_group: None,
991            },
992        ];
993
994        let report = manifest.verify(tmp.path());
995        assert!(report.is_complete(), "{report:?}");
996        assert_eq!(report.verified_chunk_count, 1);
997        assert!(report.issues.is_empty());
998    }
999
1000    #[test]
1001    fn unsafe_relative_paths_are_rejected() {
1002        let tmp = TempDir::new().unwrap();
1003        let mut manifest = EvidenceBundleManifest::new(
1004            "bad-path",
1005            EvidenceBundleKind::LexicalGeneration,
1006            1_700_000_000_006,
1007        );
1008        manifest.chunks = vec![EvidenceBundleChunk {
1009            path: "../outside".to_string(),
1010            role: EvidenceBundleChunkRole::LexicalShard,
1011            size_bytes: 1,
1012            blake3: blake3::hash(b"x").to_hex().to_string(),
1013            required: true,
1014            parity_group: None,
1015        }];
1016
1017        let report = manifest.verify(tmp.path());
1018        assert!(report.is_unsafe(), "{report:?}");
1019        assert_eq!(
1020            report.issues[0].kind,
1021            EvidenceBundleIssueKind::UnsafeChunkPath
1022        );
1023    }
1024
1025    #[cfg(unix)]
1026    #[test]
1027    fn symlinked_chunk_that_escapes_bundle_root_is_rejected() {
1028        let tmp = TempDir::new().unwrap();
1029        let outside = TempDir::new().unwrap();
1030        let outside_chunk = outside.path().join("segment-a");
1031        fs::write(&outside_chunk, b"outside shard bytes").unwrap();
1032        fs::create_dir_all(tmp.path().join("shards")).unwrap();
1033        std::os::unix::fs::symlink(&outside_chunk, tmp.path().join("shards/segment-a")).unwrap();
1034
1035        let mut manifest = EvidenceBundleManifest::new(
1036            "symlink-escape",
1037            EvidenceBundleKind::LexicalGeneration,
1038            1_700_000_000_007,
1039        );
1040        manifest.chunks = vec![EvidenceBundleChunk {
1041            path: "shards/segment-a".to_string(),
1042            role: EvidenceBundleChunkRole::LexicalShard,
1043            size_bytes: b"outside shard bytes".len() as u64,
1044            blake3: blake3::hash(b"outside shard bytes").to_hex().to_string(),
1045            required: true,
1046            parity_group: None,
1047        }];
1048
1049        let report = manifest.verify(tmp.path());
1050
1051        assert!(report.is_unsafe(), "{report:?}");
1052        assert_eq!(report.verified_chunk_count, 0);
1053        assert_eq!(
1054            report.issues[0].kind,
1055            EvidenceBundleIssueKind::UnsafeChunkPath
1056        );
1057    }
1058}