1use anyhow::{Context, Result, anyhow, bail};
9use serde::{Deserialize, Serialize};
10use std::collections::{BTreeMap, BTreeSet};
11use std::fs::{self, File};
12use std::io::Read;
13use std::path::{Component, Path, PathBuf};
14
15pub const EVIDENCE_BUNDLE_MANIFEST_VERSION: u32 = 1;
16pub const EVIDENCE_BUNDLE_MANIFEST_FILE: &str = "evidence-bundle-manifest.json";
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(rename_all = "snake_case")]
20pub enum EvidenceBundleKind {
21 LexicalGeneration,
22 SemanticShard,
23 DatabaseBackup,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum EvidenceBundleChunkRole {
29 Manifest,
30 LexicalShard,
31 SemanticShard,
32 DatabaseMain,
33 DatabaseWal,
34 Metadata,
35 Parity,
36 Other,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
40#[serde(rename_all = "snake_case")]
41pub enum EvidenceBundleVerificationStatus {
42 Complete,
43 PartiallyRepairable,
44 Unsafe,
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum EvidenceBundleIssueKind {
50 CorruptManifest,
51 UnsupportedManifestVersion,
52 EmptyManifest,
53 DuplicateChunkPath,
54 UnsafeChunkPath,
55 MissingChunk,
56 SizeMismatch,
57 DigestMismatch,
58 InvalidWalStateChunk,
59 WalMainMismatch,
60}
61
62#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
63pub struct EvidenceBundleChunk {
64 pub path: String,
65 pub role: EvidenceBundleChunkRole,
66 pub size_bytes: u64,
67 pub blake3: String,
68 #[serde(default = "default_required_chunk")]
69 pub required: bool,
70 #[serde(default, skip_serializing_if = "Option::is_none")]
71 pub parity_group: Option<String>,
72}
73
74impl EvidenceBundleChunk {
75 pub fn from_file(
76 bundle_root: &Path,
77 relative_path: impl Into<String>,
78 role: EvidenceBundleChunkRole,
79 required: bool,
80 parity_group: Option<String>,
81 ) -> Result<Self> {
82 let path = relative_path.into();
83 let resolved = resolve_existing_bundle_path(bundle_root, &path)?;
84 let (size_bytes, blake3) = digest_file(&resolved)
85 .with_context(|| format!("digesting bundle chunk {}", resolved.display()))?;
86 Ok(Self {
87 path,
88 role,
89 size_bytes,
90 blake3,
91 required,
92 parity_group,
93 })
94 }
95}
96
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
98pub struct EvidenceBundleParityGroup {
99 pub group_id: String,
100 #[serde(default)]
101 pub chunk_paths: Vec<String>,
102 pub repairable_failed_chunks: u32,
103}
104
105#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
106pub struct DatabaseWalStateEvidence {
107 pub main_chunk_path: String,
108 #[serde(default, skip_serializing_if = "Option::is_none")]
109 pub wal_chunk_path: Option<String>,
110 pub main_state_fingerprint: String,
111 #[serde(default, skip_serializing_if = "Option::is_none")]
112 pub wal_base_fingerprint: Option<String>,
113}
114
115#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
116pub struct EvidenceBundleManifest {
117 pub manifest_version: u32,
118 pub bundle_id: String,
119 pub kind: EvidenceBundleKind,
120 pub created_at_ms: i64,
121 #[serde(default, skip_serializing_if = "Option::is_none")]
122 pub source_db_fingerprint: Option<String>,
123 #[serde(default)]
124 pub chunks: Vec<EvidenceBundleChunk>,
125 #[serde(default)]
126 pub parity_groups: Vec<EvidenceBundleParityGroup>,
127 #[serde(default, skip_serializing_if = "Option::is_none")]
128 pub database_wal_state: Option<DatabaseWalStateEvidence>,
129 #[serde(default = "default_explicit_delete_required")]
130 pub explicit_delete_required: bool,
131}
132
133impl EvidenceBundleManifest {
134 pub fn new(bundle_id: impl Into<String>, kind: EvidenceBundleKind, created_at_ms: i64) -> Self {
135 Self {
136 manifest_version: EVIDENCE_BUNDLE_MANIFEST_VERSION,
137 bundle_id: bundle_id.into(),
138 kind,
139 created_at_ms,
140 source_db_fingerprint: None,
141 chunks: Vec::new(),
142 parity_groups: Vec::new(),
143 database_wal_state: None,
144 explicit_delete_required: true,
145 }
146 }
147
148 pub fn path(bundle_root: &Path) -> PathBuf {
149 bundle_root.join(EVIDENCE_BUNDLE_MANIFEST_FILE)
150 }
151
152 pub fn load(path: &Path) -> Result<Self> {
153 let bytes = fs::read(path)
154 .with_context(|| format!("reading evidence bundle manifest {}", path.display()))?;
155 serde_json::from_slice(&bytes)
156 .with_context(|| format!("parsing evidence bundle manifest {}", path.display()))
157 }
158
159 pub fn save(&self, bundle_root: &Path) -> Result<PathBuf> {
160 fs::create_dir_all(bundle_root)
161 .with_context(|| format!("creating evidence bundle root {}", bundle_root.display()))?;
162 let path = Self::path(bundle_root);
163 let tmp_path = path.with_extension("json.tmp");
164 let bytes = serde_json::to_vec_pretty(self)
165 .with_context(|| "serializing evidence bundle manifest")?;
166 fs::write(&tmp_path, bytes)
167 .with_context(|| format!("writing evidence bundle manifest {}", tmp_path.display()))?;
168 fs::rename(&tmp_path, &path).with_context(|| {
169 format!(
170 "publishing evidence bundle manifest {} -> {}",
171 tmp_path.display(),
172 path.display()
173 )
174 })?;
175 Ok(path)
176 }
177
178 pub fn verify(&self, bundle_root: &Path) -> EvidenceBundleVerificationReport {
179 verify_manifest(self, bundle_root)
180 }
181}
182
183#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
184pub struct EvidenceBundleIssue {
185 pub kind: EvidenceBundleIssueKind,
186 pub path: Option<String>,
187 pub message: String,
188 pub repairable: bool,
189}
190
191#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
192pub struct EvidenceBundleGcDryRun {
193 pub dry_run: bool,
194 pub explicit_delete_required: bool,
195 pub deletion_allowed: bool,
196 pub retained_chunk_count: usize,
197 pub retained_bytes: u64,
198 pub reason: String,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
202pub struct EvidenceBundleVerificationReport {
203 pub manifest_version: Option<u32>,
204 pub bundle_id: Option<String>,
205 pub kind: Option<EvidenceBundleKind>,
206 pub status: EvidenceBundleVerificationStatus,
207 pub issues: Vec<EvidenceBundleIssue>,
208 pub verified_chunk_count: usize,
209 pub repairable_issue_count: usize,
210 pub unsafe_issue_count: usize,
211 pub expected_chunk_count: usize,
212 pub expected_bytes: u64,
213 pub verified_bytes: u64,
214 pub gc_dry_run: EvidenceBundleGcDryRun,
215}
216
217impl EvidenceBundleVerificationReport {
218 pub fn is_complete(&self) -> bool {
219 self.status == EvidenceBundleVerificationStatus::Complete
220 }
221
222 pub fn is_partially_repairable(&self) -> bool {
223 self.status == EvidenceBundleVerificationStatus::PartiallyRepairable
224 }
225
226 pub fn is_unsafe(&self) -> bool {
227 self.status == EvidenceBundleVerificationStatus::Unsafe
228 }
229}
230
231pub fn verify_evidence_bundle_manifest_file(
232 bundle_root: &Path,
233 manifest_path: &Path,
234) -> EvidenceBundleVerificationReport {
235 match EvidenceBundleManifest::load(manifest_path) {
236 Ok(manifest) => manifest.verify(bundle_root),
237 Err(err) => unsafe_report(
238 EvidenceBundleIssueKind::CorruptManifest,
239 None,
240 format!("manifest could not be loaded: {err}"),
241 ),
242 }
243}
244
245fn verify_manifest(
246 manifest: &EvidenceBundleManifest,
247 bundle_root: &Path,
248) -> EvidenceBundleVerificationReport {
249 let mut issues = Vec::new();
250 let mut chunk_failures = Vec::new();
251 let mut verified_chunk_count = 0usize;
252 let mut verified_bytes = 0u64;
253 let expected_bytes = manifest
254 .chunks
255 .iter()
256 .fold(0u64, |sum, chunk| sum.saturating_add(chunk.size_bytes));
257
258 if manifest.manifest_version != EVIDENCE_BUNDLE_MANIFEST_VERSION {
259 issues.push(issue(
260 EvidenceBundleIssueKind::UnsupportedManifestVersion,
261 None,
262 format!(
263 "manifest version {} is not supported by verifier version {}",
264 manifest.manifest_version, EVIDENCE_BUNDLE_MANIFEST_VERSION
265 ),
266 false,
267 ));
268 }
269 if manifest.chunks.is_empty() {
270 issues.push(issue(
271 EvidenceBundleIssueKind::EmptyManifest,
272 None,
273 "manifest contains no chunks".to_string(),
274 false,
275 ));
276 }
277
278 let parity_index = parity_index(manifest);
279 let mut verified_parity_groups = BTreeSet::new();
280 let mut seen_paths = BTreeSet::new();
281 for chunk in &manifest.chunks {
282 if !seen_paths.insert(chunk.path.clone()) {
283 chunk_failures.push(raw_chunk_failure(
284 EvidenceBundleIssueKind::DuplicateChunkPath,
285 chunk.path.clone(),
286 "duplicate chunk path in manifest".to_string(),
287 ));
288 continue;
289 }
290
291 let resolved = match resolve_bundle_path(bundle_root, &chunk.path) {
292 Ok(path) => path,
293 Err(err) => {
294 chunk_failures.push(raw_chunk_failure(
295 EvidenceBundleIssueKind::UnsafeChunkPath,
296 chunk.path.clone(),
297 err.to_string(),
298 ));
299 continue;
300 }
301 };
302 if !resolved.exists() {
303 if !chunk.required {
304 continue;
305 }
306 chunk_failures.push(raw_chunk_failure(
307 EvidenceBundleIssueKind::MissingChunk,
308 chunk.path.clone(),
309 format!("required bundle chunk {} is missing", chunk.path),
310 ));
311 continue;
312 }
313 let resolved = match resolve_existing_bundle_path(bundle_root, &chunk.path) {
314 Ok(path) => path,
315 Err(err) => {
316 chunk_failures.push(raw_chunk_failure(
317 EvidenceBundleIssueKind::UnsafeChunkPath,
318 chunk.path.clone(),
319 err.to_string(),
320 ));
321 continue;
322 }
323 };
324
325 match digest_file(&resolved) {
326 Ok((actual_size, actual_digest)) => {
327 if actual_size != chunk.size_bytes {
328 chunk_failures.push(raw_chunk_failure(
329 EvidenceBundleIssueKind::SizeMismatch,
330 chunk.path.clone(),
331 format!(
332 "chunk {} has size {}, expected {}",
333 chunk.path, actual_size, chunk.size_bytes
334 ),
335 ));
336 continue;
337 }
338 if actual_digest != chunk.blake3 {
339 chunk_failures.push(raw_chunk_failure(
340 EvidenceBundleIssueKind::DigestMismatch,
341 chunk.path.clone(),
342 format!("chunk {} digest does not match manifest", chunk.path),
343 ));
344 continue;
345 }
346 verified_chunk_count = verified_chunk_count.saturating_add(1);
347 verified_bytes = verified_bytes.saturating_add(actual_size);
348 if chunk.role == EvidenceBundleChunkRole::Parity
349 && let Some(group) = parity_index.get(&chunk.path)
350 {
351 verified_parity_groups.insert(group.group_id.clone());
352 }
353 }
354 Err(err) => chunk_failures.push(raw_chunk_failure(
355 EvidenceBundleIssueKind::MissingChunk,
356 chunk.path.clone(),
357 format!("chunk {} could not be read: {err}", chunk.path),
358 )),
359 }
360 }
361
362 let failure_counts = chunk_failure_counts_by_parity_group(&chunk_failures, &parity_index);
363 for failure in chunk_failures {
364 let repairable = chunk_failure_is_repairable(
365 failure.kind,
366 &failure.path,
367 &parity_index,
368 &verified_parity_groups,
369 &failure_counts,
370 );
371 issues.push(issue(
372 failure.kind,
373 Some(failure.path),
374 failure.message,
375 repairable,
376 ));
377 }
378
379 if let Some(wal_state) = &manifest.database_wal_state {
380 validate_wal_state_chunk_declaration(
381 &mut issues,
382 manifest,
383 &wal_state.main_chunk_path,
384 "main DB",
385 );
386 if let Some(wal_chunk_path) = wal_state.wal_chunk_path.as_deref() {
387 validate_wal_state_chunk_declaration(&mut issues, manifest, wal_chunk_path, "WAL");
388 if wal_state.wal_base_fingerprint.as_deref()
389 != Some(wal_state.main_state_fingerprint.as_str())
390 {
391 issues.push(issue(
392 EvidenceBundleIssueKind::WalMainMismatch,
393 wal_state.wal_chunk_path.clone(),
394 format!(
395 "WAL base fingerprint {:?} does not match main DB fingerprint {}",
396 wal_state.wal_base_fingerprint, wal_state.main_state_fingerprint
397 ),
398 false,
399 ));
400 }
401 }
402 }
403
404 let repairable_issue_count = issues.iter().filter(|issue| issue.repairable).count();
405 let unsafe_issue_count = issues.len().saturating_sub(repairable_issue_count);
406 let status = if unsafe_issue_count > 0 {
407 EvidenceBundleVerificationStatus::Unsafe
408 } else if repairable_issue_count > 0 {
409 EvidenceBundleVerificationStatus::PartiallyRepairable
410 } else {
411 EvidenceBundleVerificationStatus::Complete
412 };
413
414 EvidenceBundleVerificationReport {
415 manifest_version: Some(manifest.manifest_version),
416 bundle_id: Some(manifest.bundle_id.clone()),
417 kind: Some(manifest.kind),
418 status,
419 issues,
420 verified_chunk_count,
421 repairable_issue_count,
422 unsafe_issue_count,
423 expected_chunk_count: manifest.chunks.len(),
424 expected_bytes,
425 verified_bytes,
426 gc_dry_run: EvidenceBundleGcDryRun {
427 dry_run: true,
428 explicit_delete_required: manifest.explicit_delete_required,
429 deletion_allowed: false,
430 retained_chunk_count: manifest.chunks.len(),
431 retained_bytes: expected_bytes,
432 reason: "evidence bundle verifier is read-only; deletion requires a separate explicit operator-approved GC path".to_string(),
433 },
434 }
435}
436
437#[derive(Debug)]
438struct RawChunkFailure {
439 kind: EvidenceBundleIssueKind,
440 path: String,
441 message: String,
442}
443
444fn raw_chunk_failure(
445 kind: EvidenceBundleIssueKind,
446 path: String,
447 message: String,
448) -> RawChunkFailure {
449 RawChunkFailure {
450 kind,
451 path,
452 message,
453 }
454}
455
456fn parity_index(manifest: &EvidenceBundleManifest) -> BTreeMap<String, &EvidenceBundleParityGroup> {
457 let mut index = BTreeMap::new();
458 for group in &manifest.parity_groups {
459 for path in &group.chunk_paths {
460 index.insert(path.clone(), group);
461 }
462 }
463 index
464}
465
466fn chunk_failure_counts_by_parity_group(
467 failures: &[RawChunkFailure],
468 parity_index: &BTreeMap<String, &EvidenceBundleParityGroup>,
469) -> BTreeMap<String, u32> {
470 let mut counts = BTreeMap::new();
471 for failure in failures {
472 if let Some(group) = parity_index.get(&failure.path) {
473 *counts.entry(group.group_id.clone()).or_insert(0) += 1;
474 }
475 }
476 counts
477}
478
479fn chunk_failure_is_repairable(
480 kind: EvidenceBundleIssueKind,
481 path: &str,
482 parity_index: &BTreeMap<String, &EvidenceBundleParityGroup>,
483 verified_parity_groups: &BTreeSet<String>,
484 failure_counts: &BTreeMap<String, u32>,
485) -> bool {
486 if !matches!(
487 kind,
488 EvidenceBundleIssueKind::MissingChunk
489 | EvidenceBundleIssueKind::SizeMismatch
490 | EvidenceBundleIssueKind::DigestMismatch
491 ) {
492 return false;
493 }
494 let Some(group) = parity_index.get(path) else {
495 return false;
496 };
497 if !verified_parity_groups.contains(&group.group_id) {
498 return false;
499 }
500 let failures_in_group = failure_counts
501 .get(&group.group_id)
502 .copied()
503 .unwrap_or_default();
504 failures_in_group > 0 && failures_in_group <= group.repairable_failed_chunks
505}
506
507fn validate_wal_state_chunk_declaration(
508 issues: &mut Vec<EvidenceBundleIssue>,
509 manifest: &EvidenceBundleManifest,
510 path: &str,
511 label: &str,
512) {
513 let Some(chunk) = manifest.chunks.iter().find(|chunk| chunk.path == path) else {
514 issues.push(issue(
515 EvidenceBundleIssueKind::InvalidWalStateChunk,
516 Some(path.to_string()),
517 format!("database_wal_state {label} chunk {path} is not declared in manifest chunks"),
518 false,
519 ));
520 return;
521 };
522
523 if !chunk.required {
524 issues.push(issue(
525 EvidenceBundleIssueKind::InvalidWalStateChunk,
526 Some(path.to_string()),
527 format!("database_wal_state {label} chunk {path} must be declared as required"),
528 false,
529 ));
530 }
531}
532
533fn issue(
534 kind: EvidenceBundleIssueKind,
535 path: Option<String>,
536 message: String,
537 repairable: bool,
538) -> EvidenceBundleIssue {
539 EvidenceBundleIssue {
540 kind,
541 path,
542 message,
543 repairable,
544 }
545}
546
547fn unsafe_report(
548 kind: EvidenceBundleIssueKind,
549 path: Option<String>,
550 message: String,
551) -> EvidenceBundleVerificationReport {
552 EvidenceBundleVerificationReport {
553 manifest_version: None,
554 bundle_id: None,
555 kind: None,
556 status: EvidenceBundleVerificationStatus::Unsafe,
557 issues: vec![issue(kind, path, message, false)],
558 verified_chunk_count: 0,
559 repairable_issue_count: 0,
560 unsafe_issue_count: 1,
561 expected_chunk_count: 0,
562 expected_bytes: 0,
563 verified_bytes: 0,
564 gc_dry_run: EvidenceBundleGcDryRun {
565 dry_run: true,
566 explicit_delete_required: true,
567 deletion_allowed: false,
568 retained_chunk_count: 0,
569 retained_bytes: 0,
570 reason: "corrupt or unreadable evidence bundle manifest cannot authorize deletion"
571 .to_string(),
572 },
573 }
574}
575
576fn digest_file(path: &Path) -> Result<(u64, String)> {
577 let mut file = File::open(path).with_context(|| format!("opening {}", path.display()))?;
578 let mut hasher = blake3::Hasher::new();
579 let mut size = 0u64;
580 let mut buffer = [0u8; 64 * 1024];
581 loop {
582 let read = file
583 .read(&mut buffer)
584 .with_context(|| format!("reading {}", path.display()))?;
585 if read == 0 {
586 break;
587 }
588 size = size.saturating_add(read as u64);
589 hasher.update(&buffer[..read]);
590 }
591 Ok((size, hasher.finalize().to_hex().to_string()))
592}
593
594fn resolve_bundle_path(bundle_root: &Path, relative_path: &str) -> Result<PathBuf> {
595 let path = Path::new(relative_path);
596 if path.is_absolute() {
597 bail!("bundle chunk path must be relative: {relative_path}");
598 }
599 for component in path.components() {
600 match component {
601 Component::Normal(_) => {}
602 Component::CurDir
603 | Component::ParentDir
604 | Component::RootDir
605 | Component::Prefix(_) => {
606 bail!("bundle chunk path contains unsafe component: {relative_path}");
607 }
608 }
609 }
610 if relative_path.is_empty() {
611 return Err(anyhow!("bundle chunk path must not be empty"));
612 }
613 Ok(bundle_root.join(path))
614}
615
616fn resolve_existing_bundle_path(bundle_root: &Path, relative_path: &str) -> Result<PathBuf> {
617 let resolved = resolve_bundle_path(bundle_root, relative_path)?;
618 let canonical_root = fs::canonicalize(bundle_root)
619 .with_context(|| format!("canonicalizing bundle root {}", bundle_root.display()))?;
620 let canonical_resolved = fs::canonicalize(&resolved)
621 .with_context(|| format!("canonicalizing bundle chunk {}", resolved.display()))?;
622 if !canonical_resolved.starts_with(&canonical_root) {
623 bail!("bundle chunk path resolves outside bundle root: {relative_path}");
624 }
625 Ok(canonical_resolved)
626}
627
628fn default_required_chunk() -> bool {
629 true
630}
631
632fn default_explicit_delete_required() -> bool {
633 true
634}
635
636#[cfg(test)]
637mod tests {
638 use super::*;
639 use tempfile::TempDir;
640
641 fn write_chunk(root: &Path, path: &str, bytes: &[u8]) {
642 let full_path = root.join(path);
643 if let Some(parent) = full_path.parent() {
644 fs::create_dir_all(parent).unwrap();
645 }
646 fs::write(full_path, bytes).unwrap();
647 }
648
649 fn chunk(root: &Path, path: &str, role: EvidenceBundleChunkRole) -> EvidenceBundleChunk {
650 EvidenceBundleChunk::from_file(root, path, role, true, None).unwrap()
651 }
652
653 #[test]
654 fn verifier_proves_complete_lexical_generation_bundle() {
655 let tmp = TempDir::new().unwrap();
656 write_chunk(tmp.path(), "manifest.json", br#"{"docs":2}"#);
657 write_chunk(tmp.path(), "shards/segment-a", b"lexical shard bytes");
658
659 let mut manifest = EvidenceBundleManifest::new(
660 "lexical-generation-1",
661 EvidenceBundleKind::LexicalGeneration,
662 1_700_000_000_000,
663 );
664 manifest.chunks = vec![
665 chunk(
666 tmp.path(),
667 "manifest.json",
668 EvidenceBundleChunkRole::Manifest,
669 ),
670 chunk(
671 tmp.path(),
672 "shards/segment-a",
673 EvidenceBundleChunkRole::LexicalShard,
674 ),
675 ];
676 manifest.save(tmp.path()).unwrap();
677
678 let report = verify_evidence_bundle_manifest_file(
679 tmp.path(),
680 &EvidenceBundleManifest::path(tmp.path()),
681 );
682 assert!(report.is_complete(), "{report:?}");
683 assert_eq!(report.verified_chunk_count, 2);
684 assert_eq!(report.unsafe_issue_count, 0);
685 assert!(!report.gc_dry_run.deletion_allowed);
686 }
687
688 #[test]
689 fn corrupt_manifest_sidecar_is_unsafe_to_use() {
690 let tmp = TempDir::new().unwrap();
691 let manifest_path = EvidenceBundleManifest::path(tmp.path());
692 fs::write(&manifest_path, b"{not-json").unwrap();
693
694 let report = verify_evidence_bundle_manifest_file(tmp.path(), &manifest_path);
695 assert!(report.is_unsafe(), "{report:?}");
696 assert_eq!(
697 report.issues[0].kind,
698 EvidenceBundleIssueKind::CorruptManifest
699 );
700 assert!(!report.issues[0].repairable);
701 assert!(!report.gc_dry_run.deletion_allowed);
702 }
703
704 #[test]
705 fn missing_semantic_shard_with_parity_is_partially_repairable() {
706 let tmp = TempDir::new().unwrap();
707 write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
708 write_chunk(tmp.path(), "semantic/parity-0.bin", b"parity bytes");
709
710 let mut shard = chunk(
711 tmp.path(),
712 "semantic/shard-0.f16",
713 EvidenceBundleChunkRole::SemanticShard,
714 );
715 shard.parity_group = Some("semantic-parity-0".to_string());
716 let mut missing = shard.clone();
717 missing.path = "semantic/shard-1.f16".to_string();
718 missing.size_bytes = 19;
719 missing.blake3 = blake3::hash(b"semantic shard one").to_hex().to_string();
720 let mut parity = chunk(
721 tmp.path(),
722 "semantic/parity-0.bin",
723 EvidenceBundleChunkRole::Parity,
724 );
725 parity.parity_group = Some("semantic-parity-0".to_string());
726
727 let mut manifest = EvidenceBundleManifest::new(
728 "semantic-tier-fast-0",
729 EvidenceBundleKind::SemanticShard,
730 1_700_000_000_001,
731 );
732 manifest.chunks = vec![shard, missing, parity];
733 manifest.parity_groups = vec![EvidenceBundleParityGroup {
734 group_id: "semantic-parity-0".to_string(),
735 chunk_paths: vec![
736 "semantic/shard-0.f16".to_string(),
737 "semantic/shard-1.f16".to_string(),
738 "semantic/parity-0.bin".to_string(),
739 ],
740 repairable_failed_chunks: 1,
741 }];
742
743 let report = manifest.verify(tmp.path());
744 assert!(report.is_partially_repairable(), "{report:?}");
745 assert_eq!(report.repairable_issue_count, 1);
746 assert_eq!(report.unsafe_issue_count, 0);
747 assert_eq!(report.issues[0].kind, EvidenceBundleIssueKind::MissingChunk);
748 assert!(report.issues[0].repairable);
749 }
750
751 #[test]
752 fn declared_parity_without_verified_parity_artifact_is_unsafe() {
753 let tmp = TempDir::new().unwrap();
754 write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
755
756 let mut shard = chunk(
757 tmp.path(),
758 "semantic/shard-0.f16",
759 EvidenceBundleChunkRole::SemanticShard,
760 );
761 shard.parity_group = Some("semantic-parity-0".to_string());
762 let mut missing = shard.clone();
763 missing.path = "semantic/shard-1.f16".to_string();
764 missing.size_bytes = 19;
765 missing.blake3 = blake3::hash(b"semantic shard one").to_hex().to_string();
766
767 let mut manifest = EvidenceBundleManifest::new(
768 "semantic-missing-parity-artifact",
769 EvidenceBundleKind::SemanticShard,
770 1_700_000_000_002,
771 );
772 manifest.chunks = vec![shard, missing];
773 manifest.parity_groups = vec![EvidenceBundleParityGroup {
774 group_id: "semantic-parity-0".to_string(),
775 chunk_paths: vec![
776 "semantic/shard-0.f16".to_string(),
777 "semantic/shard-1.f16".to_string(),
778 "semantic/parity-0.bin".to_string(),
779 ],
780 repairable_failed_chunks: 1,
781 }];
782
783 let report = manifest.verify(tmp.path());
784 assert!(report.is_unsafe(), "{report:?}");
785 assert_eq!(report.repairable_issue_count, 0);
786 assert_eq!(report.unsafe_issue_count, 1);
787 assert_eq!(report.issues[0].kind, EvidenceBundleIssueKind::MissingChunk);
788 assert!(
789 !report.issues[0].repairable,
790 "a parity declaration without a verified parity artifact must not claim repairability"
791 );
792 }
793
794 #[test]
795 fn parity_does_not_repair_manifest_structure_errors() {
796 let tmp = TempDir::new().unwrap();
797 write_chunk(tmp.path(), "semantic/shard-0.f16", b"semantic shard zero");
798
799 let mut shard = chunk(
800 tmp.path(),
801 "semantic/shard-0.f16",
802 EvidenceBundleChunkRole::SemanticShard,
803 );
804 shard.parity_group = Some("semantic-parity-0".to_string());
805
806 let mut manifest = EvidenceBundleManifest::new(
807 "semantic-duplicate-path",
808 EvidenceBundleKind::SemanticShard,
809 1_700_000_000_002,
810 );
811 manifest.chunks = vec![shard.clone(), shard];
812 manifest.parity_groups = vec![EvidenceBundleParityGroup {
813 group_id: "semantic-parity-0".to_string(),
814 chunk_paths: vec!["semantic/shard-0.f16".to_string()],
815 repairable_failed_chunks: 1,
816 }];
817
818 let report = manifest.verify(tmp.path());
819 assert!(report.is_unsafe(), "{report:?}");
820 assert_eq!(
821 report.issues[0].kind,
822 EvidenceBundleIssueKind::DuplicateChunkPath
823 );
824 assert!(!report.issues[0].repairable);
825 }
826
827 #[test]
828 fn mismatched_database_wal_state_is_unsafe_even_when_files_hash() {
829 let tmp = TempDir::new().unwrap();
830 write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
831 write_chunk(tmp.path(), "db/cass.db-wal", b"wal bytes");
832
833 let mut manifest = EvidenceBundleManifest::new(
834 "db-backup-1",
835 EvidenceBundleKind::DatabaseBackup,
836 1_700_000_000_003,
837 );
838 manifest.chunks = vec![
839 chunk(
840 tmp.path(),
841 "db/cass.db",
842 EvidenceBundleChunkRole::DatabaseMain,
843 ),
844 chunk(
845 tmp.path(),
846 "db/cass.db-wal",
847 EvidenceBundleChunkRole::DatabaseWal,
848 ),
849 ];
850 manifest.database_wal_state = Some(DatabaseWalStateEvidence {
851 main_chunk_path: "db/cass.db".to_string(),
852 wal_chunk_path: Some("db/cass.db-wal".to_string()),
853 main_state_fingerprint: "main-fp".to_string(),
854 wal_base_fingerprint: Some("other-main-fp".to_string()),
855 });
856
857 let report = manifest.verify(tmp.path());
858 assert!(report.is_unsafe(), "{report:?}");
859 assert_eq!(report.verified_chunk_count, 2);
860 assert!(
861 report
862 .issues
863 .iter()
864 .any(|issue| issue.kind == EvidenceBundleIssueKind::WalMainMismatch)
865 );
866 }
867
868 #[test]
869 fn database_wal_state_rejects_undeclared_wal_chunk() {
870 let tmp = TempDir::new().unwrap();
871 write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
872
873 let mut manifest = EvidenceBundleManifest::new(
874 "db-backup-undeclared-wal",
875 EvidenceBundleKind::DatabaseBackup,
876 1_700_000_000_003,
877 );
878 manifest.chunks = vec![chunk(
879 tmp.path(),
880 "db/cass.db",
881 EvidenceBundleChunkRole::DatabaseMain,
882 )];
883 manifest.database_wal_state = Some(DatabaseWalStateEvidence {
884 main_chunk_path: "db/cass.db".to_string(),
885 wal_chunk_path: Some("db/cass.db-wal".to_string()),
886 main_state_fingerprint: "main-fp".to_string(),
887 wal_base_fingerprint: Some("main-fp".to_string()),
888 });
889
890 let report = manifest.verify(tmp.path());
891 assert!(report.is_unsafe(), "{report:?}");
892 assert!(
893 report.issues.iter().any(|issue| {
894 issue.kind == EvidenceBundleIssueKind::InvalidWalStateChunk
895 && issue.path.as_deref() == Some("db/cass.db-wal")
896 }),
897 "database_wal_state must not certify an undeclared WAL chunk: {report:?}"
898 );
899 }
900
901 #[test]
902 fn database_wal_state_rejects_optional_wal_chunk() {
903 let tmp = TempDir::new().unwrap();
904 write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
905
906 let mut manifest = EvidenceBundleManifest::new(
907 "db-backup-optional-wal",
908 EvidenceBundleKind::DatabaseBackup,
909 1_700_000_000_003,
910 );
911 manifest.chunks = vec![
912 chunk(
913 tmp.path(),
914 "db/cass.db",
915 EvidenceBundleChunkRole::DatabaseMain,
916 ),
917 EvidenceBundleChunk {
918 path: "db/cass.db-wal".to_string(),
919 role: EvidenceBundleChunkRole::DatabaseWal,
920 size_bytes: 0,
921 blake3: blake3::hash(b"").to_hex().to_string(),
922 required: false,
923 parity_group: None,
924 },
925 ];
926 manifest.database_wal_state = Some(DatabaseWalStateEvidence {
927 main_chunk_path: "db/cass.db".to_string(),
928 wal_chunk_path: Some("db/cass.db-wal".to_string()),
929 main_state_fingerprint: "main-fp".to_string(),
930 wal_base_fingerprint: Some("main-fp".to_string()),
931 });
932
933 let report = manifest.verify(tmp.path());
934 assert!(report.is_unsafe(), "{report:?}");
935 assert!(
936 report.issues.iter().any(|issue| {
937 issue.kind == EvidenceBundleIssueKind::InvalidWalStateChunk
938 && issue.path.as_deref() == Some("db/cass.db-wal")
939 }),
940 "database_wal_state WAL chunks must not be optional: {report:?}"
941 );
942 }
943
944 #[test]
945 fn verifier_gc_surface_is_dry_run_and_does_not_delete_files() {
946 let tmp = TempDir::new().unwrap();
947 write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
948
949 let mut manifest = EvidenceBundleManifest::new(
950 "db-backup-retained",
951 EvidenceBundleKind::DatabaseBackup,
952 1_700_000_000_004,
953 );
954 manifest.chunks = vec![chunk(
955 tmp.path(),
956 "db/cass.db",
957 EvidenceBundleChunkRole::DatabaseMain,
958 )];
959
960 let report = manifest.verify(tmp.path());
961 assert!(report.is_complete(), "{report:?}");
962 assert!(report.gc_dry_run.dry_run);
963 assert!(report.gc_dry_run.explicit_delete_required);
964 assert!(!report.gc_dry_run.deletion_allowed);
965 assert!(tmp.path().join("db/cass.db").exists());
966 }
967
968 #[test]
969 fn missing_optional_chunk_does_not_make_bundle_unsafe() {
970 let tmp = TempDir::new().unwrap();
971 write_chunk(tmp.path(), "db/cass.db", b"main db bytes");
972
973 let mut manifest = EvidenceBundleManifest::new(
974 "db-backup-with-optional-sidecar",
975 EvidenceBundleKind::DatabaseBackup,
976 1_700_000_000_005,
977 );
978 manifest.chunks = vec![
979 chunk(
980 tmp.path(),
981 "db/cass.db",
982 EvidenceBundleChunkRole::DatabaseMain,
983 ),
984 EvidenceBundleChunk {
985 path: "db/cass.db-shm".to_string(),
986 role: EvidenceBundleChunkRole::Metadata,
987 size_bytes: 0,
988 blake3: blake3::hash(b"").to_hex().to_string(),
989 required: false,
990 parity_group: None,
991 },
992 ];
993
994 let report = manifest.verify(tmp.path());
995 assert!(report.is_complete(), "{report:?}");
996 assert_eq!(report.verified_chunk_count, 1);
997 assert!(report.issues.is_empty());
998 }
999
1000 #[test]
1001 fn unsafe_relative_paths_are_rejected() {
1002 let tmp = TempDir::new().unwrap();
1003 let mut manifest = EvidenceBundleManifest::new(
1004 "bad-path",
1005 EvidenceBundleKind::LexicalGeneration,
1006 1_700_000_000_006,
1007 );
1008 manifest.chunks = vec![EvidenceBundleChunk {
1009 path: "../outside".to_string(),
1010 role: EvidenceBundleChunkRole::LexicalShard,
1011 size_bytes: 1,
1012 blake3: blake3::hash(b"x").to_hex().to_string(),
1013 required: true,
1014 parity_group: None,
1015 }];
1016
1017 let report = manifest.verify(tmp.path());
1018 assert!(report.is_unsafe(), "{report:?}");
1019 assert_eq!(
1020 report.issues[0].kind,
1021 EvidenceBundleIssueKind::UnsafeChunkPath
1022 );
1023 }
1024
1025 #[cfg(unix)]
1026 #[test]
1027 fn symlinked_chunk_that_escapes_bundle_root_is_rejected() {
1028 let tmp = TempDir::new().unwrap();
1029 let outside = TempDir::new().unwrap();
1030 let outside_chunk = outside.path().join("segment-a");
1031 fs::write(&outside_chunk, b"outside shard bytes").unwrap();
1032 fs::create_dir_all(tmp.path().join("shards")).unwrap();
1033 std::os::unix::fs::symlink(&outside_chunk, tmp.path().join("shards/segment-a")).unwrap();
1034
1035 let mut manifest = EvidenceBundleManifest::new(
1036 "symlink-escape",
1037 EvidenceBundleKind::LexicalGeneration,
1038 1_700_000_000_007,
1039 );
1040 manifest.chunks = vec![EvidenceBundleChunk {
1041 path: "shards/segment-a".to_string(),
1042 role: EvidenceBundleChunkRole::LexicalShard,
1043 size_bytes: b"outside shard bytes".len() as u64,
1044 blake3: blake3::hash(b"outside shard bytes").to_hex().to_string(),
1045 required: true,
1046 parity_group: None,
1047 }];
1048
1049 let report = manifest.verify(tmp.path());
1050
1051 assert!(report.is_unsafe(), "{report:?}");
1052 assert_eq!(report.verified_chunk_count, 0);
1053 assert_eq!(
1054 report.issues[0].kind,
1055 EvidenceBundleIssueKind::UnsafeChunkPath
1056 );
1057 }
1058}