Skip to main content

assay_core/replay/
bundle.rs

1//! Replay bundle container writer (E9).
2//!
3//! Writes a hermetic .tar.gz with canonical layout: manifest.json, then
4//! files under files/, outputs/, cassettes/ in deterministic order.
5//! No user-facing CLI here (E9c); this is the core library for bundle creation.
6
7use crate::replay::manifest::ReplayManifest;
8use anyhow::{Context, Result};
9use flate2::read::GzDecoder;
10use flate2::Compression;
11use flate2::GzBuilder;
12use serde_json;
13use sha2::{Digest, Sha256};
14use std::collections::BTreeMap;
15use std::io::{Read, Write};
16use std::path::Path;
17use tar::{Archive, Builder, Header};
18
19/// Canonical paths inside the bundle (POSIX, relative to root).
20pub mod paths {
21    /// Manifest at bundle root.
22    pub const MANIFEST: &str = "manifest.json";
23    /// Inputs (config, trace, etc.).
24    pub const FILES_PREFIX: &str = "files/";
25    /// Outputs (run.json, summary.json, sarif, junit).
26    pub const OUTPUTS_PREFIX: &str = "outputs/";
27    /// Scrubbed VCR/cassettes.
28    pub const CASSETTES_PREFIX: &str = "cassettes/";
29}
30
31/// Single file to add to the bundle: relative path (POSIX) and contents.
32#[derive(Debug, Clone)]
33pub struct BundleEntry {
34    /// Relative path with POSIX forward slashes (e.g. "files/trace.jsonl").
35    pub path: String,
36    /// File contents.
37    pub data: Vec<u8>,
38}
39
40/// Write a replay bundle to `w` as .tar.gz: manifest first, then entries in sorted order.
41/// Uses deterministic tar headers (mtime 0, fixed mode) for reproducible archives.
42pub fn write_bundle_tar_gz<W: Write>(
43    w: W,
44    manifest: &ReplayManifest,
45    entries: &[BundleEntry],
46) -> Result<()> {
47    let manifest_json = serde_json::to_vec(manifest).context("serialize manifest")?;
48
49    let gz = GzBuilder::new().mtime(0).write(w, Compression::default());
50    let mut tar = Builder::new(gz);
51    tar.mode(tar::HeaderMode::Deterministic);
52
53    write_tar_entry(&mut tar, paths::MANIFEST, &manifest_json)?;
54
55    let mut sorted: Vec<_> = entries.iter().collect();
56    sorted.sort_by(|a, b| a.path.as_str().cmp(b.path.as_str()));
57
58    for e in &sorted {
59        normalize_path_and_append(&mut tar, &e.path, &e.data)?;
60    }
61
62    let gz = tar.into_inner().context("finalize tar")?;
63    gz.finish().context("finish gzip")?;
64    Ok(())
65}
66
67/// Compute SHA256 of the entire archive (for provenance.bundle_digest).
68/// Caller must pass the same manifest + set of entries as write_bundle_tar_gz; entry order is irrelevant.
69pub fn bundle_digest(manifest: &ReplayManifest, entries: &[BundleEntry]) -> Result<String> {
70    let mut buf = Vec::new();
71    write_bundle_tar_gz(&mut buf, manifest, entries)?;
72    let hash = Sha256::digest(&buf);
73    Ok(hex::encode(hash))
74}
75
76fn write_tar_entry<T: Write>(tar: &mut Builder<T>, path: &str, data: &[u8]) -> Result<()> {
77    let mut header = Header::new_gnu();
78    header.set_path(path).context("set_path")?;
79    header.set_size(data.len() as u64);
80    header.set_mode(0o644);
81    header.set_uid(0);
82    header.set_gid(0);
83    header.set_mtime(0);
84    header.set_cksum();
85    tar.append(&header, data).context("append entry")?;
86    Ok(())
87}
88
89/// Validates and normalizes a bundle **entry** path. Fail-closed: returns Ok(normalized) or Err.
90/// Applies only to entry paths (files under files/, outputs/, cassettes/). The manifest file
91/// (`manifest.json`) is written via [write_tar_entry] with [paths::MANIFEST] and never goes
92/// through this validator.
93///
94/// Rules: POSIX (backslash → slash, no leading slash); no empty path or empty segments (e.g.
95/// `files//x` rejected); no segment "." or ".." (segment check, so `files/a..b.txt` is allowed);
96/// no drive letter (':' in first segment); canonical prefix required: files/, outputs/, or cassettes/.
97fn validate_entry_path(path: &str) -> Result<String> {
98    let normalized = path.replace('\\', "/").trim_start_matches('/').to_string();
99    if normalized.is_empty() {
100        anyhow::bail!("invalid bundle path: empty path");
101    }
102    let segments: Vec<&str> = normalized.split('/').collect();
103    if segments[0].contains(':') {
104        anyhow::bail!(
105            "invalid bundle path: drive-letter or ':' in first segment (path: {})",
106            path
107        );
108    }
109    for seg in &segments {
110        if seg.is_empty() {
111            anyhow::bail!("invalid bundle path: empty segment (path: {})", path);
112        }
113        if *seg == "." || *seg == ".." {
114            anyhow::bail!(
115                "invalid bundle path: traversal segment '.' or '..' (path: {})",
116                path
117            );
118        }
119    }
120    let has_canonical_prefix = normalized.starts_with(paths::FILES_PREFIX)
121        || normalized.starts_with(paths::OUTPUTS_PREFIX)
122        || normalized.starts_with(paths::CASSETTES_PREFIX);
123    if !has_canonical_prefix {
124        anyhow::bail!(
125            "invalid bundle path prefix: must be files/, outputs/, or cassettes/ (path: {})",
126            path
127        );
128    }
129    Ok(normalized)
130}
131
132/// Normalize path (validate by segment + canonical prefix), then append to tar.
133fn normalize_path_and_append<T: Write>(
134    tar: &mut Builder<T>,
135    path: &str,
136    data: &[u8],
137) -> Result<()> {
138    let normalized = validate_entry_path(path)?;
139    write_tar_entry(tar, &normalized, data)
140}
141
142/// Build a file manifest (path -> FileManifestEntry) from entries. Fail-closed: invalid path → Error
143/// (same policy as writer). Paths must be valid and under files/, outputs/, or cassettes/.
144pub fn build_file_manifest(
145    entries: &[BundleEntry],
146) -> Result<BTreeMap<String, crate::replay::manifest::FileManifestEntry>> {
147    let mut out = BTreeMap::new();
148    for e in entries {
149        let path = validate_entry_path(&e.path)?;
150        let hash = Sha256::digest(&e.data);
151        out.insert(
152            path.clone(),
153            crate::replay::manifest::FileManifestEntry {
154                sha256: format!("sha256:{}", hex::encode(hash)),
155                size: e.data.len() as u64,
156                mode: Some(0o644),
157                content_type: content_type_hint(Path::new(&path)),
158            },
159        );
160    }
161    Ok(out)
162}
163
164/// Result of reading a bundle: manifest and all file entries (path -> contents).
165/// Paths are POSIX, relative to bundle root; manifest.json is not in entries.
166#[derive(Debug)]
167pub struct ReadBundle {
168    pub manifest: ReplayManifest,
169    pub entries: Vec<(String, Vec<u8>)>,
170}
171
172/// Read a replay bundle from .tar.gz: parse manifest and collect all entry (path, data).
173/// Paths normalized to POSIX. Enforces same path policy as writer: only manifest.json or
174/// files/, outputs/, cassettes/ (no empty segment, no . or .., no drive letter). Duplicate
175/// paths in tar → Error. Missing manifest.json → Error.
176pub fn read_bundle_tar_gz<R: Read>(r: R) -> Result<ReadBundle> {
177    let dec = GzDecoder::new(r);
178    let mut ar = Archive::new(dec);
179    let mut manifest_data: Option<Vec<u8>> = None;
180    let mut seen = BTreeMap::new();
181    for entry in ar.entries().context("list tar entries")? {
182        let mut e = entry.context("read tar entry")?;
183        let path = e.path().context("entry path")?;
184        let path_str = path.to_string_lossy().replace('\\', "/");
185        if path_str == paths::MANIFEST {
186            let mut data = Vec::new();
187            e.read_to_end(&mut data).context("read manifest body")?;
188            manifest_data = Some(data);
189            continue;
190        }
191        validate_entry_path(&path_str)?;
192        let mut data = Vec::new();
193        e.read_to_end(&mut data).context("read entry body")?;
194        if seen.insert(path_str.clone(), data).is_some() {
195            anyhow::bail!("duplicate path in bundle: {}", path_str);
196        }
197    }
198    let manifest_json = manifest_data.context("manifest.json missing in bundle")?;
199    let manifest: ReplayManifest =
200        serde_json::from_slice(&manifest_json).context("parse manifest.json")?;
201    let entries = seen.into_iter().collect();
202    Ok(ReadBundle { manifest, entries })
203}
204
205fn content_type_hint(path: &Path) -> Option<String> {
206    let ext = path.extension()?.to_str()?;
207    Some(match ext {
208        "json" => "application/json".to_string(),
209        "jsonl" => "application/x-ndjson".to_string(),
210        "xml" => "application/xml".to_string(),
211        "yaml" | "yml" => "application/x-yaml".to_string(),
212        _ => return None,
213    })
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use crate::replay::manifest::{
220        ReplayCoverage, ReplayManifest, ReplayOutputs, ReplaySeeds, ScrubPolicy,
221    };
222    use std::collections::BTreeMap;
223
224    #[test]
225    fn write_bundle_minimal_roundtrip() {
226        let manifest = ReplayManifest::minimal("2.15.0".into());
227        let entries = vec![BundleEntry {
228            path: "outputs/summary.json".into(),
229            data: br#"{"schema_version":1}"#.to_vec(),
230        }];
231        let mut buf = Vec::new();
232        write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap();
233        assert!(!buf.is_empty());
234        let digest = bundle_digest(&manifest, &entries).unwrap();
235        assert_eq!(digest.len(), 64);
236    }
237
238    #[test]
239    fn read_bundle_roundtrip() {
240        let manifest = ReplayManifest::minimal("2.15.0".into());
241        let entries = vec![
242            BundleEntry {
243                path: "files/trace.jsonl".into(),
244                data: b"[]".to_vec(),
245            },
246            BundleEntry {
247                path: "outputs/summary.json".into(),
248                data: br#"{"schema_version":1}"#.to_vec(),
249            },
250        ];
251        let mut buf = Vec::new();
252        write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap();
253        let read = read_bundle_tar_gz(std::io::Cursor::new(&buf)).unwrap();
254        assert_eq!(read.manifest.schema_version, manifest.schema_version);
255        assert_eq!(read.manifest.assay_version, manifest.assay_version);
256        let paths: std::collections::BTreeSet<_> =
257            read.entries.iter().map(|(p, _)| p.as_str()).collect();
258        assert!(paths.contains("files/trace.jsonl"));
259        assert!(paths.contains("outputs/summary.json"));
260        let data: std::collections::BTreeMap<_, _> = read.entries.into_iter().collect();
261        assert_eq!(data.get("files/trace.jsonl").unwrap(), &b"[]"[..]);
262    }
263
264    /// Reader fails when manifest.json is absent (same policy: bundle must be valid).
265    #[test]
266    fn read_bundle_fails_manifest_missing() {
267        let mut buf = Vec::new();
268        let gz = GzBuilder::new()
269            .mtime(0)
270            .write(&mut buf, flate2::Compression::default());
271        let mut tar = Builder::new(gz);
272        let mut header = Header::new_gnu();
273        header.set_path("files/x").unwrap();
274        header.set_size(0);
275        header.set_mode(0o644);
276        header.set_cksum();
277        tar.append(&header, &[] as &[u8]).unwrap();
278        let gz = tar.into_inner().unwrap();
279        gz.finish().unwrap();
280        let err = read_bundle_tar_gz(std::io::Cursor::new(&buf)).unwrap_err();
281        assert!(err.to_string().contains("manifest.json missing"), "{}", err);
282    }
283
284    /// Duplicate path in tar → Error (avoids zip-slip style confusion; last-wins undefined).
285    #[test]
286    fn read_bundle_fails_duplicate_path() {
287        let manifest = ReplayManifest::minimal("2.15.0".into());
288        let manifest_json = serde_json::to_vec(&manifest).unwrap();
289        let mut buf = Vec::new();
290        let gz = GzBuilder::new()
291            .mtime(0)
292            .write(&mut buf, flate2::Compression::default());
293        let mut tar = Builder::new(gz);
294        tar.mode(tar::HeaderMode::Deterministic);
295        let mut h = Header::new_gnu();
296        h.set_path(paths::MANIFEST).unwrap();
297        h.set_size(manifest_json.len() as u64);
298        h.set_mode(0o644);
299        h.set_cksum();
300        tar.append(&h, &manifest_json[..]).unwrap();
301        for _ in 0..2 {
302            let mut h2 = Header::new_gnu();
303            h2.set_path("files/x").unwrap();
304            h2.set_size(1);
305            h2.set_mode(0o644);
306            h2.set_cksum();
307            tar.append(&h2, &b"x"[..]).unwrap();
308        }
309        let gz = tar.into_inner().unwrap();
310        gz.finish().unwrap();
311        let err = read_bundle_tar_gz(std::io::Cursor::new(&buf)).unwrap_err();
312        assert!(err.to_string().contains("duplicate path"), "{}", err);
313    }
314
315    #[test]
316    fn build_file_manifest_normalizes_paths() {
317        let entries = vec![BundleEntry {
318            path: "files/trace.jsonl".into(),
319            data: vec![1, 2, 3],
320        }];
321        let manifest_map = build_file_manifest(&entries).unwrap();
322        assert_eq!(manifest_map.len(), 1);
323        let entry = manifest_map.get("files/trace.jsonl").unwrap();
324        assert_eq!(entry.size, 3);
325        assert!(entry.sha256.starts_with("sha256:"));
326    }
327
328    /// Legitimate filename with ".." in segment (not traversal) is allowed.
329    #[test]
330    fn path_segment_dotdot_allows_literal_dotdot_in_filename() {
331        let manifest = ReplayManifest::minimal("2.15.0".into());
332        let entries = vec![BundleEntry {
333            path: "files/a..b.txt".into(),
334            data: b"ok".to_vec(),
335        }];
336        let mut buf = Vec::new();
337        write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap();
338        let names = list_tar_gz_paths(&buf);
339        assert!(names.contains(&"files/a..b.txt".to_string()));
340    }
341
342    /// Non-canonical prefix (evil.txt, x/y) rejected.
343    #[test]
344    fn path_must_have_canonical_prefix() {
345        let manifest = ReplayManifest::minimal("2.15.0".into());
346        for bad in ["evil.txt", "x/y/z", "output/run.json"] {
347            let entries = vec![BundleEntry {
348                path: bad.to_string(),
349                data: vec![],
350            }];
351            let err = write_bundle_tar_gz(&mut Vec::new(), &manifest, &entries).unwrap_err();
352            assert!(
353                err.to_string().contains("invalid bundle path prefix"),
354                "{}",
355                bad
356            );
357        }
358    }
359
360    /// Empty segment (duplicate slash) rejected.
361    #[test]
362    fn path_rejects_empty_segment() {
363        let manifest = ReplayManifest::minimal("2.15.0".into());
364        let entries = vec![BundleEntry {
365            path: "files//x.json".into(),
366            data: vec![],
367        }];
368        let err = write_bundle_tar_gz(&mut Vec::new(), &manifest, &entries).unwrap_err();
369        assert!(err.to_string().contains("empty segment"), "files//x");
370    }
371
372    /// Windows drive-letter-like path rejected.
373    #[test]
374    fn path_rejects_drive_letter() {
375        let manifest = ReplayManifest::minimal("2.15.0".into());
376        for bad in ["C:/foo", "C:\\foo", "D:bar"] {
377            let entries = vec![BundleEntry {
378                path: bad.to_string(),
379                data: vec![],
380            }];
381            let err = write_bundle_tar_gz(&mut Vec::new(), &manifest, &entries).unwrap_err();
382            assert!(
383                err.to_string().contains("drive-letter")
384                    || err.to_string().contains("first segment"),
385                "{}",
386                bad
387            );
388        }
389    }
390
391    /// build_file_manifest fail-closed: invalid path returns Err (same policy as writer).
392    #[test]
393    fn build_file_manifest_fail_closed_on_invalid_path() {
394        let entries = vec![
395            BundleEntry {
396                path: "files/ok.json".into(),
397                data: vec![],
398            },
399            BundleEntry {
400                path: "../secrets.txt".into(),
401                data: vec![],
402            },
403        ];
404        let err = build_file_manifest(&entries).unwrap_err();
405        assert!(err.to_string().contains("invalid bundle path"));
406    }
407
408    /// Audit: digest of written bytes equals bundle_digest(manifest, entries).
409    #[test]
410    fn bundle_digest_equals_sha256_of_written_bytes() {
411        let manifest = ReplayManifest::minimal("2.15.0".into());
412        let entries = vec![
413            BundleEntry {
414                path: "files/trace.jsonl".into(),
415                data: b"[]".to_vec(),
416            },
417            BundleEntry {
418                path: "outputs/summary.json".into(),
419                data: b"{}".to_vec(),
420            },
421        ];
422        let mut buf = Vec::new();
423        write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap();
424        let digest_from_fn = bundle_digest(&manifest, &entries).unwrap();
425        let hash_of_bytes = hex::encode(Sha256::digest(&buf));
426        assert_eq!(
427            digest_from_fn, hash_of_bytes,
428            "bundle_digest must equal sha256(written bytes)"
429        );
430    }
431
432    /// Audit: path traversal (..) and empty path rejected; no .. or absolute in output.
433    #[test]
434    fn path_traversal_rejected_and_output_has_no_traversal() {
435        let manifest = ReplayManifest::minimal("2.15.0".into());
436        for bad_path in [
437            "../secrets.txt",
438            "files/../../etc/passwd",
439            "outputs/../leak",
440            "",
441        ] {
442            let entries = vec![BundleEntry {
443                path: bad_path.to_string(),
444                data: vec![],
445            }];
446            let mut buf = Vec::new();
447            let err = write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap_err();
448            assert!(
449                err.to_string().contains("invalid bundle path"),
450                "{}",
451                bad_path
452            );
453        }
454        // Leading slash and backslash are normalized; result must not be in archive as absolute/traversal
455        let entries = vec![
456            BundleEntry {
457                path: "files/trace.jsonl".into(),
458                data: b"[]".to_vec(),
459            },
460            BundleEntry {
461                path: "outputs/run.json".into(),
462                data: b"{}".to_vec(),
463            },
464        ];
465        let mut buf = Vec::new();
466        write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap();
467        let names = list_tar_gz_paths(&buf);
468        for name in &names {
469            assert!(!name.contains(".."), "no .. in archive path: {}", name);
470            assert!(
471                !name.starts_with('/'),
472                "no leading / in archive path: {}",
473                name
474            );
475        }
476        assert!(names.iter().any(|s| s == "manifest.json"));
477        assert!(names.iter().any(|s| s.starts_with("files/")));
478        assert!(names.iter().any(|s| s.starts_with("outputs/")));
479    }
480
481    /// Audit: full manifest (replay_coverage, seeds, scrub_policy) and canonical layout.
482    #[test]
483    fn audit_full_manifest_and_canonical_layout() {
484        let mut reason = BTreeMap::new();
485        reason.insert(
486            "test_b".to_string(),
487            "judge response not cached".to_string(),
488        );
489        let manifest = ReplayManifest {
490            schema_version: 1,
491            assay_version: "2.15.0".to_string(),
492            created_at: Some("2025-01-27T12:00:00Z".to_string()),
493            source_run_path: Some(".assay/run_abc123".to_string()),
494            selection_method: Some("run-id".to_string()),
495            git_sha: Some("a1b2c3d4e5f6".to_string()),
496            git_dirty: Some(false),
497            workflow_run_id: None,
498            config_digest: None,
499            policy_digest: None,
500            baseline_digest: None,
501            trace_digest: None,
502            trace_path: Some("files/trace.jsonl".to_string()),
503            outputs: Some(ReplayOutputs {
504                run: Some("outputs/run.json".to_string()),
505                summary: Some("outputs/summary.json".to_string()),
506                junit: None,
507                sarif: None,
508            }),
509            toolchain: None,
510            seeds: Some(ReplaySeeds {
511                seed_version: Some(1),
512                order_seed: Some("42".to_string()),
513                judge_seed: None,
514            }),
515            replay_coverage: Some(ReplayCoverage {
516                complete_tests: vec!["test_a".to_string()],
517                incomplete_tests: vec!["test_b".to_string()],
518                reason: Some(reason),
519            }),
520            scrub_policy: Some(ScrubPolicy::default()),
521            files: None,
522            env: None,
523        };
524        let entries = vec![
525            BundleEntry {
526                path: "files/trace.jsonl".into(),
527                data: b"[]".to_vec(),
528            },
529            BundleEntry {
530                path: "outputs/run.json".into(),
531                data: b"{}".to_vec(),
532            },
533            BundleEntry {
534                path: "outputs/summary.json".into(),
535                data: b"{}".to_vec(),
536            },
537            BundleEntry {
538                path: "cassettes/.gitkeep".into(),
539                data: vec![],
540            },
541        ];
542        let mut buf = Vec::new();
543        write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap();
544        let names = list_tar_gz_paths(&buf);
545        assert!(
546            names.contains(&"manifest.json".to_string()),
547            "canonical: manifest at root"
548        );
549        assert!(names
550            .iter()
551            .all(|p| !p.contains("..") && !p.starts_with('/')));
552        assert!(names.contains(&"manifest.json".to_string()));
553        assert!(names.iter().any(|p| p.starts_with("files/")));
554        assert!(names.iter().any(|p| p.starts_with("outputs/")));
555        assert!(names.iter().any(|p| p.starts_with("cassettes/")));
556    }
557
558    // --- Gap 1: Golden-value snapshot test ---
559
560    /// Pinned digest: catches silent reproducibility regressions (serde field order,
561    /// flate2 compression defaults, tar header changes).
562    #[test]
563    fn golden_digest_snapshot() {
564        let manifest = ReplayManifest::minimal("2.15.0".into());
565        let entries = vec![BundleEntry {
566            path: "files/trace.jsonl".into(),
567            data: b"[]".to_vec(),
568        }];
569        let digest = bundle_digest(&manifest, &entries).unwrap();
570        assert_eq!(
571            digest, "e982d2dd1d7cf56df6b417c7af1bc3f7f334ecfc47298bf5d240f4485f3b7a7c",
572            "Golden digest changed — if intentional, update this value after verifying \
573             that the new output is still deterministic across platforms"
574        );
575    }
576
577    // --- Gap 2: Fix helper + sort-order test ---
578
579    /// Returns tar entry paths in **archive order** (no sorting).
580    fn list_tar_gz_paths(gz: &[u8]) -> Vec<String> {
581        let dec = flate2::read::GzDecoder::new(gz);
582        let mut ar = tar::Archive::new(dec);
583        let mut names = Vec::new();
584        for e in ar.entries().unwrap() {
585            let e = e.unwrap();
586            let path = e.path().unwrap();
587            names.push(path.to_string_lossy().replace('\\', "/"));
588        }
589        names
590    }
591
592    /// Writer must emit entries in sorted order (after manifest). Entries given
593    /// out-of-order must appear sorted in the archive.
594    #[test]
595    fn entries_written_in_sorted_order() {
596        let manifest = ReplayManifest::minimal("2.15.0".into());
597        // Provide entries deliberately out of sorted order.
598        let entries = vec![
599            BundleEntry {
600                path: "outputs/z.json".into(),
601                data: b"{}".to_vec(),
602            },
603            BundleEntry {
604                path: "files/a.jsonl".into(),
605                data: b"[]".to_vec(),
606            },
607            BundleEntry {
608                path: "cassettes/m.json".into(),
609                data: b"{}".to_vec(),
610            },
611        ];
612        let mut buf = Vec::new();
613        write_bundle_tar_gz(&mut buf, &manifest, &entries).unwrap();
614        let names = list_tar_gz_paths(&buf);
615        assert_eq!(names[0], "manifest.json", "manifest must be first");
616        let data_entries: Vec<_> = names[1..].to_vec();
617        let mut expected = data_entries.clone();
618        expected.sort();
619        assert_eq!(
620            data_entries, expected,
621            "entries after manifest must be in sorted order"
622        );
623    }
624
625    // --- Gap 3: Direct unit tests for validate_entry_path ---
626
627    #[test]
628    fn validate_entry_path_accepts_valid_paths() {
629        for good in [
630            "files/trace.jsonl",
631            "outputs/run.json",
632            "cassettes/openai/embed.json",
633            "files/a..b.txt",
634            "files/deep/nested/dir/file.json",
635        ] {
636            let result = validate_entry_path(good);
637            assert!(result.is_ok(), "should accept: {}", good);
638            assert_eq!(result.unwrap(), good, "valid path returned unchanged");
639        }
640    }
641
642    #[test]
643    fn validate_entry_path_normalizes_backslash_and_leading_slash() {
644        assert_eq!(
645            validate_entry_path("files\\trace.jsonl").unwrap(),
646            "files/trace.jsonl"
647        );
648        assert_eq!(
649            validate_entry_path("/files/trace.jsonl").unwrap(),
650            "files/trace.jsonl"
651        );
652        assert_eq!(
653            validate_entry_path("\\files\\trace.jsonl").unwrap(),
654            "files/trace.jsonl"
655        );
656    }
657
658    #[test]
659    fn validate_entry_path_rejects_empty() {
660        let err = validate_entry_path("").unwrap_err();
661        assert!(err.to_string().contains("empty path"));
662    }
663
664    #[test]
665    fn validate_entry_path_rejects_empty_segment() {
666        let err = validate_entry_path("files//x.json").unwrap_err();
667        assert!(err.to_string().contains("empty segment"));
668    }
669
670    #[test]
671    fn validate_entry_path_rejects_dot_segments() {
672        for bad in ["files/./x.json", "files/../x.json", "outputs/.."] {
673            let err = validate_entry_path(bad).unwrap_err();
674            assert!(
675                err.to_string().contains("traversal segment"),
676                "should reject: {}",
677                bad
678            );
679        }
680    }
681
682    #[test]
683    fn validate_entry_path_rejects_drive_letter() {
684        for bad in ["C:/foo", "D:bar"] {
685            let err = validate_entry_path(bad).unwrap_err();
686            assert!(
687                err.to_string().contains("drive-letter"),
688                "should reject: {}",
689                bad
690            );
691        }
692    }
693
694    #[test]
695    fn validate_entry_path_rejects_non_canonical_prefix() {
696        for bad in ["evil.txt", "x/y/z", "output/run.json", "file/x.json"] {
697            let err = validate_entry_path(bad).unwrap_err();
698            assert!(
699                err.to_string().contains("invalid bundle path prefix"),
700                "should reject: {}",
701                bad
702            );
703        }
704    }
705}