Skip to main content

git_closure/snapshot/
serial.rs

1/// S-expression serialization and deserialization for `.gcl` snapshot files.
2use std::fs;
3use std::path::Path;
4
5use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
6use base64::Engine;
7
8use crate::error::GitClosureError;
9use crate::utils::io_error_with_path;
10
11use super::{ListEntry, Result, SnapshotFile, SnapshotHeader};
12
13// ── Serialization ─────────────────────────────────────────────────────────────
14
15/// Serializes `files` into the canonical `.gcl` S-expression format.
16///
17/// `files` must be in lexicographic path order (the caller is responsible).
18/// `header.git_rev` and `header.git_branch` are emitted as informational
19/// comments but are **not** included in the structural `snapshot_hash`.
20pub(crate) fn serialize_snapshot(files: &[SnapshotFile], header: &SnapshotHeader) -> String {
21    let mut output = String::new();
22
23    output.push_str(";; git-closure snapshot v0.1\n");
24    output.push_str(&format!(";; snapshot-hash: {}\n", header.snapshot_hash));
25    output.push_str(&format!(";; file-count: {}\n", files.len()));
26    if let Some(rev) = &header.git_rev {
27        output.push_str(&format!(";; git-rev: {rev}\n"));
28    }
29    if let Some(branch) = &header.git_branch {
30        output.push_str(&format!(";; git-branch: {branch}\n"));
31    }
32    for (key, value) in &header.extra_headers {
33        output.push_str(&format!(";; {key}: {value}\n"));
34    }
35    output.push('\n');
36    output.push_str("(\n");
37
38    for file in files {
39        output.push_str("  (\n");
40        output.push_str("    (:path ");
41        output.push_str(&quote_string(&file.path));
42        if let Some(target) = &file.symlink_target {
43            output.push('\n');
44            output.push_str("     :type ");
45            output.push_str(&quote_string("symlink"));
46            output.push('\n');
47            output.push_str("     :target ");
48            output.push_str(&quote_string(target));
49            output.push_str(")\n");
50            output.push_str("\"\"\n");
51            output.push_str("  )\n");
52            continue;
53        }
54        output.push('\n');
55        output.push_str("     :sha256 ");
56        output.push_str(&quote_string(&file.sha256));
57        output.push('\n');
58        output.push_str("     :mode ");
59        output.push_str(&quote_string(&file.mode));
60        output.push('\n');
61        output.push_str("     :size ");
62        output.push_str(&file.size.to_string());
63        if let Some(encoding) = &file.encoding {
64            output.push('\n');
65            output.push_str("     :encoding ");
66            output.push_str(&quote_string(encoding));
67        }
68        output.push_str(")\n");
69
70        let quoted_content = if file.encoding.as_deref() == Some("base64") {
71            quote_string(&BASE64_STANDARD.encode(&file.content))
72        } else {
73            // INVARIANT: files without base64 encoding were validated as valid UTF-8
74            // during collection via `std::str::from_utf8` in collect_file_attributes.
75            // `from_utf8_lossy` would silently corrupt data by substituting U+FFFD —
76            // an undetectable data-loss bug.  Panic loudly instead so the invariant
77            // violation is surfaced immediately during development/testing.
78            quote_string(
79                std::str::from_utf8(&file.content)
80                    .expect("non-base64 file content must be valid UTF-8 (invariant violated)"),
81            )
82        };
83
84        output.push_str(&quoted_content);
85        output.push('\n');
86        output.push_str("  )\n");
87    }
88
89    output.push_str(")\n");
90    output
91}
92
93/// Serializes a lexpr `Value` as a quoted S-expression string.
94pub(crate) fn quote_string(input: &str) -> String {
95    lexpr::to_string(&lexpr::Value::string(input))
96        .expect("lexpr string serialization should not fail")
97}
98
99// ── Deserialization ───────────────────────────────────────────────────────────
100
101/// Parses the full text of a `.gcl` snapshot into a header and file list.
102///
103/// Files in the returned vector are guaranteed to be in lexicographic path
104/// order.  The `header.file_count` is cross-checked against the number of
105/// parsed entries.
106#[derive(Debug, Clone, Default)]
107pub struct ParseLimits {
108    pub max_entry_count: Option<usize>,
109    pub max_file_bytes: Option<u64>,
110    pub max_total_bytes: Option<u64>,
111}
112
113pub fn parse_snapshot(input: &str) -> Result<(SnapshotHeader, Vec<SnapshotFile>)> {
114    parse_snapshot_with_limits(input, None)
115}
116
117pub fn parse_snapshot_with_limits(
118    input: &str,
119    limits: Option<&ParseLimits>,
120) -> Result<(SnapshotHeader, Vec<SnapshotFile>)> {
121    let (header, body) = split_header_body(input)?;
122    let parsed = lexpr::from_str(body).map_err(|err| {
123        GitClosureError::Parse(format!("failed to parse S-expression body: {err}"))
124    })?;
125    let files = parse_files_value(&parsed, limits)?;
126
127    if files.len() != header.file_count {
128        return Err(GitClosureError::Parse(format!(
129            "file count mismatch: header says {}, parsed {}",
130            header.file_count,
131            files.len()
132        )));
133    }
134
135    Ok((header, files))
136}
137
138fn split_header_body(input: &str) -> Result<(SnapshotHeader, &str)> {
139    let mut snapshot_hash = None;
140    let mut file_count = None;
141    let mut git_rev = None;
142    let mut git_branch = None;
143    let mut extra_headers = Vec::new();
144    let mut body_start = None;
145    let mut cursor = 0usize;
146
147    for line in input.lines() {
148        let line_len = line.len();
149        if line.starts_with(";;") {
150            if line.strip_prefix(";; format-hash:").is_some() {
151                return Err(GitClosureError::LegacyHeader);
152            }
153            if let Some(value) = line.strip_prefix(";; snapshot-hash:") {
154                snapshot_hash = Some(value.trim().to_string());
155            }
156            if let Some(value) = line.strip_prefix(";; file-count:") {
157                file_count = Some(value.trim().parse::<usize>().map_err(|err| {
158                    GitClosureError::Parse(format!("invalid file-count header: {err}"))
159                })?);
160            }
161            if let Some(value) = line.strip_prefix(";; git-rev:") {
162                git_rev = Some(value.trim().to_string());
163            }
164            if let Some(value) = line.strip_prefix(";; git-branch:") {
165                git_branch = Some(value.trim().to_string());
166            }
167            if let Some(rest) = line.strip_prefix(";; ") {
168                if let Some((raw_key, raw_value)) = rest.split_once(':') {
169                    let key = raw_key.trim();
170                    if key != "snapshot-hash"
171                        && key != "file-count"
172                        && key != "git-rev"
173                        && key != "git-branch"
174                        && key != "format-hash"
175                        && !key.is_empty()
176                    {
177                        extra_headers.push((key.to_string(), raw_value.trim().to_string()));
178                    }
179                }
180            }
181            cursor += line_len + 1;
182            continue;
183        }
184
185        if line.trim().is_empty() {
186            cursor += line_len + 1;
187            continue;
188        }
189
190        body_start = Some(cursor);
191        break;
192    }
193
194    let snapshot_hash = snapshot_hash.ok_or(GitClosureError::MissingHeader("snapshot-hash"))?;
195    let file_count = file_count.ok_or(GitClosureError::MissingHeader("file-count"))?;
196    let body_start = body_start.ok_or(GitClosureError::MissingHeader("S-expression body"))?;
197
198    let body = &input[body_start..];
199
200    Ok((
201        SnapshotHeader {
202            snapshot_hash,
203            file_count,
204            git_rev,
205            git_branch,
206            extra_headers,
207        },
208        body,
209    ))
210}
211
212fn parse_files_value(
213    value: &lexpr::Value,
214    limits: Option<&ParseLimits>,
215) -> Result<Vec<SnapshotFile>> {
216    let root = value
217        .to_ref_vec()
218        .ok_or_else(|| GitClosureError::Parse("snapshot body must be a list".to_string()))?;
219
220    if let Some(limit) = limits.and_then(|l| l.max_entry_count) {
221        if root.len() > limit {
222            return Err(GitClosureError::Parse(format!(
223                "snapshot entry count {} exceeds max_entry_count limit {}",
224                root.len(),
225                limit
226            )));
227        }
228    }
229
230    let mut files = Vec::with_capacity(root.len());
231    let mut total_bytes = 0u64;
232
233    for entry in root {
234        let pair = entry.to_ref_vec().ok_or_else(|| {
235            GitClosureError::Parse("each entry must be a 2-item list".to_string())
236        })?;
237        if pair.len() != 2 {
238            return Err(GitClosureError::Parse(
239                "each entry must contain plist and content".to_string(),
240            ));
241        }
242
243        let plist = pair[0]
244            .to_ref_vec()
245            .ok_or_else(|| GitClosureError::Parse("entry plist must be a list".to_string()))?;
246
247        let content_field = pair[1]
248            .as_str()
249            .ok_or_else(|| GitClosureError::Parse("entry content must be a string".to_string()))?;
250
251        let mut path = None;
252        let mut sha256 = None;
253        let mut mode = None;
254        let mut size = None;
255        let mut encoding = None;
256        let mut entry_type = None;
257        let mut target = None;
258
259        if plist.len() % 2 != 0 {
260            return Err(GitClosureError::Parse(
261                "plist key/value pairs are malformed".to_string(),
262            ));
263        }
264
265        let mut idx = 0usize;
266        while idx < plist.len() {
267            let key = if let Some(keyword) = plist[idx].as_keyword() {
268                keyword
269            } else if let Some(symbol) = plist[idx].as_symbol() {
270                symbol.strip_prefix(':').ok_or_else(|| {
271                    GitClosureError::Parse("plist symbol keys must start with ':'".to_string())
272                })?
273            } else {
274                return Err(GitClosureError::Parse(
275                    "plist keys must be keywords or :symbol values".to_string(),
276                ));
277            };
278            let value = &plist[idx + 1];
279
280            match key {
281                "path" => {
282                    path = Some(
283                        value
284                            .as_str()
285                            .ok_or_else(|| {
286                                GitClosureError::Parse(":path must be a string".to_string())
287                            })?
288                            .to_string(),
289                    );
290                }
291                "sha256" => {
292                    sha256 = Some(
293                        value
294                            .as_str()
295                            .ok_or_else(|| {
296                                GitClosureError::Parse(":sha256 must be a string".to_string())
297                            })?
298                            .to_string(),
299                    );
300                }
301                "mode" => {
302                    mode = Some(
303                        value
304                            .as_str()
305                            .ok_or_else(|| {
306                                GitClosureError::Parse(":mode must be a string".to_string())
307                            })?
308                            .to_string(),
309                    );
310                }
311                "size" => {
312                    size = Some(value.as_u64().ok_or_else(|| {
313                        GitClosureError::Parse(":size must be a u64".to_string())
314                    })?);
315                }
316                "encoding" => {
317                    encoding = Some(
318                        value
319                            .as_str()
320                            .ok_or_else(|| {
321                                GitClosureError::Parse(":encoding must be a string".to_string())
322                            })?
323                            .to_string(),
324                    );
325                }
326                "type" => {
327                    entry_type = Some(
328                        value
329                            .as_str()
330                            .ok_or_else(|| {
331                                GitClosureError::Parse(":type must be a string".to_string())
332                            })?
333                            .to_string(),
334                    );
335                }
336                "target" => {
337                    target = Some(
338                        value
339                            .as_str()
340                            .ok_or_else(|| {
341                                GitClosureError::Parse(":target must be a string".to_string())
342                            })?
343                            .to_string(),
344                    );
345                }
346                _other => {
347                    // Unknown keys are intentionally ignored for forward compatibility.
348                    // README: "unknown plist keys are silently ignored by any conformant reader."
349                    // A future version of git-closure may emit :mtime, :git-object-id, etc.
350                    idx += 2;
351                    continue;
352                }
353            }
354
355            idx += 2;
356        }
357
358        let path = path.ok_or_else(|| GitClosureError::Parse("missing :path".to_string()))?;
359        if entry_type.as_deref() == Some("symlink") {
360            if sha256.as_deref().map(|s| !s.is_empty()).unwrap_or(false) {
361                return Err(GitClosureError::Parse(format!(
362                    "symlink entry {} has unexpected :sha256 field",
363                    path
364                )));
365            }
366            if size.map(|s| s != 0).unwrap_or(false) {
367                return Err(GitClosureError::Parse(format!(
368                    "symlink entry {} has unexpected non-zero :size",
369                    path
370                )));
371            }
372            if encoding.is_some() {
373                return Err(GitClosureError::Parse(format!(
374                    "symlink entry {} has unexpected :encoding field",
375                    path
376                )));
377            }
378            let target = target
379                .ok_or_else(|| GitClosureError::Parse("missing :target for symlink".to_string()))?;
380            files.push(SnapshotFile {
381                path,
382                sha256: String::new(),
383                mode: "120000".to_string(),
384                size: 0,
385                encoding: None,
386                symlink_target: Some(target),
387                content: Vec::new(),
388            });
389            continue;
390        }
391
392        let sha256 = sha256.ok_or_else(|| GitClosureError::Parse("missing :sha256".to_string()))?;
393        let mode = mode.ok_or_else(|| GitClosureError::Parse("missing :mode".to_string()))?;
394        let size = size.ok_or_else(|| GitClosureError::Parse("missing :size".to_string()))?;
395
396        if let Some(limit) = limits.and_then(|l| l.max_file_bytes) {
397            if size > limit {
398                return Err(GitClosureError::Parse(format!(
399                    "entry {} exceeds max_file_bytes limit ({size} > {limit})",
400                    path
401                )));
402            }
403        }
404
405        let content = match encoding.as_deref() {
406            Some("base64") => BASE64_STANDARD.decode(content_field).map_err(|err| {
407                GitClosureError::Parse(format!("invalid base64 content for {path}: {err}"))
408            })?,
409            Some(other) => {
410                return Err(GitClosureError::Parse(format!(
411                    "unsupported encoding for {path}: {other}"
412                )));
413            }
414            None => content_field.as_bytes().to_vec(),
415        };
416
417        if content.len() as u64 != size {
418            return Err(GitClosureError::SizeMismatch {
419                path,
420                expected: size,
421                actual: content.len() as u64,
422            });
423        }
424
425        total_bytes = total_bytes.saturating_add(size);
426        if let Some(limit) = limits.and_then(|l| l.max_total_bytes) {
427            if total_bytes > limit {
428                return Err(GitClosureError::Parse(format!(
429                    "snapshot content exceeds max_total_bytes limit ({total_bytes} > {limit})"
430                )));
431            }
432        }
433
434        files.push(SnapshotFile {
435            path,
436            sha256,
437            mode,
438            size,
439            encoding,
440            symlink_target: None,
441            content,
442        });
443    }
444
445    files.sort_by(|a, b| a.path.cmp(&b.path));
446    for window in files.windows(2) {
447        if window[0].path == window[1].path {
448            return Err(GitClosureError::Parse(format!(
449                "duplicate :path in snapshot: {}",
450                window[0].path
451            )));
452        }
453    }
454    Ok(files)
455}
456
457// ── Public high-level operations ─────────────────────────────────────────────
458
459/// Parses a `.gcl` snapshot file and returns a `ListEntry` for each recorded
460/// file, in lexicographic path order.
461pub fn list_snapshot(snapshot: &Path) -> Result<Vec<ListEntry>> {
462    let text = fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
463    list_snapshot_str(&text)
464}
465
466/// Parses snapshot text and returns a `ListEntry` for each recorded file.
467pub fn list_snapshot_str(text: &str) -> Result<Vec<ListEntry>> {
468    let (_header, files) = parse_snapshot(text)?;
469    Ok(files
470        .into_iter()
471        .map(|f| ListEntry {
472            is_symlink: f.symlink_target.is_some(),
473            symlink_target: f.symlink_target,
474            sha256: f.sha256,
475            mode: f.mode,
476            size: f.size,
477            path: f.path,
478        })
479        .collect())
480}
481
482/// Formatting behavior toggles for [`fmt_snapshot_with_options`].
483#[derive(Debug, Clone, Copy, Default)]
484pub struct FmtOptions {
485    /// Recompute and overwrite a mismatched header `snapshot-hash`.
486    pub repair_hash: bool,
487}
488
489/// Reads and canonicalizes a snapshot file using default [`FmtOptions`].
490///
491/// The result is byte-identical to what [`crate::build_snapshot`] would
492/// produce for the same content — modulo the structural hash which is
493/// recomputed from the parsed file list. Use `--check` mode in the `fmt`
494/// subcommand to detect snapshots that are not yet in canonical form.
495pub fn fmt_snapshot(snapshot: &Path) -> Result<String> {
496    fmt_snapshot_with_options(snapshot, FmtOptions::default())
497}
498
499/// Reads and canonicalizes a snapshot file with explicit formatting options.
500pub fn fmt_snapshot_with_options(snapshot: &Path, options: FmtOptions) -> Result<String> {
501    let text = fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
502    let (mut header, mut files) = parse_snapshot(&text)?;
503    files.sort_by(|a, b| a.path.cmp(&b.path));
504    let computed_hash = super::hash::compute_snapshot_hash(&files);
505    if header.snapshot_hash != computed_hash && !options.repair_hash {
506        return Err(GitClosureError::HashMismatch {
507            expected: header.snapshot_hash,
508            actual: computed_hash,
509        });
510    }
511    header.snapshot_hash = computed_hash;
512    header.file_count = files.len();
513    Ok(serialize_snapshot(&files, &header))
514}
515
516#[cfg(test)]
517mod tests {
518    use super::*;
519    use crate::snapshot::hash::compute_snapshot_hash;
520    use proptest::prelude::*;
521    use std::collections::BTreeMap;
522
523    /// Build a minimal SnapshotHeader (no git metadata) for use in tests.
524    fn make_header(files: &[SnapshotFile]) -> SnapshotHeader {
525        SnapshotHeader {
526            snapshot_hash: compute_snapshot_hash(files),
527            file_count: files.len(),
528            git_rev: None,
529            git_branch: None,
530            extra_headers: Vec::new(),
531        }
532    }
533
534    fn make_text_file(path: &str, content: &str) -> SnapshotFile {
535        use crate::snapshot::hash::sha256_hex;
536        let bytes = content.as_bytes().to_vec();
537        SnapshotFile {
538            path: path.to_string(),
539            sha256: sha256_hex(&bytes),
540            mode: "644".to_string(),
541            size: bytes.len() as u64,
542            encoding: None,
543            symlink_target: None,
544            content: bytes,
545        }
546    }
547
548    fn path_strategy() -> impl Strategy<Value = String> {
549        proptest::string::string_regex(r"[A-Za-z0-9_.-]{1,12}(/[A-Za-z0-9_.-]{1,12}){0,2}")
550            .expect("valid path regex")
551            .prop_filter("path must be safe and relative", |path| {
552                !path.starts_with('/')
553                    && !path
554                        .split('/')
555                        .any(|segment| segment == "." || segment == "..")
556            })
557    }
558
559    fn symlink_target_strategy() -> impl Strategy<Value = String> {
560        proptest::string::string_regex(r"[A-Za-z0-9_.-]{1,16}(/[A-Za-z0-9_.-]{1,16}){0,2}")
561            .expect("valid symlink target regex")
562            .prop_filter("symlink target must not be empty", |target| {
563                !target.is_empty()
564            })
565    }
566
567    fn snapshot_file_strategy() -> impl Strategy<Value = SnapshotFile> {
568        let regular_utf8 = (
569            path_strategy(),
570            prop::sample::select(vec!["644".to_string(), "755".to_string()]),
571            proptest::string::string_regex("[ -~]{0,64}").expect("valid UTF-8 content regex"),
572        )
573            .prop_map(|(path, mode, content)| {
574                let bytes = content.into_bytes();
575                SnapshotFile {
576                    path,
577                    sha256: crate::snapshot::hash::sha256_hex(&bytes),
578                    mode,
579                    size: bytes.len() as u64,
580                    encoding: None,
581                    symlink_target: None,
582                    content: bytes,
583                }
584            });
585
586        let regular_binary = (
587            path_strategy(),
588            prop::sample::select(vec!["644".to_string(), "755".to_string()]),
589            prop::collection::vec(any::<u8>(), 0..64),
590        )
591            .prop_map(|(path, mode, bytes)| SnapshotFile {
592                path,
593                sha256: crate::snapshot::hash::sha256_hex(&bytes),
594                mode,
595                size: bytes.len() as u64,
596                encoding: Some("base64".to_string()),
597                symlink_target: None,
598                content: bytes,
599            });
600
601        let symlink =
602            (path_strategy(), symlink_target_strategy()).prop_map(|(path, target)| SnapshotFile {
603                path,
604                sha256: String::new(),
605                mode: "120000".to_string(),
606                size: 0,
607                encoding: None,
608                symlink_target: Some(target),
609                content: Vec::new(),
610            });
611
612        prop_oneof![regular_utf8, regular_binary, symlink]
613    }
614
615    fn canonicalize_generated_files(files: Vec<SnapshotFile>) -> Vec<SnapshotFile> {
616        let mut by_path = BTreeMap::new();
617        for file in files {
618            by_path.entry(file.path.clone()).or_insert(file);
619        }
620        by_path.into_values().collect()
621    }
622
623    #[test]
624    fn serialize_then_parse_roundtrip_single_text_file() {
625        let file = make_text_file("readme.txt", "hello\n");
626        let files_arr = [file.clone()];
627        let header = make_header(&files_arr);
628        let text = serialize_snapshot(&files_arr, &header);
629        let expected_hash = header.snapshot_hash.clone();
630        let (header, files) = parse_snapshot(&text).expect("parse serialized snapshot");
631        assert_eq!(header.snapshot_hash, expected_hash);
632        assert_eq!(files.len(), 1);
633        assert_eq!(files[0].path, file.path);
634        assert_eq!(files[0].content, file.content);
635    }
636
637    #[test]
638    fn serialize_then_parse_roundtrip_binary_file() {
639        use crate::snapshot::hash::sha256_hex;
640        let bytes: Vec<u8> = (0u8..=255).collect();
641        let file = SnapshotFile {
642            path: "all-bytes.bin".to_string(),
643            sha256: sha256_hex(&bytes),
644            mode: "644".to_string(),
645            size: bytes.len() as u64,
646            encoding: Some("base64".to_string()),
647            symlink_target: None,
648            content: bytes.clone(),
649        };
650        let files_arr = [file];
651        let header = make_header(&files_arr);
652        let text = serialize_snapshot(&files_arr, &header);
653        let (_, files) = parse_snapshot(&text).expect("parse binary snapshot");
654        assert_eq!(files[0].content, bytes);
655    }
656
657    proptest! {
658        #[test]
659        fn proptest_parse_serialize_roundtrip(files in prop::collection::vec(snapshot_file_strategy(), 0..16)) {
660            let files = canonicalize_generated_files(files);
661            let header = make_header(&files);
662            let serialized = serialize_snapshot(&files, &header);
663            let (parsed_header, parsed_files) = parse_snapshot(&serialized)
664                .expect("generated snapshot should parse");
665
666            prop_assert_eq!(parsed_header.file_count, files.len());
667            prop_assert_eq!(parsed_header.snapshot_hash, compute_snapshot_hash(&files));
668            prop_assert_eq!(parsed_files, files);
669        }
670
671        #[test]
672        fn proptest_fmt_is_idempotent(files in prop::collection::vec(snapshot_file_strategy(), 0..16)) {
673            let files = canonicalize_generated_files(files);
674            let header = make_header(&files);
675            let serialized = serialize_snapshot(&files, &header);
676
677            let tmp = tempfile::TempDir::new().expect("create tempdir");
678            let snapshot = tmp.path().join("proptest.gcl");
679            std::fs::write(&snapshot, serialized).expect("write generated snapshot");
680
681            let once = fmt_snapshot(&snapshot).expect("first fmt pass");
682            std::fs::write(&snapshot, &once).expect("write first fmt result");
683            let twice = fmt_snapshot(&snapshot).expect("second fmt pass");
684
685            prop_assert_eq!(twice, once);
686        }
687    }
688
689    #[test]
690    fn parse_snapshot_unknown_plist_key_is_ignored() {
691        let file = make_text_file("a.txt", "hi");
692        let files_arr = [file];
693        let header = make_header(&files_arr);
694        let text = serialize_snapshot(&files_arr, &header);
695        // Inject a future unknown key.
696        let modified = text.replace(":mode ", ":future-key \"v\"\n     :mode ");
697        let (_, files) = parse_snapshot(&modified).expect("unknown key must be silently ignored");
698        assert_eq!(files[0].path, "a.txt");
699    }
700
701    #[test]
702    fn parse_snapshot_rejects_duplicate_regular_paths() {
703        let content_a = "a";
704        let content_b = "b";
705        let digest_a = crate::snapshot::hash::sha256_hex(content_a.as_bytes());
706        let digest_b = crate::snapshot::hash::sha256_hex(content_b.as_bytes());
707        let snapshot_hash = crate::snapshot::hash::sha256_hex(b"placeholder");
708        let input = format!(
709            ";; git-closure snapshot v0.1\n;; snapshot-hash: {snapshot_hash}\n;; file-count: 2\n\n(\n  ((:path \"dup.txt\" :sha256 \"{digest_a}\" :mode \"644\" :size 1) \"{content_a}\")\n  ((:path \"dup.txt\" :sha256 \"{digest_b}\" :mode \"644\" :size 1) \"{content_b}\")\n)\n"
710        );
711
712        let err = parse_snapshot(&input).expect_err("duplicate paths must be rejected");
713        match err {
714            GitClosureError::Parse(msg) => assert!(
715                msg.contains("duplicate :path") && msg.contains("dup.txt"),
716                "parse error should mention duplicate path, got: {msg}"
717            ),
718            other => panic!("expected Parse error, got {other:?}"),
719        }
720    }
721
722    #[test]
723    fn parse_snapshot_rejects_duplicate_regular_and_symlink_paths() {
724        let content = "x";
725        let digest = crate::snapshot::hash::sha256_hex(content.as_bytes());
726        let snapshot_hash = crate::snapshot::hash::sha256_hex(b"placeholder");
727        let input = format!(
728            ";; git-closure snapshot v0.1\n;; snapshot-hash: {snapshot_hash}\n;; file-count: 2\n\n(\n  ((:path \"dup.txt\" :sha256 \"{digest}\" :mode \"644\" :size 1) \"{content}\")\n  ((:path \"dup.txt\" :type \"symlink\" :target \"target.txt\") \"\")\n)\n"
729        );
730
731        let err = parse_snapshot(&input)
732            .expect_err("duplicate path between regular and symlink must be rejected");
733        match err {
734            GitClosureError::Parse(msg) => assert!(
735                msg.contains("duplicate :path") && msg.contains("dup.txt"),
736                "parse error should mention duplicate path, got: {msg}"
737            ),
738            other => panic!("expected Parse error, got {other:?}"),
739        }
740    }
741
742    #[test]
743    fn verify_snapshot_rejects_duplicate_paths_via_parse() {
744        use tempfile::TempDir;
745
746        let dir = TempDir::new().expect("create tempdir");
747        let snapshot = dir.path().join("duplicate.gcl");
748
749        let content_a = "a";
750        let content_b = "b";
751        let digest_a = crate::snapshot::hash::sha256_hex(content_a.as_bytes());
752        let digest_b = crate::snapshot::hash::sha256_hex(content_b.as_bytes());
753        let files = vec![
754            SnapshotFile {
755                path: "dup.txt".to_string(),
756                sha256: digest_a.clone(),
757                mode: "644".to_string(),
758                size: 1,
759                encoding: None,
760                symlink_target: None,
761                content: content_a.as_bytes().to_vec(),
762            },
763            SnapshotFile {
764                path: "dup.txt".to_string(),
765                sha256: digest_b.clone(),
766                mode: "644".to_string(),
767                size: 1,
768                encoding: None,
769                symlink_target: None,
770                content: content_b.as_bytes().to_vec(),
771            },
772        ];
773        let snapshot_hash = crate::snapshot::hash::compute_snapshot_hash(&files);
774        let input = format!(
775            ";; git-closure snapshot v0.1\n;; snapshot-hash: {snapshot_hash}\n;; file-count: 2\n\n(\n  ((:path \"dup.txt\" :sha256 \"{digest_a}\" :mode \"644\" :size 1) \"{content_a}\")\n  ((:path \"dup.txt\" :sha256 \"{digest_b}\" :mode \"644\" :size 1) \"{content_b}\")\n)\n"
776        );
777        std::fs::write(&snapshot, input).expect("write duplicate snapshot");
778
779        let err = crate::materialize::verify_snapshot(&snapshot)
780            .expect_err("verify must reject snapshots with duplicate paths");
781        assert!(matches!(err, GitClosureError::Parse(_)));
782    }
783
784    #[test]
785    fn parse_snapshot_with_limits_rejects_entry_count_limit() {
786        let file_a = make_text_file("a.txt", "a");
787        let file_b = make_text_file("b.txt", "b");
788        let files = vec![file_a, file_b];
789        let header = make_header(&files);
790        let text = serialize_snapshot(&files, &header);
791
792        let limits = ParseLimits {
793            max_entry_count: Some(1),
794            max_file_bytes: None,
795            max_total_bytes: None,
796        };
797        let err = parse_snapshot_with_limits(&text, Some(&limits))
798            .expect_err("entry count limit must reject oversized snapshot");
799        assert!(matches!(err, GitClosureError::Parse(_)));
800    }
801
802    #[test]
803    fn parse_snapshot_with_limits_rejects_file_bytes_limit() {
804        let file = make_text_file("a.txt", "hello");
805        let files = vec![file];
806        let header = make_header(&files);
807        let text = serialize_snapshot(&files, &header);
808
809        let limits = ParseLimits {
810            max_entry_count: None,
811            max_file_bytes: Some(4),
812            max_total_bytes: None,
813        };
814        let err = parse_snapshot_with_limits(&text, Some(&limits))
815            .expect_err("file bytes limit must reject oversized entry");
816        assert!(matches!(err, GitClosureError::Parse(_)));
817    }
818
819    #[test]
820    fn parse_snapshot_with_limits_rejects_total_bytes_limit() {
821        let file_a = make_text_file("a.txt", "abc");
822        let file_b = make_text_file("b.txt", "def");
823        let files = vec![file_a, file_b];
824        let header = make_header(&files);
825        let text = serialize_snapshot(&files, &header);
826
827        let limits = ParseLimits {
828            max_entry_count: None,
829            max_file_bytes: None,
830            max_total_bytes: Some(5),
831        };
832        let err = parse_snapshot_with_limits(&text, Some(&limits))
833            .expect_err("total bytes limit must reject oversized aggregate");
834        assert!(matches!(err, GitClosureError::Parse(_)));
835    }
836
837    #[test]
838    fn parse_snapshot_rejects_symlink_with_nonempty_sha256() {
839        let files = vec![SnapshotFile {
840            path: "link".to_string(),
841            sha256: String::new(),
842            mode: "120000".to_string(),
843            size: 0,
844            encoding: None,
845            symlink_target: Some("target.txt".to_string()),
846            content: Vec::new(),
847        }];
848        let header = make_header(&files);
849        let text = serialize_snapshot(&files, &header);
850        let modified = text.replace(
851            ":type \"symlink\"",
852            ":sha256 \"deadbeef\"\n     :type \"symlink\"",
853        );
854
855        let err = parse_snapshot(&modified)
856            .expect_err("symlink entries must reject non-empty sha256 field");
857        assert!(matches!(err, GitClosureError::Parse(_)));
858    }
859
860    #[test]
861    fn parse_snapshot_rejects_symlink_with_nonzero_size() {
862        let files = vec![SnapshotFile {
863            path: "link".to_string(),
864            sha256: String::new(),
865            mode: "120000".to_string(),
866            size: 0,
867            encoding: None,
868            symlink_target: Some("target.txt".to_string()),
869            content: Vec::new(),
870        }];
871        let header = make_header(&files);
872        let text = serialize_snapshot(&files, &header);
873        let modified = text.replace(":type \"symlink\"", ":size 1\n     :type \"symlink\"");
874
875        let err =
876            parse_snapshot(&modified).expect_err("symlink entries must reject non-zero size field");
877        assert!(matches!(err, GitClosureError::Parse(_)));
878    }
879
880    #[test]
881    fn parse_snapshot_rejects_legacy_format_hash_header() {
882        let input = ";; format-hash: abc\n;; file-count: 0\n\n()\n";
883        let err = parse_snapshot(input).expect_err("legacy header must be rejected");
884        assert!(matches!(err, GitClosureError::LegacyHeader));
885    }
886
887    #[test]
888    fn quote_string_matches_lexpr_printer() {
889        let sample = "line1\nline2\u{0000}\u{fffd}\u{1f642}\\\"";
890        let expected = lexpr::to_string(&lexpr::Value::string(sample)).expect("print with lexpr");
891        assert_eq!(quote_string(sample), expected);
892    }
893
894    // ── list_snapshot tests ───────────────────────────────────────────────────
895
896    #[test]
897    fn list_snapshot_returns_entries_in_path_order() {
898        use std::fs;
899        use tempfile::TempDir;
900
901        let file_b = make_text_file("b.txt", "b");
902        let file_a = make_text_file("a.txt", "a");
903        // Intentionally unsorted to verify output is sorted.
904        let mut files = vec![file_b.clone(), file_a.clone()];
905        files.sort_by(|x, y| x.path.cmp(&y.path));
906        let header = make_header(&files);
907        let text = serialize_snapshot(&files, &header);
908
909        let dir = TempDir::new().unwrap();
910        let snap = dir.path().join("snap.gcl");
911        fs::write(&snap, text.as_bytes()).unwrap();
912
913        let entries = list_snapshot(&snap).expect("list_snapshot must succeed");
914        assert_eq!(entries.len(), 2);
915        assert_eq!(entries[0].path, "a.txt");
916        assert_eq!(entries[1].path, "b.txt");
917        assert!(!entries[0].is_symlink);
918        assert_eq!(entries[0].size, 1);
919    }
920
921    #[test]
922    fn list_snapshot_symlink_entry_has_correct_fields() {
923        use crate::snapshot::hash::sha256_hex;
924        use std::fs;
925        use tempfile::TempDir;
926
927        let symlink_file = SnapshotFile {
928            path: "link".to_string(),
929            sha256: String::new(),
930            mode: "120000".to_string(),
931            size: 0,
932            encoding: None,
933            symlink_target: Some("target.txt".to_string()),
934            content: Vec::new(),
935        };
936        let regular = make_text_file("target.txt", "content");
937        let files = vec![symlink_file, regular];
938        let header = make_header(&files);
939        let text = serialize_snapshot(&files, &header);
940
941        let dir = TempDir::new().unwrap();
942        let snap = dir.path().join("snap.gcl");
943        fs::write(&snap, text.as_bytes()).unwrap();
944
945        let entries = list_snapshot(&snap).expect("list_snapshot must succeed");
946        let link_entry = entries.iter().find(|e| e.path == "link").unwrap();
947        assert!(link_entry.is_symlink);
948        assert_eq!(link_entry.symlink_target.as_deref(), Some("target.txt"));
949        assert_eq!(link_entry.sha256, "");
950        assert_eq!(link_entry.size, 0);
951
952        // Suppress unused import warning in non-unix builds.
953        let _ = sha256_hex;
954    }
955
956    #[test]
957    fn list_snapshot_str_returns_expected_entries() {
958        let files = vec![make_text_file("a.txt", "a"), make_text_file("b.txt", "bb")];
959        let header = make_header(&files);
960        let text = serialize_snapshot(&files, &header);
961
962        let entries =
963            list_snapshot_str(&text).expect("list_snapshot_str should parse valid snapshot");
964        assert_eq!(entries.len(), 2);
965        assert_eq!(entries[0].path, "a.txt");
966        assert_eq!(entries[1].path, "b.txt");
967        assert_eq!(entries[0].size, 1);
968        assert_eq!(entries[1].size, 2);
969    }
970
971    // ── fmt_snapshot tests ────────────────────────────────────────────────────
972
973    #[test]
974    fn fmt_snapshot_is_idempotent() {
975        use std::fs;
976        use tempfile::TempDir;
977
978        let file = make_text_file("src/lib.rs", "fn main() {}\n");
979        let files_arr = [file];
980        let header = make_header(&files_arr);
981        let original = serialize_snapshot(&files_arr, &header);
982
983        let dir = TempDir::new().unwrap();
984        let snap = dir.path().join("snap.gcl");
985        fs::write(&snap, original.as_bytes()).unwrap();
986
987        let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
988        assert_eq!(
989            formatted, original,
990            "fmt_snapshot on already-canonical snapshot must be idempotent"
991        );
992
993        // Write the formatted version and format again — must still be equal.
994        fs::write(&snap, formatted.as_bytes()).unwrap();
995        let formatted2 = fmt_snapshot(&snap).expect("second fmt_snapshot must succeed");
996        assert_eq!(formatted2, formatted);
997    }
998
999    #[test]
1000    fn fmt_snapshot_sorts_files_canonically() {
1001        use std::fs;
1002        use tempfile::TempDir;
1003
1004        let file_z = make_text_file("z.txt", "z");
1005        let file_a = make_text_file("a.txt", "a");
1006        // Build with files in reverse order (z before a) to create an out-of-order snapshot.
1007        let mut files_sorted = vec![file_z.clone(), file_a.clone()];
1008        files_sorted.sort_by(|x, y| x.path.cmp(&y.path));
1009        let header = make_header(&files_sorted);
1010        let canonical = serialize_snapshot(&files_sorted, &header);
1011
1012        let dir = TempDir::new().unwrap();
1013        let snap = dir.path().join("snap.gcl");
1014        fs::write(&snap, canonical.as_bytes()).unwrap();
1015
1016        let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
1017        // Paths must appear in order in the formatted output.
1018        let a_pos = formatted.find("\"a.txt\"").unwrap();
1019        let z_pos = formatted.find("\"z.txt\"").unwrap();
1020        assert!(
1021            a_pos < z_pos,
1022            "a.txt must appear before z.txt in canonical output"
1023        );
1024    }
1025
1026    #[test]
1027    fn fmt_snapshot_preserves_unknown_headers_in_order() {
1028        use std::fs;
1029        use tempfile::TempDir;
1030
1031        let file = make_text_file("a.txt", "a");
1032        let files = vec![file];
1033        let header = make_header(&files);
1034        let mut text = serialize_snapshot(&files, &header);
1035        text = text.replacen(
1036            ";; file-count: 1\n",
1037            ";; file-count: 1\n;; source-uri: gh:owner/repo@main\n;; x-custom: abc\n",
1038            1,
1039        );
1040
1041        let dir = TempDir::new().unwrap();
1042        let snap = dir.path().join("snap.gcl");
1043        fs::write(&snap, text.as_bytes()).unwrap();
1044
1045        let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
1046        let source_pos = formatted
1047            .find(";; source-uri: gh:owner/repo@main")
1048            .expect("source-uri header retained");
1049        let custom_pos = formatted
1050            .find(";; x-custom: abc")
1051            .expect("x-custom header retained");
1052        assert!(
1053            source_pos < custom_pos,
1054            "unknown headers must keep input order"
1055        );
1056
1057        fs::write(&snap, formatted.as_bytes()).unwrap();
1058        let formatted_again = fmt_snapshot(&snap).expect("second fmt_snapshot must succeed");
1059        assert_eq!(formatted_again, formatted, "fmt(fmt(x)) must be idempotent");
1060    }
1061
1062    #[test]
1063    fn fmt_snapshot_rejects_hash_mismatch_by_default() {
1064        use std::fs;
1065        use tempfile::TempDir;
1066
1067        let file = make_text_file("a.txt", "a");
1068        let mut header = make_header(std::slice::from_ref(&file));
1069        header.snapshot_hash =
1070            "0000000000000000000000000000000000000000000000000000000000000000".to_string();
1071        let text = serialize_snapshot(std::slice::from_ref(&file), &header);
1072
1073        let dir = TempDir::new().unwrap();
1074        let snap = dir.path().join("tampered.gcl");
1075        fs::write(&snap, text.as_bytes()).unwrap();
1076
1077        let err = fmt_snapshot(&snap).expect_err("fmt must reject hash mismatch by default");
1078        assert!(matches!(err, GitClosureError::HashMismatch { .. }));
1079    }
1080
1081    #[test]
1082    fn fmt_snapshot_repair_hash_allows_recanonicalization() {
1083        use std::fs;
1084        use tempfile::TempDir;
1085
1086        let file = make_text_file("a.txt", "a");
1087        let mut header = make_header(std::slice::from_ref(&file));
1088        header.snapshot_hash =
1089            "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff".to_string();
1090        let text = serialize_snapshot(std::slice::from_ref(&file), &header);
1091
1092        let dir = TempDir::new().unwrap();
1093        let snap = dir.path().join("repair.gcl");
1094        fs::write(&snap, text.as_bytes()).unwrap();
1095
1096        let repaired = fmt_snapshot_with_options(&snap, FmtOptions { repair_hash: true })
1097            .expect("fmt --repair-hash should succeed");
1098        assert!(
1099            !repaired.contains("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
1100            "repaired output must contain a recomputed hash"
1101        );
1102    }
1103
1104    #[test]
1105    fn serialize_snapshot_avoids_content_clone_in_utf8_path() {
1106        let source = include_str!("serial.rs");
1107        let needle = ["String::from_utf8(", "file.content.clone()", ")"].join("");
1108        assert!(
1109            !source.contains(&needle),
1110            "utf8 serialization path should avoid cloning file.content"
1111        );
1112    }
1113
1114    // ── git metadata header tests (T-32) ─────────────────────────────────────
1115
1116    #[test]
1117    fn serialize_with_git_metadata_emits_header_comments() {
1118        let file = make_text_file("src/lib.rs", "fn main() {}\n");
1119        let files = [file];
1120        let hash = compute_snapshot_hash(&files);
1121        let header = SnapshotHeader {
1122            snapshot_hash: hash,
1123            file_count: files.len(),
1124            git_rev: Some("deadbeef1234567890abcdef1234567890abcdef".to_string()),
1125            git_branch: Some("main".to_string()),
1126            extra_headers: Vec::new(),
1127        };
1128        let text = serialize_snapshot(&files, &header);
1129        assert!(
1130            text.contains(";; git-rev: deadbeef1234567890abcdef1234567890abcdef\n"),
1131            "serialized text must contain git-rev comment, got: {text}"
1132        );
1133        assert!(
1134            text.contains(";; git-branch: main\n"),
1135            "serialized text must contain git-branch comment, got: {text}"
1136        );
1137    }
1138
1139    #[test]
1140    fn git_metadata_not_included_in_snapshot_hash() {
1141        let file = make_text_file("src/lib.rs", "fn main() {}\n");
1142        let files = [file];
1143        let hash = compute_snapshot_hash(&files);
1144
1145        let header_without_meta = SnapshotHeader {
1146            snapshot_hash: hash.clone(),
1147            file_count: files.len(),
1148            git_rev: None,
1149            git_branch: None,
1150            extra_headers: Vec::new(),
1151        };
1152        let header_with_meta = SnapshotHeader {
1153            snapshot_hash: hash.clone(),
1154            file_count: files.len(),
1155            git_rev: Some("abc123".to_string()),
1156            git_branch: Some("feature-branch".to_string()),
1157            extra_headers: Vec::new(),
1158        };
1159
1160        let text_without = serialize_snapshot(&files, &header_without_meta);
1161        let text_with = serialize_snapshot(&files, &header_with_meta);
1162
1163        // The snapshot-hash comment must be identical in both.
1164        let hash_line = format!(";; snapshot-hash: {hash}\n");
1165        assert!(
1166            text_without.contains(&hash_line),
1167            "snapshot without meta must contain hash line"
1168        );
1169        assert!(
1170            text_with.contains(&hash_line),
1171            "snapshot with meta must contain same hash line"
1172        );
1173
1174        // The two serializations must differ only in the metadata lines.
1175        assert_ne!(
1176            text_without, text_with,
1177            "snapshots with and without git metadata must differ in text"
1178        );
1179    }
1180
1181    #[test]
1182    fn git_metadata_roundtrips_through_parse() {
1183        use std::fs;
1184        use tempfile::TempDir;
1185
1186        let file = make_text_file("readme.txt", "hello\n");
1187        let files = [file];
1188        let hash = compute_snapshot_hash(&files);
1189        let header = SnapshotHeader {
1190            snapshot_hash: hash,
1191            file_count: files.len(),
1192            git_rev: Some("cafebabe".to_string()),
1193            git_branch: Some("release/v1".to_string()),
1194            extra_headers: Vec::new(),
1195        };
1196        let text = serialize_snapshot(&files, &header);
1197
1198        let dir = TempDir::new().unwrap();
1199        let snap = dir.path().join("snap.gcl");
1200        fs::write(&snap, text.as_bytes()).unwrap();
1201
1202        // Parse the file back; metadata fields must survive the round-trip.
1203        let (parsed_header, _) = parse_snapshot(&text).expect("parse must succeed");
1204        assert_eq!(parsed_header.git_rev.as_deref(), Some("cafebabe"));
1205        assert_eq!(parsed_header.git_branch.as_deref(), Some("release/v1"));
1206
1207        // fmt_snapshot must preserve metadata.
1208        let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
1209        assert!(
1210            formatted.contains(";; git-rev: cafebabe\n"),
1211            "fmt_snapshot must preserve git-rev, got: {formatted}"
1212        );
1213        assert!(
1214            formatted.contains(";; git-branch: release/v1\n"),
1215            "fmt_snapshot must preserve git-branch, got: {formatted}"
1216        );
1217    }
1218}