1use std::fs;
3use std::path::Path;
4
5use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
6use base64::Engine;
7
8use crate::error::GitClosureError;
9use crate::utils::io_error_with_path;
10
11use super::{ListEntry, Result, SnapshotFile, SnapshotHeader};
12
13pub(crate) fn serialize_snapshot(files: &[SnapshotFile], header: &SnapshotHeader) -> String {
21 let mut output = String::new();
22
23 output.push_str(";; git-closure snapshot v0.1\n");
24 output.push_str(&format!(";; snapshot-hash: {}\n", header.snapshot_hash));
25 output.push_str(&format!(";; file-count: {}\n", files.len()));
26 if let Some(rev) = &header.git_rev {
27 output.push_str(&format!(";; git-rev: {rev}\n"));
28 }
29 if let Some(branch) = &header.git_branch {
30 output.push_str(&format!(";; git-branch: {branch}\n"));
31 }
32 for (key, value) in &header.extra_headers {
33 output.push_str(&format!(";; {key}: {value}\n"));
34 }
35 output.push('\n');
36 output.push_str("(\n");
37
38 for file in files {
39 output.push_str(" (\n");
40 output.push_str(" (:path ");
41 output.push_str("e_string(&file.path));
42 if let Some(target) = &file.symlink_target {
43 output.push('\n');
44 output.push_str(" :type ");
45 output.push_str("e_string("symlink"));
46 output.push('\n');
47 output.push_str(" :target ");
48 output.push_str("e_string(target));
49 output.push_str(")\n");
50 output.push_str("\"\"\n");
51 output.push_str(" )\n");
52 continue;
53 }
54 output.push('\n');
55 output.push_str(" :sha256 ");
56 output.push_str("e_string(&file.sha256));
57 output.push('\n');
58 output.push_str(" :mode ");
59 output.push_str("e_string(&file.mode));
60 output.push('\n');
61 output.push_str(" :size ");
62 output.push_str(&file.size.to_string());
63 if let Some(encoding) = &file.encoding {
64 output.push('\n');
65 output.push_str(" :encoding ");
66 output.push_str("e_string(encoding));
67 }
68 output.push_str(")\n");
69
70 let quoted_content = if file.encoding.as_deref() == Some("base64") {
71 quote_string(&BASE64_STANDARD.encode(&file.content))
72 } else {
73 quote_string(
79 std::str::from_utf8(&file.content)
80 .expect("non-base64 file content must be valid UTF-8 (invariant violated)"),
81 )
82 };
83
84 output.push_str("ed_content);
85 output.push('\n');
86 output.push_str(" )\n");
87 }
88
89 output.push_str(")\n");
90 output
91}
92
93pub(crate) fn quote_string(input: &str) -> String {
95 lexpr::to_string(&lexpr::Value::string(input))
96 .expect("lexpr string serialization should not fail")
97}
98
99#[derive(Debug, Clone, Default)]
107pub struct ParseLimits {
108 pub max_entry_count: Option<usize>,
109 pub max_file_bytes: Option<u64>,
110 pub max_total_bytes: Option<u64>,
111}
112
113pub fn parse_snapshot(input: &str) -> Result<(SnapshotHeader, Vec<SnapshotFile>)> {
114 parse_snapshot_with_limits(input, None)
115}
116
117pub fn parse_snapshot_with_limits(
118 input: &str,
119 limits: Option<&ParseLimits>,
120) -> Result<(SnapshotHeader, Vec<SnapshotFile>)> {
121 let (header, body) = split_header_body(input)?;
122 let parsed = lexpr::from_str(body).map_err(|err| {
123 GitClosureError::Parse(format!("failed to parse S-expression body: {err}"))
124 })?;
125 let files = parse_files_value(&parsed, limits)?;
126
127 if files.len() != header.file_count {
128 return Err(GitClosureError::Parse(format!(
129 "file count mismatch: header says {}, parsed {}",
130 header.file_count,
131 files.len()
132 )));
133 }
134
135 Ok((header, files))
136}
137
138fn split_header_body(input: &str) -> Result<(SnapshotHeader, &str)> {
139 let mut snapshot_hash = None;
140 let mut file_count = None;
141 let mut git_rev = None;
142 let mut git_branch = None;
143 let mut extra_headers = Vec::new();
144 let mut body_start = None;
145 let mut cursor = 0usize;
146
147 for line in input.lines() {
148 let line_len = line.len();
149 if line.starts_with(";;") {
150 if line.strip_prefix(";; format-hash:").is_some() {
151 return Err(GitClosureError::LegacyHeader);
152 }
153 if let Some(value) = line.strip_prefix(";; snapshot-hash:") {
154 snapshot_hash = Some(value.trim().to_string());
155 }
156 if let Some(value) = line.strip_prefix(";; file-count:") {
157 file_count = Some(value.trim().parse::<usize>().map_err(|err| {
158 GitClosureError::Parse(format!("invalid file-count header: {err}"))
159 })?);
160 }
161 if let Some(value) = line.strip_prefix(";; git-rev:") {
162 git_rev = Some(value.trim().to_string());
163 }
164 if let Some(value) = line.strip_prefix(";; git-branch:") {
165 git_branch = Some(value.trim().to_string());
166 }
167 if let Some(rest) = line.strip_prefix(";; ") {
168 if let Some((raw_key, raw_value)) = rest.split_once(':') {
169 let key = raw_key.trim();
170 if key != "snapshot-hash"
171 && key != "file-count"
172 && key != "git-rev"
173 && key != "git-branch"
174 && key != "format-hash"
175 && !key.is_empty()
176 {
177 extra_headers.push((key.to_string(), raw_value.trim().to_string()));
178 }
179 }
180 }
181 cursor += line_len + 1;
182 continue;
183 }
184
185 if line.trim().is_empty() {
186 cursor += line_len + 1;
187 continue;
188 }
189
190 body_start = Some(cursor);
191 break;
192 }
193
194 let snapshot_hash = snapshot_hash.ok_or(GitClosureError::MissingHeader("snapshot-hash"))?;
195 let file_count = file_count.ok_or(GitClosureError::MissingHeader("file-count"))?;
196 let body_start = body_start.ok_or(GitClosureError::MissingHeader("S-expression body"))?;
197
198 let body = &input[body_start..];
199
200 Ok((
201 SnapshotHeader {
202 snapshot_hash,
203 file_count,
204 git_rev,
205 git_branch,
206 extra_headers,
207 },
208 body,
209 ))
210}
211
212fn parse_files_value(
213 value: &lexpr::Value,
214 limits: Option<&ParseLimits>,
215) -> Result<Vec<SnapshotFile>> {
216 let root = value
217 .to_ref_vec()
218 .ok_or_else(|| GitClosureError::Parse("snapshot body must be a list".to_string()))?;
219
220 if let Some(limit) = limits.and_then(|l| l.max_entry_count) {
221 if root.len() > limit {
222 return Err(GitClosureError::Parse(format!(
223 "snapshot entry count {} exceeds max_entry_count limit {}",
224 root.len(),
225 limit
226 )));
227 }
228 }
229
230 let mut files = Vec::with_capacity(root.len());
231 let mut total_bytes = 0u64;
232
233 for entry in root {
234 let pair = entry.to_ref_vec().ok_or_else(|| {
235 GitClosureError::Parse("each entry must be a 2-item list".to_string())
236 })?;
237 if pair.len() != 2 {
238 return Err(GitClosureError::Parse(
239 "each entry must contain plist and content".to_string(),
240 ));
241 }
242
243 let plist = pair[0]
244 .to_ref_vec()
245 .ok_or_else(|| GitClosureError::Parse("entry plist must be a list".to_string()))?;
246
247 let content_field = pair[1]
248 .as_str()
249 .ok_or_else(|| GitClosureError::Parse("entry content must be a string".to_string()))?;
250
251 let mut path = None;
252 let mut sha256 = None;
253 let mut mode = None;
254 let mut size = None;
255 let mut encoding = None;
256 let mut entry_type = None;
257 let mut target = None;
258
259 if plist.len() % 2 != 0 {
260 return Err(GitClosureError::Parse(
261 "plist key/value pairs are malformed".to_string(),
262 ));
263 }
264
265 let mut idx = 0usize;
266 while idx < plist.len() {
267 let key = if let Some(keyword) = plist[idx].as_keyword() {
268 keyword
269 } else if let Some(symbol) = plist[idx].as_symbol() {
270 symbol.strip_prefix(':').ok_or_else(|| {
271 GitClosureError::Parse("plist symbol keys must start with ':'".to_string())
272 })?
273 } else {
274 return Err(GitClosureError::Parse(
275 "plist keys must be keywords or :symbol values".to_string(),
276 ));
277 };
278 let value = &plist[idx + 1];
279
280 match key {
281 "path" => {
282 path = Some(
283 value
284 .as_str()
285 .ok_or_else(|| {
286 GitClosureError::Parse(":path must be a string".to_string())
287 })?
288 .to_string(),
289 );
290 }
291 "sha256" => {
292 sha256 = Some(
293 value
294 .as_str()
295 .ok_or_else(|| {
296 GitClosureError::Parse(":sha256 must be a string".to_string())
297 })?
298 .to_string(),
299 );
300 }
301 "mode" => {
302 mode = Some(
303 value
304 .as_str()
305 .ok_or_else(|| {
306 GitClosureError::Parse(":mode must be a string".to_string())
307 })?
308 .to_string(),
309 );
310 }
311 "size" => {
312 size = Some(value.as_u64().ok_or_else(|| {
313 GitClosureError::Parse(":size must be a u64".to_string())
314 })?);
315 }
316 "encoding" => {
317 encoding = Some(
318 value
319 .as_str()
320 .ok_or_else(|| {
321 GitClosureError::Parse(":encoding must be a string".to_string())
322 })?
323 .to_string(),
324 );
325 }
326 "type" => {
327 entry_type = Some(
328 value
329 .as_str()
330 .ok_or_else(|| {
331 GitClosureError::Parse(":type must be a string".to_string())
332 })?
333 .to_string(),
334 );
335 }
336 "target" => {
337 target = Some(
338 value
339 .as_str()
340 .ok_or_else(|| {
341 GitClosureError::Parse(":target must be a string".to_string())
342 })?
343 .to_string(),
344 );
345 }
346 _other => {
347 idx += 2;
351 continue;
352 }
353 }
354
355 idx += 2;
356 }
357
358 let path = path.ok_or_else(|| GitClosureError::Parse("missing :path".to_string()))?;
359 if entry_type.as_deref() == Some("symlink") {
360 if sha256.as_deref().map(|s| !s.is_empty()).unwrap_or(false) {
361 return Err(GitClosureError::Parse(format!(
362 "symlink entry {} has unexpected :sha256 field",
363 path
364 )));
365 }
366 if size.map(|s| s != 0).unwrap_or(false) {
367 return Err(GitClosureError::Parse(format!(
368 "symlink entry {} has unexpected non-zero :size",
369 path
370 )));
371 }
372 if encoding.is_some() {
373 return Err(GitClosureError::Parse(format!(
374 "symlink entry {} has unexpected :encoding field",
375 path
376 )));
377 }
378 let target = target
379 .ok_or_else(|| GitClosureError::Parse("missing :target for symlink".to_string()))?;
380 files.push(SnapshotFile {
381 path,
382 sha256: String::new(),
383 mode: "120000".to_string(),
384 size: 0,
385 encoding: None,
386 symlink_target: Some(target),
387 content: Vec::new(),
388 });
389 continue;
390 }
391
392 let sha256 = sha256.ok_or_else(|| GitClosureError::Parse("missing :sha256".to_string()))?;
393 let mode = mode.ok_or_else(|| GitClosureError::Parse("missing :mode".to_string()))?;
394 let size = size.ok_or_else(|| GitClosureError::Parse("missing :size".to_string()))?;
395
396 if let Some(limit) = limits.and_then(|l| l.max_file_bytes) {
397 if size > limit {
398 return Err(GitClosureError::Parse(format!(
399 "entry {} exceeds max_file_bytes limit ({size} > {limit})",
400 path
401 )));
402 }
403 }
404
405 let content = match encoding.as_deref() {
406 Some("base64") => BASE64_STANDARD.decode(content_field).map_err(|err| {
407 GitClosureError::Parse(format!("invalid base64 content for {path}: {err}"))
408 })?,
409 Some(other) => {
410 return Err(GitClosureError::Parse(format!(
411 "unsupported encoding for {path}: {other}"
412 )));
413 }
414 None => content_field.as_bytes().to_vec(),
415 };
416
417 if content.len() as u64 != size {
418 return Err(GitClosureError::SizeMismatch {
419 path,
420 expected: size,
421 actual: content.len() as u64,
422 });
423 }
424
425 total_bytes = total_bytes.saturating_add(size);
426 if let Some(limit) = limits.and_then(|l| l.max_total_bytes) {
427 if total_bytes > limit {
428 return Err(GitClosureError::Parse(format!(
429 "snapshot content exceeds max_total_bytes limit ({total_bytes} > {limit})"
430 )));
431 }
432 }
433
434 files.push(SnapshotFile {
435 path,
436 sha256,
437 mode,
438 size,
439 encoding,
440 symlink_target: None,
441 content,
442 });
443 }
444
445 files.sort_by(|a, b| a.path.cmp(&b.path));
446 for window in files.windows(2) {
447 if window[0].path == window[1].path {
448 return Err(GitClosureError::Parse(format!(
449 "duplicate :path in snapshot: {}",
450 window[0].path
451 )));
452 }
453 }
454 Ok(files)
455}
456
457pub fn list_snapshot(snapshot: &Path) -> Result<Vec<ListEntry>> {
462 let text = fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
463 list_snapshot_str(&text)
464}
465
466pub fn list_snapshot_str(text: &str) -> Result<Vec<ListEntry>> {
468 let (_header, files) = parse_snapshot(text)?;
469 Ok(files
470 .into_iter()
471 .map(|f| ListEntry {
472 is_symlink: f.symlink_target.is_some(),
473 symlink_target: f.symlink_target,
474 sha256: f.sha256,
475 mode: f.mode,
476 size: f.size,
477 path: f.path,
478 })
479 .collect())
480}
481
482#[derive(Debug, Clone, Copy, Default)]
484pub struct FmtOptions {
485 pub repair_hash: bool,
487}
488
489pub fn fmt_snapshot(snapshot: &Path) -> Result<String> {
496 fmt_snapshot_with_options(snapshot, FmtOptions::default())
497}
498
499pub fn fmt_snapshot_with_options(snapshot: &Path, options: FmtOptions) -> Result<String> {
501 let text = fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
502 let (mut header, mut files) = parse_snapshot(&text)?;
503 files.sort_by(|a, b| a.path.cmp(&b.path));
504 let computed_hash = super::hash::compute_snapshot_hash(&files);
505 if header.snapshot_hash != computed_hash && !options.repair_hash {
506 return Err(GitClosureError::HashMismatch {
507 expected: header.snapshot_hash,
508 actual: computed_hash,
509 });
510 }
511 header.snapshot_hash = computed_hash;
512 header.file_count = files.len();
513 Ok(serialize_snapshot(&files, &header))
514}
515
516#[cfg(test)]
517mod tests {
518 use super::*;
519 use crate::snapshot::hash::compute_snapshot_hash;
520 use proptest::prelude::*;
521 use std::collections::BTreeMap;
522
523 fn make_header(files: &[SnapshotFile]) -> SnapshotHeader {
525 SnapshotHeader {
526 snapshot_hash: compute_snapshot_hash(files),
527 file_count: files.len(),
528 git_rev: None,
529 git_branch: None,
530 extra_headers: Vec::new(),
531 }
532 }
533
534 fn make_text_file(path: &str, content: &str) -> SnapshotFile {
535 use crate::snapshot::hash::sha256_hex;
536 let bytes = content.as_bytes().to_vec();
537 SnapshotFile {
538 path: path.to_string(),
539 sha256: sha256_hex(&bytes),
540 mode: "644".to_string(),
541 size: bytes.len() as u64,
542 encoding: None,
543 symlink_target: None,
544 content: bytes,
545 }
546 }
547
548 fn path_strategy() -> impl Strategy<Value = String> {
549 proptest::string::string_regex(r"[A-Za-z0-9_.-]{1,12}(/[A-Za-z0-9_.-]{1,12}){0,2}")
550 .expect("valid path regex")
551 .prop_filter("path must be safe and relative", |path| {
552 !path.starts_with('/')
553 && !path
554 .split('/')
555 .any(|segment| segment == "." || segment == "..")
556 })
557 }
558
559 fn symlink_target_strategy() -> impl Strategy<Value = String> {
560 proptest::string::string_regex(r"[A-Za-z0-9_.-]{1,16}(/[A-Za-z0-9_.-]{1,16}){0,2}")
561 .expect("valid symlink target regex")
562 .prop_filter("symlink target must not be empty", |target| {
563 !target.is_empty()
564 })
565 }
566
567 fn snapshot_file_strategy() -> impl Strategy<Value = SnapshotFile> {
568 let regular_utf8 = (
569 path_strategy(),
570 prop::sample::select(vec!["644".to_string(), "755".to_string()]),
571 proptest::string::string_regex("[ -~]{0,64}").expect("valid UTF-8 content regex"),
572 )
573 .prop_map(|(path, mode, content)| {
574 let bytes = content.into_bytes();
575 SnapshotFile {
576 path,
577 sha256: crate::snapshot::hash::sha256_hex(&bytes),
578 mode,
579 size: bytes.len() as u64,
580 encoding: None,
581 symlink_target: None,
582 content: bytes,
583 }
584 });
585
586 let regular_binary = (
587 path_strategy(),
588 prop::sample::select(vec!["644".to_string(), "755".to_string()]),
589 prop::collection::vec(any::<u8>(), 0..64),
590 )
591 .prop_map(|(path, mode, bytes)| SnapshotFile {
592 path,
593 sha256: crate::snapshot::hash::sha256_hex(&bytes),
594 mode,
595 size: bytes.len() as u64,
596 encoding: Some("base64".to_string()),
597 symlink_target: None,
598 content: bytes,
599 });
600
601 let symlink =
602 (path_strategy(), symlink_target_strategy()).prop_map(|(path, target)| SnapshotFile {
603 path,
604 sha256: String::new(),
605 mode: "120000".to_string(),
606 size: 0,
607 encoding: None,
608 symlink_target: Some(target),
609 content: Vec::new(),
610 });
611
612 prop_oneof![regular_utf8, regular_binary, symlink]
613 }
614
615 fn canonicalize_generated_files(files: Vec<SnapshotFile>) -> Vec<SnapshotFile> {
616 let mut by_path = BTreeMap::new();
617 for file in files {
618 by_path.entry(file.path.clone()).or_insert(file);
619 }
620 by_path.into_values().collect()
621 }
622
623 #[test]
624 fn serialize_then_parse_roundtrip_single_text_file() {
625 let file = make_text_file("readme.txt", "hello\n");
626 let files_arr = [file.clone()];
627 let header = make_header(&files_arr);
628 let text = serialize_snapshot(&files_arr, &header);
629 let expected_hash = header.snapshot_hash.clone();
630 let (header, files) = parse_snapshot(&text).expect("parse serialized snapshot");
631 assert_eq!(header.snapshot_hash, expected_hash);
632 assert_eq!(files.len(), 1);
633 assert_eq!(files[0].path, file.path);
634 assert_eq!(files[0].content, file.content);
635 }
636
637 #[test]
638 fn serialize_then_parse_roundtrip_binary_file() {
639 use crate::snapshot::hash::sha256_hex;
640 let bytes: Vec<u8> = (0u8..=255).collect();
641 let file = SnapshotFile {
642 path: "all-bytes.bin".to_string(),
643 sha256: sha256_hex(&bytes),
644 mode: "644".to_string(),
645 size: bytes.len() as u64,
646 encoding: Some("base64".to_string()),
647 symlink_target: None,
648 content: bytes.clone(),
649 };
650 let files_arr = [file];
651 let header = make_header(&files_arr);
652 let text = serialize_snapshot(&files_arr, &header);
653 let (_, files) = parse_snapshot(&text).expect("parse binary snapshot");
654 assert_eq!(files[0].content, bytes);
655 }
656
657 proptest! {
658 #[test]
659 fn proptest_parse_serialize_roundtrip(files in prop::collection::vec(snapshot_file_strategy(), 0..16)) {
660 let files = canonicalize_generated_files(files);
661 let header = make_header(&files);
662 let serialized = serialize_snapshot(&files, &header);
663 let (parsed_header, parsed_files) = parse_snapshot(&serialized)
664 .expect("generated snapshot should parse");
665
666 prop_assert_eq!(parsed_header.file_count, files.len());
667 prop_assert_eq!(parsed_header.snapshot_hash, compute_snapshot_hash(&files));
668 prop_assert_eq!(parsed_files, files);
669 }
670
671 #[test]
672 fn proptest_fmt_is_idempotent(files in prop::collection::vec(snapshot_file_strategy(), 0..16)) {
673 let files = canonicalize_generated_files(files);
674 let header = make_header(&files);
675 let serialized = serialize_snapshot(&files, &header);
676
677 let tmp = tempfile::TempDir::new().expect("create tempdir");
678 let snapshot = tmp.path().join("proptest.gcl");
679 std::fs::write(&snapshot, serialized).expect("write generated snapshot");
680
681 let once = fmt_snapshot(&snapshot).expect("first fmt pass");
682 std::fs::write(&snapshot, &once).expect("write first fmt result");
683 let twice = fmt_snapshot(&snapshot).expect("second fmt pass");
684
685 prop_assert_eq!(twice, once);
686 }
687 }
688
689 #[test]
690 fn parse_snapshot_unknown_plist_key_is_ignored() {
691 let file = make_text_file("a.txt", "hi");
692 let files_arr = [file];
693 let header = make_header(&files_arr);
694 let text = serialize_snapshot(&files_arr, &header);
695 let modified = text.replace(":mode ", ":future-key \"v\"\n :mode ");
697 let (_, files) = parse_snapshot(&modified).expect("unknown key must be silently ignored");
698 assert_eq!(files[0].path, "a.txt");
699 }
700
701 #[test]
702 fn parse_snapshot_rejects_duplicate_regular_paths() {
703 let content_a = "a";
704 let content_b = "b";
705 let digest_a = crate::snapshot::hash::sha256_hex(content_a.as_bytes());
706 let digest_b = crate::snapshot::hash::sha256_hex(content_b.as_bytes());
707 let snapshot_hash = crate::snapshot::hash::sha256_hex(b"placeholder");
708 let input = format!(
709 ";; git-closure snapshot v0.1\n;; snapshot-hash: {snapshot_hash}\n;; file-count: 2\n\n(\n ((:path \"dup.txt\" :sha256 \"{digest_a}\" :mode \"644\" :size 1) \"{content_a}\")\n ((:path \"dup.txt\" :sha256 \"{digest_b}\" :mode \"644\" :size 1) \"{content_b}\")\n)\n"
710 );
711
712 let err = parse_snapshot(&input).expect_err("duplicate paths must be rejected");
713 match err {
714 GitClosureError::Parse(msg) => assert!(
715 msg.contains("duplicate :path") && msg.contains("dup.txt"),
716 "parse error should mention duplicate path, got: {msg}"
717 ),
718 other => panic!("expected Parse error, got {other:?}"),
719 }
720 }
721
722 #[test]
723 fn parse_snapshot_rejects_duplicate_regular_and_symlink_paths() {
724 let content = "x";
725 let digest = crate::snapshot::hash::sha256_hex(content.as_bytes());
726 let snapshot_hash = crate::snapshot::hash::sha256_hex(b"placeholder");
727 let input = format!(
728 ";; git-closure snapshot v0.1\n;; snapshot-hash: {snapshot_hash}\n;; file-count: 2\n\n(\n ((:path \"dup.txt\" :sha256 \"{digest}\" :mode \"644\" :size 1) \"{content}\")\n ((:path \"dup.txt\" :type \"symlink\" :target \"target.txt\") \"\")\n)\n"
729 );
730
731 let err = parse_snapshot(&input)
732 .expect_err("duplicate path between regular and symlink must be rejected");
733 match err {
734 GitClosureError::Parse(msg) => assert!(
735 msg.contains("duplicate :path") && msg.contains("dup.txt"),
736 "parse error should mention duplicate path, got: {msg}"
737 ),
738 other => panic!("expected Parse error, got {other:?}"),
739 }
740 }
741
742 #[test]
743 fn verify_snapshot_rejects_duplicate_paths_via_parse() {
744 use tempfile::TempDir;
745
746 let dir = TempDir::new().expect("create tempdir");
747 let snapshot = dir.path().join("duplicate.gcl");
748
749 let content_a = "a";
750 let content_b = "b";
751 let digest_a = crate::snapshot::hash::sha256_hex(content_a.as_bytes());
752 let digest_b = crate::snapshot::hash::sha256_hex(content_b.as_bytes());
753 let files = vec![
754 SnapshotFile {
755 path: "dup.txt".to_string(),
756 sha256: digest_a.clone(),
757 mode: "644".to_string(),
758 size: 1,
759 encoding: None,
760 symlink_target: None,
761 content: content_a.as_bytes().to_vec(),
762 },
763 SnapshotFile {
764 path: "dup.txt".to_string(),
765 sha256: digest_b.clone(),
766 mode: "644".to_string(),
767 size: 1,
768 encoding: None,
769 symlink_target: None,
770 content: content_b.as_bytes().to_vec(),
771 },
772 ];
773 let snapshot_hash = crate::snapshot::hash::compute_snapshot_hash(&files);
774 let input = format!(
775 ";; git-closure snapshot v0.1\n;; snapshot-hash: {snapshot_hash}\n;; file-count: 2\n\n(\n ((:path \"dup.txt\" :sha256 \"{digest_a}\" :mode \"644\" :size 1) \"{content_a}\")\n ((:path \"dup.txt\" :sha256 \"{digest_b}\" :mode \"644\" :size 1) \"{content_b}\")\n)\n"
776 );
777 std::fs::write(&snapshot, input).expect("write duplicate snapshot");
778
779 let err = crate::materialize::verify_snapshot(&snapshot)
780 .expect_err("verify must reject snapshots with duplicate paths");
781 assert!(matches!(err, GitClosureError::Parse(_)));
782 }
783
784 #[test]
785 fn parse_snapshot_with_limits_rejects_entry_count_limit() {
786 let file_a = make_text_file("a.txt", "a");
787 let file_b = make_text_file("b.txt", "b");
788 let files = vec![file_a, file_b];
789 let header = make_header(&files);
790 let text = serialize_snapshot(&files, &header);
791
792 let limits = ParseLimits {
793 max_entry_count: Some(1),
794 max_file_bytes: None,
795 max_total_bytes: None,
796 };
797 let err = parse_snapshot_with_limits(&text, Some(&limits))
798 .expect_err("entry count limit must reject oversized snapshot");
799 assert!(matches!(err, GitClosureError::Parse(_)));
800 }
801
802 #[test]
803 fn parse_snapshot_with_limits_rejects_file_bytes_limit() {
804 let file = make_text_file("a.txt", "hello");
805 let files = vec![file];
806 let header = make_header(&files);
807 let text = serialize_snapshot(&files, &header);
808
809 let limits = ParseLimits {
810 max_entry_count: None,
811 max_file_bytes: Some(4),
812 max_total_bytes: None,
813 };
814 let err = parse_snapshot_with_limits(&text, Some(&limits))
815 .expect_err("file bytes limit must reject oversized entry");
816 assert!(matches!(err, GitClosureError::Parse(_)));
817 }
818
819 #[test]
820 fn parse_snapshot_with_limits_rejects_total_bytes_limit() {
821 let file_a = make_text_file("a.txt", "abc");
822 let file_b = make_text_file("b.txt", "def");
823 let files = vec![file_a, file_b];
824 let header = make_header(&files);
825 let text = serialize_snapshot(&files, &header);
826
827 let limits = ParseLimits {
828 max_entry_count: None,
829 max_file_bytes: None,
830 max_total_bytes: Some(5),
831 };
832 let err = parse_snapshot_with_limits(&text, Some(&limits))
833 .expect_err("total bytes limit must reject oversized aggregate");
834 assert!(matches!(err, GitClosureError::Parse(_)));
835 }
836
837 #[test]
838 fn parse_snapshot_rejects_symlink_with_nonempty_sha256() {
839 let files = vec![SnapshotFile {
840 path: "link".to_string(),
841 sha256: String::new(),
842 mode: "120000".to_string(),
843 size: 0,
844 encoding: None,
845 symlink_target: Some("target.txt".to_string()),
846 content: Vec::new(),
847 }];
848 let header = make_header(&files);
849 let text = serialize_snapshot(&files, &header);
850 let modified = text.replace(
851 ":type \"symlink\"",
852 ":sha256 \"deadbeef\"\n :type \"symlink\"",
853 );
854
855 let err = parse_snapshot(&modified)
856 .expect_err("symlink entries must reject non-empty sha256 field");
857 assert!(matches!(err, GitClosureError::Parse(_)));
858 }
859
860 #[test]
861 fn parse_snapshot_rejects_symlink_with_nonzero_size() {
862 let files = vec![SnapshotFile {
863 path: "link".to_string(),
864 sha256: String::new(),
865 mode: "120000".to_string(),
866 size: 0,
867 encoding: None,
868 symlink_target: Some("target.txt".to_string()),
869 content: Vec::new(),
870 }];
871 let header = make_header(&files);
872 let text = serialize_snapshot(&files, &header);
873 let modified = text.replace(":type \"symlink\"", ":size 1\n :type \"symlink\"");
874
875 let err =
876 parse_snapshot(&modified).expect_err("symlink entries must reject non-zero size field");
877 assert!(matches!(err, GitClosureError::Parse(_)));
878 }
879
880 #[test]
881 fn parse_snapshot_rejects_legacy_format_hash_header() {
882 let input = ";; format-hash: abc\n;; file-count: 0\n\n()\n";
883 let err = parse_snapshot(input).expect_err("legacy header must be rejected");
884 assert!(matches!(err, GitClosureError::LegacyHeader));
885 }
886
887 #[test]
888 fn quote_string_matches_lexpr_printer() {
889 let sample = "line1\nline2\u{0000}\u{fffd}\u{1f642}\\\"";
890 let expected = lexpr::to_string(&lexpr::Value::string(sample)).expect("print with lexpr");
891 assert_eq!(quote_string(sample), expected);
892 }
893
894 #[test]
897 fn list_snapshot_returns_entries_in_path_order() {
898 use std::fs;
899 use tempfile::TempDir;
900
901 let file_b = make_text_file("b.txt", "b");
902 let file_a = make_text_file("a.txt", "a");
903 let mut files = vec![file_b.clone(), file_a.clone()];
905 files.sort_by(|x, y| x.path.cmp(&y.path));
906 let header = make_header(&files);
907 let text = serialize_snapshot(&files, &header);
908
909 let dir = TempDir::new().unwrap();
910 let snap = dir.path().join("snap.gcl");
911 fs::write(&snap, text.as_bytes()).unwrap();
912
913 let entries = list_snapshot(&snap).expect("list_snapshot must succeed");
914 assert_eq!(entries.len(), 2);
915 assert_eq!(entries[0].path, "a.txt");
916 assert_eq!(entries[1].path, "b.txt");
917 assert!(!entries[0].is_symlink);
918 assert_eq!(entries[0].size, 1);
919 }
920
921 #[test]
922 fn list_snapshot_symlink_entry_has_correct_fields() {
923 use crate::snapshot::hash::sha256_hex;
924 use std::fs;
925 use tempfile::TempDir;
926
927 let symlink_file = SnapshotFile {
928 path: "link".to_string(),
929 sha256: String::new(),
930 mode: "120000".to_string(),
931 size: 0,
932 encoding: None,
933 symlink_target: Some("target.txt".to_string()),
934 content: Vec::new(),
935 };
936 let regular = make_text_file("target.txt", "content");
937 let files = vec![symlink_file, regular];
938 let header = make_header(&files);
939 let text = serialize_snapshot(&files, &header);
940
941 let dir = TempDir::new().unwrap();
942 let snap = dir.path().join("snap.gcl");
943 fs::write(&snap, text.as_bytes()).unwrap();
944
945 let entries = list_snapshot(&snap).expect("list_snapshot must succeed");
946 let link_entry = entries.iter().find(|e| e.path == "link").unwrap();
947 assert!(link_entry.is_symlink);
948 assert_eq!(link_entry.symlink_target.as_deref(), Some("target.txt"));
949 assert_eq!(link_entry.sha256, "");
950 assert_eq!(link_entry.size, 0);
951
952 let _ = sha256_hex;
954 }
955
956 #[test]
957 fn list_snapshot_str_returns_expected_entries() {
958 let files = vec![make_text_file("a.txt", "a"), make_text_file("b.txt", "bb")];
959 let header = make_header(&files);
960 let text = serialize_snapshot(&files, &header);
961
962 let entries =
963 list_snapshot_str(&text).expect("list_snapshot_str should parse valid snapshot");
964 assert_eq!(entries.len(), 2);
965 assert_eq!(entries[0].path, "a.txt");
966 assert_eq!(entries[1].path, "b.txt");
967 assert_eq!(entries[0].size, 1);
968 assert_eq!(entries[1].size, 2);
969 }
970
971 #[test]
974 fn fmt_snapshot_is_idempotent() {
975 use std::fs;
976 use tempfile::TempDir;
977
978 let file = make_text_file("src/lib.rs", "fn main() {}\n");
979 let files_arr = [file];
980 let header = make_header(&files_arr);
981 let original = serialize_snapshot(&files_arr, &header);
982
983 let dir = TempDir::new().unwrap();
984 let snap = dir.path().join("snap.gcl");
985 fs::write(&snap, original.as_bytes()).unwrap();
986
987 let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
988 assert_eq!(
989 formatted, original,
990 "fmt_snapshot on already-canonical snapshot must be idempotent"
991 );
992
993 fs::write(&snap, formatted.as_bytes()).unwrap();
995 let formatted2 = fmt_snapshot(&snap).expect("second fmt_snapshot must succeed");
996 assert_eq!(formatted2, formatted);
997 }
998
999 #[test]
1000 fn fmt_snapshot_sorts_files_canonically() {
1001 use std::fs;
1002 use tempfile::TempDir;
1003
1004 let file_z = make_text_file("z.txt", "z");
1005 let file_a = make_text_file("a.txt", "a");
1006 let mut files_sorted = vec![file_z.clone(), file_a.clone()];
1008 files_sorted.sort_by(|x, y| x.path.cmp(&y.path));
1009 let header = make_header(&files_sorted);
1010 let canonical = serialize_snapshot(&files_sorted, &header);
1011
1012 let dir = TempDir::new().unwrap();
1013 let snap = dir.path().join("snap.gcl");
1014 fs::write(&snap, canonical.as_bytes()).unwrap();
1015
1016 let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
1017 let a_pos = formatted.find("\"a.txt\"").unwrap();
1019 let z_pos = formatted.find("\"z.txt\"").unwrap();
1020 assert!(
1021 a_pos < z_pos,
1022 "a.txt must appear before z.txt in canonical output"
1023 );
1024 }
1025
1026 #[test]
1027 fn fmt_snapshot_preserves_unknown_headers_in_order() {
1028 use std::fs;
1029 use tempfile::TempDir;
1030
1031 let file = make_text_file("a.txt", "a");
1032 let files = vec![file];
1033 let header = make_header(&files);
1034 let mut text = serialize_snapshot(&files, &header);
1035 text = text.replacen(
1036 ";; file-count: 1\n",
1037 ";; file-count: 1\n;; source-uri: gh:owner/repo@main\n;; x-custom: abc\n",
1038 1,
1039 );
1040
1041 let dir = TempDir::new().unwrap();
1042 let snap = dir.path().join("snap.gcl");
1043 fs::write(&snap, text.as_bytes()).unwrap();
1044
1045 let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
1046 let source_pos = formatted
1047 .find(";; source-uri: gh:owner/repo@main")
1048 .expect("source-uri header retained");
1049 let custom_pos = formatted
1050 .find(";; x-custom: abc")
1051 .expect("x-custom header retained");
1052 assert!(
1053 source_pos < custom_pos,
1054 "unknown headers must keep input order"
1055 );
1056
1057 fs::write(&snap, formatted.as_bytes()).unwrap();
1058 let formatted_again = fmt_snapshot(&snap).expect("second fmt_snapshot must succeed");
1059 assert_eq!(formatted_again, formatted, "fmt(fmt(x)) must be idempotent");
1060 }
1061
1062 #[test]
1063 fn fmt_snapshot_rejects_hash_mismatch_by_default() {
1064 use std::fs;
1065 use tempfile::TempDir;
1066
1067 let file = make_text_file("a.txt", "a");
1068 let mut header = make_header(std::slice::from_ref(&file));
1069 header.snapshot_hash =
1070 "0000000000000000000000000000000000000000000000000000000000000000".to_string();
1071 let text = serialize_snapshot(std::slice::from_ref(&file), &header);
1072
1073 let dir = TempDir::new().unwrap();
1074 let snap = dir.path().join("tampered.gcl");
1075 fs::write(&snap, text.as_bytes()).unwrap();
1076
1077 let err = fmt_snapshot(&snap).expect_err("fmt must reject hash mismatch by default");
1078 assert!(matches!(err, GitClosureError::HashMismatch { .. }));
1079 }
1080
1081 #[test]
1082 fn fmt_snapshot_repair_hash_allows_recanonicalization() {
1083 use std::fs;
1084 use tempfile::TempDir;
1085
1086 let file = make_text_file("a.txt", "a");
1087 let mut header = make_header(std::slice::from_ref(&file));
1088 header.snapshot_hash =
1089 "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff".to_string();
1090 let text = serialize_snapshot(std::slice::from_ref(&file), &header);
1091
1092 let dir = TempDir::new().unwrap();
1093 let snap = dir.path().join("repair.gcl");
1094 fs::write(&snap, text.as_bytes()).unwrap();
1095
1096 let repaired = fmt_snapshot_with_options(&snap, FmtOptions { repair_hash: true })
1097 .expect("fmt --repair-hash should succeed");
1098 assert!(
1099 !repaired.contains("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
1100 "repaired output must contain a recomputed hash"
1101 );
1102 }
1103
1104 #[test]
1105 fn serialize_snapshot_avoids_content_clone_in_utf8_path() {
1106 let source = include_str!("serial.rs");
1107 let needle = ["String::from_utf8(", "file.content.clone()", ")"].join("");
1108 assert!(
1109 !source.contains(&needle),
1110 "utf8 serialization path should avoid cloning file.content"
1111 );
1112 }
1113
1114 #[test]
1117 fn serialize_with_git_metadata_emits_header_comments() {
1118 let file = make_text_file("src/lib.rs", "fn main() {}\n");
1119 let files = [file];
1120 let hash = compute_snapshot_hash(&files);
1121 let header = SnapshotHeader {
1122 snapshot_hash: hash,
1123 file_count: files.len(),
1124 git_rev: Some("deadbeef1234567890abcdef1234567890abcdef".to_string()),
1125 git_branch: Some("main".to_string()),
1126 extra_headers: Vec::new(),
1127 };
1128 let text = serialize_snapshot(&files, &header);
1129 assert!(
1130 text.contains(";; git-rev: deadbeef1234567890abcdef1234567890abcdef\n"),
1131 "serialized text must contain git-rev comment, got: {text}"
1132 );
1133 assert!(
1134 text.contains(";; git-branch: main\n"),
1135 "serialized text must contain git-branch comment, got: {text}"
1136 );
1137 }
1138
1139 #[test]
1140 fn git_metadata_not_included_in_snapshot_hash() {
1141 let file = make_text_file("src/lib.rs", "fn main() {}\n");
1142 let files = [file];
1143 let hash = compute_snapshot_hash(&files);
1144
1145 let header_without_meta = SnapshotHeader {
1146 snapshot_hash: hash.clone(),
1147 file_count: files.len(),
1148 git_rev: None,
1149 git_branch: None,
1150 extra_headers: Vec::new(),
1151 };
1152 let header_with_meta = SnapshotHeader {
1153 snapshot_hash: hash.clone(),
1154 file_count: files.len(),
1155 git_rev: Some("abc123".to_string()),
1156 git_branch: Some("feature-branch".to_string()),
1157 extra_headers: Vec::new(),
1158 };
1159
1160 let text_without = serialize_snapshot(&files, &header_without_meta);
1161 let text_with = serialize_snapshot(&files, &header_with_meta);
1162
1163 let hash_line = format!(";; snapshot-hash: {hash}\n");
1165 assert!(
1166 text_without.contains(&hash_line),
1167 "snapshot without meta must contain hash line"
1168 );
1169 assert!(
1170 text_with.contains(&hash_line),
1171 "snapshot with meta must contain same hash line"
1172 );
1173
1174 assert_ne!(
1176 text_without, text_with,
1177 "snapshots with and without git metadata must differ in text"
1178 );
1179 }
1180
1181 #[test]
1182 fn git_metadata_roundtrips_through_parse() {
1183 use std::fs;
1184 use tempfile::TempDir;
1185
1186 let file = make_text_file("readme.txt", "hello\n");
1187 let files = [file];
1188 let hash = compute_snapshot_hash(&files);
1189 let header = SnapshotHeader {
1190 snapshot_hash: hash,
1191 file_count: files.len(),
1192 git_rev: Some("cafebabe".to_string()),
1193 git_branch: Some("release/v1".to_string()),
1194 extra_headers: Vec::new(),
1195 };
1196 let text = serialize_snapshot(&files, &header);
1197
1198 let dir = TempDir::new().unwrap();
1199 let snap = dir.path().join("snap.gcl");
1200 fs::write(&snap, text.as_bytes()).unwrap();
1201
1202 let (parsed_header, _) = parse_snapshot(&text).expect("parse must succeed");
1204 assert_eq!(parsed_header.git_rev.as_deref(), Some("cafebabe"));
1205 assert_eq!(parsed_header.git_branch.as_deref(), Some("release/v1"));
1206
1207 let formatted = fmt_snapshot(&snap).expect("fmt_snapshot must succeed");
1209 assert!(
1210 formatted.contains(";; git-rev: cafebabe\n"),
1211 "fmt_snapshot must preserve git-rev, got: {formatted}"
1212 );
1213 assert!(
1214 formatted.contains(";; git-branch: release/v1\n"),
1215 "fmt_snapshot must preserve git-branch, got: {formatted}"
1216 );
1217 }
1218}