Skip to main content

snapdir_core/
manifest.rs

1//! Manifest line format model and (de)serialization.
2//!
3//! A snapdir manifest is a UTF-8 text file listing files and directories, one
4//! per line, sorted by path. Each line has the exact shape:
5//!
6//! ```text
7//! PATH_TYPE PERMISSIONS CHECKSUM SIZE PATH
8//! ```
9//!
10//! single-space separated, where:
11//!
12//! - `PATH_TYPE` is `F` for files, `D` for directories (directory paths end
13//!   with `/`).
14//! - `PERMISSIONS` is the octal permission string (e.g. `700`, `600`).
15//! - `CHECKSUM` is the hex checksum of the entry.
16//! - `SIZE` is the content size in bytes.
17//! - `PATH` is the entry path. In relative mode paths are prefixed with `./`;
18//!   with `--absolute` the full path is kept.
19//!
20//! This module owns only the *format* (the line model, [`Display`], and
21//! parsing). It does NOT compute checksums, walk the filesystem, or stat
22//! files — those land in later gates. Per the library-purity principle it
23//! performs no terminal I/O and reads no environment.
24//!
25//! [`Display`]: std::fmt::Display
26
27use core::fmt;
28use core::str::FromStr;
29
30use thiserror::Error;
31
32/// The type of a manifest entry's path.
33///
34/// Mirrors the oracle's `PATH_TYPE` column: `F` for files, `D` for
35/// directories. Symbolic links are recorded as the type of their target, so
36/// only these two variants exist in a manifest.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
38pub enum PathType {
39    /// A regular file (`F`).
40    File,
41    /// A directory (`D`). Its rendered path always ends with `/`.
42    Directory,
43}
44
45impl PathType {
46    /// Returns the single-character tag used in a manifest line (`"F"` or
47    /// `"D"`).
48    #[must_use]
49    pub const fn as_str(self) -> &'static str {
50        match self {
51            PathType::File => "F",
52            PathType::Directory => "D",
53        }
54    }
55}
56
57impl fmt::Display for PathType {
58    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59        f.write_str(self.as_str())
60    }
61}
62
63/// Errors raised while parsing a manifest line or document.
64#[derive(Debug, Error, PartialEq, Eq)]
65pub enum ParseError {
66    /// The line did not contain the five space-separated fields
67    /// `PATH_TYPE PERMISSIONS CHECKSUM SIZE PATH`.
68    #[error("malformed manifest line (expected 'TYPE PERM CHECKSUM SIZE PATH'): {0:?}")]
69    MalformedLine(String),
70    /// The `PATH_TYPE` field was neither `F` nor `D`.
71    #[error("invalid path type {0:?} (expected 'F' or 'D')")]
72    InvalidPathType(String),
73    /// The `SIZE` field was not a non-negative integer.
74    #[error("invalid size field {0:?}")]
75    InvalidSize(String),
76}
77
78/// A single manifest entry: one line of the manifest.
79///
80/// Field order matches the on-disk format exactly. `path` is stored verbatim
81/// as it should be rendered (already `./`-prefixed in relative mode, or
82/// absolute, and already trailing-`/` for directories).
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct ManifestEntry {
85    /// `F` (file) or `D` (directory).
86    pub path_type: PathType,
87    /// Octal permission string, e.g. `"700"`.
88    pub permissions: String,
89    /// Hex checksum of the entry's content (file bytes or directory merkle).
90    pub checksum: String,
91    /// Content size in bytes.
92    pub size: u64,
93    /// The rendered path. Directories end with `/`; relative paths start
94    /// with `./`.
95    pub path: String,
96}
97
98impl ManifestEntry {
99    /// Builds an entry from its parts, taking the `path` exactly as it should
100    /// be rendered.
101    #[must_use]
102    pub fn new(
103        path_type: PathType,
104        permissions: impl Into<String>,
105        checksum: impl Into<String>,
106        size: u64,
107        path: impl Into<String>,
108    ) -> Self {
109        Self {
110            path_type,
111            permissions: permissions.into(),
112            checksum: checksum.into(),
113            size,
114            path: path.into(),
115        }
116    }
117
118    /// Parses a single, non-empty, non-comment manifest line.
119    ///
120    /// The line is split into exactly five fields on the first four spaces;
121    /// the fifth field (the path) is taken verbatim so paths may contain
122    /// spaces.
123    ///
124    /// # Errors
125    ///
126    /// Returns [`ParseError`] if the line has fewer than five fields, an
127    /// unknown path type, or a non-integer size.
128    pub fn parse_line(line: &str) -> Result<Self, ParseError> {
129        // Split on the first four spaces only; the path (field 5) keeps any
130        // remaining spaces verbatim. `splitn(5, ' ')` gives at most 5 pieces.
131        let mut parts = line.splitn(5, ' ');
132        let type_str = parts
133            .next()
134            .ok_or_else(|| ParseError::MalformedLine(line.to_owned()))?;
135        let permissions = parts
136            .next()
137            .ok_or_else(|| ParseError::MalformedLine(line.to_owned()))?;
138        let checksum = parts
139            .next()
140            .ok_or_else(|| ParseError::MalformedLine(line.to_owned()))?;
141        let size_str = parts
142            .next()
143            .ok_or_else(|| ParseError::MalformedLine(line.to_owned()))?;
144        let path = parts
145            .next()
146            .ok_or_else(|| ParseError::MalformedLine(line.to_owned()))?;
147
148        let path_type = match type_str {
149            "F" => PathType::File,
150            "D" => PathType::Directory,
151            other => return Err(ParseError::InvalidPathType(other.to_owned())),
152        };
153
154        // Reject empty fields that `splitn` would otherwise tolerate, e.g. a
155        // line with the right number of spaces but a blank permission/path.
156        if permissions.is_empty() || checksum.is_empty() || path.is_empty() {
157            return Err(ParseError::MalformedLine(line.to_owned()));
158        }
159
160        let size = size_str
161            .parse::<u64>()
162            .map_err(|_| ParseError::InvalidSize(size_str.to_owned()))?;
163
164        Ok(Self::new(path_type, permissions, checksum, size, path))
165    }
166}
167
168impl fmt::Display for ManifestEntry {
169    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
170        write!(
171            f,
172            "{} {} {} {} {}",
173            self.path_type, self.permissions, self.checksum, self.size, self.path
174        )
175    }
176}
177
178/// An ordered collection of manifest entries.
179///
180/// [`Display`] reproduces the exact manifest text: entries sorted by path
181/// (`sort -k5` semantics), one per line, joined by `\n` with no trailing
182/// newline. Parsing strips empty lines and excludes `#`-comment lines.
183///
184/// [`Display`]: std::fmt::Display
185#[derive(Debug, Clone, Default, PartialEq, Eq)]
186pub struct Manifest {
187    entries: Vec<ManifestEntry>,
188}
189
190impl Manifest {
191    /// Creates an empty manifest.
192    #[must_use]
193    pub fn new() -> Self {
194        Self {
195            entries: Vec::new(),
196        }
197    }
198
199    /// Builds a manifest from entries, sorting them by path (`sort -k5`).
200    #[must_use]
201    pub fn from_entries(entries: Vec<ManifestEntry>) -> Self {
202        let mut manifest = Self { entries };
203        manifest.sort();
204        manifest
205    }
206
207    /// Appends an entry. Call [`Manifest::sort`] (or use [`Manifest::display`]
208    /// via [`Display`]) to restore path ordering afterwards.
209    ///
210    /// [`Display`]: std::fmt::Display
211    pub fn push(&mut self, entry: ManifestEntry) {
212        self.entries.push(entry);
213    }
214
215    /// Returns the entries in their current order.
216    #[must_use]
217    pub fn entries(&self) -> &[ManifestEntry] {
218        &self.entries
219    }
220
221    /// Sorts entries by path, matching the oracle's `sort -k5` (a byte-wise
222    /// ordering on the path field).
223    pub fn sort(&mut self) {
224        self.entries
225            .sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
226    }
227
228    /// Parses a manifest document: splits on newlines, strips empty lines,
229    /// and excludes `#`-comment lines (which are also excluded from the
230    /// checksum by the oracle). The remaining lines are parsed and sorted by
231    /// path.
232    ///
233    /// # Errors
234    ///
235    /// Returns the first [`ParseError`] encountered on a non-empty,
236    /// non-comment line.
237    pub fn parse(text: &str) -> Result<Self, ParseError> {
238        let mut entries = Vec::new();
239        for line in text.lines() {
240            if line.is_empty() || line.starts_with('#') {
241                continue;
242            }
243            entries.push(ManifestEntry::parse_line(line)?);
244        }
245        Ok(Self::from_entries(entries))
246    }
247}
248
249impl fmt::Display for Manifest {
250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
251        // Render in path order without mutating self.
252        let mut order: Vec<&ManifestEntry> = self.entries.iter().collect();
253        order.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
254        let mut first = true;
255        for entry in order {
256            if first {
257                first = false;
258            } else {
259                f.write_str("\n")?;
260            }
261            write!(f, "{entry}")?;
262        }
263        Ok(())
264    }
265}
266
267impl FromStr for Manifest {
268    type Err = ParseError;
269
270    fn from_str(s: &str) -> Result<Self, Self::Err> {
271        Manifest::parse(s)
272    }
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    // The canonical multi-level b3sum fixture from `snapdir-manifest`'s own
280    // test suite (the frozen oracle). Used to pin the exact line format and
281    // ordering.
282    const ORACLE_B3SUM_MANIFEST: &str = "\
283D 700 207d090daf06217a0920593ee642a90fcad85b9dccec02725e85311005f64327 43 ./
284D 700 ed23cfd2037d23cf8c6b67497425e7a06d5e40ea2bd8e43fc434006022dafe86 21 ./a/
285F 600 3c9cb8b8c8f3588f8e59e18d284330b0a951be644fbef2b9784b56e15d1c6096 4 ./a/a1f
286D 700 ee795476bff6c1816b4c7558a74ee0b44ec600c3cde6b02564508f67d536a656 17 ./a/aa/
287F 600 a2951028421deef48d1ba185f4c497c2d986f1dd76079baf2f5eb8479f132b5a 5 ./a/aa/aa1f
288D 700 8aed4caf45b22aa4c8a195945136e3a01f77864e91fabe2d9272feeee87ae334 12 ./a/aa/aaa/
289F 600 5cfee4fb4074748633b4ccbddb6b184a9b5e2f5ce74df6d2803f5fea0392a197 6 ./a/aa/aaa/aaa1f
290F 600 3791f11a017feedffd24c2656e18d5c4ca9d6c404c8f40ccc511b6351c8575a6 6 ./a/aa/aaa/aaa2f
291D 700 9a8b0e35c000df69893648b91d15cc30ab88ae5a40af48228caf5fa443dafc9b 12 ./b/
292D 700 d41c2090167e6f546a510f0da98d8a8355d6bd2b61666644604c73b3a8f5b5d9 12 ./b/bb/
293D 700 3b9023fa454aa22466feeb8cbf55a2c764dd79de0e93c9a793e8b54caec227da 12 ./b/bb/bbb/
294F 600 8d18b7f3aabbef192a524fa2549d1d36b48c9030d234c9bdf87caa267fb09933 6 ./b/bb/bbb/bbb1f
295F 600 2e16e172b6e337325f271d4eae00bc1ea20e41609ef78665710cada1477005cc 6 ./b/bb/bbb/bbb2f
296D 700 15eb2657c1e6f5a24023c10429bb6f1b7d81b2cc2057eedee2192fbf3e7b892c 6 ./c/
297D 700 e711f4e76ae9b3e25ad9a32b5f115cc9a81e55a428c552aa0bcab8543967f51a 6 ./c/cc/
298D 700 31a1955d5a65328f31014650cf79b5c0c3d9b82de19352ade8d299cc22f6ec40 6 ./c/cc/ccc/
299F 600 24f0cf3553e0dac0ce8aead4279e0fc368899e89ef776999d0d7e812b5ca0f3b 6 ./c/cc/ccc/ccc1f
300F 600 27a55588c59999fd686667c4b186af08161b95c287216f0cde723f0e191d1974 4 ./r1f";
301
302    #[test]
303    fn manifest_entry_display_line_format() {
304        let entry = ManifestEntry::new(
305            PathType::File,
306            "600",
307            "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262",
308            0,
309            "./bar.txt",
310        );
311        assert_eq!(
312            entry.to_string(),
313            "F 600 af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 0 ./bar.txt"
314        );
315    }
316
317    #[test]
318    fn manifest_directory_entry_display_keeps_trailing_slash() {
319        let entry = ManifestEntry::new(
320            PathType::Directory,
321            "700",
322            "dba5865c0d91b17958e4d2cac98c338f85cbbda07b71a020ab16c391b5e7af4b",
323            0,
324            "./",
325        );
326        assert_eq!(
327            entry.to_string(),
328            "D 700 dba5865c0d91b17958e4d2cac98c338f85cbbda07b71a020ab16c391b5e7af4b 0 ./"
329        );
330    }
331
332    #[test]
333    fn manifest_display_round_trips_oracle_b3sum_fixture() {
334        // Parsing then displaying must reproduce the oracle byte-for-byte.
335        let manifest = Manifest::parse(ORACLE_B3SUM_MANIFEST).expect("oracle parses");
336        assert_eq!(manifest.to_string(), ORACLE_B3SUM_MANIFEST);
337    }
338
339    #[test]
340    fn manifest_display_round_trips_empty_dir_guide_fixture() {
341        // The empty-dir guide fixture (two duplicate empty files).
342        let fixture = "\
343D 700 dba5865c0d91b17958e4d2cac98c338f85cbbda07b71a020ab16c391b5e7af4b 0 ./
344F 600 af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 0 ./bar.txt
345F 600 af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 0 ./foo.txt";
346        let manifest = Manifest::parse(fixture).expect("guide fixture parses");
347        assert_eq!(manifest.to_string(), fixture);
348    }
349
350    #[test]
351    fn manifest_sorts_entries_by_path_sort_k5() {
352        // Insert out of order; Display must sort by path field.
353        let mut manifest = Manifest::new();
354        manifest.push(ManifestEntry::new(PathType::File, "600", "ccc", 4, "./r1f"));
355        manifest.push(ManifestEntry::new(
356            PathType::Directory,
357            "700",
358            "aaa",
359            0,
360            "./",
361        ));
362        manifest.push(ManifestEntry::new(
363            PathType::Directory,
364            "700",
365            "bbb",
366            21,
367            "./a/",
368        ));
369        manifest.push(ManifestEntry::new(
370            PathType::File,
371            "600",
372            "ddd",
373            4,
374            "./a/a1f",
375        ));
376
377        let rendered = manifest.to_string();
378        let expected = "\
379D 700 aaa 0 ./
380D 700 bbb 21 ./a/
381F 600 ddd 4 ./a/a1f
382F 600 ccc 4 ./r1f";
383        assert_eq!(rendered, expected);
384    }
385
386    #[test]
387    fn manifest_sort_k5_orders_by_path_not_type_or_checksum() {
388        // `sort -k5` keys on the path; a 'D' line can follow an 'F' line and a
389        // larger checksum can precede a smaller one when the paths demand it.
390        let parsed = Manifest::parse(ORACLE_B3SUM_MANIFEST).expect("parses");
391        let paths: Vec<&str> = parsed.entries().iter().map(|e| e.path.as_str()).collect();
392        let mut sorted = paths.clone();
393        sorted.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
394        assert_eq!(paths, sorted, "entries must be in sort -k5 path order");
395        // ./a/ (dir) sorts before ./a/a1f (file) because of the path bytes.
396        let idx_a_dir = paths.iter().position(|p| *p == "./a/").unwrap();
397        let idx_a1f = paths.iter().position(|p| *p == "./a/a1f").unwrap();
398        assert!(idx_a_dir < idx_a1f);
399    }
400
401    #[test]
402    fn manifest_parse_strips_empty_lines() {
403        let with_blanks = "\n\nD 700 aaa 0 ./\n\nF 600 bbb 4 ./r1f\n\n";
404        let manifest = Manifest::parse(with_blanks).expect("parses with blanks");
405        assert_eq!(manifest.entries().len(), 2);
406        assert_eq!(manifest.to_string(), "D 700 aaa 0 ./\nF 600 bbb 4 ./r1f");
407    }
408
409    #[test]
410    fn manifest_parse_excludes_comment_lines() {
411        // `#` lines are comments: excluded from the manifest (and the oracle's
412        // checksum). They must not appear in the parsed entries or output.
413        let with_comments = "\
414# this is a comment header
415D 700 aaa 0 ./
416# another comment in the middle
417F 600 bbb 4 ./r1f
418#trailing comment without space";
419        let manifest = Manifest::parse(with_comments).expect("parses with comments");
420        assert_eq!(manifest.entries().len(), 2);
421        assert_eq!(manifest.to_string(), "D 700 aaa 0 ./\nF 600 bbb 4 ./r1f");
422    }
423
424    #[test]
425    fn manifest_relative_vs_absolute_path_rendering() {
426        // Relative mode: paths prefixed with `./`.
427        let relative = ManifestEntry::new(PathType::Directory, "700", "aaa", 43, "./");
428        assert!(relative.to_string().ends_with(" ./"));
429
430        // Absolute mode: the full path is kept verbatim (no `./` rewrite).
431        let absolute = ManifestEntry::new(
432            PathType::Directory,
433            "700",
434            "207d090daf06217a0920593ee642a90fcad85b9dccec02725e85311005f64327",
435            43,
436            "/tmp/files/",
437        );
438        assert_eq!(
439            absolute.to_string(),
440            "D 700 207d090daf06217a0920593ee642a90fcad85b9dccec02725e85311005f64327 43 /tmp/files/"
441        );
442        let abs_file = ManifestEntry::new(PathType::File, "600", "abc", 4, "/tmp/files/r1f");
443        assert_eq!(abs_file.to_string(), "F 600 abc 4 /tmp/files/r1f");
444    }
445
446    #[test]
447    fn manifest_entry_parse_line_round_trips() {
448        let line =
449            "F 600 a2951028421deef48d1ba185f4c497c2d986f1dd76079baf2f5eb8479f132b5a 5 ./a/aa/aa1f";
450        let entry = ManifestEntry::parse_line(line).expect("parses");
451        assert_eq!(entry.path_type, PathType::File);
452        assert_eq!(entry.permissions, "600");
453        assert_eq!(entry.size, 5);
454        assert_eq!(entry.path, "./a/aa/aa1f");
455        assert_eq!(entry.to_string(), line);
456    }
457
458    #[test]
459    fn manifest_entry_parse_line_allows_spaces_in_path() {
460        // Only the first four spaces delimit fields; the path keeps the rest.
461        let line = "F 600 abc 4 ./a file with spaces.txt";
462        let entry = ManifestEntry::parse_line(line).expect("parses");
463        assert_eq!(entry.path, "./a file with spaces.txt");
464        assert_eq!(entry.to_string(), line);
465    }
466
467    #[test]
468    fn manifest_entry_parse_line_rejects_bad_type() {
469        let err = ManifestEntry::parse_line("X 600 abc 4 ./x").unwrap_err();
470        assert_eq!(err, ParseError::InvalidPathType("X".to_owned()));
471    }
472
473    #[test]
474    fn manifest_entry_parse_line_rejects_bad_size() {
475        let err = ManifestEntry::parse_line("F 600 abc notanumber ./x").unwrap_err();
476        assert_eq!(err, ParseError::InvalidSize("notanumber".to_owned()));
477    }
478
479    #[test]
480    fn manifest_entry_parse_line_rejects_too_few_fields() {
481        let err = ManifestEntry::parse_line("F 600 abc 4").unwrap_err();
482        assert_eq!(err, ParseError::MalformedLine("F 600 abc 4".to_owned()));
483    }
484
485    #[test]
486    fn manifest_from_str_matches_parse() {
487        let parsed: Manifest = ORACLE_B3SUM_MANIFEST.parse().expect("FromStr parses");
488        assert_eq!(parsed.to_string(), ORACLE_B3SUM_MANIFEST);
489    }
490}