Skip to main content

snapdir_core/
walk.rs

1//! In-process filesystem walk producing a frozen-format [`Manifest`].
2//!
3//! This module reproduces the original `snapdir-manifest generate` behavior in
4//! pure Rust, consuming the frozen [`manifest`](crate::manifest),
5//! [`merkle`](crate::merkle) and [`excludes`](crate::excludes) APIs without
6//! changing any of them. It walks a directory tree and emits one
7//! [`ManifestEntry`] per file (`F`) and directory (`D`), computing per-file
8//! content checksums with a [`Hasher`] and per-directory checksums/sizes with
9//! [`directory_checksum`].
10//!
11//! ## Behaviors matched against the oracle
12//!
13//! - **Traversal** mirrors `find`/`find -L`: every directory becomes a `D`
14//!   entry (path ending `/`) and every regular file directly inside it becomes
15//!   an `F` entry. Directories are recorded even when empty.
16//! - **Symlinks** are *followed by default* ([`FollowMode::Follow`], the
17//!   oracle's `find -L`): a symlink to a directory is reported as a directory
18//!   and descended into, a symlink to a file as a file, inheriting the
19//!   target's type/permissions/size/checksum. [`FollowMode::NoFollow`] (plain
20//!   `find`) drops symlinks entirely — they appear as neither `D` nor `F`.
21//! - **Permissions** are the octal mode bits, matching `stat -f '%A'` (macOS)
22//!   / `stat -c '%a'` (Linux): the low 12 bits of `st_mode` rendered in octal
23//!   with no leading zero (e.g. `755`, `644`, `700`).
24//! - **File size** is the content byte length (`%z` / `%s`). **Directory size**
25//!   is the *sum of its direct members' sizes* (files and subdirectories),
26//!   excluding the directory's own `stat` size — matching the oracle's
27//!   `_snapdir_manifest_sum_lines` over the direct children.
28//! - **Excludes** are applied via [`ExcludeMatcher`] against the *absolute*
29//!   path of each candidate directory and file, mirroring the oracle's
30//!   `find … | grep -E -v "$EXCLUDE"` (the filter runs before the relative
31//!   `./` rewrite). A `%system%` expansion forces [`FollowMode::NoFollow`];
32//!   the caller resolves that via [`expand_excludes`](crate::excludes::expand_excludes).
33//! - **Paths** are absolute under [`PathMode::Absolute`], or rewritten to a
34//!   leading `./` under [`PathMode::Relative`] (the oracle's
35//!   `sed -E "s| \.?${root_dir}| .|"`). Directory paths always end with `/`.
36//! - **Ordering** is `sort -k5` (byte-wise on the path), delegated to
37//!   [`Manifest`]'s own sort.
38//!
39//! Per the library-purity principle this module reads the filesystem at the
40//! *given* root path (that is its job) but reads no `$HOME`/config/environment
41//! for behavior: the root, options, excludes and hasher all arrive as
42//! parameters, and errors surface as the typed [`WalkError`].
43
44use std::collections::BTreeMap;
45use std::io;
46use std::os::unix::fs::PermissionsExt;
47use std::path::{Path, PathBuf};
48
49use thiserror::Error;
50
51use crate::excludes::{ExcludeMatcher, FollowMode};
52use crate::manifest::{Manifest, ManifestEntry, PathType};
53use crate::merkle::Hasher;
54use crate::progress::{Meter, Phase};
55
56/// Whether emitted paths are absolute or rewritten relative to the root.
57///
58/// Mirrors the oracle's `--absolute` flag: the default is
59/// [`Relative`](PathMode::Relative) (paths prefixed with `./`), and
60/// `--absolute` keeps the full absolute path.
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
62pub enum PathMode {
63    /// Rewrite paths to a leading `./` relative to the root (the default).
64    #[default]
65    Relative,
66    /// Keep absolute paths (`--absolute`).
67    Absolute,
68}
69
70/// Options controlling a [`walk`].
71///
72/// All inputs are parameters: this struct carries the symlink-follow setting,
73/// the relative/absolute path mode, and the optional compiled exclude matcher.
74/// The root path and [`Hasher`] are passed to [`walk`] directly.
75#[derive(Debug, Clone, Default)]
76pub struct WalkOptions {
77    /// Whether to follow symlinks ([`FollowMode::Follow`] by default).
78    pub follow: FollowMode,
79    /// Whether to emit absolute or relative (`./`) paths.
80    pub path_mode: PathMode,
81    /// An optional compiled exclude matcher. When `Some`, any directory or
82    /// file whose absolute path matches is dropped (`grep -E -v`).
83    pub exclude: Option<ExcludeMatcher>,
84}
85
86/// Errors raised while walking the filesystem.
87#[derive(Debug, Error)]
88pub enum WalkError {
89    /// The root path is not absolute. The walk needs an absolute root so it can
90    /// rewrite relative paths exactly as the oracle does (it `readlink`s the
91    /// argument to an absolute path first); the CLI lane resolves the user's
92    /// argument before calling [`walk`].
93    #[error("walk root must be an absolute path, got {0:?}")]
94    RootNotAbsolute(PathBuf),
95
96    /// The root path does not resolve to a directory.
97    #[error("walk root is not a directory: {0:?}")]
98    RootNotDirectory(PathBuf),
99
100    /// An I/O error occurred while reading the tree at `path`.
101    #[error("i/o error while walking {path:?}: {source}")]
102    Io {
103        /// The path being read when the error occurred.
104        path: PathBuf,
105        /// The underlying I/O error.
106        #[source]
107        source: io::Error,
108    },
109
110    /// A path could not be rendered as UTF-8. The frozen manifest format is
111    /// UTF-8 text; non-UTF-8 paths cannot be represented.
112    #[error("path is not valid UTF-8: {0:?}")]
113    NonUtf8Path(PathBuf),
114}
115
116impl WalkError {
117    fn io(path: impl Into<PathBuf>, source: io::Error) -> Self {
118        WalkError::Io {
119            path: path.into(),
120            source,
121        }
122    }
123}
124
125/// Renders the octal permission string for a file mode, matching
126/// `stat -f '%A'` (macOS) / `stat -c '%a'` (Linux): the low 12 mode bits in
127/// octal with no leading zero (e.g. `755`, `644`, `4755`).
128fn octal_permissions(mode: u32) -> String {
129    format!("{:o}", mode & 0o7777)
130}
131
132/// Returns a path as `&str`, or a [`WalkError::NonUtf8Path`].
133fn path_str(path: &Path) -> Result<&str, WalkError> {
134    path.to_str()
135        .ok_or_else(|| WalkError::NonUtf8Path(path.to_path_buf()))
136}
137
138/// A discovered file entry, before path rewriting.
139struct FileRecord {
140    /// Absolute path of the file.
141    abs_path: String,
142    permissions: String,
143    checksum: String,
144    size: u64,
145}
146
147/// A discovered directory, holding its absolute path and (filled during the
148/// post-order pass) its computed checksum and member-size total.
149struct DirRecord {
150    /// Absolute path of the directory (no trailing slash, except root `/`).
151    abs_path: String,
152    permissions: String,
153    /// Absolute paths of direct child directories, in discovery order.
154    child_dirs: Vec<String>,
155    /// Direct child files.
156    files: Vec<FileRecord>,
157}
158
159/// Walks the directory tree rooted at `root`, producing a [`Manifest`] that
160/// matches the original `snapdir-manifest` output byte-for-byte for the same
161/// tree and checksum function.
162///
163/// `root` must be an **absolute** path to a directory (the CLI lane resolves
164/// the user's argument first, mirroring the oracle's `readlink`). `hasher`
165/// supplies the content/merkle checksum function (BLAKE3 by default; the
166/// `--checksum-bin` matrix swaps in [`Md5Hasher`](crate::merkle::Md5Hasher) /
167/// [`Sha256Hasher`](crate::merkle::Sha256Hasher) / keyed BLAKE3). `options`
168/// carries the follow mode, path mode and optional exclude matcher.
169///
170/// # Errors
171///
172/// Returns [`WalkError`] if `root` is not absolute, is not a directory, holds a
173/// non-UTF-8 path, or if an I/O error occurs while reading the tree.
174pub fn walk<H: Hasher>(
175    root: &Path,
176    options: &WalkOptions,
177    hasher: &H,
178) -> Result<Manifest, WalkError> {
179    walk_with_meter(root, options, hasher, None)
180}
181
182/// Like [`walk`], but records hashing progress into an optional [`Meter`].
183///
184/// When `meter` is `Some`, the phase is set to [`Phase::Hashing`] and, for each
185/// regular file whose bytes are read and hashed, the file's byte length is added
186/// to the meter's bytes-in counter and the file is counted as one finished
187/// object. When `meter` is `None` this behaves exactly like [`walk`].
188///
189/// The recording is purely advisory: the returned [`Manifest`] is
190/// **byte-identical** whether or not a meter is supplied — the meter is updated
191/// with a couple of cheap [`Ordering::Relaxed`](std::sync::atomic::Ordering)
192/// atomic ops per file and never influences traversal or output.
193///
194/// # Errors
195///
196/// Returns [`WalkError`] under the same conditions as [`walk`].
197pub fn walk_with_meter<H: Hasher>(
198    root: &Path,
199    options: &WalkOptions,
200    hasher: &H,
201    meter: Option<&Meter>,
202) -> Result<Manifest, WalkError> {
203    if let Some(meter) = meter {
204        meter.set_phase(Phase::Hashing);
205    }
206    if !root.is_absolute() {
207        return Err(WalkError::RootNotAbsolute(root.to_path_buf()));
208    }
209
210    // Resolve the root's metadata following symlinks (the oracle always works
211    // on the resolved root directory).
212    let root_meta = std::fs::metadata(root).map_err(|e| WalkError::io(root, e))?;
213    if !root_meta.is_dir() {
214        return Err(WalkError::RootNotDirectory(root.to_path_buf()));
215    }
216    // The oracle's `stat -f '%A'` / `stat -c '%a'` does NOT follow symlinks, so
217    // a directory's PERMISSIONS column always comes from its own `lstat`. For
218    // the root we `lstat` it directly (it is normally a real directory; if it
219    // is itself a symlink the user passed, its own perms still apply).
220    let root_lstat = std::fs::symlink_metadata(root).map_err(|e| WalkError::io(root, e))?;
221    let root_permissions = octal_permissions(root_lstat.permissions().mode());
222
223    let root_str = path_str(root)?.to_owned();
224
225    // Discover every directory (depth-first, following symlinks per `follow`),
226    // recording its direct files and direct child directories. We collect into
227    // an ordered map keyed by absolute path so the post-order pass can compute
228    // directory checksums bottom-up.
229    let mut dirs: BTreeMap<String, DirRecord> = BTreeMap::new();
230    discover_dir(
231        root,
232        &root_str,
233        root_permissions,
234        options,
235        hasher,
236        meter,
237        &mut dirs,
238    )?;
239
240    // Compute each directory's checksum + member-size bottom-up. `dirs` is keyed
241    // by path in a BTreeMap (lexicographic), so a child path always sorts after
242    // its parent prefix; processing in reverse key order guarantees children are
243    // finalized before their parents. We memoize finalized (checksum, size).
244    let keys: Vec<String> = dirs.keys().cloned().collect();
245    let mut finalized: BTreeMap<String, (String, u64)> = BTreeMap::new();
246    for key in keys.iter().rev() {
247        let record = &dirs[key];
248
249        // Direct children's checksums (files + subdirs) for the merkle rule,
250        // and their sizes for the member-size sum.
251        let mut child_checksums: Vec<String> = Vec::new();
252        let mut member_size: u64 = 0;
253        for file in &record.files {
254            child_checksums.push(file.checksum.clone());
255            member_size += file.size;
256        }
257        for child in &record.child_dirs {
258            let (csum, size) = finalized
259                .get(child)
260                .expect("child dir finalized before parent (reverse key order)");
261            child_checksums.push(csum.clone());
262            member_size += size;
263        }
264
265        let checksum =
266            crate::merkle::directory_checksum(child_checksums.iter().map(String::as_str), hasher);
267        finalized.insert(key.clone(), (checksum, member_size));
268    }
269
270    // Emit manifest entries. Files first, then their directory, in any order —
271    // the Manifest sorts by path (`sort -k5`) on Display.
272    let mut manifest = Manifest::new();
273    for (key, record) in &dirs {
274        let (checksum, size) = &finalized[key];
275        let dir_path = render_dir_path(key, &root_str, options.path_mode);
276        manifest.push(ManifestEntry::new(
277            PathType::Directory,
278            record.permissions.clone(),
279            checksum.clone(),
280            *size,
281            dir_path,
282        ));
283        for file in &record.files {
284            let file_path = rewrite_path(&file.abs_path, &root_str, options.path_mode);
285            manifest.push(ManifestEntry::new(
286                PathType::File,
287                file.permissions.clone(),
288                file.checksum.clone(),
289                file.size,
290                file_path,
291            ));
292        }
293    }
294    manifest.sort();
295    Ok(manifest)
296}
297
298/// Recursively discovers the directory at `abs_path` (already known to be a
299/// directory), recording its direct files and child directories, then recurses
300/// into each child directory.
301fn discover_dir<H: Hasher>(
302    dir: &Path,
303    abs_path: &str,
304    permissions: String,
305    options: &WalkOptions,
306    hasher: &H,
307    meter: Option<&Meter>,
308    dirs: &mut BTreeMap<String, DirRecord>,
309) -> Result<(), WalkError> {
310    // `permissions` is the directory's own `lstat` octal mode (a symlinked
311    // directory keeps the symlink's perms, matching the oracle's non-following
312    // `stat -f '%A'` / `stat -c '%a'`).
313    let mut record = DirRecord {
314        abs_path: abs_path.to_owned(),
315        permissions,
316        child_dirs: Vec::new(),
317        files: Vec::new(),
318    };
319
320    let read_dir = std::fs::read_dir(dir).map_err(|e| WalkError::io(dir, e))?;
321    for entry in read_dir {
322        let entry = entry.map_err(|e| WalkError::io(dir, e))?;
323        let entry_path = entry.path();
324        let entry_abs = path_str(&entry_path)?.to_owned();
325
326        // Excludes run on the absolute path (`grep -E -v` over `find` output),
327        // before any relative rewrite. A matching path is dropped for both the
328        // directory listing and the file listing.
329        if let Some(matcher) = &options.exclude {
330            if matcher.is_excluded(&entry_abs) {
331                continue;
332            }
333        }
334
335        // `symlink_metadata` does not traverse the final symlink, so we can
336        // detect symlinks and honor the follow mode like plain `find` vs
337        // `find -L`.
338        let link_meta = entry
339            .metadata()
340            .or_else(|_| std::fs::symlink_metadata(&entry_path))
341            .map_err(|e| WalkError::io(&entry_path, e))?;
342        let is_symlink = link_meta.file_type().is_symlink();
343
344        if is_symlink && !options.follow.follows_symlinks() {
345            // Plain `find` lists a symlink as type `l`; it is neither a `-type d`
346            // nor a `-type f`, so it never enters the manifest under no-follow.
347            continue;
348        }
349
350        // Resolve the (possibly symlinked) target's metadata. Following symlinks
351        // (`find -L`) makes a symlink-to-dir a directory and a symlink-to-file a
352        // file, inheriting the target's type/perms/size/checksum.
353        let target_meta = match std::fs::metadata(&entry_path) {
354            Ok(m) => m,
355            Err(e) => {
356                // A broken symlink (or a symlink loop on some platforms) cannot
357                // be stat'd through. `find -L` likewise cannot classify it as a
358                // file or directory, so it is omitted. Surface real I/O errors
359                // on non-symlink entries.
360                if is_symlink && (e.kind() == io::ErrorKind::NotFound || is_loop_error(&e)) {
361                    continue;
362                }
363                return Err(WalkError::io(&entry_path, e));
364            }
365        };
366        let file_type = target_meta.file_type();
367
368        // PERMISSIONS (and, for files, SIZE) come from the entry's own `lstat`,
369        // because the oracle's `stat` is non-following: a symlinked entry keeps
370        // the symlink's perms/size while its CHECKSUM is read through the link
371        // (b3sum/md5sum/sha256sum all follow symlinks). For a real (non-symlink)
372        // entry `lstat` == `stat`, so this is identical there.
373        let own_permissions = octal_permissions(link_meta.permissions().mode());
374
375        if file_type.is_dir() {
376            record.child_dirs.push(entry_abs.clone());
377            discover_dir(
378                &entry_path,
379                &entry_abs,
380                own_permissions,
381                options,
382                hasher,
383                meter,
384                dirs,
385            )?;
386        } else if file_type.is_file() {
387            // Read content through the link for the checksum; take SIZE from the
388            // entry's own `lstat` (for a symlink that is the target-path length,
389            // matching the oracle's `%z` / `%s` on the un-dereferenced symlink).
390            let bytes = std::fs::read(&entry_path).map_err(|e| WalkError::io(&entry_path, e))?;
391            let checksum = hasher.hash_hex(&bytes);
392            // Advisory progress: record the bytes hashed and count this file as
393            // one finished object. This never affects the manifest output.
394            if let Some(meter) = meter {
395                meter.add_in(bytes.len() as u64);
396                meter.object_finished();
397            }
398            record.files.push(FileRecord {
399                abs_path: entry_abs,
400                permissions: own_permissions,
401                checksum,
402                size: link_meta.len(),
403            });
404        }
405        // Anything else (sockets, fifos, devices) is neither `-type d` nor
406        // `-type f`, so it is skipped — matching `find`.
407    }
408
409    dirs.insert(record.abs_path.clone(), record);
410    Ok(())
411}
412
413/// Detects a symlink-loop I/O error (`ELOOP`) so the walk can skip it the way
414/// `find -L` halts on / omits a self-referential symlink.
415fn is_loop_error(error: &io::Error) -> bool {
416    error.raw_os_error() == Some(libc_eloop())
417}
418
419/// `ELOOP` is 40 on Linux and 62 on macOS/BSD. We avoid a `libc` dependency by
420/// matching on the message kind via the raw errno of both platforms.
421const fn libc_eloop() -> i32 {
422    #[cfg(target_os = "linux")]
423    {
424        40
425    }
426    #[cfg(not(target_os = "linux"))]
427    {
428        62
429    }
430}
431
432/// Renders a directory's path for the manifest: always trailing-`/`, and either
433/// absolute or rewritten to a leading `./` relative to `root`.
434fn render_dir_path(abs_path: &str, root: &str, mode: PathMode) -> String {
435    let rewritten = rewrite_path(abs_path, root, mode);
436    // Directory paths always end with `/`. The root rewrites to "." -> "./";
437    // a nested dir "./a" -> "./a/". Absolute "/abs/a" -> "/abs/a/".
438    if rewritten.ends_with('/') {
439        rewritten
440    } else {
441        format!("{rewritten}/")
442    }
443}
444
445/// Applies the oracle's relative rewrite `sed -E "s| \.?${root_dir}| .|"`:
446/// the leading `root` prefix of an absolute path becomes `.`. In absolute mode
447/// the path is returned unchanged.
448fn rewrite_path(abs_path: &str, root: &str, mode: PathMode) -> String {
449    match mode {
450        PathMode::Absolute => abs_path.to_owned(),
451        PathMode::Relative => {
452            if abs_path == root {
453                // The root directory itself becomes ".".
454                ".".to_owned()
455            } else if let Some(rest) = abs_path.strip_prefix(root) {
456                // rest starts with '/': "/a/aa/f1" -> "./a/aa/f1".
457                format!(".{rest}")
458            } else {
459                // Defensive: not under root (should not happen). Leave as-is.
460                abs_path.to_owned()
461            }
462        }
463    }
464}
465
466#[cfg(test)]
467mod tests {
468    //! Pure-Rust walk tests.
469    //!
470    //! Originally these shelled out to the legacy Bash oracle
471    //! (the `snapdir-manifest` script) and asserted byte-identity. The oracle
472    //! has since been deleted from the branch, so each case is now pinned
473    //! against an
474    //! **embedded golden manifest constant** (or, where a column is
475    //! platform-dependent, a structural assertion). The golden bytes were
476    //! captured once from this very `walk` implementation over fixtures with
477    //! **explicit, fixed permissions** (dirs `0o700`/`0o755`, files `0o600`),
478    //! which makes the `TYPE PERMS CHECKSUM SIZE PATH` output fully
479    //! deterministic. The content/size/checksum/merkle columns were
480    //! cross-checked against the recorded oracle vectors in
481    //! `crates/snapdir-core/tests/compat_golden.rs` (e.g. the empty-file
482    //! `af1349b9…` checksum and the `./a/aa/aaa/` merkle `8aed4caf…`).
483    //!
484    //! Symlink rows (`./a_link/`, `./r1f_link`) carry the symlink's *own* lstat
485    //! permissions, which differ across platforms (macOS reports `755`, Linux
486    //! `777`), so those tests assert structure (presence/absence + materialized
487    //! subtree) rather than a byte-exact perm column.
488    use super::*;
489    use crate::merkle::Blake3Hasher;
490    use crate::progress::{Meter, Phase};
491    use std::fs;
492    use std::os::unix::fs::PermissionsExt;
493    use std::path::PathBuf;
494    use std::sync::atomic::{AtomicU64, Ordering};
495
496    /// A self-cleaning scratch directory under the system temp dir. Avoids a
497    /// `tempfile` dev-dependency; the walk is library-pure and never reads the
498    /// environment itself — only this test harness builds fixtures on disk. The
499    /// root is chmod'd to a fixed `0o755` so the root `D` line's perm column is
500    /// deterministic across umasks.
501    struct Scratch {
502        path: PathBuf,
503    }
504
505    impl Scratch {
506        fn new(tag: &str) -> Self {
507            static COUNTER: AtomicU64 = AtomicU64::new(0);
508            let n = COUNTER.fetch_add(1, Ordering::Relaxed);
509            let pid = std::process::id();
510            // Resolve through canonicalize so macOS's /var -> /private/var (and
511            // any other symlinked temp prefix) is already normalized.
512            let base = std::env::temp_dir()
513                .canonicalize()
514                .expect("temp dir canonicalizes");
515            let path = base.join(format!("snapdir-walk-test-{tag}-{pid}-{n}"));
516            let _ = fs::remove_dir_all(&path);
517            fs::create_dir_all(&path).expect("create scratch dir");
518            fs::set_permissions(&path, fs::Permissions::from_mode(0o755))
519                .expect("chmod scratch root");
520            Scratch { path }
521        }
522
523        fn root(&self) -> &Path {
524            &self.path
525        }
526    }
527
528    impl Drop for Scratch {
529        fn drop(&mut self) {
530            let _ = fs::remove_dir_all(&self.path);
531        }
532    }
533
534    /// Writes a file (creating parents) with a fixed `0o600` mode so the `F`
535    /// line's perm column is deterministic.
536    fn write_file(path: &Path, contents: &[u8]) {
537        if let Some(parent) = path.parent() {
538            fs::create_dir_all(parent).expect("create parent dir");
539        }
540        fs::write(path, contents).expect("write file");
541        fs::set_permissions(path, fs::Permissions::from_mode(0o600)).expect("chmod file");
542    }
543
544    /// Recursively chmods `root` and every descendant directory to `mode`, so
545    /// every `D` line's perm column is pinned (independent of the process umask).
546    fn chmod_dirs(root: &Path, mode: u32) {
547        fs::set_permissions(root, fs::Permissions::from_mode(mode)).expect("chmod dir");
548        for entry in fs::read_dir(root).expect("read_dir").flatten() {
549            let ft = entry.file_type().expect("file_type");
550            // `is_dir()` here is lstat-based via DirEntry::file_type, so a
551            // symlink-to-dir is NOT recursed into (its own perms stay as-is).
552            if ft.is_dir() {
553                chmod_dirs(&entry.path(), mode);
554            }
555        }
556    }
557
558    /// Builds a [`WalkOptions`] for the given follow/path/exclude combination.
559    fn opts(follow: FollowMode, path_mode: PathMode, exclude: Option<&str>) -> WalkOptions {
560        WalkOptions {
561            follow,
562            path_mode,
563            exclude: exclude.map(|p| ExcludeMatcher::new(p).expect("valid exclude regex")),
564        }
565    }
566
567    /// Runs the walk and returns its `Display` manifest text (no trailing
568    /// newline — `Manifest`'s `Display` does not emit one).
569    fn manifest_text(root: &Path, options: &WalkOptions) -> String {
570        walk(root, options, &Blake3Hasher::new())
571            .expect("walk")
572            .to_string()
573    }
574
575    // -- Empty-string / empty-file checksum reused from the oracle vectors -----
576    // (matches compat_golden.rs::EMPTY_FILE_B3).
577    const EMPTY_B3: &str = "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262";
578
579    #[test]
580    fn walk_root_must_be_absolute() {
581        let err = walk(
582            Path::new("relative/path"),
583            &WalkOptions::default(),
584            &Blake3Hasher::new(),
585        )
586        .unwrap_err();
587        assert!(matches!(err, WalkError::RootNotAbsolute(_)));
588    }
589
590    #[test]
591    fn walk_empty_directory_golden() {
592        // An empty directory: a single `D` line whose checksum is the merkle of
593        // zero children == blake3("") and whose size is 0. Root chmod'd to 755.
594        let scratch = Scratch::new("empty-dir");
595        let expected = format!("D 755 {EMPTY_B3} 0 ./");
596        assert_eq!(
597            manifest_text(scratch.root(), &WalkOptions::default()),
598            expected
599        );
600    }
601
602    #[test]
603    fn walk_single_empty_file_golden() {
604        // Root `D` line (its merkle == single empty-file child) plus the `F`
605        // line for the empty file. Both content checksums are blake3("").
606        let scratch = Scratch::new("empty-file");
607        write_file(&scratch.root().join("empty.txt"), b"");
608        let expected = format!(
609            "D 755 dba5865c0d91b17958e4d2cac98c338f85cbbda07b71a020ab16c391b5e7af4b 0 ./\n\
610             F 600 {EMPTY_B3} 0 ./empty.txt"
611        );
612        assert_eq!(
613            manifest_text(scratch.root(), &WalkOptions::default()),
614            expected
615        );
616    }
617
618    /// The deep guide tree under [`PathMode::Relative`]. Dirs are `0o700`, files
619    /// `0o600`; every checksum/merkle value matches the recorded oracle vectors
620    /// (cf. `compat_golden.rs::MULTILEVEL_MANIFEST` — same `./a/…`/`./b/…`/`./c/…`
621    /// subtree). The extra empty `./d/` dir carries the blake3("") merkle.
622    const NESTED_RELATIVE_GOLDEN: &str = "\
623D 700 3f938f681dcbd616d00d42f704d525c05e7ed2746888c35c8214127c632587c3 43 ./
624D 700 ed23cfd2037d23cf8c6b67497425e7a06d5e40ea2bd8e43fc434006022dafe86 21 ./a/
625F 600 3c9cb8b8c8f3588f8e59e18d284330b0a951be644fbef2b9784b56e15d1c6096 4 ./a/a1f
626D 700 ee795476bff6c1816b4c7558a74ee0b44ec600c3cde6b02564508f67d536a656 17 ./a/aa/
627F 600 a2951028421deef48d1ba185f4c497c2d986f1dd76079baf2f5eb8479f132b5a 5 ./a/aa/aa1f
628D 700 8aed4caf45b22aa4c8a195945136e3a01f77864e91fabe2d9272feeee87ae334 12 ./a/aa/aaa/
629F 600 5cfee4fb4074748633b4ccbddb6b184a9b5e2f5ce74df6d2803f5fea0392a197 6 ./a/aa/aaa/aaa1f
630F 600 3791f11a017feedffd24c2656e18d5c4ca9d6c404c8f40ccc511b6351c8575a6 6 ./a/aa/aaa/aaa2f
631D 700 9a8b0e35c000df69893648b91d15cc30ab88ae5a40af48228caf5fa443dafc9b 12 ./b/
632D 700 d41c2090167e6f546a510f0da98d8a8355d6bd2b61666644604c73b3a8f5b5d9 12 ./b/bb/
633D 700 3b9023fa454aa22466feeb8cbf55a2c764dd79de0e93c9a793e8b54caec227da 12 ./b/bb/bbb/
634F 600 8d18b7f3aabbef192a524fa2549d1d36b48c9030d234c9bdf87caa267fb09933 6 ./b/bb/bbb/bbb1f
635F 600 2e16e172b6e337325f271d4eae00bc1ea20e41609ef78665710cada1477005cc 6 ./b/bb/bbb/bbb2f
636D 700 15eb2657c1e6f5a24023c10429bb6f1b7d81b2cc2057eedee2192fbf3e7b892c 6 ./c/
637D 700 e711f4e76ae9b3e25ad9a32b5f115cc9a81e55a428c552aa0bcab8543967f51a 6 ./c/cc/
638D 700 31a1955d5a65328f31014650cf79b5c0c3d9b82de19352ade8d299cc22f6ec40 6 ./c/cc/ccc/
639F 600 24f0cf3553e0dac0ce8aead4279e0fc368899e89ef776999d0d7e812b5ca0f3b 6 ./c/cc/ccc/ccc1f
640D 700 af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 0 ./d/
641F 600 27a55588c59999fd686667c4b186af08161b95c287216f0cde723f0e191d1974 4 ./r1f";
642
643    fn build_nested(root: &Path) {
644        write_file(&root.join("a/aa/aaa/aaa1f"), b"aaa1f\n");
645        write_file(&root.join("a/aa/aaa/aaa2f"), b"aaa2f\n");
646        write_file(&root.join("a/aa/aa1f"), b"aa1f\n");
647        write_file(&root.join("a/a1f"), b"a1f\n");
648        write_file(&root.join("r1f"), b"r1f\n");
649        write_file(&root.join("b/bb/bbb/bbb1f"), b"bbb1f\n");
650        write_file(&root.join("b/bb/bbb/bbb2f"), b"bbb2f\n");
651        write_file(&root.join("c/cc/ccc/ccc1f"), b"ccc1f\n");
652        // Empty subdirectory with no files.
653        fs::create_dir_all(root.join("d")).unwrap();
654        chmod_dirs(root, 0o700);
655    }
656
657    #[test]
658    fn walk_nested_tree_relative_golden() {
659        let scratch = Scratch::new("nested-rel");
660        build_nested(scratch.root());
661        assert_eq!(
662            manifest_text(
663                scratch.root(),
664                &opts(FollowMode::Follow, PathMode::Relative, None)
665            ),
666            NESTED_RELATIVE_GOLDEN
667        );
668    }
669
670    #[test]
671    fn walk_nested_tree_absolute_golden() {
672        // Under PathMode::Absolute every PATH column is the scratch root prefix
673        // + the relative tail; the TYPE/PERMS/CHECKSUM/SIZE columns are
674        // identical to the relative golden. We reconstruct the expected text by
675        // rewriting the relative golden's `./` prefix to the absolute root,
676        // proving the only difference is the path rendering.
677        let scratch = Scratch::new("nested-abs");
678        let r = scratch.root();
679        build_nested(r);
680        let root_str = r.to_str().unwrap();
681        let expected: String = NESTED_RELATIVE_GOLDEN
682            .lines()
683            .map(|line| {
684                // Replace the leading "./" of the PATH (last field) with the
685                // absolute root. The path is everything after the 4th space.
686                let (head, path) = line.rsplit_once(' ').unwrap();
687                let abs_path = if path == "./" {
688                    format!("{root_str}/")
689                } else {
690                    format!("{root_str}/{}", path.strip_prefix("./").unwrap())
691                };
692                format!("{head} {abs_path}")
693            })
694            .collect::<Vec<_>>()
695            .join("\n");
696        assert_eq!(
697            manifest_text(r, &opts(FollowMode::Follow, PathMode::Absolute, None)),
698            expected
699        );
700    }
701
702    #[test]
703    fn walk_directory_size_is_sum_of_members_golden() {
704        // Cross-check dir-size summation: each `D` line's SIZE is the sum of its
705        // members (recursively), independent of the directory's own stat size.
706        let scratch = Scratch::new("dir-size");
707        let r = scratch.root();
708        write_file(&r.join("f1"), b"hello"); // 5
709        write_file(&r.join("sub/f2"), b"world!!"); // 7
710        write_file(&r.join("sub/f3"), b"x"); // 1
711        chmod_dirs(r, 0o700);
712
713        let expected = "\
714D 700 5681c72cfd0ddea4f54683365bc4082b92147bf33976875653133cc4aed0f96a 13 ./
715F 600 ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f 5 ./f1
716D 700 2ac73ec4f4ec2ef21ebfba467be499a58aef80a34d7001d68bdeb14cb58a954d 8 ./sub/
717F 600 8bafa24d36bc2aa6edc0d041e763cb59ebadb71b6e63ab4ac9314de95e9a0de7 7 ./sub/f2
718F 600 3ae7d805f6789a6402acb70ad4096a85a56bf6804eaf25c0493ac697548d30b5 1 ./sub/f3";
719        let manifest = walk(r, &WalkOptions::default(), &Blake3Hasher::new()).expect("walk");
720        assert_eq!(manifest.to_string(), expected);
721
722        // Structural cross-check of the summation rule independent of the bytes.
723        let root_dir = manifest.entries().iter().find(|e| e.path == "./").unwrap();
724        let sub_dir = manifest
725            .entries()
726            .iter()
727            .find(|e| e.path == "./sub/")
728            .unwrap();
729        assert_eq!(sub_dir.size, 8, "sub = f2(7) + f3(1)");
730        assert_eq!(root_dir.size, 13, "root = f1(5) + sub(8)");
731    }
732
733    /// Builds the symlink fixture: a real `a/` subtree plus a dir-symlink
734    /// `a_link -> a` and a file-symlink `r1f_link -> r1f`. Real dirs chmod'd to
735    /// `0o700`; files `0o600`. The symlinks' own perms are left platform-default
736    /// (NOT chmod'd) — hence the structural (not byte-golden) assertions below.
737    fn build_symlinks(root: &Path) {
738        write_file(&root.join("a/aa/f1"), b"hello");
739        write_file(&root.join("a/f2"), b"world!!");
740        write_file(&root.join("r1f"), b"r");
741        std::os::unix::fs::symlink("a", root.join("a_link")).expect("symlink dir");
742        std::os::unix::fs::symlink("r1f", root.join("r1f_link")).expect("symlink file");
743        chmod_dirs(root, 0o700);
744    }
745
746    #[test]
747    fn walk_symlink_followed_by_default() {
748        let scratch = Scratch::new("symlink-follow");
749        let r = scratch.root();
750        build_symlinks(r);
751
752        let manifest = manifest_text(r, &opts(FollowMode::Follow, PathMode::Relative, None));
753
754        // The dir symlink is followed: it materializes as its own `D ./a_link/`
755        // row whose CHECKSUM equals the real `./a/` directory's merkle, plus the
756        // full target subtree mirrored under ./a_link/.
757        let a_dir_b3 = "0c862ed8e62262f84e7fc0fe4a6c566adec4a85ef22f8a46b7ad4c9344146701";
758        assert!(
759            manifest
760                .lines()
761                .any(|l| l.starts_with("D ") && l.contains(a_dir_b3) && l.ends_with(" ./a/")),
762            "real ./a/ dir present with its merkle: {manifest}"
763        );
764        assert!(
765            manifest
766                .lines()
767                .any(|l| l.starts_with("D ") && l.contains(a_dir_b3) && l.ends_with(" ./a_link/")),
768            "followed symlink dir ./a_link/ mirrors ./a/'s merkle: {manifest}"
769        );
770        // Mirrored target subtree entries (content checksums are deterministic).
771        assert!(manifest.lines().any(|l| l.ends_with(" ./a_link/aa/")));
772        assert!(manifest.lines().any(|l| {
773            l.starts_with("F ")
774                && l.contains("ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f")
775                && l.ends_with(" ./a_link/aa/f1")
776        }));
777        // The file symlink is followed: it appears as an `F` row pointing at the
778        // target's content (blake3("r")), ending in ./r1f_link.
779        let r1f_b3 = "b2dea48d667b2821a9bcf69eded39a2458a1d8165ca7fcac64c3557b69a7ea08";
780        assert!(
781            manifest
782                .lines()
783                .any(|l| l.starts_with("F ") && l.contains(r1f_b3) && l.ends_with(" ./r1f_link")),
784            "followed symlink file ./r1f_link present: {manifest}"
785        );
786        assert!(
787            manifest
788                .lines()
789                .any(|l| l.starts_with("F ") && l.contains(r1f_b3) && l.ends_with(" ./r1f")),
790            "real ./r1f present: {manifest}"
791        );
792    }
793
794    #[test]
795    fn walk_no_follow_drops_symlinks() {
796        let scratch = Scratch::new("symlink-nofollow");
797        let r = scratch.root();
798        build_symlinks(r);
799
800        // With --no-follow the symlinks are dropped entirely; the manifest is a
801        // byte-exact golden over only the real entries (no `_link` rows). Note
802        // the root `D` SIZE is 13 (= sum of real members), not the 28 of the
803        // followed case (which double-counts via a_link/).
804        let expected = "\
805D 700 61a8f1898844a17eeed84d34c2e3b5fd9c7fef136dba5f7036ae70294595a085 13 ./
806D 700 0c862ed8e62262f84e7fc0fe4a6c566adec4a85ef22f8a46b7ad4c9344146701 12 ./a/
807D 700 6cd17c61c7e42c50586ee5f3f54dbc4f809f71073fc176ed2ae865103dd33625 5 ./a/aa/
808F 600 ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f 5 ./a/aa/f1
809F 600 8bafa24d36bc2aa6edc0d041e763cb59ebadb71b6e63ab4ac9314de95e9a0de7 7 ./a/f2
810F 600 b2dea48d667b2821a9bcf69eded39a2458a1d8165ca7fcac64c3557b69a7ea08 1 ./r1f";
811        let manifest = manifest_text(r, &opts(FollowMode::NoFollow, PathMode::Relative, None));
812        assert_eq!(manifest, expected);
813        assert!(!manifest.contains("_link"), "no-follow drops all symlinks");
814    }
815
816    #[test]
817    fn walk_exclude_regex_golden() {
818        let scratch = Scratch::new("exclude-regex");
819        let r = scratch.root();
820        write_file(&r.join("keep/k"), b"x");
821        write_file(&r.join("drop/d"), b"y");
822        write_file(&r.join("top.txt"), b"top");
823        chmod_dirs(r, 0o700);
824
825        // The matcher runs against the ABSOLUTE find path, so the exclude is
826        // anchored at the absolute root + "/drop". `drop/` is dropped entirely;
827        // `keep/` and `top.txt` remain (byte-exact golden over the survivors).
828        let abs = r.to_str().unwrap();
829        let pattern = format!("{abs}/drop");
830        let manifest = manifest_text(
831            r,
832            &opts(FollowMode::Follow, PathMode::Relative, Some(&pattern)),
833        );
834        let expected = "\
835D 700 b6f1055a5f14fdd55fa831ff6d2e2f433c7ca7fa2cc43e63a8cd0a4542d3010a 4 ./
836D 700 b9030f201b43e2a72e62951476c0bcfafe3b020ece221d2254d8610ea9e88fb5 1 ./keep/
837F 600 3ae7d805f6789a6402acb70ad4096a85a56bf6804eaf25c0493ac697548d30b5 1 ./keep/k
838F 600 ef854702aa94ba4f60c67d731671c9e0e49a031be6ce475489e91f7a33cb5243 3 ./top.txt";
839        assert_eq!(manifest, expected);
840        assert!(!manifest.contains("drop"), "drop/ excluded");
841    }
842
843    #[test]
844    fn walk_exclude_common_golden() {
845        let scratch = Scratch::new("exclude-common");
846        let r = scratch.root();
847        write_file(&r.join("src/main.rs"), b"fn main() {}\n");
848        write_file(&r.join(".git/objects/secret"), b"secret");
849        write_file(&r.join("node_modules/pkg/index.js"), b"//js\n");
850        chmod_dirs(r, 0o700);
851
852        // %common% expands to the regex that drops .git, node_modules, etc.
853        // (the CLI lane uses the same expansion; core never reads the env).
854        let expanded = crate::excludes::expand_excludes(
855            "%common%",
856            "/nonexistent/.cache/",
857            "/nonexistent/cache",
858        );
859        let pattern = expanded.pattern.expect("non-empty");
860        let manifest = manifest_text(
861            r,
862            &opts(FollowMode::Follow, PathMode::Relative, Some(&pattern)),
863        );
864        // Only ./src survives — byte-exact golden over the survivors.
865        let expected = "\
866D 700 ad5409ad5f97a26c908382b379b23971ee143e6bcd29a7d663175936d2cd4e94 13 ./
867D 700 069cd5e102d7dd39faa7093b5b2d784c32e19b01f829a902c14aa10b7182debc 13 ./src/
868F 600 2d1ebfa706ba230165250f744796a92accba5e1b6fa357983b65319da33f8e93 13 ./src/main.rs";
869        assert_eq!(manifest, expected);
870        assert!(!manifest.contains(".git"), "%common% excludes .git");
871        assert!(
872            !manifest.contains("node_modules"),
873            "%common% excludes node_modules"
874        );
875    }
876
877    #[test]
878    fn progress_meter_walk_records_files_and_bytes() {
879        // A small tree with known file sizes; the meter records the total bytes
880        // hashed and one finished object per file.
881        let scratch = Scratch::new("meter-records");
882        let r = scratch.root();
883        write_file(&r.join("f1"), b"hello"); // 5
884        write_file(&r.join("sub/f2"), b"world!!"); // 7
885        write_file(&r.join("sub/f3"), b"x"); // 1
886        chmod_dirs(r, 0o700);
887
888        let meter = Meter::new();
889        let _ = walk_with_meter(
890            r,
891            &WalkOptions::default(),
892            &Blake3Hasher::new(),
893            Some(&meter),
894        )
895        .expect("walk");
896
897        let snap = meter.snapshot();
898        assert_eq!(snap.bytes_in, 5 + 7 + 1, "sum of file byte lengths");
899        assert_eq!(snap.objects_done, 3, "one finished object per file");
900        assert_eq!(snap.in_flight, 0, "no object left in flight");
901        assert_eq!(snap.phase, Phase::Hashing, "walk sets the Hashing phase");
902    }
903
904    #[test]
905    fn progress_meter_walk_output_unchanged() {
906        // Recording into a meter must not change the manifest: walk(None) and
907        // walk_with_meter(Some) over the same tree are byte-identical.
908        let scratch = Scratch::new("meter-unchanged");
909        let r = scratch.root();
910        build_nested(r);
911        let opts = opts(FollowMode::Follow, PathMode::Relative, None);
912
913        let without = walk(r, &opts, &Blake3Hasher::new()).expect("walk");
914        let meter = Meter::new();
915        let with = walk_with_meter(r, &opts, &Blake3Hasher::new(), Some(&meter)).expect("walk");
916
917        assert_eq!(
918            without.to_string(),
919            with.to_string(),
920            "meter recording must not change the manifest"
921        );
922        // And it really did record (sanity: nine files in build_nested).
923        assert_eq!(meter.snapshot().objects_done, 8);
924    }
925
926    #[test]
927    fn walk_snapshot_id_is_blake3_of_manifest_text() {
928        // The snapshot id is BLAKE3 of the manifest text + a trailing newline
929        // (comment lines stripped). Cross-check the public derivation against an
930        // explicit recomputation over the walk's own output.
931        let scratch = Scratch::new("snapshot-id");
932        let r = scratch.root();
933        write_file(&r.join("a/f1"), b"hello\n");
934        write_file(&r.join("b/f2"), b"world\n");
935        chmod_dirs(r, 0o700);
936        let hasher = Blake3Hasher::new();
937        let manifest = walk(r, &WalkOptions::default(), &hasher).expect("walk");
938        let id = crate::merkle::snapshot_id(&manifest, &hasher);
939
940        let mut bytes = manifest.to_string().into_bytes();
941        bytes.push(b'\n');
942        let expected = hasher.hash_hex(&bytes);
943        assert_eq!(
944            id, expected,
945            "snapshot id == blake3(manifest_text + \"\\n\")"
946        );
947        assert_eq!(id.len(), 64, "id is 64 lowercase hex chars");
948        assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
949    }
950}