snapdir_core/walk.rs
1//! In-process filesystem walk producing a frozen-format [`Manifest`].
2//!
3//! This module reproduces the original `snapdir-manifest generate` behavior in
4//! pure Rust, consuming the frozen [`manifest`](crate::manifest),
5//! [`merkle`](crate::merkle) and [`excludes`](crate::excludes) APIs without
6//! changing any of them. It walks a directory tree and emits one
7//! [`ManifestEntry`] per file (`F`) and directory (`D`), computing per-file
8//! content checksums with a [`Hasher`] and per-directory checksums/sizes with
9//! [`directory_checksum`].
10//!
11//! ## Behaviors matched against the oracle
12//!
13//! - **Traversal** mirrors `find`/`find -L`: every directory becomes a `D`
14//! entry (path ending `/`) and every regular file directly inside it becomes
15//! an `F` entry. Directories are recorded even when empty.
16//! - **Symlinks** are *followed by default* ([`FollowMode::Follow`], the
17//! oracle's `find -L`): a symlink to a directory is reported as a directory
18//! and descended into, a symlink to a file as a file, inheriting the
19//! target's type/permissions/size/checksum. [`FollowMode::NoFollow`] (plain
20//! `find`) drops symlinks entirely — they appear as neither `D` nor `F`.
21//! - **Permissions** are the octal mode bits, matching `stat -f '%A'` (macOS)
22//! / `stat -c '%a'` (Linux): the low 12 bits of `st_mode` rendered in octal
23//! with no leading zero (e.g. `755`, `644`, `700`).
24//! - **File size** is the content byte length (`%z` / `%s`). **Directory size**
25//! is the *sum of its direct members' sizes* (files and subdirectories),
26//! excluding the directory's own `stat` size — matching the oracle's
27//! `_snapdir_manifest_sum_lines` over the direct children.
28//! - **Excludes** are applied via [`ExcludeMatcher`] against the *absolute*
29//! path of each candidate directory and file, mirroring the oracle's
30//! `find … | grep -E -v "$EXCLUDE"` (the filter runs before the relative
31//! `./` rewrite). A `%system%` expansion forces [`FollowMode::NoFollow`];
32//! the caller resolves that via [`expand_excludes`](crate::excludes::expand_excludes).
33//! - **Paths** are absolute under [`PathMode::Absolute`], or rewritten to a
34//! leading `./` under [`PathMode::Relative`] (the oracle's
35//! `sed -E "s| \.?${root_dir}| .|"`). Directory paths always end with `/`.
36//! - **Ordering** is `sort -k5` (byte-wise on the path), delegated to
37//! [`Manifest`]'s own sort.
38//!
39//! Per the library-purity principle this module reads the filesystem at the
40//! *given* root path (that is its job) but reads no `$HOME`/config/environment
41//! for behavior: the root, options, excludes and hasher all arrive as
42//! parameters, and errors surface as the typed [`WalkError`].
43
44use std::collections::BTreeMap;
45use std::io;
46use std::os::unix::fs::PermissionsExt;
47use std::path::{Path, PathBuf};
48
49use thiserror::Error;
50
51use crate::excludes::{ExcludeMatcher, FollowMode};
52use crate::manifest::{Manifest, ManifestEntry, PathType};
53use crate::merkle::Hasher;
54
55/// Whether emitted paths are absolute or rewritten relative to the root.
56///
57/// Mirrors the oracle's `--absolute` flag: the default is
58/// [`Relative`](PathMode::Relative) (paths prefixed with `./`), and
59/// `--absolute` keeps the full absolute path.
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
61pub enum PathMode {
62 /// Rewrite paths to a leading `./` relative to the root (the default).
63 #[default]
64 Relative,
65 /// Keep absolute paths (`--absolute`).
66 Absolute,
67}
68
69/// Options controlling a [`walk`].
70///
71/// All inputs are parameters: this struct carries the symlink-follow setting,
72/// the relative/absolute path mode, and the optional compiled exclude matcher.
73/// The root path and [`Hasher`] are passed to [`walk`] directly.
74#[derive(Debug, Clone, Default)]
75pub struct WalkOptions {
76 /// Whether to follow symlinks ([`FollowMode::Follow`] by default).
77 pub follow: FollowMode,
78 /// Whether to emit absolute or relative (`./`) paths.
79 pub path_mode: PathMode,
80 /// An optional compiled exclude matcher. When `Some`, any directory or
81 /// file whose absolute path matches is dropped (`grep -E -v`).
82 pub exclude: Option<ExcludeMatcher>,
83}
84
85/// Errors raised while walking the filesystem.
86#[derive(Debug, Error)]
87pub enum WalkError {
88 /// The root path is not absolute. The walk needs an absolute root so it can
89 /// rewrite relative paths exactly as the oracle does (it `readlink`s the
90 /// argument to an absolute path first); the CLI lane resolves the user's
91 /// argument before calling [`walk`].
92 #[error("walk root must be an absolute path, got {0:?}")]
93 RootNotAbsolute(PathBuf),
94
95 /// The root path does not resolve to a directory.
96 #[error("walk root is not a directory: {0:?}")]
97 RootNotDirectory(PathBuf),
98
99 /// An I/O error occurred while reading the tree at `path`.
100 #[error("i/o error while walking {path:?}: {source}")]
101 Io {
102 /// The path being read when the error occurred.
103 path: PathBuf,
104 /// The underlying I/O error.
105 #[source]
106 source: io::Error,
107 },
108
109 /// A path could not be rendered as UTF-8. The frozen manifest format is
110 /// UTF-8 text; non-UTF-8 paths cannot be represented.
111 #[error("path is not valid UTF-8: {0:?}")]
112 NonUtf8Path(PathBuf),
113}
114
115impl WalkError {
116 fn io(path: impl Into<PathBuf>, source: io::Error) -> Self {
117 WalkError::Io {
118 path: path.into(),
119 source,
120 }
121 }
122}
123
124/// Renders the octal permission string for a file mode, matching
125/// `stat -f '%A'` (macOS) / `stat -c '%a'` (Linux): the low 12 mode bits in
126/// octal with no leading zero (e.g. `755`, `644`, `4755`).
127fn octal_permissions(mode: u32) -> String {
128 format!("{:o}", mode & 0o7777)
129}
130
131/// Returns a path as `&str`, or a [`WalkError::NonUtf8Path`].
132fn path_str(path: &Path) -> Result<&str, WalkError> {
133 path.to_str()
134 .ok_or_else(|| WalkError::NonUtf8Path(path.to_path_buf()))
135}
136
137/// A discovered file entry, before path rewriting.
138struct FileRecord {
139 /// Absolute path of the file.
140 abs_path: String,
141 permissions: String,
142 checksum: String,
143 size: u64,
144}
145
146/// A discovered directory, holding its absolute path and (filled during the
147/// post-order pass) its computed checksum and member-size total.
148struct DirRecord {
149 /// Absolute path of the directory (no trailing slash, except root `/`).
150 abs_path: String,
151 permissions: String,
152 /// Absolute paths of direct child directories, in discovery order.
153 child_dirs: Vec<String>,
154 /// Direct child files.
155 files: Vec<FileRecord>,
156}
157
158/// Walks the directory tree rooted at `root`, producing a [`Manifest`] that
159/// matches the original `snapdir-manifest` output byte-for-byte for the same
160/// tree and checksum function.
161///
162/// `root` must be an **absolute** path to a directory (the CLI lane resolves
163/// the user's argument first, mirroring the oracle's `readlink`). `hasher`
164/// supplies the content/merkle checksum function (BLAKE3 by default; the
165/// `--checksum-bin` matrix swaps in [`Md5Hasher`](crate::merkle::Md5Hasher) /
166/// [`Sha256Hasher`](crate::merkle::Sha256Hasher) / keyed BLAKE3). `options`
167/// carries the follow mode, path mode and optional exclude matcher.
168///
169/// # Errors
170///
171/// Returns [`WalkError`] if `root` is not absolute, is not a directory, holds a
172/// non-UTF-8 path, or if an I/O error occurs while reading the tree.
173pub fn walk<H: Hasher>(
174 root: &Path,
175 options: &WalkOptions,
176 hasher: &H,
177) -> Result<Manifest, WalkError> {
178 if !root.is_absolute() {
179 return Err(WalkError::RootNotAbsolute(root.to_path_buf()));
180 }
181
182 // Resolve the root's metadata following symlinks (the oracle always works
183 // on the resolved root directory).
184 let root_meta = std::fs::metadata(root).map_err(|e| WalkError::io(root, e))?;
185 if !root_meta.is_dir() {
186 return Err(WalkError::RootNotDirectory(root.to_path_buf()));
187 }
188 // The oracle's `stat -f '%A'` / `stat -c '%a'` does NOT follow symlinks, so
189 // a directory's PERMISSIONS column always comes from its own `lstat`. For
190 // the root we `lstat` it directly (it is normally a real directory; if it
191 // is itself a symlink the user passed, its own perms still apply).
192 let root_lstat = std::fs::symlink_metadata(root).map_err(|e| WalkError::io(root, e))?;
193 let root_permissions = octal_permissions(root_lstat.permissions().mode());
194
195 let root_str = path_str(root)?.to_owned();
196
197 // Discover every directory (depth-first, following symlinks per `follow`),
198 // recording its direct files and direct child directories. We collect into
199 // an ordered map keyed by absolute path so the post-order pass can compute
200 // directory checksums bottom-up.
201 let mut dirs: BTreeMap<String, DirRecord> = BTreeMap::new();
202 discover_dir(
203 root,
204 &root_str,
205 root_permissions,
206 options,
207 hasher,
208 &mut dirs,
209 )?;
210
211 // Compute each directory's checksum + member-size bottom-up. `dirs` is keyed
212 // by path in a BTreeMap (lexicographic), so a child path always sorts after
213 // its parent prefix; processing in reverse key order guarantees children are
214 // finalized before their parents. We memoize finalized (checksum, size).
215 let keys: Vec<String> = dirs.keys().cloned().collect();
216 let mut finalized: BTreeMap<String, (String, u64)> = BTreeMap::new();
217 for key in keys.iter().rev() {
218 let record = &dirs[key];
219
220 // Direct children's checksums (files + subdirs) for the merkle rule,
221 // and their sizes for the member-size sum.
222 let mut child_checksums: Vec<String> = Vec::new();
223 let mut member_size: u64 = 0;
224 for file in &record.files {
225 child_checksums.push(file.checksum.clone());
226 member_size += file.size;
227 }
228 for child in &record.child_dirs {
229 let (csum, size) = finalized
230 .get(child)
231 .expect("child dir finalized before parent (reverse key order)");
232 child_checksums.push(csum.clone());
233 member_size += size;
234 }
235
236 let checksum =
237 crate::merkle::directory_checksum(child_checksums.iter().map(String::as_str), hasher);
238 finalized.insert(key.clone(), (checksum, member_size));
239 }
240
241 // Emit manifest entries. Files first, then their directory, in any order —
242 // the Manifest sorts by path (`sort -k5`) on Display.
243 let mut manifest = Manifest::new();
244 for (key, record) in &dirs {
245 let (checksum, size) = &finalized[key];
246 let dir_path = render_dir_path(key, &root_str, options.path_mode);
247 manifest.push(ManifestEntry::new(
248 PathType::Directory,
249 record.permissions.clone(),
250 checksum.clone(),
251 *size,
252 dir_path,
253 ));
254 for file in &record.files {
255 let file_path = rewrite_path(&file.abs_path, &root_str, options.path_mode);
256 manifest.push(ManifestEntry::new(
257 PathType::File,
258 file.permissions.clone(),
259 file.checksum.clone(),
260 file.size,
261 file_path,
262 ));
263 }
264 }
265 manifest.sort();
266 Ok(manifest)
267}
268
269/// Recursively discovers the directory at `abs_path` (already known to be a
270/// directory), recording its direct files and child directories, then recurses
271/// into each child directory.
272fn discover_dir<H: Hasher>(
273 dir: &Path,
274 abs_path: &str,
275 permissions: String,
276 options: &WalkOptions,
277 hasher: &H,
278 dirs: &mut BTreeMap<String, DirRecord>,
279) -> Result<(), WalkError> {
280 // `permissions` is the directory's own `lstat` octal mode (a symlinked
281 // directory keeps the symlink's perms, matching the oracle's non-following
282 // `stat -f '%A'` / `stat -c '%a'`).
283 let mut record = DirRecord {
284 abs_path: abs_path.to_owned(),
285 permissions,
286 child_dirs: Vec::new(),
287 files: Vec::new(),
288 };
289
290 let read_dir = std::fs::read_dir(dir).map_err(|e| WalkError::io(dir, e))?;
291 for entry in read_dir {
292 let entry = entry.map_err(|e| WalkError::io(dir, e))?;
293 let entry_path = entry.path();
294 let entry_abs = path_str(&entry_path)?.to_owned();
295
296 // Excludes run on the absolute path (`grep -E -v` over `find` output),
297 // before any relative rewrite. A matching path is dropped for both the
298 // directory listing and the file listing.
299 if let Some(matcher) = &options.exclude {
300 if matcher.is_excluded(&entry_abs) {
301 continue;
302 }
303 }
304
305 // `symlink_metadata` does not traverse the final symlink, so we can
306 // detect symlinks and honor the follow mode like plain `find` vs
307 // `find -L`.
308 let link_meta = entry
309 .metadata()
310 .or_else(|_| std::fs::symlink_metadata(&entry_path))
311 .map_err(|e| WalkError::io(&entry_path, e))?;
312 let is_symlink = link_meta.file_type().is_symlink();
313
314 if is_symlink && !options.follow.follows_symlinks() {
315 // Plain `find` lists a symlink as type `l`; it is neither a `-type d`
316 // nor a `-type f`, so it never enters the manifest under no-follow.
317 continue;
318 }
319
320 // Resolve the (possibly symlinked) target's metadata. Following symlinks
321 // (`find -L`) makes a symlink-to-dir a directory and a symlink-to-file a
322 // file, inheriting the target's type/perms/size/checksum.
323 let target_meta = match std::fs::metadata(&entry_path) {
324 Ok(m) => m,
325 Err(e) => {
326 // A broken symlink (or a symlink loop on some platforms) cannot
327 // be stat'd through. `find -L` likewise cannot classify it as a
328 // file or directory, so it is omitted. Surface real I/O errors
329 // on non-symlink entries.
330 if is_symlink && (e.kind() == io::ErrorKind::NotFound || is_loop_error(&e)) {
331 continue;
332 }
333 return Err(WalkError::io(&entry_path, e));
334 }
335 };
336 let file_type = target_meta.file_type();
337
338 // PERMISSIONS (and, for files, SIZE) come from the entry's own `lstat`,
339 // because the oracle's `stat` is non-following: a symlinked entry keeps
340 // the symlink's perms/size while its CHECKSUM is read through the link
341 // (b3sum/md5sum/sha256sum all follow symlinks). For a real (non-symlink)
342 // entry `lstat` == `stat`, so this is identical there.
343 let own_permissions = octal_permissions(link_meta.permissions().mode());
344
345 if file_type.is_dir() {
346 record.child_dirs.push(entry_abs.clone());
347 discover_dir(
348 &entry_path,
349 &entry_abs,
350 own_permissions,
351 options,
352 hasher,
353 dirs,
354 )?;
355 } else if file_type.is_file() {
356 // Read content through the link for the checksum; take SIZE from the
357 // entry's own `lstat` (for a symlink that is the target-path length,
358 // matching the oracle's `%z` / `%s` on the un-dereferenced symlink).
359 let bytes = std::fs::read(&entry_path).map_err(|e| WalkError::io(&entry_path, e))?;
360 let checksum = hasher.hash_hex(&bytes);
361 record.files.push(FileRecord {
362 abs_path: entry_abs,
363 permissions: own_permissions,
364 checksum,
365 size: link_meta.len(),
366 });
367 }
368 // Anything else (sockets, fifos, devices) is neither `-type d` nor
369 // `-type f`, so it is skipped — matching `find`.
370 }
371
372 dirs.insert(record.abs_path.clone(), record);
373 Ok(())
374}
375
376/// Detects a symlink-loop I/O error (`ELOOP`) so the walk can skip it the way
377/// `find -L` halts on / omits a self-referential symlink.
378fn is_loop_error(error: &io::Error) -> bool {
379 error.raw_os_error() == Some(libc_eloop())
380}
381
382/// `ELOOP` is 40 on Linux and 62 on macOS/BSD. We avoid a `libc` dependency by
383/// matching on the message kind via the raw errno of both platforms.
384const fn libc_eloop() -> i32 {
385 #[cfg(target_os = "linux")]
386 {
387 40
388 }
389 #[cfg(not(target_os = "linux"))]
390 {
391 62
392 }
393}
394
395/// Renders a directory's path for the manifest: always trailing-`/`, and either
396/// absolute or rewritten to a leading `./` relative to `root`.
397fn render_dir_path(abs_path: &str, root: &str, mode: PathMode) -> String {
398 let rewritten = rewrite_path(abs_path, root, mode);
399 // Directory paths always end with `/`. The root rewrites to "." -> "./";
400 // a nested dir "./a" -> "./a/". Absolute "/abs/a" -> "/abs/a/".
401 if rewritten.ends_with('/') {
402 rewritten
403 } else {
404 format!("{rewritten}/")
405 }
406}
407
408/// Applies the oracle's relative rewrite `sed -E "s| \.?${root_dir}| .|"`:
409/// the leading `root` prefix of an absolute path becomes `.`. In absolute mode
410/// the path is returned unchanged.
411fn rewrite_path(abs_path: &str, root: &str, mode: PathMode) -> String {
412 match mode {
413 PathMode::Absolute => abs_path.to_owned(),
414 PathMode::Relative => {
415 if abs_path == root {
416 // The root directory itself becomes ".".
417 ".".to_owned()
418 } else if let Some(rest) = abs_path.strip_prefix(root) {
419 // rest starts with '/': "/a/aa/f1" -> "./a/aa/f1".
420 format!(".{rest}")
421 } else {
422 // Defensive: not under root (should not happen). Leave as-is.
423 abs_path.to_owned()
424 }
425 }
426 }
427}
428
429#[cfg(test)]
430mod tests {
431 //! Pure-Rust walk tests.
432 //!
433 //! Originally these shelled out to the legacy Bash oracle
434 //! (the `snapdir-manifest` script) and asserted byte-identity. The oracle
435 //! has since been deleted from the branch, so each case is now pinned
436 //! against an
437 //! **embedded golden manifest constant** (or, where a column is
438 //! platform-dependent, a structural assertion). The golden bytes were
439 //! captured once from this very `walk` implementation over fixtures with
440 //! **explicit, fixed permissions** (dirs `0o700`/`0o755`, files `0o600`),
441 //! which makes the `TYPE PERMS CHECKSUM SIZE PATH` output fully
442 //! deterministic. The content/size/checksum/merkle columns were
443 //! cross-checked against the recorded oracle vectors in
444 //! `crates/snapdir-core/tests/compat_golden.rs` (e.g. the empty-file
445 //! `af1349b9…` checksum and the `./a/aa/aaa/` merkle `8aed4caf…`).
446 //!
447 //! Symlink rows (`./a_link/`, `./r1f_link`) carry the symlink's *own* lstat
448 //! permissions, which differ across platforms (macOS reports `755`, Linux
449 //! `777`), so those tests assert structure (presence/absence + materialized
450 //! subtree) rather than a byte-exact perm column.
451 use super::*;
452 use crate::merkle::Blake3Hasher;
453 use std::fs;
454 use std::os::unix::fs::PermissionsExt;
455 use std::path::PathBuf;
456 use std::sync::atomic::{AtomicU64, Ordering};
457
458 /// A self-cleaning scratch directory under the system temp dir. Avoids a
459 /// `tempfile` dev-dependency; the walk is library-pure and never reads the
460 /// environment itself — only this test harness builds fixtures on disk. The
461 /// root is chmod'd to a fixed `0o755` so the root `D` line's perm column is
462 /// deterministic across umasks.
463 struct Scratch {
464 path: PathBuf,
465 }
466
467 impl Scratch {
468 fn new(tag: &str) -> Self {
469 static COUNTER: AtomicU64 = AtomicU64::new(0);
470 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
471 let pid = std::process::id();
472 // Resolve through canonicalize so macOS's /var -> /private/var (and
473 // any other symlinked temp prefix) is already normalized.
474 let base = std::env::temp_dir()
475 .canonicalize()
476 .expect("temp dir canonicalizes");
477 let path = base.join(format!("snapdir-walk-test-{tag}-{pid}-{n}"));
478 let _ = fs::remove_dir_all(&path);
479 fs::create_dir_all(&path).expect("create scratch dir");
480 fs::set_permissions(&path, fs::Permissions::from_mode(0o755))
481 .expect("chmod scratch root");
482 Scratch { path }
483 }
484
485 fn root(&self) -> &Path {
486 &self.path
487 }
488 }
489
490 impl Drop for Scratch {
491 fn drop(&mut self) {
492 let _ = fs::remove_dir_all(&self.path);
493 }
494 }
495
496 /// Writes a file (creating parents) with a fixed `0o600` mode so the `F`
497 /// line's perm column is deterministic.
498 fn write_file(path: &Path, contents: &[u8]) {
499 if let Some(parent) = path.parent() {
500 fs::create_dir_all(parent).expect("create parent dir");
501 }
502 fs::write(path, contents).expect("write file");
503 fs::set_permissions(path, fs::Permissions::from_mode(0o600)).expect("chmod file");
504 }
505
506 /// Recursively chmods `root` and every descendant directory to `mode`, so
507 /// every `D` line's perm column is pinned (independent of the process umask).
508 fn chmod_dirs(root: &Path, mode: u32) {
509 fs::set_permissions(root, fs::Permissions::from_mode(mode)).expect("chmod dir");
510 for entry in fs::read_dir(root).expect("read_dir").flatten() {
511 let ft = entry.file_type().expect("file_type");
512 // `is_dir()` here is lstat-based via DirEntry::file_type, so a
513 // symlink-to-dir is NOT recursed into (its own perms stay as-is).
514 if ft.is_dir() {
515 chmod_dirs(&entry.path(), mode);
516 }
517 }
518 }
519
520 /// Builds a [`WalkOptions`] for the given follow/path/exclude combination.
521 fn opts(follow: FollowMode, path_mode: PathMode, exclude: Option<&str>) -> WalkOptions {
522 WalkOptions {
523 follow,
524 path_mode,
525 exclude: exclude.map(|p| ExcludeMatcher::new(p).expect("valid exclude regex")),
526 }
527 }
528
529 /// Runs the walk and returns its `Display` manifest text (no trailing
530 /// newline — `Manifest`'s `Display` does not emit one).
531 fn manifest_text(root: &Path, options: &WalkOptions) -> String {
532 walk(root, options, &Blake3Hasher::new())
533 .expect("walk")
534 .to_string()
535 }
536
537 // -- Empty-string / empty-file checksum reused from the oracle vectors -----
538 // (matches compat_golden.rs::EMPTY_FILE_B3).
539 const EMPTY_B3: &str = "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262";
540
541 #[test]
542 fn walk_root_must_be_absolute() {
543 let err = walk(
544 Path::new("relative/path"),
545 &WalkOptions::default(),
546 &Blake3Hasher::new(),
547 )
548 .unwrap_err();
549 assert!(matches!(err, WalkError::RootNotAbsolute(_)));
550 }
551
552 #[test]
553 fn walk_empty_directory_golden() {
554 // An empty directory: a single `D` line whose checksum is the merkle of
555 // zero children == blake3("") and whose size is 0. Root chmod'd to 755.
556 let scratch = Scratch::new("empty-dir");
557 let expected = format!("D 755 {EMPTY_B3} 0 ./");
558 assert_eq!(
559 manifest_text(scratch.root(), &WalkOptions::default()),
560 expected
561 );
562 }
563
564 #[test]
565 fn walk_single_empty_file_golden() {
566 // Root `D` line (its merkle == single empty-file child) plus the `F`
567 // line for the empty file. Both content checksums are blake3("").
568 let scratch = Scratch::new("empty-file");
569 write_file(&scratch.root().join("empty.txt"), b"");
570 let expected = format!(
571 "D 755 dba5865c0d91b17958e4d2cac98c338f85cbbda07b71a020ab16c391b5e7af4b 0 ./\n\
572 F 600 {EMPTY_B3} 0 ./empty.txt"
573 );
574 assert_eq!(
575 manifest_text(scratch.root(), &WalkOptions::default()),
576 expected
577 );
578 }
579
580 /// The deep guide tree under [`PathMode::Relative`]. Dirs are `0o700`, files
581 /// `0o600`; every checksum/merkle value matches the recorded oracle vectors
582 /// (cf. `compat_golden.rs::MULTILEVEL_MANIFEST` — same `./a/…`/`./b/…`/`./c/…`
583 /// subtree). The extra empty `./d/` dir carries the blake3("") merkle.
584 const NESTED_RELATIVE_GOLDEN: &str = "\
585D 700 3f938f681dcbd616d00d42f704d525c05e7ed2746888c35c8214127c632587c3 43 ./
586D 700 ed23cfd2037d23cf8c6b67497425e7a06d5e40ea2bd8e43fc434006022dafe86 21 ./a/
587F 600 3c9cb8b8c8f3588f8e59e18d284330b0a951be644fbef2b9784b56e15d1c6096 4 ./a/a1f
588D 700 ee795476bff6c1816b4c7558a74ee0b44ec600c3cde6b02564508f67d536a656 17 ./a/aa/
589F 600 a2951028421deef48d1ba185f4c497c2d986f1dd76079baf2f5eb8479f132b5a 5 ./a/aa/aa1f
590D 700 8aed4caf45b22aa4c8a195945136e3a01f77864e91fabe2d9272feeee87ae334 12 ./a/aa/aaa/
591F 600 5cfee4fb4074748633b4ccbddb6b184a9b5e2f5ce74df6d2803f5fea0392a197 6 ./a/aa/aaa/aaa1f
592F 600 3791f11a017feedffd24c2656e18d5c4ca9d6c404c8f40ccc511b6351c8575a6 6 ./a/aa/aaa/aaa2f
593D 700 9a8b0e35c000df69893648b91d15cc30ab88ae5a40af48228caf5fa443dafc9b 12 ./b/
594D 700 d41c2090167e6f546a510f0da98d8a8355d6bd2b61666644604c73b3a8f5b5d9 12 ./b/bb/
595D 700 3b9023fa454aa22466feeb8cbf55a2c764dd79de0e93c9a793e8b54caec227da 12 ./b/bb/bbb/
596F 600 8d18b7f3aabbef192a524fa2549d1d36b48c9030d234c9bdf87caa267fb09933 6 ./b/bb/bbb/bbb1f
597F 600 2e16e172b6e337325f271d4eae00bc1ea20e41609ef78665710cada1477005cc 6 ./b/bb/bbb/bbb2f
598D 700 15eb2657c1e6f5a24023c10429bb6f1b7d81b2cc2057eedee2192fbf3e7b892c 6 ./c/
599D 700 e711f4e76ae9b3e25ad9a32b5f115cc9a81e55a428c552aa0bcab8543967f51a 6 ./c/cc/
600D 700 31a1955d5a65328f31014650cf79b5c0c3d9b82de19352ade8d299cc22f6ec40 6 ./c/cc/ccc/
601F 600 24f0cf3553e0dac0ce8aead4279e0fc368899e89ef776999d0d7e812b5ca0f3b 6 ./c/cc/ccc/ccc1f
602D 700 af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 0 ./d/
603F 600 27a55588c59999fd686667c4b186af08161b95c287216f0cde723f0e191d1974 4 ./r1f";
604
605 fn build_nested(root: &Path) {
606 write_file(&root.join("a/aa/aaa/aaa1f"), b"aaa1f\n");
607 write_file(&root.join("a/aa/aaa/aaa2f"), b"aaa2f\n");
608 write_file(&root.join("a/aa/aa1f"), b"aa1f\n");
609 write_file(&root.join("a/a1f"), b"a1f\n");
610 write_file(&root.join("r1f"), b"r1f\n");
611 write_file(&root.join("b/bb/bbb/bbb1f"), b"bbb1f\n");
612 write_file(&root.join("b/bb/bbb/bbb2f"), b"bbb2f\n");
613 write_file(&root.join("c/cc/ccc/ccc1f"), b"ccc1f\n");
614 // Empty subdirectory with no files.
615 fs::create_dir_all(root.join("d")).unwrap();
616 chmod_dirs(root, 0o700);
617 }
618
619 #[test]
620 fn walk_nested_tree_relative_golden() {
621 let scratch = Scratch::new("nested-rel");
622 build_nested(scratch.root());
623 assert_eq!(
624 manifest_text(
625 scratch.root(),
626 &opts(FollowMode::Follow, PathMode::Relative, None)
627 ),
628 NESTED_RELATIVE_GOLDEN
629 );
630 }
631
632 #[test]
633 fn walk_nested_tree_absolute_golden() {
634 // Under PathMode::Absolute every PATH column is the scratch root prefix
635 // + the relative tail; the TYPE/PERMS/CHECKSUM/SIZE columns are
636 // identical to the relative golden. We reconstruct the expected text by
637 // rewriting the relative golden's `./` prefix to the absolute root,
638 // proving the only difference is the path rendering.
639 let scratch = Scratch::new("nested-abs");
640 let r = scratch.root();
641 build_nested(r);
642 let root_str = r.to_str().unwrap();
643 let expected: String = NESTED_RELATIVE_GOLDEN
644 .lines()
645 .map(|line| {
646 // Replace the leading "./" of the PATH (last field) with the
647 // absolute root. The path is everything after the 4th space.
648 let (head, path) = line.rsplit_once(' ').unwrap();
649 let abs_path = if path == "./" {
650 format!("{root_str}/")
651 } else {
652 format!("{root_str}/{}", path.strip_prefix("./").unwrap())
653 };
654 format!("{head} {abs_path}")
655 })
656 .collect::<Vec<_>>()
657 .join("\n");
658 assert_eq!(
659 manifest_text(r, &opts(FollowMode::Follow, PathMode::Absolute, None)),
660 expected
661 );
662 }
663
664 #[test]
665 fn walk_directory_size_is_sum_of_members_golden() {
666 // Cross-check dir-size summation: each `D` line's SIZE is the sum of its
667 // members (recursively), independent of the directory's own stat size.
668 let scratch = Scratch::new("dir-size");
669 let r = scratch.root();
670 write_file(&r.join("f1"), b"hello"); // 5
671 write_file(&r.join("sub/f2"), b"world!!"); // 7
672 write_file(&r.join("sub/f3"), b"x"); // 1
673 chmod_dirs(r, 0o700);
674
675 let expected = "\
676D 700 5681c72cfd0ddea4f54683365bc4082b92147bf33976875653133cc4aed0f96a 13 ./
677F 600 ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f 5 ./f1
678D 700 2ac73ec4f4ec2ef21ebfba467be499a58aef80a34d7001d68bdeb14cb58a954d 8 ./sub/
679F 600 8bafa24d36bc2aa6edc0d041e763cb59ebadb71b6e63ab4ac9314de95e9a0de7 7 ./sub/f2
680F 600 3ae7d805f6789a6402acb70ad4096a85a56bf6804eaf25c0493ac697548d30b5 1 ./sub/f3";
681 let manifest = walk(r, &WalkOptions::default(), &Blake3Hasher::new()).expect("walk");
682 assert_eq!(manifest.to_string(), expected);
683
684 // Structural cross-check of the summation rule independent of the bytes.
685 let root_dir = manifest.entries().iter().find(|e| e.path == "./").unwrap();
686 let sub_dir = manifest
687 .entries()
688 .iter()
689 .find(|e| e.path == "./sub/")
690 .unwrap();
691 assert_eq!(sub_dir.size, 8, "sub = f2(7) + f3(1)");
692 assert_eq!(root_dir.size, 13, "root = f1(5) + sub(8)");
693 }
694
695 /// Builds the symlink fixture: a real `a/` subtree plus a dir-symlink
696 /// `a_link -> a` and a file-symlink `r1f_link -> r1f`. Real dirs chmod'd to
697 /// `0o700`; files `0o600`. The symlinks' own perms are left platform-default
698 /// (NOT chmod'd) — hence the structural (not byte-golden) assertions below.
699 fn build_symlinks(root: &Path) {
700 write_file(&root.join("a/aa/f1"), b"hello");
701 write_file(&root.join("a/f2"), b"world!!");
702 write_file(&root.join("r1f"), b"r");
703 std::os::unix::fs::symlink("a", root.join("a_link")).expect("symlink dir");
704 std::os::unix::fs::symlink("r1f", root.join("r1f_link")).expect("symlink file");
705 chmod_dirs(root, 0o700);
706 }
707
708 #[test]
709 fn walk_symlink_followed_by_default() {
710 let scratch = Scratch::new("symlink-follow");
711 let r = scratch.root();
712 build_symlinks(r);
713
714 let manifest = manifest_text(r, &opts(FollowMode::Follow, PathMode::Relative, None));
715
716 // The dir symlink is followed: it materializes as its own `D ./a_link/`
717 // row whose CHECKSUM equals the real `./a/` directory's merkle, plus the
718 // full target subtree mirrored under ./a_link/.
719 let a_dir_b3 = "0c862ed8e62262f84e7fc0fe4a6c566adec4a85ef22f8a46b7ad4c9344146701";
720 assert!(
721 manifest
722 .lines()
723 .any(|l| l.starts_with("D ") && l.contains(a_dir_b3) && l.ends_with(" ./a/")),
724 "real ./a/ dir present with its merkle: {manifest}"
725 );
726 assert!(
727 manifest
728 .lines()
729 .any(|l| l.starts_with("D ") && l.contains(a_dir_b3) && l.ends_with(" ./a_link/")),
730 "followed symlink dir ./a_link/ mirrors ./a/'s merkle: {manifest}"
731 );
732 // Mirrored target subtree entries (content checksums are deterministic).
733 assert!(manifest.lines().any(|l| l.ends_with(" ./a_link/aa/")));
734 assert!(manifest.lines().any(|l| {
735 l.starts_with("F ")
736 && l.contains("ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f")
737 && l.ends_with(" ./a_link/aa/f1")
738 }));
739 // The file symlink is followed: it appears as an `F` row pointing at the
740 // target's content (blake3("r")), ending in ./r1f_link.
741 let r1f_b3 = "b2dea48d667b2821a9bcf69eded39a2458a1d8165ca7fcac64c3557b69a7ea08";
742 assert!(
743 manifest
744 .lines()
745 .any(|l| l.starts_with("F ") && l.contains(r1f_b3) && l.ends_with(" ./r1f_link")),
746 "followed symlink file ./r1f_link present: {manifest}"
747 );
748 assert!(
749 manifest
750 .lines()
751 .any(|l| l.starts_with("F ") && l.contains(r1f_b3) && l.ends_with(" ./r1f")),
752 "real ./r1f present: {manifest}"
753 );
754 }
755
756 #[test]
757 fn walk_no_follow_drops_symlinks() {
758 let scratch = Scratch::new("symlink-nofollow");
759 let r = scratch.root();
760 build_symlinks(r);
761
762 // With --no-follow the symlinks are dropped entirely; the manifest is a
763 // byte-exact golden over only the real entries (no `_link` rows). Note
764 // the root `D` SIZE is 13 (= sum of real members), not the 28 of the
765 // followed case (which double-counts via a_link/).
766 let expected = "\
767D 700 61a8f1898844a17eeed84d34c2e3b5fd9c7fef136dba5f7036ae70294595a085 13 ./
768D 700 0c862ed8e62262f84e7fc0fe4a6c566adec4a85ef22f8a46b7ad4c9344146701 12 ./a/
769D 700 6cd17c61c7e42c50586ee5f3f54dbc4f809f71073fc176ed2ae865103dd33625 5 ./a/aa/
770F 600 ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f 5 ./a/aa/f1
771F 600 8bafa24d36bc2aa6edc0d041e763cb59ebadb71b6e63ab4ac9314de95e9a0de7 7 ./a/f2
772F 600 b2dea48d667b2821a9bcf69eded39a2458a1d8165ca7fcac64c3557b69a7ea08 1 ./r1f";
773 let manifest = manifest_text(r, &opts(FollowMode::NoFollow, PathMode::Relative, None));
774 assert_eq!(manifest, expected);
775 assert!(!manifest.contains("_link"), "no-follow drops all symlinks");
776 }
777
778 #[test]
779 fn walk_exclude_regex_golden() {
780 let scratch = Scratch::new("exclude-regex");
781 let r = scratch.root();
782 write_file(&r.join("keep/k"), b"x");
783 write_file(&r.join("drop/d"), b"y");
784 write_file(&r.join("top.txt"), b"top");
785 chmod_dirs(r, 0o700);
786
787 // The matcher runs against the ABSOLUTE find path, so the exclude is
788 // anchored at the absolute root + "/drop". `drop/` is dropped entirely;
789 // `keep/` and `top.txt` remain (byte-exact golden over the survivors).
790 let abs = r.to_str().unwrap();
791 let pattern = format!("{abs}/drop");
792 let manifest = manifest_text(
793 r,
794 &opts(FollowMode::Follow, PathMode::Relative, Some(&pattern)),
795 );
796 let expected = "\
797D 700 b6f1055a5f14fdd55fa831ff6d2e2f433c7ca7fa2cc43e63a8cd0a4542d3010a 4 ./
798D 700 b9030f201b43e2a72e62951476c0bcfafe3b020ece221d2254d8610ea9e88fb5 1 ./keep/
799F 600 3ae7d805f6789a6402acb70ad4096a85a56bf6804eaf25c0493ac697548d30b5 1 ./keep/k
800F 600 ef854702aa94ba4f60c67d731671c9e0e49a031be6ce475489e91f7a33cb5243 3 ./top.txt";
801 assert_eq!(manifest, expected);
802 assert!(!manifest.contains("drop"), "drop/ excluded");
803 }
804
805 #[test]
806 fn walk_exclude_common_golden() {
807 let scratch = Scratch::new("exclude-common");
808 let r = scratch.root();
809 write_file(&r.join("src/main.rs"), b"fn main() {}\n");
810 write_file(&r.join(".git/objects/secret"), b"secret");
811 write_file(&r.join("node_modules/pkg/index.js"), b"//js\n");
812 chmod_dirs(r, 0o700);
813
814 // %common% expands to the regex that drops .git, node_modules, etc.
815 // (the CLI lane uses the same expansion; core never reads the env).
816 let expanded = crate::excludes::expand_excludes(
817 "%common%",
818 "/nonexistent/.cache/",
819 "/nonexistent/cache",
820 );
821 let pattern = expanded.pattern.expect("non-empty");
822 let manifest = manifest_text(
823 r,
824 &opts(FollowMode::Follow, PathMode::Relative, Some(&pattern)),
825 );
826 // Only ./src survives — byte-exact golden over the survivors.
827 let expected = "\
828D 700 ad5409ad5f97a26c908382b379b23971ee143e6bcd29a7d663175936d2cd4e94 13 ./
829D 700 069cd5e102d7dd39faa7093b5b2d784c32e19b01f829a902c14aa10b7182debc 13 ./src/
830F 600 2d1ebfa706ba230165250f744796a92accba5e1b6fa357983b65319da33f8e93 13 ./src/main.rs";
831 assert_eq!(manifest, expected);
832 assert!(!manifest.contains(".git"), "%common% excludes .git");
833 assert!(
834 !manifest.contains("node_modules"),
835 "%common% excludes node_modules"
836 );
837 }
838
839 #[test]
840 fn walk_snapshot_id_is_blake3_of_manifest_text() {
841 // The snapshot id is BLAKE3 of the manifest text + a trailing newline
842 // (comment lines stripped). Cross-check the public derivation against an
843 // explicit recomputation over the walk's own output.
844 let scratch = Scratch::new("snapshot-id");
845 let r = scratch.root();
846 write_file(&r.join("a/f1"), b"hello\n");
847 write_file(&r.join("b/f2"), b"world\n");
848 chmod_dirs(r, 0o700);
849 let hasher = Blake3Hasher::new();
850 let manifest = walk(r, &WalkOptions::default(), &hasher).expect("walk");
851 let id = crate::merkle::snapshot_id(&manifest, &hasher);
852
853 let mut bytes = manifest.to_string().into_bytes();
854 bytes.push(b'\n');
855 let expected = hasher.hash_hex(&bytes);
856 assert_eq!(
857 id, expected,
858 "snapshot id == blake3(manifest_text + \"\\n\")"
859 );
860 assert_eq!(id.len(), 64, "id is 64 lowercase hex chars");
861 assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
862 }
863}