Skip to main content

substrate/tree/
walk.rs

1//! Directory traversal with filtering for tree hashing, archive packing, and mtime computation.
2//!
3//! Substrate defines the traversal logic and filtering rules; callers provide I/O
4//! via the [`DirReader`] trait. This keeps substrate zero-I/O and WASM-compatible
5//! while ensuring consistent filtering everywhere.
6
7use std::path::Path;
8
9use anyhow::Result;
10
11use super::blob_tree_blake3_nfc::TreeEntry;
12
13// Re-use the exact same predicate the hash layer uses.
14/// Check whether a filename should be excluded (exact name match).
15///
16/// This is the canonical predicate used by [`walk_dir`] and the hash layer.
17pub fn should_exclude(name: &str, exclude_names: &[String]) -> bool {
18    exclude_names.iter().any(|pattern| name == pattern)
19}
20
21/// A single entry returned by [`DirReader::read_dir`].
22pub struct DirEntry {
23    pub name: String,
24    pub is_dir: bool,
25    pub is_file: bool,
26}
27
28/// Filesystem abstraction so substrate can drive traversal without doing I/O itself.
29pub trait DirReader {
30    /// List immediate children of a directory.
31    fn read_dir(&self, path: &Path) -> Result<Vec<DirEntry>>;
32    /// Read the full contents of a file.
33    fn read_file(&self, path: &Path) -> Result<Vec<u8>>;
34    /// Check whether a file has the executable bit set.
35    fn is_executable(&self, path: &Path) -> Result<bool>;
36    /// Check whether a path is ignored by follow_rules (gitignore-style).
37    fn is_ignored(&self, path: &Path, is_dir: bool) -> bool;
38    /// Return the file's modification time as milliseconds since the Unix epoch.
39    fn mtime_ms(&self, path: &Path) -> Result<Option<u64>>;
40}
41
42/// Walk a directory tree and produce [`TreeEntry`] values.
43///
44/// `exclude_names` filters by exact filename match (same rule as the hash layer).
45/// `reader.is_ignored()` handles follow_rules / gitignore-style filtering.
46pub fn walk_dir(
47    reader: &dyn DirReader,
48    dir_path: &Path,
49    exclude_names: &[String],
50) -> Result<Vec<TreeEntry>> {
51    let children = reader.read_dir(dir_path)?;
52    let mut entries = Vec::new();
53
54    for child in children {
55        if should_exclude(&child.name, exclude_names) {
56            continue;
57        }
58
59        let child_path = dir_path.join(&child.name);
60
61        if reader.is_ignored(&child_path, child.is_dir) {
62            continue;
63        }
64
65        if child.is_file {
66            let content = reader.read_file(&child_path)?;
67            let executable = reader.is_executable(&child_path)?;
68            entries.push(TreeEntry::File {
69                name: child.name,
70                content,
71                executable,
72            });
73        } else if child.is_dir {
74            let sub = walk_dir(reader, &child_path, exclude_names)?;
75            entries.push(TreeEntry::Directory {
76                name: child.name,
77                children: sub,
78            });
79        }
80    }
81
82    Ok(entries)
83}
84
85/// Flatten a recursive `TreeEntry` tree into a flat list of `(path, content, executable)`.
86///
87/// Only files appear in the output. Paths use `/` as separator.
88pub fn flatten_entries(entries: &[TreeEntry]) -> Vec<(String, Vec<u8>, bool)> {
89    let mut result = Vec::new();
90    flatten_inner(entries, "", &mut result);
91    result
92}
93
94fn flatten_inner(entries: &[TreeEntry], prefix: &str, result: &mut Vec<(String, Vec<u8>, bool)>) {
95    for entry in entries {
96        match entry {
97            TreeEntry::File {
98                name,
99                content,
100                executable,
101            } => {
102                let path = if prefix.is_empty() {
103                    name.clone()
104                } else {
105                    format!("{}/{}", prefix, name)
106                };
107                result.push((path, content.clone(), *executable));
108            }
109            TreeEntry::Directory { name, children } => {
110                let dir_prefix = if prefix.is_empty() {
111                    name.clone()
112                } else {
113                    format!("{}/{}", prefix, name)
114                };
115                flatten_inner(children, &dir_prefix, result);
116            }
117        }
118    }
119}
120
121/// Walk a directory and return the maximum file modification time (epoch ms).
122///
123/// Uses the same `exclude_names` + `is_ignored` filtering as [`walk_dir`].
124pub fn max_mtime(reader: &dyn DirReader, dir_path: &Path, exclude_names: &[String]) -> Result<u64> {
125    let children = reader.read_dir(dir_path)?;
126    let mut max_ms: u64 = 0;
127
128    for child in children {
129        if should_exclude(&child.name, exclude_names) {
130            continue;
131        }
132
133        let child_path = dir_path.join(&child.name);
134
135        if reader.is_ignored(&child_path, child.is_dir) {
136            continue;
137        }
138
139        if child.is_file {
140            if let Some(ms) = reader.mtime_ms(&child_path)? {
141                if ms > max_ms {
142                    max_ms = ms;
143                }
144            }
145        } else if child.is_dir {
146            let sub = max_mtime(reader, &child_path, exclude_names)?;
147            if sub > max_ms {
148                max_ms = sub;
149            }
150        }
151    }
152
153    Ok(max_ms)
154}
155
156// ═══════════════════════════════════════════
157// Tests
158// ═══════════════════════════════════════════
159
160#[cfg(test)]
161#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
162mod tests {
163    use super::*;
164    use std::collections::BTreeMap;
165    use std::path::PathBuf;
166
167    /// In-memory filesystem for testing.
168    struct MockFs {
169        /// path → (content, executable, mtime_ms)
170        files: BTreeMap<PathBuf, (Vec<u8>, bool, u64)>,
171        /// path → list of child names
172        dirs: BTreeMap<PathBuf, Vec<String>>,
173        /// paths that should be "ignored" (gitignore simulation)
174        ignored: Vec<PathBuf>,
175    }
176
177    impl MockFs {
178        fn new() -> Self {
179            Self {
180                files: BTreeMap::new(),
181                dirs: BTreeMap::new(),
182                ignored: Vec::new(),
183            }
184        }
185
186        fn add_file(&mut self, path: &str, content: &[u8], executable: bool, mtime_ms: u64) {
187            let p = PathBuf::from(path);
188            self.files
189                .insert(p.clone(), (content.to_vec(), executable, mtime_ms));
190            // Register in parent dir
191            if let Some(parent) = p.parent() {
192                let name = p.file_name().unwrap().to_string_lossy().to_string();
193                self.dirs
194                    .entry(parent.to_path_buf())
195                    .or_default()
196                    .push(name);
197                // Ensure parent dirs exist up the chain
198                self.ensure_dir(parent);
199            }
200        }
201
202        fn add_dir(&mut self, path: &str) {
203            let p = PathBuf::from(path);
204            self.ensure_dir(&p);
205        }
206
207        fn ensure_dir(&mut self, path: &Path) {
208            if !self.dirs.contains_key(path) {
209                self.dirs.insert(path.to_path_buf(), Vec::new());
210            }
211            if let Some(parent) = path.parent() {
212                if parent != path {
213                    let name = path.file_name().unwrap().to_string_lossy().to_string();
214                    let siblings = self.dirs.entry(parent.to_path_buf()).or_default();
215                    if !siblings.contains(&name) {
216                        siblings.push(name);
217                    }
218                    self.ensure_dir(parent);
219                }
220            }
221        }
222
223        fn ignore(&mut self, path: &str) {
224            self.ignored.push(PathBuf::from(path));
225        }
226    }
227
228    impl DirReader for MockFs {
229        fn read_dir(&self, path: &Path) -> Result<Vec<DirEntry>> {
230            let children = self.dirs.get(path).cloned().unwrap_or_default();
231            Ok(children
232                .into_iter()
233                .map(|name| {
234                    let child_path = path.join(&name);
235                    DirEntry {
236                        name,
237                        is_dir: self.dirs.contains_key(&child_path),
238                        is_file: self.files.contains_key(&child_path),
239                    }
240                })
241                .collect())
242        }
243
244        fn read_file(&self, path: &Path) -> Result<Vec<u8>> {
245            self.files
246                .get(path)
247                .map(|(content, _, _)| content.clone())
248                .ok_or_else(|| anyhow::anyhow!("file not found: {}", path.display()))
249        }
250
251        fn is_executable(&self, path: &Path) -> Result<bool> {
252            Ok(self
253                .files
254                .get(path)
255                .map(|(_, exec, _)| *exec)
256                .unwrap_or(false))
257        }
258
259        fn is_ignored(&self, path: &Path, _is_dir: bool) -> bool {
260            self.ignored.iter().any(|p| p == path)
261        }
262
263        fn mtime_ms(&self, path: &Path) -> Result<Option<u64>> {
264            Ok(self.files.get(path).map(|(_, _, mtime)| *mtime))
265        }
266    }
267
268    // ─── flatten_entries tests ───
269
270    #[test]
271    fn flatten_empty_tree() {
272        let result = flatten_entries(&[]);
273        assert!(result.is_empty());
274    }
275
276    #[test]
277    fn flatten_single_file() {
278        let entries = vec![TreeEntry::File {
279            name: "hello.txt".to_string(),
280            content: b"hello".to_vec(),
281            executable: false,
282        }];
283        let flat = flatten_entries(&entries);
284        assert_eq!(flat.len(), 1);
285        assert_eq!(flat[0].0, "hello.txt");
286        assert_eq!(flat[0].1, b"hello");
287        assert!(!flat[0].2);
288    }
289
290    #[test]
291    fn flatten_executable_flag() {
292        let entries = vec![TreeEntry::File {
293            name: "run.sh".to_string(),
294            content: b"#!/bin/sh".to_vec(),
295            executable: true,
296        }];
297        let flat = flatten_entries(&entries);
298        assert!(flat[0].2);
299    }
300
301    #[test]
302    fn flatten_multiple_files_no_dirs() {
303        let entries = vec![
304            TreeEntry::File {
305                name: "a.txt".to_string(),
306                content: b"a".to_vec(),
307                executable: false,
308            },
309            TreeEntry::File {
310                name: "b.txt".to_string(),
311                content: b"b".to_vec(),
312                executable: false,
313            },
314        ];
315        let flat = flatten_entries(&entries);
316        assert_eq!(flat.len(), 2);
317        assert_eq!(flat[0].0, "a.txt");
318        assert_eq!(flat[1].0, "b.txt");
319    }
320
321    #[test]
322    fn flatten_nested_dir() {
323        let entries = vec![TreeEntry::Directory {
324            name: "src".to_string(),
325            children: vec![TreeEntry::File {
326                name: "lib.rs".to_string(),
327                content: b"pub fn foo() {}".to_vec(),
328                executable: false,
329            }],
330        }];
331        let flat = flatten_entries(&entries);
332        assert_eq!(flat.len(), 1);
333        assert_eq!(flat[0].0, "src/lib.rs");
334    }
335
336    #[test]
337    fn flatten_deep_nesting() {
338        let entries = vec![TreeEntry::Directory {
339            name: "a".to_string(),
340            children: vec![TreeEntry::Directory {
341                name: "b".to_string(),
342                children: vec![TreeEntry::File {
343                    name: "c.txt".to_string(),
344                    content: b"deep".to_vec(),
345                    executable: false,
346                }],
347            }],
348        }];
349        let flat = flatten_entries(&entries);
350        assert_eq!(flat.len(), 1);
351        assert_eq!(flat[0].0, "a/b/c.txt");
352        assert_eq!(flat[0].1, b"deep");
353    }
354
355    #[test]
356    fn flatten_mixed_files_and_dirs() {
357        let entries = vec![
358            TreeEntry::File {
359                name: "README.md".to_string(),
360                content: b"# Hello".to_vec(),
361                executable: false,
362            },
363            TreeEntry::Directory {
364                name: "src".to_string(),
365                children: vec![
366                    TreeEntry::File {
367                        name: "main.rs".to_string(),
368                        content: b"fn main() {}".to_vec(),
369                        executable: false,
370                    },
371                    TreeEntry::File {
372                        name: "lib.rs".to_string(),
373                        content: b"pub mod foo;".to_vec(),
374                        executable: false,
375                    },
376                ],
377            },
378        ];
379        let flat = flatten_entries(&entries);
380        assert_eq!(flat.len(), 3);
381        assert_eq!(flat[0].0, "README.md");
382        assert_eq!(flat[1].0, "src/main.rs");
383        assert_eq!(flat[2].0, "src/lib.rs");
384    }
385
386    #[test]
387    fn flatten_empty_directory_produces_nothing() {
388        let entries = vec![TreeEntry::Directory {
389            name: "empty".to_string(),
390            children: vec![],
391        }];
392        let flat = flatten_entries(&entries);
393        assert!(flat.is_empty());
394    }
395
396    #[test]
397    fn flatten_paths_use_forward_slash() {
398        let entries = vec![TreeEntry::Directory {
399            name: "dir".to_string(),
400            children: vec![TreeEntry::Directory {
401                name: "sub".to_string(),
402                children: vec![TreeEntry::File {
403                    name: "f.txt".to_string(),
404                    content: vec![],
405                    executable: false,
406                }],
407            }],
408        }];
409        let flat = flatten_entries(&entries);
410        assert_eq!(flat[0].0, "dir/sub/f.txt");
411        assert!(!flat[0].0.contains('\\'));
412    }
413
414    #[test]
415    fn flatten_preserves_content() {
416        let content = vec![0u8, 1, 2, 255, 128, 64];
417        let entries = vec![TreeEntry::File {
418            name: "binary.bin".to_string(),
419            content: content.clone(),
420            executable: false,
421        }];
422        let flat = flatten_entries(&entries);
423        assert_eq!(flat[0].1, content);
424    }
425
426    // ─── walk_dir tests ───
427
428    #[test]
429    fn walk_basic() {
430        let mut fs = MockFs::new();
431        fs.add_file("/root/a.txt", b"aaa", false, 1000);
432        fs.add_file("/root/b.txt", b"bbb", false, 2000);
433
434        let entries = walk_dir(&fs, Path::new("/root"), &[]).unwrap();
435        assert_eq!(entries.len(), 2);
436    }
437
438    #[test]
439    fn walk_exclude_names_exact_match() {
440        let mut fs = MockFs::new();
441        fs.add_file("/root/keep.txt", b"keep", false, 1000);
442        fs.add_dir("/root/.git");
443        fs.add_file("/root/.git/config", b"git", false, 1000);
444
445        let entries = walk_dir(&fs, Path::new("/root"), &[".git".to_string()]).unwrap();
446        // .git dir should be excluded
447        assert_eq!(entries.len(), 1);
448        match &entries[0] {
449            TreeEntry::File { name, .. } => assert_eq!(name, "keep.txt"),
450            _ => panic!("expected file"),
451        }
452    }
453
454    #[test]
455    fn walk_exclude_names_no_substring_match() {
456        // Regression test: exclude_names ".git" must NOT exclude ".gitignore"
457        let mut fs = MockFs::new();
458        fs.add_file("/root/.gitignore", b"*.tmp", false, 1000);
459        fs.add_file("/root/file.txt", b"ok", false, 1000);
460
461        let entries = walk_dir(&fs, Path::new("/root"), &[".git".to_string()]).unwrap();
462        let names: Vec<&str> = entries
463            .iter()
464            .map(|e| match e {
465                TreeEntry::File { name, .. } | TreeEntry::Directory { name, .. } => name.as_str(),
466            })
467            .collect();
468        assert!(
469            names.contains(&".gitignore"),
470            "should NOT exclude .gitignore when excluding .git"
471        );
472        assert_eq!(entries.len(), 2);
473    }
474
475    #[test]
476    fn walk_exclude_names_nested_dir() {
477        // .git inside a subdir should also be excluded
478        let mut fs = MockFs::new();
479        fs.add_file("/root/src/code.rs", b"fn main(){}", false, 1000);
480        fs.add_dir("/root/src/.git");
481        fs.add_file("/root/src/.git/HEAD", b"ref", false, 1000);
482
483        let entries = walk_dir(&fs, Path::new("/root"), &[".git".to_string()]).unwrap();
484        let flat = flatten_entries(&entries);
485        assert_eq!(flat.len(), 1);
486        assert_eq!(flat[0].0, "src/code.rs");
487    }
488
489    #[test]
490    fn walk_is_ignored_respected() {
491        let mut fs = MockFs::new();
492        fs.add_file("/root/keep.txt", b"keep", false, 1000);
493        fs.add_file("/root/ignored.tmp", b"tmp", false, 1000);
494        fs.ignore("/root/ignored.tmp");
495
496        let entries = walk_dir(&fs, Path::new("/root"), &[]).unwrap();
497        assert_eq!(entries.len(), 1);
498        match &entries[0] {
499            TreeEntry::File { name, .. } => assert_eq!(name, "keep.txt"),
500            _ => panic!("expected file"),
501        }
502    }
503
504    #[test]
505    fn walk_executable_detected() {
506        let mut fs = MockFs::new();
507        fs.add_file("/root/script.sh", b"#!/bin/sh", true, 1000);
508        fs.add_file("/root/data.txt", b"data", false, 1000);
509
510        let entries = walk_dir(&fs, Path::new("/root"), &[]).unwrap();
511        for entry in &entries {
512            if let TreeEntry::File {
513                name, executable, ..
514            } = entry
515            {
516                if name == "script.sh" {
517                    assert!(*executable);
518                } else {
519                    assert!(!*executable);
520                }
521            }
522        }
523    }
524
525    #[test]
526    fn walk_nested_structure() {
527        let mut fs = MockFs::new();
528        fs.add_file("/root/README.md", b"# Hi", false, 1000);
529        fs.add_file("/root/src/main.rs", b"fn main(){}", false, 2000);
530        fs.add_file("/root/src/util/helpers.rs", b"pub fn help(){}", false, 3000);
531
532        let entries = walk_dir(&fs, Path::new("/root"), &[]).unwrap();
533        let flat = flatten_entries(&entries);
534        let paths: Vec<&str> = flat.iter().map(|(p, _, _)| p.as_str()).collect();
535        assert!(paths.contains(&"README.md"));
536        assert!(paths.contains(&"src/main.rs"));
537        assert!(paths.contains(&"src/util/helpers.rs"));
538    }
539
540    // ─── max_mtime tests ───
541
542    #[test]
543    fn max_mtime_returns_largest() {
544        let mut fs = MockFs::new();
545        fs.add_file("/root/old.txt", b"old", false, 1000);
546        fs.add_file("/root/new.txt", b"new", false, 5000);
547        fs.add_file("/root/mid.txt", b"mid", false, 3000);
548
549        let ms = max_mtime(&fs, Path::new("/root"), &[]).unwrap();
550        assert_eq!(ms, 5000);
551    }
552
553    #[test]
554    fn max_mtime_respects_exclude_names() {
555        let mut fs = MockFs::new();
556        fs.add_file("/root/code.rs", b"fn main(){}", false, 1000);
557        fs.add_dir("/root/.git");
558        fs.add_file("/root/.git/index", b"git-index", false, 9999);
559
560        let ms = max_mtime(&fs, Path::new("/root"), &[".git".to_string()]).unwrap();
561        // .git/index has mtime 9999 but should be excluded
562        assert_eq!(ms, 1000);
563    }
564
565    #[test]
566    fn max_mtime_respects_is_ignored() {
567        let mut fs = MockFs::new();
568        fs.add_file("/root/code.rs", b"fn main(){}", false, 1000);
569        fs.add_file("/root/build.tmp", b"tmp", false, 9999);
570        fs.ignore("/root/build.tmp");
571
572        let ms = max_mtime(&fs, Path::new("/root"), &[]).unwrap();
573        assert_eq!(ms, 1000);
574    }
575
576    #[test]
577    fn max_mtime_nested() {
578        let mut fs = MockFs::new();
579        fs.add_file("/root/a.txt", b"a", false, 100);
580        fs.add_file("/root/sub/b.txt", b"b", false, 500);
581
582        let ms = max_mtime(&fs, Path::new("/root"), &[]).unwrap();
583        assert_eq!(ms, 500);
584    }
585
586    #[test]
587    fn max_mtime_empty_dir() {
588        let mut fs = MockFs::new();
589        fs.add_dir("/root");
590
591        let ms = max_mtime(&fs, Path::new("/root"), &[]).unwrap();
592        assert_eq!(ms, 0);
593    }
594}