Skip to main content

coding_tools/
walk.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Shared file-selection traversal.
5//!
6//! The predicate vocabulary every tool uses to choose *which* entries to act on
7//! — search root, name, kind, size, and whether to descend dot-entries or follow
8//! symlinks — lives here so it is identical across the suite: what you learn
9//! about targeting from `ct-search` transfers verbatim to `ct-edit`. A
10//! [`Selector`] holds the resolved predicates; [`Selector::walk`] yields the
11//! entries that pass them, leaving content-level work (grep, replace) to the
12//! caller.
13
14use std::ffi::OsStr;
15use std::path::PathBuf;
16
17use ignore::{DirEntry, WalkBuilder};
18use regex::Regex;
19
20/// Entry-kind selector for `--type`.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
22pub enum EntryType {
23    /// Regular file.
24    F,
25    /// Directory.
26    D,
27    /// Symbolic link.
28    L,
29}
30
31/// A parsed `--size` predicate, in bytes.
32#[derive(Debug, Clone, Copy)]
33pub enum SizeCmp {
34    /// Strictly larger than N bytes (`+N`).
35    Gt(u64),
36    /// Strictly smaller than N bytes (`-N`).
37    Lt(u64),
38    /// At least N bytes (bare `N`).
39    Ge(u64),
40}
41
42/// Parse a `--size` spec `[+|-]N[k|m|g|b]` into a [`SizeCmp`].
43///
44/// `+N` is "larger than", `-N` is "smaller than", a bare `N` is "at least N";
45/// a trailing `k`/`m`/`g` multiplies by 1024/1024²/1024³.
46///
47/// # Examples
48///
49/// ```
50/// use coding_tools::walk::{parse_size, size_matches, SizeCmp};
51///
52/// let cmp = parse_size("+4k").unwrap();        // larger than 4 KiB
53/// assert!(matches!(cmp, SizeCmp::Gt(4096)));
54/// assert!(size_matches(&cmp, 5000));
55/// assert!(!size_matches(&cmp, 4096));
56///
57/// assert!(matches!(parse_size("10").unwrap(), SizeCmp::Ge(10)));
58/// assert!(parse_size("+x").is_err());
59/// ```
60pub fn parse_size(spec: &str) -> Result<SizeCmp, String> {
61    let spec = spec.trim();
62    let (ctor, body): (fn(u64) -> SizeCmp, &str) = if let Some(r) = spec.strip_prefix('+') {
63        (SizeCmp::Gt, r)
64    } else if let Some(r) = spec.strip_prefix('-') {
65        (SizeCmp::Lt, r)
66    } else {
67        (SizeCmp::Ge, spec)
68    };
69    let body = body.trim();
70    if body.is_empty() {
71        return Err(format!("empty size value in '{spec}'"));
72    }
73    let last = body.chars().last().unwrap();
74    let (num_part, mult): (&str, u64) = match last.to_ascii_lowercase() {
75        'k' => (&body[..body.len() - 1], 1024),
76        'm' => (&body[..body.len() - 1], 1024 * 1024),
77        'g' => (&body[..body.len() - 1], 1024 * 1024 * 1024),
78        'b' => (&body[..body.len() - 1], 1),
79        _ => (body, 1),
80    };
81    let n: u64 = num_part
82        .trim()
83        .parse()
84        .map_err(|_| format!("invalid size number '{num_part}' in '{spec}'"))?;
85    let bytes = n
86        .checked_mul(mult)
87        .ok_or_else(|| format!("size too large: '{spec}'"))?;
88    Ok(ctor(bytes))
89}
90
91/// Whether a byte length satisfies a [`SizeCmp`].
92pub fn size_matches(cmp: &SizeCmp, len: u64) -> bool {
93    match *cmp {
94        SizeCmp::Gt(n) => len > n,
95        SizeCmp::Lt(n) => len < n,
96        SizeCmp::Ge(n) => len >= n,
97    }
98}
99
100/// Whether an entry's kind is among `types` (empty `types` means "any kind").
101fn entry_kind_matches(types: &[EntryType], entry: &DirEntry) -> bool {
102    if types.is_empty() {
103        return true;
104    }
105    let Some(ft) = entry.file_type() else {
106        return false; // only stdin has no file type; never matches a kind
107    };
108    types.iter().any(|t| match t {
109        EntryType::F => ft.is_file(),
110        EntryType::D => ft.is_dir(),
111        EntryType::L => ft.is_symlink(),
112    })
113}
114
115/// Resolved file-selection predicates. Build one, then iterate [`walk`].
116///
117/// [`walk`]: Selector::walk
118pub struct Selector {
119    /// Traversal root (a file yields just itself; a directory is descended).
120    pub base: PathBuf,
121    /// Whole-name alternatives; `None` matches any name.
122    pub names: Option<Vec<Regex>>,
123    /// Allowed entry kinds; empty matches any kind.
124    pub types: Vec<EntryType>,
125    /// Size predicate (applies to regular files only).
126    pub size: Option<SizeCmp>,
127    /// Include dot-entries and descend dot-directories.
128    pub hidden: bool,
129    /// Follow symlinks while traversing.
130    pub follow: bool,
131    /// Walk every file, ignoring `.gitignore`/`.ignore` rules (the `.git`
132    /// directory is always skipped regardless). Default `false`: like git, the
133    /// walk skips what the project has chosen to ignore.
134    pub no_ignore: bool,
135}
136
137impl Selector {
138    /// Yield every entry under [`base`](Selector::base) that passes the
139    /// structural predicates (kind, name, size, hidden). By default the walk
140    /// honors `.gitignore`/`.ignore` (and always skips `.git`), so a build tree
141    /// like `target/` is not descended; `no_ignore` disables that filtering.
142    /// Traversal errors and per-entry `stat` failures surface as `Err` items
143    /// rather than panicking.
144    pub fn walk(&self) -> impl Iterator<Item = Result<DirEntry, String>> + '_ {
145        let respect = !self.no_ignore;
146        WalkBuilder::new(&self.base)
147            .follow_links(self.follow)
148            .hidden(!self.hidden) // hidden(true) = skip dot-entries
149            .ignore(respect)
150            .git_ignore(respect)
151            .git_global(respect)
152            .git_exclude(respect)
153            .parents(respect)
154            // The VCS directory is never useful to these tools; skip it even
155            // under --hidden / --no-ignore.
156            .filter_entry(|e| e.file_name() != OsStr::new(".git"))
157            .build()
158            .filter_map(move |res| self.evaluate(res))
159    }
160
161    /// Apply the structural predicates to one raw traversal result. `None` drops
162    /// the entry; `Some(Ok)` keeps it; `Some(Err)` reports a hard failure.
163    fn evaluate(&self, res: Result<DirEntry, ignore::Error>) -> Option<Result<DirEntry, String>> {
164        let entry = match res {
165            Ok(e) => e,
166            Err(e) => return Some(Err(format!("traversal error: {e}"))),
167        };
168        if !entry_kind_matches(&self.types, &entry) {
169            return None;
170        }
171        if let Some(names) = &self.names {
172            let nm = entry.file_name().to_string_lossy();
173            if !names.iter().any(|r| r.is_match(&nm)) {
174                return None;
175            }
176        }
177        if let Some(cmp) = &self.size {
178            if !entry.file_type().is_some_and(|t| t.is_file()) {
179                return None;
180            }
181            match entry.metadata() {
182                Ok(m) => {
183                    if !size_matches(cmp, m.len()) {
184                        return None;
185                    }
186                }
187                Err(e) => return Some(Err(format!("stat {}: {e}", entry.path().display()))),
188            }
189        }
190        Some(Ok(entry))
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn size_grammar_directions() {
200        assert!(matches!(parse_size("+4k").unwrap(), SizeCmp::Gt(4096)));
201        assert!(matches!(parse_size("-2m").unwrap(), SizeCmp::Lt(2097152)));
202        assert!(matches!(parse_size("10").unwrap(), SizeCmp::Ge(10)));
203        assert!(parse_size("+x").is_err());
204    }
205
206    #[test]
207    fn size_matches_compares() {
208        assert!(size_matches(&SizeCmp::Gt(10), 11));
209        assert!(!size_matches(&SizeCmp::Gt(10), 10));
210        assert!(size_matches(&SizeCmp::Ge(10), 10));
211        assert!(size_matches(&SizeCmp::Lt(10), 9));
212    }
213}