Skip to main content

coding_tools/
walk.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Shared file-selection traversal.
5//!
6//! The predicate vocabulary every tool uses to choose *which* entries to act on
7//! — search root, name, kind, size, and whether to descend dot-entries or follow
8//! symlinks — lives here so it is identical across the suite: what you learn
9//! about targeting from `ct-search` transfers verbatim to `ct-edit`. A
10//! [`Selector`] holds the resolved predicates; [`Selector::walk`] yields the
11//! entries that pass them, leaving content-level work (grep, replace) to the
12//! caller.
13
14use std::path::PathBuf;
15
16use regex::Regex;
17use walkdir::{DirEntry, WalkDir};
18
19/// Entry-kind selector for `--type`.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
21pub enum EntryType {
22    /// Regular file.
23    F,
24    /// Directory.
25    D,
26    /// Symbolic link.
27    L,
28}
29
30/// A parsed `--size` predicate, in bytes.
31#[derive(Debug, Clone, Copy)]
32pub enum SizeCmp {
33    /// Strictly larger than N bytes (`+N`).
34    Gt(u64),
35    /// Strictly smaller than N bytes (`-N`).
36    Lt(u64),
37    /// At least N bytes (bare `N`).
38    Ge(u64),
39}
40
41/// Parse a `--size` spec `[+|-]N[k|m|g|b]` into a [`SizeCmp`].
42///
43/// `+N` is "larger than", `-N` is "smaller than", a bare `N` is "at least N";
44/// a trailing `k`/`m`/`g` multiplies by 1024/1024²/1024³.
45///
46/// # Examples
47///
48/// ```
49/// use coding_tools::walk::{parse_size, size_matches, SizeCmp};
50///
51/// let cmp = parse_size("+4k").unwrap();        // larger than 4 KiB
52/// assert!(matches!(cmp, SizeCmp::Gt(4096)));
53/// assert!(size_matches(&cmp, 5000));
54/// assert!(!size_matches(&cmp, 4096));
55///
56/// assert!(matches!(parse_size("10").unwrap(), SizeCmp::Ge(10)));
57/// assert!(parse_size("+x").is_err());
58/// ```
59pub fn parse_size(spec: &str) -> Result<SizeCmp, String> {
60    let spec = spec.trim();
61    let (ctor, body): (fn(u64) -> SizeCmp, &str) = if let Some(r) = spec.strip_prefix('+') {
62        (SizeCmp::Gt, r)
63    } else if let Some(r) = spec.strip_prefix('-') {
64        (SizeCmp::Lt, r)
65    } else {
66        (SizeCmp::Ge, spec)
67    };
68    let body = body.trim();
69    if body.is_empty() {
70        return Err(format!("empty size value in '{spec}'"));
71    }
72    let last = body.chars().last().unwrap();
73    let (num_part, mult): (&str, u64) = match last.to_ascii_lowercase() {
74        'k' => (&body[..body.len() - 1], 1024),
75        'm' => (&body[..body.len() - 1], 1024 * 1024),
76        'g' => (&body[..body.len() - 1], 1024 * 1024 * 1024),
77        'b' => (&body[..body.len() - 1], 1),
78        _ => (body, 1),
79    };
80    let n: u64 = num_part
81        .trim()
82        .parse()
83        .map_err(|_| format!("invalid size number '{num_part}' in '{spec}'"))?;
84    let bytes = n
85        .checked_mul(mult)
86        .ok_or_else(|| format!("size too large: '{spec}'"))?;
87    Ok(ctor(bytes))
88}
89
90/// Whether a byte length satisfies a [`SizeCmp`].
91pub fn size_matches(cmp: &SizeCmp, len: u64) -> bool {
92    match *cmp {
93        SizeCmp::Gt(n) => len > n,
94        SizeCmp::Lt(n) => len < n,
95        SizeCmp::Ge(n) => len >= n,
96    }
97}
98
99/// True for dot-entries below the search root (the root itself is never hidden).
100fn is_hidden(entry: &DirEntry) -> bool {
101    entry.depth() > 0 && entry.file_name().to_string_lossy().starts_with('.')
102}
103
104/// Whether an entry's kind is among `types` (empty `types` means "any kind").
105fn entry_kind_matches(types: &[EntryType], entry: &DirEntry) -> bool {
106    if types.is_empty() {
107        return true;
108    }
109    let ft = entry.file_type();
110    types.iter().any(|t| match t {
111        EntryType::F => ft.is_file(),
112        EntryType::D => ft.is_dir(),
113        EntryType::L => ft.is_symlink(),
114    })
115}
116
117/// Resolved file-selection predicates. Build one, then iterate [`walk`].
118///
119/// [`walk`]: Selector::walk
120pub struct Selector {
121    /// Traversal root (a file yields just itself; a directory is descended).
122    pub base: PathBuf,
123    /// Whole-name alternatives; `None` matches any name.
124    pub names: Option<Vec<Regex>>,
125    /// Allowed entry kinds; empty matches any kind.
126    pub types: Vec<EntryType>,
127    /// Size predicate (applies to regular files only).
128    pub size: Option<SizeCmp>,
129    /// Include dot-entries and descend dot-directories.
130    pub hidden: bool,
131    /// Follow symlinks while traversing.
132    pub follow: bool,
133}
134
135impl Selector {
136    /// Yield every entry under [`base`](Selector::base) that passes the
137    /// structural predicates (kind, name, size, hidden). Traversal errors and
138    /// per-entry `stat` failures surface as `Err` items rather than panicking.
139    pub fn walk(&self) -> impl Iterator<Item = Result<DirEntry, String>> + '_ {
140        WalkDir::new(&self.base)
141            .follow_links(self.follow)
142            .into_iter()
143            .filter_entry(move |e| self.hidden || !is_hidden(e))
144            .filter_map(move |res| self.evaluate(res))
145    }
146
147    /// Apply the structural predicates to one raw traversal result. `None` drops
148    /// the entry; `Some(Ok)` keeps it; `Some(Err)` reports a hard failure.
149    fn evaluate(&self, res: walkdir::Result<DirEntry>) -> Option<Result<DirEntry, String>> {
150        let entry = match res {
151            Ok(e) => e,
152            Err(e) => return Some(Err(format!("traversal error: {e}"))),
153        };
154        if !entry_kind_matches(&self.types, &entry) {
155            return None;
156        }
157        if let Some(names) = &self.names {
158            let nm = entry.file_name().to_string_lossy();
159            if !names.iter().any(|r| r.is_match(&nm)) {
160                return None;
161            }
162        }
163        if let Some(cmp) = &self.size {
164            if !entry.file_type().is_file() {
165                return None;
166            }
167            match entry.metadata() {
168                Ok(m) => {
169                    if !size_matches(cmp, m.len()) {
170                        return None;
171                    }
172                }
173                Err(e) => return Some(Err(format!("stat {}: {e}", entry.path().display()))),
174            }
175        }
176        Some(Ok(entry))
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    #[test]
185    fn size_grammar_directions() {
186        assert!(matches!(parse_size("+4k").unwrap(), SizeCmp::Gt(4096)));
187        assert!(matches!(parse_size("-2m").unwrap(), SizeCmp::Lt(2097152)));
188        assert!(matches!(parse_size("10").unwrap(), SizeCmp::Ge(10)));
189        assert!(parse_size("+x").is_err());
190    }
191
192    #[test]
193    fn size_matches_compares() {
194        assert!(size_matches(&SizeCmp::Gt(10), 11));
195        assert!(!size_matches(&SizeCmp::Gt(10), 10));
196        assert!(size_matches(&SizeCmp::Ge(10), 10));
197        assert!(size_matches(&SizeCmp::Lt(10), 9));
198    }
199}