coding_tools/walk.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Shared file-selection traversal.
5//!
6//! The predicate vocabulary every tool uses to choose *which* entries to act on
7//! — search root, name, kind, size, and whether to descend dot-entries or follow
8//! symlinks — lives here so it is identical across the suite: what you learn
9//! about targeting from `ct-search` transfers verbatim to `ct-edit`. A
10//! [`Selector`] holds the resolved predicates; [`Selector::walk`] yields the
11//! entries that pass them, leaving content-level work (grep, replace) to the
12//! caller.
13
14use std::ffi::OsStr;
15use std::path::PathBuf;
16
17use ignore::{DirEntry, WalkBuilder};
18use regex::Regex;
19
20/// Entry-kind selector for `--type`.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
22pub enum EntryType {
23 /// Regular file.
24 F,
25 /// Directory.
26 D,
27 /// Symbolic link.
28 L,
29}
30
31/// A parsed `--size` predicate, in bytes.
32#[derive(Debug, Clone, Copy)]
33pub enum SizeCmp {
34 /// Strictly larger than N bytes (`+N`).
35 Gt(u64),
36 /// Strictly smaller than N bytes (`-N`).
37 Lt(u64),
38 /// At least N bytes (bare `N`).
39 Ge(u64),
40}
41
42/// Parse a `--size` spec `[+|-]N[k|m|g|b]` into a [`SizeCmp`].
43///
44/// `+N` is "larger than", `-N` is "smaller than", a bare `N` is "at least N";
45/// a trailing `k`/`m`/`g` multiplies by 1024/1024²/1024³.
46///
47/// # Examples
48///
49/// ```
50/// use coding_tools::walk::{parse_size, size_matches, SizeCmp};
51///
52/// let cmp = parse_size("+4k").unwrap(); // larger than 4 KiB
53/// assert!(matches!(cmp, SizeCmp::Gt(4096)));
54/// assert!(size_matches(&cmp, 5000));
55/// assert!(!size_matches(&cmp, 4096));
56///
57/// assert!(matches!(parse_size("10").unwrap(), SizeCmp::Ge(10)));
58/// assert!(parse_size("+x").is_err());
59/// ```
60pub fn parse_size(spec: &str) -> Result<SizeCmp, String> {
61 let spec = spec.trim();
62 let (ctor, body): (fn(u64) -> SizeCmp, &str) = if let Some(r) = spec.strip_prefix('+') {
63 (SizeCmp::Gt, r)
64 } else if let Some(r) = spec.strip_prefix('-') {
65 (SizeCmp::Lt, r)
66 } else {
67 (SizeCmp::Ge, spec)
68 };
69 let body = body.trim();
70 if body.is_empty() {
71 return Err(format!("empty size value in '{spec}'"));
72 }
73 let last = body.chars().last().unwrap();
74 let (num_part, mult): (&str, u64) = match last.to_ascii_lowercase() {
75 'k' => (&body[..body.len() - 1], 1024),
76 'm' => (&body[..body.len() - 1], 1024 * 1024),
77 'g' => (&body[..body.len() - 1], 1024 * 1024 * 1024),
78 'b' => (&body[..body.len() - 1], 1),
79 _ => (body, 1),
80 };
81 let n: u64 = num_part
82 .trim()
83 .parse()
84 .map_err(|_| format!("invalid size number '{num_part}' in '{spec}'"))?;
85 let bytes = n
86 .checked_mul(mult)
87 .ok_or_else(|| format!("size too large: '{spec}'"))?;
88 Ok(ctor(bytes))
89}
90
91/// Whether a byte length satisfies a [`SizeCmp`].
92pub fn size_matches(cmp: &SizeCmp, len: u64) -> bool {
93 match *cmp {
94 SizeCmp::Gt(n) => len > n,
95 SizeCmp::Lt(n) => len < n,
96 SizeCmp::Ge(n) => len >= n,
97 }
98}
99
100/// Whether an entry's kind is among `types` (empty `types` means "any kind").
101fn entry_kind_matches(types: &[EntryType], entry: &DirEntry) -> bool {
102 if types.is_empty() {
103 return true;
104 }
105 let Some(ft) = entry.file_type() else {
106 return false; // only stdin has no file type; never matches a kind
107 };
108 types.iter().any(|t| match t {
109 EntryType::F => ft.is_file(),
110 EntryType::D => ft.is_dir(),
111 EntryType::L => ft.is_symlink(),
112 })
113}
114
115/// Resolved file-selection predicates. Build one, then iterate [`walk`].
116///
117/// [`walk`]: Selector::walk
118pub struct Selector {
119 /// Traversal root (a file yields just itself; a directory is descended).
120 pub base: PathBuf,
121 /// Whole-name alternatives; `None` matches any name.
122 pub names: Option<Vec<Regex>>,
123 /// Allowed entry kinds; empty matches any kind.
124 pub types: Vec<EntryType>,
125 /// Size predicate (applies to regular files only).
126 pub size: Option<SizeCmp>,
127 /// Include dot-entries and descend dot-directories.
128 pub hidden: bool,
129 /// Follow symlinks while traversing.
130 pub follow: bool,
131 /// Walk every file, ignoring `.gitignore`/`.ignore` rules (the `.git`
132 /// directory is always skipped regardless). Default `false`: like git, the
133 /// walk skips what the project has chosen to ignore.
134 pub no_ignore: bool,
135}
136
137impl Selector {
138 /// Yield every entry under [`base`](Selector::base) that passes the
139 /// structural predicates (kind, name, size, hidden). By default the walk
140 /// honors `.gitignore`/`.ignore` (and always skips `.git`), so a build tree
141 /// like `target/` is not descended; `no_ignore` disables that filtering.
142 /// Traversal errors and per-entry `stat` failures surface as `Err` items
143 /// rather than panicking.
144 pub fn walk(&self) -> impl Iterator<Item = Result<DirEntry, String>> + '_ {
145 let respect = !self.no_ignore;
146 WalkBuilder::new(&self.base)
147 .follow_links(self.follow)
148 .hidden(!self.hidden) // hidden(true) = skip dot-entries
149 .ignore(respect)
150 .git_ignore(respect)
151 .git_global(respect)
152 .git_exclude(respect)
153 .parents(respect)
154 // The VCS directory is never useful to these tools; skip it even
155 // under --hidden / --no-ignore.
156 .filter_entry(|e| e.file_name() != OsStr::new(".git"))
157 .build()
158 .filter_map(move |res| self.evaluate(res))
159 }
160
161 /// Apply the structural predicates to one raw traversal result. `None` drops
162 /// the entry; `Some(Ok)` keeps it; `Some(Err)` reports a hard failure.
163 fn evaluate(&self, res: Result<DirEntry, ignore::Error>) -> Option<Result<DirEntry, String>> {
164 let entry = match res {
165 Ok(e) => e,
166 Err(e) => return Some(Err(format!("traversal error: {e}"))),
167 };
168 if !entry_kind_matches(&self.types, &entry) {
169 return None;
170 }
171 if let Some(names) = &self.names {
172 let nm = entry.file_name().to_string_lossy();
173 if !names.iter().any(|r| r.is_match(&nm)) {
174 return None;
175 }
176 }
177 if let Some(cmp) = &self.size {
178 if !entry.file_type().is_some_and(|t| t.is_file()) {
179 return None;
180 }
181 match entry.metadata() {
182 Ok(m) => {
183 if !size_matches(cmp, m.len()) {
184 return None;
185 }
186 }
187 Err(e) => return Some(Err(format!("stat {}: {e}", entry.path().display()))),
188 }
189 }
190 Some(Ok(entry))
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 #[test]
199 fn size_grammar_directions() {
200 assert!(matches!(parse_size("+4k").unwrap(), SizeCmp::Gt(4096)));
201 assert!(matches!(parse_size("-2m").unwrap(), SizeCmp::Lt(2097152)));
202 assert!(matches!(parse_size("10").unwrap(), SizeCmp::Ge(10)));
203 assert!(parse_size("+x").is_err());
204 }
205
206 #[test]
207 fn size_matches_compares() {
208 assert!(size_matches(&SizeCmp::Gt(10), 11));
209 assert!(!size_matches(&SizeCmp::Gt(10), 10));
210 assert!(size_matches(&SizeCmp::Ge(10), 10));
211 assert!(size_matches(&SizeCmp::Lt(10), 9));
212 }
213}