rusty_detox/lib.rs
1//! # rusty-detox
2//!
3//! A Rust port of Doug Harple's `detox(1)` v3.0.1: sanitize messy filenames
4//! through a configurable filter pipeline.
5//!
6//! ## Quick start
7//!
8//! ```
9//! use rusty_detox::{DetoxBuilder, Sequence};
10//!
11//! let detox = DetoxBuilder::new()
12//! .sequence(Sequence::utf_8())
13//! .build();
14//! let clean = detox.sanitize("My Résumé (final v2).pdf");
15//! // The closing paren before `.pdf` becomes `_`; trailing-trim only
16//! // strips runs at the very ends of the basename, not before the extension.
17//! assert_eq!(clean, "My_Resume_final_v2_.pdf");
18//! ```
19//!
20//! ## Stability (lockstep SemVer)
21//!
22//! Library and binary share a single crate version. The vendored upstream
23//! translation tables (`Table.utf_8`, `Table.iso8859_1`) are **frozen at
24//! v3.0.1** — re-vendoring is a MAJOR semver bump.
25
26#![deny(missing_docs)]
27
28pub mod config;
29pub mod error;
30pub mod filter;
31pub mod planner;
32pub mod renamer;
33pub mod sequence;
34pub mod tables;
35
36#[cfg(feature = "cli")]
37pub mod walker;
38
39pub use error::DetoxError;
40pub use filter::Filter;
41pub use sequence::Sequence;
42
43use std::path::PathBuf;
44
45/// Configured detox pipeline runner. Construct via [`DetoxBuilder`].
46#[non_exhaustive]
47#[derive(Debug, Clone)]
48pub struct Detox {
49 sequence: Sequence,
50 /// Verbose: emit one rename line per change to stdout.
51 pub verbose: bool,
52 /// Dry-run: plan and report renames without issuing any rename syscalls.
53 pub dry_run: bool,
54 /// Recursive: descend into directories depth-first leaves-up.
55 pub recursive: bool,
56 /// Maximum collision-resolution suffix attempts before giving up.
57 pub collision_cap: u32,
58}
59
60impl Detox {
61 /// Sanitize one UTF-8 input string, returning UTF-8 output (FR-040).
62 ///
63 /// Lossy reconstruction via [`String::from_utf8_lossy`] handles the rare
64 /// case where the active sequence includes [`Filter::Uncgi`] and the
65 /// input contains percent-escapes that decode to invalid UTF-8 fragments
66 /// (e.g., a lone `%C3`). In that case the invalid byte is replaced with
67 /// U+FFFD in the `&str` path. Callers needing byte-exact round-tripping
68 /// for arbitrary inputs MUST use [`Detox::sanitize_bytes`].
69 pub fn sanitize(&self, input: &str) -> String {
70 let bytes = self.sanitize_bytes(input.as_bytes());
71 String::from_utf8_lossy(&bytes).into_owned()
72 }
73
74 /// Canonical byte-oriented sanitization entry point (FR-040).
75 ///
76 /// Applies the configured [`Sequence`] to `input` and returns the
77 /// transformed bytes. No filesystem I/O.
78 pub fn sanitize_bytes(&self, input: &[u8]) -> Vec<u8> {
79 self.sequence.apply(input)
80 }
81
82 /// Planned rename(s) for `path` without executing them (FR-041).
83 ///
84 /// Performs filesystem READS (`readdir`, `stat`) as required to enumerate
85 /// directory contents and check for pre-existing collision targets, but
86 /// no filesystem MUTATION. Side-effect-free with respect to writes/logs.
87 pub fn plan(&self, path: &std::path::Path) -> Vec<RenamePlanEntry> {
88 let entries = if self.recursive && path.is_dir() {
89 #[cfg(feature = "cli")]
90 {
91 walker::recursive_walk(path)
92 .into_iter()
93 .map(|w| w.path)
94 .collect()
95 }
96 #[cfg(not(feature = "cli"))]
97 {
98 vec![path.to_path_buf()]
99 }
100 } else {
101 vec![path.to_path_buf()]
102 };
103 planner::plan_directory(&entries, &self.sequence, self.collision_cap).unwrap_or_default()
104 }
105
106 /// Execute the rename(s) for `path` (FR-042).
107 ///
108 /// Produces the same plan as [`Detox::plan`] and iterates it issuing
109 /// rename syscalls (with EXDEV fallback per FR-025). Returns a
110 /// [`DetoxReport`] on success or a [`DetoxError`] on the first
111 /// unrecoverable failure.
112 pub fn execute(&self, path: &std::path::Path) -> Result<DetoxReport, DetoxError> {
113 let plan = self.plan(path);
114 let mut report = DetoxReport {
115 planned: plan.len(),
116 renamed: 0,
117 skipped: 0,
118 errored: 0,
119 };
120 if self.dry_run {
121 report.skipped = plan.len();
122 return Ok(report);
123 }
124 for entry in &plan {
125 match renamer::rename_with_fallback(&entry.source, &entry.target) {
126 Ok(_) => report.renamed += 1,
127 Err(e) => {
128 report.errored += 1;
129 return Err(e);
130 }
131 }
132 }
133 Ok(report)
134 }
135}
136
137/// Builder for [`Detox`] (FR-037).
138///
139/// All builder methods are OPTIONAL with documented defaults; `build()` is
140/// INFALLIBLE. Set the active [`Sequence`] with [`DetoxBuilder::sequence`]
141/// (default [`Sequence::default()`]).
142#[derive(Debug, Clone)]
143pub struct DetoxBuilder {
144 sequence: Sequence,
145 verbose: bool,
146 dry_run: bool,
147 recursive: bool,
148 collision_cap: u32,
149}
150
151impl DetoxBuilder {
152 /// Fresh builder with all defaults applied.
153 #[must_use]
154 pub fn new() -> Self {
155 DetoxBuilder {
156 sequence: Sequence::default(),
157 verbose: false,
158 dry_run: false,
159 recursive: false,
160 collision_cap: 1000,
161 }
162 }
163
164 /// Set the active sequence (default [`Sequence::default()`]).
165 #[must_use]
166 pub fn sequence(mut self, s: Sequence) -> Self {
167 self.sequence = s;
168 self
169 }
170
171 /// Set verbose flag (default `false`).
172 #[must_use]
173 pub fn verbose(mut self, on: bool) -> Self {
174 self.verbose = on;
175 self
176 }
177
178 /// Set dry-run flag (default `false`).
179 #[must_use]
180 pub fn dry_run(mut self, on: bool) -> Self {
181 self.dry_run = on;
182 self
183 }
184
185 /// Set recursive flag (default `false`).
186 #[must_use]
187 pub fn recursive(mut self, on: bool) -> Self {
188 self.recursive = on;
189 self
190 }
191
192 /// Set the collision-resolution attempt cap (default `1000`).
193 #[must_use]
194 pub fn collision_cap(mut self, cap: u32) -> Self {
195 self.collision_cap = cap;
196 self
197 }
198
199 /// Build a configured [`Detox`]. INFALLIBLE.
200 #[must_use]
201 pub fn build(self) -> Detox {
202 Detox {
203 sequence: self.sequence,
204 verbose: self.verbose,
205 dry_run: self.dry_run,
206 recursive: self.recursive,
207 collision_cap: self.collision_cap,
208 }
209 }
210}
211
212impl Default for DetoxBuilder {
213 fn default() -> Self {
214 Self::new()
215 }
216}
217
218/// A single source→target rename mapping (FR-041).
219#[non_exhaustive]
220#[derive(Debug, Clone, PartialEq, Eq)]
221pub struct RenamePlanEntry {
222 /// Source path.
223 pub source: PathBuf,
224 /// Target path after the pipeline has been applied.
225 pub target: PathBuf,
226 /// Numeric collision-suffix applied (if any).
227 pub collision_suffix: Option<u32>,
228}
229
230/// Summary report returned from [`Detox::execute`] (FR-042).
231#[non_exhaustive]
232#[derive(Debug, Clone, Default, PartialEq, Eq)]
233pub struct DetoxReport {
234 /// Number of entries the planner produced.
235 pub planned: usize,
236 /// Number of entries successfully renamed.
237 pub renamed: usize,
238 /// Number of entries skipped (e.g., dry-run or already-clean).
239 pub skipped: usize,
240 /// Number of entries that errored mid-execution.
241 pub errored: usize,
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247 use static_assertions::{assert_impl_all, const_assert};
248
249 assert_impl_all!(Detox: Send, Sync, Clone);
250 assert_impl_all!(DetoxBuilder: Send, Sync, Clone);
251 assert_impl_all!(Sequence: Send, Sync, Clone);
252 assert_impl_all!(Filter: Send, Sync, Clone);
253 assert_impl_all!(DetoxError: Send, Sync);
254
255 // Compile-time check that DetoxReport has a Default impl.
256 const _: fn() = || {
257 let _ = DetoxReport::default();
258 };
259 const_assert!(std::mem::size_of::<RenamePlanEntry>() > 0);
260
261 #[test]
262 fn sanitize_default_sequence() {
263 let detox = DetoxBuilder::new().build();
264 assert_eq!(detox.sanitize("hello world.txt"), "hello_world.txt");
265 }
266
267 #[test]
268 fn sanitize_utf8_sequence() {
269 let detox = DetoxBuilder::new().sequence(Sequence::utf_8()).build();
270 assert_eq!(detox.sanitize("café.pdf"), "cafe.pdf");
271 }
272
273 #[test]
274 fn sanitize_bytes_parity_with_str_for_utf8_clean() {
275 // SC-028: parity-by-construction for UTF-8-clean input
276 let detox = DetoxBuilder::new().sequence(Sequence::utf_8()).build();
277 let input = "café.pdf";
278 assert_eq!(
279 detox.sanitize(input).as_bytes(),
280 detox.sanitize_bytes(input.as_bytes()).as_slice()
281 );
282 }
283
284 #[test]
285 fn clean_filename_unchanged() {
286 let detox = DetoxBuilder::new().build();
287 assert_eq!(detox.sanitize("clean_already.txt"), "clean_already.txt");
288 }
289}