Skip to main content

rusty_detox/
lib.rs

1//! # rusty-detox
2//!
3//! A Rust port of Doug Harple's `detox(1)` v3.0.1: sanitize messy filenames
4//! through a configurable filter pipeline.
5//!
6//! ## Quick start
7//!
8//! ```
9//! use rusty_detox::{DetoxBuilder, Sequence};
10//!
11//! let detox = DetoxBuilder::new()
12//!     .sequence(Sequence::utf_8())
13//!     .build();
14//! let clean = detox.sanitize("My Résumé (final v2).pdf");
15//! // The closing paren before `.pdf` becomes `_`; trailing-trim only
16//! // strips runs at the very ends of the basename, not before the extension.
17//! assert_eq!(clean, "My_Resume_final_v2_.pdf");
18//! ```
19//!
20//! ## Stability (lockstep SemVer)
21//!
22//! Library and binary share a single crate version. The vendored upstream
23//! translation tables (`Table.utf_8`, `Table.iso8859_1`) are **frozen at
24//! v3.0.1** — re-vendoring is a MAJOR semver bump.
25
26#![deny(missing_docs)]
27
28pub mod config;
29pub mod error;
30pub mod filter;
31pub mod planner;
32pub mod renamer;
33pub mod sequence;
34pub mod tables;
35
36#[cfg(feature = "cli")]
37pub mod walker;
38
39pub use error::DetoxError;
40pub use filter::Filter;
41pub use sequence::Sequence;
42
43use std::path::PathBuf;
44
45/// Configured detox pipeline runner. Construct via [`DetoxBuilder`].
46#[non_exhaustive]
47#[derive(Debug, Clone)]
48pub struct Detox {
49    sequence: Sequence,
50    /// Verbose: emit one rename line per change to stdout.
51    pub verbose: bool,
52    /// Dry-run: plan and report renames without issuing any rename syscalls.
53    pub dry_run: bool,
54    /// Recursive: descend into directories depth-first leaves-up.
55    pub recursive: bool,
56    /// Maximum collision-resolution suffix attempts before giving up.
57    pub collision_cap: u32,
58}
59
60impl Detox {
61    /// Sanitize one UTF-8 input string, returning UTF-8 output (FR-040).
62    ///
63    /// Lossy reconstruction via [`String::from_utf8_lossy`] handles the rare
64    /// case where the active sequence includes [`Filter::Uncgi`] and the
65    /// input contains percent-escapes that decode to invalid UTF-8 fragments
66    /// (e.g., a lone `%C3`). In that case the invalid byte is replaced with
67    /// U+FFFD in the `&str` path. Callers needing byte-exact round-tripping
68    /// for arbitrary inputs MUST use [`Detox::sanitize_bytes`].
69    pub fn sanitize(&self, input: &str) -> String {
70        let bytes = self.sanitize_bytes(input.as_bytes());
71        String::from_utf8_lossy(&bytes).into_owned()
72    }
73
74    /// Canonical byte-oriented sanitization entry point (FR-040).
75    ///
76    /// Applies the configured [`Sequence`] to `input` and returns the
77    /// transformed bytes. No filesystem I/O.
78    pub fn sanitize_bytes(&self, input: &[u8]) -> Vec<u8> {
79        self.sequence.apply(input)
80    }
81
82    /// Planned rename(s) for `path` without executing them (FR-041).
83    ///
84    /// Performs filesystem READS (`readdir`, `stat`) as required to enumerate
85    /// directory contents and check for pre-existing collision targets, but
86    /// no filesystem MUTATION. Side-effect-free with respect to writes/logs.
87    pub fn plan(&self, path: &std::path::Path) -> Vec<RenamePlanEntry> {
88        let entries = if self.recursive && path.is_dir() {
89            #[cfg(feature = "cli")]
90            {
91                walker::recursive_walk(path)
92                    .into_iter()
93                    .map(|w| w.path)
94                    .collect()
95            }
96            #[cfg(not(feature = "cli"))]
97            {
98                vec![path.to_path_buf()]
99            }
100        } else {
101            vec![path.to_path_buf()]
102        };
103        planner::plan_directory(&entries, &self.sequence, self.collision_cap).unwrap_or_default()
104    }
105
106    /// Execute the rename(s) for `path` (FR-042).
107    ///
108    /// Produces the same plan as [`Detox::plan`] and iterates it issuing
109    /// rename syscalls (with EXDEV fallback per FR-025). Returns a
110    /// [`DetoxReport`] on success or a [`DetoxError`] on the first
111    /// unrecoverable failure.
112    pub fn execute(&self, path: &std::path::Path) -> Result<DetoxReport, DetoxError> {
113        let plan = self.plan(path);
114        let mut report = DetoxReport {
115            planned: plan.len(),
116            renamed: 0,
117            skipped: 0,
118            errored: 0,
119        };
120        if self.dry_run {
121            report.skipped = plan.len();
122            return Ok(report);
123        }
124        for entry in &plan {
125            match renamer::rename_with_fallback(&entry.source, &entry.target) {
126                Ok(_) => report.renamed += 1,
127                Err(e) => {
128                    report.errored += 1;
129                    return Err(e);
130                }
131            }
132        }
133        Ok(report)
134    }
135}
136
137/// Builder for [`Detox`] (FR-037).
138///
139/// All builder methods are OPTIONAL with documented defaults; `build()` is
140/// INFALLIBLE. Set the active [`Sequence`] with [`DetoxBuilder::sequence`]
141/// (default [`Sequence::default()`]).
142#[derive(Debug, Clone)]
143pub struct DetoxBuilder {
144    sequence: Sequence,
145    verbose: bool,
146    dry_run: bool,
147    recursive: bool,
148    collision_cap: u32,
149}
150
151impl DetoxBuilder {
152    /// Fresh builder with all defaults applied.
153    #[must_use]
154    pub fn new() -> Self {
155        DetoxBuilder {
156            sequence: Sequence::default(),
157            verbose: false,
158            dry_run: false,
159            recursive: false,
160            collision_cap: 1000,
161        }
162    }
163
164    /// Set the active sequence (default [`Sequence::default()`]).
165    #[must_use]
166    pub fn sequence(mut self, s: Sequence) -> Self {
167        self.sequence = s;
168        self
169    }
170
171    /// Set verbose flag (default `false`).
172    #[must_use]
173    pub fn verbose(mut self, on: bool) -> Self {
174        self.verbose = on;
175        self
176    }
177
178    /// Set dry-run flag (default `false`).
179    #[must_use]
180    pub fn dry_run(mut self, on: bool) -> Self {
181        self.dry_run = on;
182        self
183    }
184
185    /// Set recursive flag (default `false`).
186    #[must_use]
187    pub fn recursive(mut self, on: bool) -> Self {
188        self.recursive = on;
189        self
190    }
191
192    /// Set the collision-resolution attempt cap (default `1000`).
193    #[must_use]
194    pub fn collision_cap(mut self, cap: u32) -> Self {
195        self.collision_cap = cap;
196        self
197    }
198
199    /// Build a configured [`Detox`]. INFALLIBLE.
200    #[must_use]
201    pub fn build(self) -> Detox {
202        Detox {
203            sequence: self.sequence,
204            verbose: self.verbose,
205            dry_run: self.dry_run,
206            recursive: self.recursive,
207            collision_cap: self.collision_cap,
208        }
209    }
210}
211
212impl Default for DetoxBuilder {
213    fn default() -> Self {
214        Self::new()
215    }
216}
217
218/// A single source→target rename mapping (FR-041).
219#[non_exhaustive]
220#[derive(Debug, Clone, PartialEq, Eq)]
221pub struct RenamePlanEntry {
222    /// Source path.
223    pub source: PathBuf,
224    /// Target path after the pipeline has been applied.
225    pub target: PathBuf,
226    /// Numeric collision-suffix applied (if any).
227    pub collision_suffix: Option<u32>,
228}
229
230/// Summary report returned from [`Detox::execute`] (FR-042).
231#[non_exhaustive]
232#[derive(Debug, Clone, Default, PartialEq, Eq)]
233pub struct DetoxReport {
234    /// Number of entries the planner produced.
235    pub planned: usize,
236    /// Number of entries successfully renamed.
237    pub renamed: usize,
238    /// Number of entries skipped (e.g., dry-run or already-clean).
239    pub skipped: usize,
240    /// Number of entries that errored mid-execution.
241    pub errored: usize,
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use static_assertions::{assert_impl_all, const_assert};
248
249    assert_impl_all!(Detox: Send, Sync, Clone);
250    assert_impl_all!(DetoxBuilder: Send, Sync, Clone);
251    assert_impl_all!(Sequence: Send, Sync, Clone);
252    assert_impl_all!(Filter: Send, Sync, Clone);
253    assert_impl_all!(DetoxError: Send, Sync);
254
255    // Compile-time check that DetoxReport has a Default impl.
256    const _: fn() = || {
257        let _ = DetoxReport::default();
258    };
259    const_assert!(std::mem::size_of::<RenamePlanEntry>() > 0);
260
261    #[test]
262    fn sanitize_default_sequence() {
263        let detox = DetoxBuilder::new().build();
264        assert_eq!(detox.sanitize("hello world.txt"), "hello_world.txt");
265    }
266
267    #[test]
268    fn sanitize_utf8_sequence() {
269        let detox = DetoxBuilder::new().sequence(Sequence::utf_8()).build();
270        assert_eq!(detox.sanitize("café.pdf"), "cafe.pdf");
271    }
272
273    #[test]
274    fn sanitize_bytes_parity_with_str_for_utf8_clean() {
275        // SC-028: parity-by-construction for UTF-8-clean input
276        let detox = DetoxBuilder::new().sequence(Sequence::utf_8()).build();
277        let input = "café.pdf";
278        assert_eq!(
279            detox.sanitize(input).as_bytes(),
280            detox.sanitize_bytes(input.as_bytes()).as_slice()
281        );
282    }
283
284    #[test]
285    fn clean_filename_unchanged() {
286        let detox = DetoxBuilder::new().build();
287        assert_eq!(detox.sanitize("clean_already.txt"), "clean_already.txt");
288    }
289}