Skip to main content

rusty_detox/
sequence.rs

1//! [`Sequence`] — ordered list of [`Filter`]s applied left-to-right (FR-008/FR-038).
2
3use crate::filter::{DEFAULT_SEPARATOR, DEFAULT_UNSAFE_CHARS, Filter};
4
5/// Ordered list of [`Filter`]s applied left-to-right to a name byte sequence.
6///
7/// Three built-in constructors mirror upstream's named sequences:
8/// - [`Sequence::default`] (= `safe` + `wipeup`)
9/// - [`Sequence::iso8859_1`] (= `iso8859_1` + `safe` + `wipeup`)
10/// - [`Sequence::utf_8`] (= `utf_8` + `safe` + `wipeup`)
11///
12/// `Sequence::new()` + `push(Filter)` enables ad-hoc construction; `push`
13/// consumes and returns owned `Self` so chained calls compile as builder-style
14/// fluent code (FR-038 + clarification Q3).
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct Sequence {
17    pub(crate) filters: Vec<Filter>,
18    pub(crate) name: String,
19}
20
21impl Sequence {
22    /// Canonical inherent default constructor — returns the `default` sequence
23    /// (`safe` + `wipeup`). [`impl Default for Sequence`] delegates here so
24    /// both call sites produce identical values (FR-038).
25    ///
26    /// The inherent-method-with-same-name-as-trait pattern is a deliberate
27    /// FR-038 design choice (clippy::should_implement_trait suppressed) so
28    /// that `Sequence::default()` reads naturally as a named-sequence
29    /// constructor parallel to `Sequence::utf_8()` and `Sequence::iso8859_1()`.
30    ///
31    /// # Examples
32    ///
33    /// ```
34    /// use rusty_detox::Sequence;
35    /// let s = Sequence::default();
36    /// assert_eq!(s.name(), "default");
37    /// ```
38    #[must_use]
39    #[allow(clippy::should_implement_trait)]
40    pub fn default() -> Self {
41        Sequence {
42            name: "default".to_string(),
43            filters: vec![
44                Filter::Safe {
45                    replacement: b'_',
46                    unsafe_chars: DEFAULT_UNSAFE_CHARS.to_vec(),
47                },
48                Filter::Wipeup {
49                    separator: DEFAULT_SEPARATOR,
50                    remove_trailing: true,
51                },
52            ],
53        }
54    }
55
56    /// Built-in `iso8859_1` sequence: `iso8859_1` + `safe` + `wipeup`.
57    #[must_use]
58    pub fn iso8859_1() -> Self {
59        Sequence {
60            name: "iso8859_1".to_string(),
61            filters: vec![
62                Filter::Iso8859_1,
63                Filter::Safe {
64                    replacement: b'_',
65                    unsafe_chars: DEFAULT_UNSAFE_CHARS.to_vec(),
66                },
67                Filter::Wipeup {
68                    separator: DEFAULT_SEPARATOR,
69                    remove_trailing: true,
70                },
71            ],
72        }
73    }
74
75    /// Built-in `utf_8` sequence: `utf_8` + `safe` + `wipeup`.
76    #[must_use]
77    pub fn utf_8() -> Self {
78        Sequence {
79            name: "utf_8".to_string(),
80            filters: vec![
81                Filter::Utf8,
82                Filter::Safe {
83                    replacement: b'_',
84                    unsafe_chars: DEFAULT_UNSAFE_CHARS.to_vec(),
85                },
86                Filter::Wipeup {
87                    separator: DEFAULT_SEPARATOR,
88                    remove_trailing: true,
89                },
90            ],
91        }
92    }
93
94    /// Empty named sequence — push filters with [`Sequence::push`].
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// use rusty_detox::{Sequence, Filter};
100    ///
101    /// let seq = Sequence::new("custom")
102    ///     .push(Filter::safe_default())
103    ///     .push(Filter::wipeup_default());
104    /// assert_eq!(seq.name(), "custom");
105    /// ```
106    #[must_use]
107    pub fn new(name: impl Into<String>) -> Self {
108        Sequence {
109            name: name.into(),
110            filters: Vec::new(),
111        }
112    }
113
114    /// Append a filter and return the (consumed) sequence for fluent chaining.
115    #[must_use]
116    pub fn push(mut self, filter: Filter) -> Self {
117        self.filters.push(filter);
118        self
119    }
120
121    /// Sequence name (used by `-L` listing and `-s` resolution).
122    pub fn name(&self) -> &str {
123        &self.name
124    }
125
126    /// Slice of filters in pipeline order.
127    pub fn filters(&self) -> &[Filter] {
128        &self.filters
129    }
130
131    /// Apply all filters in order to `input`. Returns the transformed bytes.
132    pub(crate) fn apply(&self, input: &[u8]) -> Vec<u8> {
133        let mut buf = input.to_vec();
134        for filter in &self.filters {
135            buf = filter.apply(&buf);
136        }
137        buf
138    }
139}
140
141impl Default for Sequence {
142    /// Trait default delegates to inherent [`Sequence::default()`] — both
143    /// paths produce identical values (FR-038).
144    fn default() -> Self {
145        Self::default()
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn default_inherent_matches_trait_default() {
155        let inherent = Sequence::default();
156        let trait_d: Sequence = <Sequence as Default>::default();
157        assert_eq!(inherent, trait_d);
158    }
159
160    #[test]
161    fn default_sanitizes_space_to_underscore() {
162        let s = Sequence::default();
163        assert_eq!(s.apply(b"hello world.txt"), b"hello_world.txt");
164    }
165
166    #[test]
167    fn utf_8_strips_e_acute() {
168        let s = Sequence::utf_8();
169        assert_eq!(s.apply("café résumé.pdf".as_bytes()), b"cafe_resume.pdf");
170    }
171
172    #[test]
173    fn push_returns_self_for_chaining() {
174        let s = Sequence::new("custom")
175            .push(Filter::safe_default())
176            .push(Filter::wipeup_default());
177        assert_eq!(s.filters().len(), 2);
178    }
179}