Skip to main content

test_better_snapshot/
redact.rs

1//! Redactions: stabilizing non-deterministic content before a snapshot
2//! comparison.
3//!
4//! A snapshot is only worth keeping if it is stable run to run, but rendered
5//! values often carry content that is not: a freshly minted UUID, a wall-clock
6//! timestamp. A [`Redactions`] set rewrites those spans to a fixed placeholder
7//! *before* the value is compared or stored, so the run-to-run noise never
8//! reaches the snapshot file. Because the same redactions run on every
9//! comparison, the placeholder is what lives in the `.snap` file and what later
10//! runs compare against.
11//!
12//! The built-in rules are hand-written scanners ([`redact_uuids`] for the
13//! 8-4-4-4-12 hex form, [`redact_rfc3339_timestamps`] for ISO-8601-style
14//! date-times), so this crate stays `std`-only. [`replace`] handles a known
15//! literal, and [`redact_with`] is the escape hatch for anything the built-ins
16//! do not cover.
17//!
18//! [`redact_uuids`]: Redactions::redact_uuids
19//! [`redact_rfc3339_timestamps`]: Redactions::redact_rfc3339_timestamps
20//! [`replace`]: Redactions::replace
21//! [`redact_with`]: Redactions::redact_with
22
23use std::fmt;
24
25/// One redaction rule: maps the running text to its rewritten form. Boxed so a
26/// literal replacement, a built-in scanner, and a user-supplied function are
27/// all the same type.
28type RedactionRule = Box<dyn Fn(&str) -> String + Send + Sync>;
29
30/// An ordered set of text rewrites applied to a value before it is compared
31/// against (or written to) a snapshot.
32///
33/// Build one with the chained methods, then hand it to
34/// `check!(value).matches_snapshot_with(name, &redactions)` (or the inline
35/// variant). Rules run in the order they were added, each on the output of the
36/// last.
37///
38/// ```
39/// use test_better_core::TestResult;
40/// use test_better_matchers::{eq, check};
41/// use test_better_snapshot::Redactions;
42///
43/// # fn main() -> TestResult {
44/// let redactions = Redactions::new().redact_uuids();
45/// let rendered = "user 550e8400-e29b-41d4-a716-446655440000 logged in";
46/// check!(redactions.apply(rendered))
47///     .satisfies(eq("user [uuid] logged in".to_string()))?;
48/// # Ok(())
49/// # }
50/// ```
51#[derive(Default)]
52pub struct Redactions {
53    /// The rules, run in order, each on the output of the last.
54    rules: Vec<RedactionRule>,
55}
56
57impl fmt::Debug for Redactions {
58    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59        // The rules are closures, so there is nothing useful to print but how
60        // many there are.
61        f.debug_struct("Redactions")
62            .field("rules", &self.rules.len())
63            .finish()
64    }
65}
66
67impl Redactions {
68    /// An empty set: [`apply`](Self::apply) returns its input unchanged until a
69    /// rule is added.
70    #[must_use]
71    pub fn new() -> Self {
72        Self { rules: Vec::new() }
73    }
74
75    /// Replaces every occurrence of the literal `needle` with `placeholder`.
76    ///
77    /// This is the rule for a value you already know, e.g. a generated id you
78    /// captured earlier in the test. An empty `needle` is a no-op rule rather
79    /// than a rule that matches everywhere.
80    #[must_use]
81    pub fn replace(mut self, needle: impl Into<String>, placeholder: impl Into<String>) -> Self {
82        let needle = needle.into();
83        let placeholder = placeholder.into();
84        self.rules.push(Box::new(move |input| {
85            if needle.is_empty() {
86                input.to_string()
87            } else {
88                input.replace(needle.as_str(), placeholder.as_str())
89            }
90        }));
91        self
92    }
93
94    /// Replaces every UUID (the canonical 8-4-4-4-12 hex form, either case)
95    /// with `[uuid]`.
96    #[must_use]
97    pub fn redact_uuids(mut self) -> Self {
98        self.rules
99            .push(Box::new(|input| scan_replace(input, "[uuid]", uuid_at)));
100        self
101    }
102
103    /// Replaces every RFC 3339 / ISO 8601 date-time (e.g.
104    /// `2026-05-14T12:34:56Z`, with optional fractional seconds and either a
105    /// `Z` or a `±hh:mm` offset) with `[timestamp]`.
106    #[must_use]
107    pub fn redact_rfc3339_timestamps(mut self) -> Self {
108        self.rules.push(Box::new(|input| {
109            scan_replace(input, "[timestamp]", rfc3339_at)
110        }));
111        self
112    }
113
114    /// Adds an arbitrary rewriting rule: the escape hatch for content the
115    /// built-ins do not cover.
116    ///
117    /// The closure is handed the running text and returns its rewritten form.
118    #[must_use]
119    pub fn redact_with(mut self, rule: impl Fn(&str) -> String + Send + Sync + 'static) -> Self {
120        self.rules.push(Box::new(rule));
121        self
122    }
123
124    /// Runs every rule, in order, and returns the rewritten text. With no rules
125    /// added this returns `input` unchanged.
126    #[must_use]
127    pub fn apply(&self, input: &str) -> String {
128        let mut text = input.to_string();
129        for rule in &self.rules {
130            text = rule(&text);
131        }
132        text
133    }
134
135    /// Whether any rule has been added. An empty set is worth skipping: its
136    /// [`apply`](Self::apply) is an allocation that changes nothing.
137    #[must_use]
138    pub fn is_empty(&self) -> bool {
139        self.rules.is_empty()
140    }
141}
142
143/// Walks `input` left to right, replacing every span `matcher` accepts with
144/// `placeholder` and copying everything else through verbatim.
145///
146/// `matcher` is called with the remaining tail; it returns the byte length of
147/// a match starting at the front, or `None`. A match advances past the whole
148/// span, so matches never overlap.
149fn scan_replace(input: &str, placeholder: &str, matcher: impl Fn(&str) -> Option<usize>) -> String {
150    let mut out = String::with_capacity(input.len());
151    let mut rest = input;
152    while !rest.is_empty() {
153        if let Some(len) = matcher(rest) {
154            out.push_str(placeholder);
155            rest = &rest[len..];
156        } else {
157            match rest.chars().next() {
158                Some(ch) => {
159                    out.push(ch);
160                    rest = &rest[ch.len_utf8()..];
161                }
162                // `rest` is non-empty, so this arm is unreachable; breaking
163                // rather than panicking keeps the function total.
164                None => break,
165            }
166        }
167    }
168    out
169}
170
171/// If `s` starts with a canonical UUID, returns its byte length (always 36).
172///
173/// The shape is five hex groups of lengths 8, 4, 4, 4, 12 joined by `-`. A
174/// trailing hex digit is rejected so a longer hex run is not chopped in half.
175fn uuid_at(s: &str) -> Option<usize> {
176    const GROUPS: [usize; 5] = [8, 4, 4, 4, 12];
177    let bytes = s.as_bytes();
178    let mut pos = 0usize;
179    for (index, &len) in GROUPS.iter().enumerate() {
180        for _ in 0..len {
181            if !bytes.get(pos)?.is_ascii_hexdigit() {
182                return None;
183            }
184            pos += 1;
185        }
186        if index < GROUPS.len() - 1 {
187            if bytes.get(pos) != Some(&b'-') {
188                return None;
189            }
190            pos += 1;
191        }
192    }
193    // Don't swallow the front of an even longer hex string.
194    if bytes.get(pos).is_some_and(u8::is_ascii_hexdigit) {
195        return None;
196    }
197    Some(pos)
198}
199
200/// If `s` starts with an RFC 3339 date-time, returns its byte length.
201fn rfc3339_at(s: &str) -> Option<usize> {
202    let bytes = s.as_bytes();
203    let mut pos = 0usize;
204
205    // date: yyyy-mm-dd
206    take_digits(bytes, &mut pos, 4)?;
207    take_byte(bytes, &mut pos, b'-')?;
208    take_digits(bytes, &mut pos, 2)?;
209    take_byte(bytes, &mut pos, b'-')?;
210    take_digits(bytes, &mut pos, 2)?;
211
212    // date-time separator: `T`, `t`, or a space (RFC 3339 §5.6 allows all).
213    match bytes.get(pos) {
214        Some(b'T' | b't' | b' ') => pos += 1,
215        _ => return None,
216    }
217
218    // time: hh:mm:ss
219    take_digits(bytes, &mut pos, 2)?;
220    take_byte(bytes, &mut pos, b':')?;
221    take_digits(bytes, &mut pos, 2)?;
222    take_byte(bytes, &mut pos, b':')?;
223    take_digits(bytes, &mut pos, 2)?;
224
225    // optional fractional seconds: a dot followed by one or more digits.
226    if bytes.get(pos) == Some(&b'.') {
227        pos += 1;
228        let frac_start = pos;
229        while bytes.get(pos).is_some_and(u8::is_ascii_digit) {
230            pos += 1;
231        }
232        if pos == frac_start {
233            return None;
234        }
235    }
236
237    // offset: `Z`/`z` or `±hh:mm`.
238    match bytes.get(pos) {
239        Some(b'Z' | b'z') => pos += 1,
240        Some(b'+' | b'-') => {
241            pos += 1;
242            take_digits(bytes, &mut pos, 2)?;
243            take_byte(bytes, &mut pos, b':')?;
244            take_digits(bytes, &mut pos, 2)?;
245        }
246        _ => return None,
247    }
248
249    Some(pos)
250}
251
252/// Advances `pos` past exactly `count` ASCII digits, or returns `None` and
253/// leaves `pos` untouched.
254fn take_digits(bytes: &[u8], pos: &mut usize, count: usize) -> Option<()> {
255    for offset in 0..count {
256        if !bytes.get(*pos + offset)?.is_ascii_digit() {
257            return None;
258        }
259    }
260    *pos += count;
261    Some(())
262}
263
264/// Advances `pos` past `expected`, or returns `None` and leaves `pos`
265/// untouched.
266fn take_byte(bytes: &[u8], pos: &mut usize, expected: u8) -> Option<()> {
267    if bytes.get(*pos) == Some(&expected) {
268        *pos += 1;
269        Some(())
270    } else {
271        None
272    }
273}
274
275#[cfg(test)]
276mod tests {
277    use test_better_core::TestResult;
278    use test_better_matchers::{check, eq, is_true};
279
280    use super::*;
281
282    #[test]
283    fn an_empty_set_returns_its_input_unchanged() -> TestResult {
284        let redactions = Redactions::new();
285        check!(redactions.is_empty()).satisfies(is_true())?;
286        check!(redactions.apply("untouched")).satisfies(eq("untouched".to_string()))
287    }
288
289    #[test]
290    fn redact_uuids_replaces_every_canonical_uuid() -> TestResult {
291        let redactions = Redactions::new().redact_uuids();
292        let input = "from 550E8400-E29B-41D4-A716-446655440000 to \
293                     00000000-0000-0000-0000-000000000000";
294        check!(redactions.apply(input)).satisfies(eq("from [uuid] to [uuid]".to_string()))
295    }
296
297    #[test]
298    fn redact_uuids_leaves_a_near_miss_alone() -> TestResult {
299        let redactions = Redactions::new().redact_uuids();
300        // One hex digit short in the last group, and a non-hex character: both
301        // must pass through verbatim.
302        let input = "550e8400-e29b-41d4-a716-44665544000 and zzze8400-e29b";
303        check!(redactions.apply(input)).satisfies(eq(input.to_string()))
304    }
305
306    #[test]
307    fn redact_rfc3339_timestamps_handles_z_and_offset_and_fractions() -> TestResult {
308        let redactions = Redactions::new().redact_rfc3339_timestamps();
309        let input = "at 2026-05-14T12:34:56Z and 2026-01-02T03:04:05.678-05:00 done";
310        check!(redactions.apply(input))
311            .satisfies(eq("at [timestamp] and [timestamp] done".to_string()))
312    }
313
314    #[test]
315    fn rules_run_in_order_and_compose() -> TestResult {
316        let redactions = Redactions::new()
317            .redact_uuids()
318            .replace("[uuid]", "<id>")
319            .redact_with(|text| text.to_uppercase());
320        check!(redactions.apply("id 550e8400-e29b-41d4-a716-446655440000"))
321            .satisfies(eq("ID <ID>".to_string()))
322    }
323
324    #[test]
325    fn replace_ignores_an_empty_needle() -> TestResult {
326        let redactions = Redactions::new().replace("", "X");
327        check!(redactions.apply("abc")).satisfies(eq("abc".to_string()))
328    }
329
330    #[test]
331    fn a_uuid_glued_to_more_hex_is_not_redacted() -> TestResult {
332        // A 37th hex digit means this is not a bare UUID; leave it alone rather
333        // than emit `[uuid]f`.
334        let redactions = Redactions::new().redact_uuids();
335        let input = "550e8400-e29b-41d4-a716-446655440000f";
336        check!(redactions.apply(input)).satisfies(eq(input.to_string()))?;
337        // The Debug impl reports the rule count.
338        check!(format!("{redactions:?}").contains("rules: 1")).satisfies(is_true())
339    }
340}