Skip to main content

snapbox/filter/
redactions.rs

1use std::borrow::Cow;
2use std::path::Path;
3use std::path::PathBuf;
4
5/// Replace data with placeholders
6///
7/// This can be used for:
8/// - Handling test-run dependent data like temp directories or elapsed time
9/// - Making special characters more obvious (e.g. redacting a tab a `[TAB]`)
10/// - Normalizing platform-specific data like [`std::env::consts::EXE_SUFFIX`]
11///
12/// # Examples
13///
14/// ```rust
15/// let mut subst = snapbox::Redactions::new();
16/// subst.insert("[LOCATION]", "World");
17/// assert_eq!(subst.redact("Hello World!"), "Hello [LOCATION]!");
18/// ```
19#[derive(Default, Clone, Debug, PartialEq, Eq)]
20pub struct Redactions {
21    vars: Option<
22        std::collections::BTreeMap<RedactedValueInner, std::collections::BTreeSet<&'static str>>,
23    >,
24    unused: Option<std::collections::BTreeSet<RedactedValueInner>>,
25}
26
27impl Redactions {
28    pub const fn new() -> Self {
29        Self {
30            vars: None,
31            unused: None,
32        }
33    }
34
35    pub(crate) fn with_exe() -> Self {
36        let mut redactions = Self::new();
37        redactions
38            .insert("[EXE]", std::env::consts::EXE_SUFFIX)
39            .unwrap();
40        redactions
41    }
42
43    /// Insert an additional match pattern
44    ///
45    /// `placeholder` must be enclosed in `[` and `]`.
46    ///
47    /// ```rust
48    /// let mut subst = snapbox::Redactions::new();
49    /// subst.insert("[EXE]", std::env::consts::EXE_SUFFIX);
50    /// ```
51    ///
52    /// With the `regex` feature, you can define patterns using regexes.
53    /// You can choose to replace a subset of the regex by giving it the named capture group
54    /// `redacted`.
55    ///
56    /// ```rust
57    /// # #[cfg(feature = "regex")] {
58    /// let mut subst = snapbox::Redactions::new();
59    /// subst.insert("[OBJECT]", regex::Regex::new("(?<redacted>(world|moon))").unwrap());
60    /// assert_eq!(subst.redact("Hello world!"), "Hello [OBJECT]!");
61    /// assert_eq!(subst.redact("Hello moon!"), "Hello [OBJECT]!");
62    /// assert_eq!(subst.redact("Hello other!"), "Hello other!");
63    /// # }
64    /// ```
65    pub fn insert(
66        &mut self,
67        placeholder: &'static str,
68        value: impl Into<RedactedValue>,
69    ) -> crate::assert::Result<()> {
70        let placeholder = validate_placeholder(placeholder)?;
71        let value = value.into();
72        if let Some(value) = value.inner {
73            self.vars
74                .get_or_insert(std::collections::BTreeMap::new())
75                .entry(value)
76                .or_default()
77                .insert(placeholder);
78        } else {
79            self.unused
80                .get_or_insert(std::collections::BTreeSet::new())
81                .insert(RedactedValueInner::Str(placeholder));
82        }
83        Ok(())
84    }
85
86    /// Insert additional match patterns
87    ///
88    /// Placeholders must be enclosed in `[` and `]`.
89    pub fn extend(
90        &mut self,
91        vars: impl IntoIterator<Item = (&'static str, impl Into<RedactedValue>)>,
92    ) -> crate::assert::Result<()> {
93        for (placeholder, value) in vars {
94            self.insert(placeholder, value)?;
95        }
96        Ok(())
97    }
98
99    pub fn remove(&mut self, placeholder: &'static str) -> crate::assert::Result<()> {
100        let placeholder = validate_placeholder(placeholder)?;
101        self.vars
102            .get_or_insert(std::collections::BTreeMap::new())
103            .retain(|_value, placeholders| {
104                placeholders.retain(|p| *p != placeholder);
105                !placeholders.is_empty()
106            });
107        Ok(())
108    }
109
110    /// Apply redaction only, no pattern-dependent globs
111    ///
112    /// # Examples
113    ///
114    /// ```rust
115    /// let mut subst = snapbox::Redactions::new();
116    /// subst.insert("[LOCATION]", "World");
117    /// let output = subst.redact("Hello World!");
118    /// assert_eq!(output, "Hello [LOCATION]!");
119    /// ```
120    pub fn redact(&self, input: &str) -> String {
121        let mut input = input.to_owned();
122        replace_many(
123            &mut input,
124            self.vars
125                .iter()
126                .flatten()
127                .flat_map(|(value, placeholders)| {
128                    placeholders
129                        .iter()
130                        .map(move |placeholder| (value, *placeholder))
131                }),
132        );
133        input
134    }
135
136    /// Clear unused redactions from expected data
137    ///
138    /// Some redactions can be conditionally present, like redacting [`std::env::consts::EXE_SUFFIX`].
139    /// When the redaction is not present, it needs to be removed from the expected data so it can
140    /// be matched against the actual data.
141    pub fn clear_unused<'v>(&self, pattern: &'v str) -> Cow<'v, str> {
142        if !self.unused.as_ref().map(|s| s.is_empty()).unwrap_or(false) && pattern.contains('[') {
143            let mut pattern = pattern.to_owned();
144            replace_many(
145                &mut pattern,
146                self.unused.iter().flatten().map(|var| (var, "")),
147            );
148            Cow::Owned(pattern)
149        } else {
150            Cow::Borrowed(pattern)
151        }
152    }
153}
154
155#[derive(Clone)]
156pub struct RedactedValue {
157    inner: Option<RedactedValueInner>,
158}
159
160#[derive(Clone, Debug)]
161enum RedactedValueInner {
162    Str(&'static str),
163    String(String),
164    Path {
165        native: String,
166        normalized: String,
167    },
168    #[cfg(feature = "regex")]
169    Regex(regex::Regex),
170}
171
172impl RedactedValueInner {
173    fn find_in(&self, buffer: &str) -> Option<std::ops::Range<usize>> {
174        match self {
175            Self::Str(s) => buffer.find(s).map(|offset| offset..(offset + s.len())),
176            Self::String(s) => buffer.find(s).map(|offset| offset..(offset + s.len())),
177            Self::Path { native, normalized } => {
178                match (buffer.find(native), buffer.find(normalized)) {
179                    (Some(native_offset), Some(normalized_offset)) => {
180                        if native_offset <= normalized_offset {
181                            Some(native_offset..(native_offset + native.len()))
182                        } else {
183                            Some(normalized_offset..(normalized_offset + normalized.len()))
184                        }
185                    }
186                    (Some(offset), None) => Some(offset..(offset + native.len())),
187                    (None, Some(offset)) => Some(offset..(offset + normalized.len())),
188                    (None, None) => None,
189                }
190            }
191            #[cfg(feature = "regex")]
192            Self::Regex(r) => {
193                let captures = r.captures(buffer)?;
194                let m = captures.name("redacted").or_else(|| captures.get(0))?;
195                Some(m.range())
196            }
197        }
198    }
199
200    fn as_cmp(&self) -> (usize, std::cmp::Reverse<usize>, &str) {
201        match self {
202            Self::Str(s) => (0, std::cmp::Reverse(s.len()), s),
203            Self::String(s) => (0, std::cmp::Reverse(s.len()), s),
204            Self::Path { normalized: s, .. } => (0, std::cmp::Reverse(s.len()), s),
205            #[cfg(feature = "regex")]
206            Self::Regex(r) => {
207                let s = r.as_str();
208                (1, std::cmp::Reverse(s.len()), s)
209            }
210        }
211    }
212}
213
214impl From<&'static str> for RedactedValue {
215    fn from(inner: &'static str) -> Self {
216        if inner.is_empty() {
217            Self { inner: None }
218        } else {
219            Self {
220                inner: Some(RedactedValueInner::Str(inner)),
221            }
222        }
223    }
224}
225
226impl From<String> for RedactedValue {
227    fn from(inner: String) -> Self {
228        if inner.is_empty() {
229            Self { inner: None }
230        } else {
231            Self {
232                inner: Some(RedactedValueInner::String(inner)),
233            }
234        }
235    }
236}
237
238impl From<&'_ String> for RedactedValue {
239    fn from(inner: &'_ String) -> Self {
240        inner.clone().into()
241    }
242}
243
244impl From<Cow<'static, str>> for RedactedValue {
245    fn from(inner: Cow<'static, str>) -> Self {
246        match inner {
247            Cow::Borrowed(s) => s.into(),
248            Cow::Owned(s) => s.into(),
249        }
250    }
251}
252
253impl From<&'static Path> for RedactedValue {
254    fn from(inner: &'static Path) -> Self {
255        inner.to_owned().into()
256    }
257}
258
259impl From<PathBuf> for RedactedValue {
260    fn from(inner: PathBuf) -> Self {
261        if inner.as_os_str().is_empty() {
262            Self { inner: None }
263        } else {
264            let native = match inner.into_os_string().into_string() {
265                Ok(s) => s,
266                Err(os) => PathBuf::from(os).display().to_string(),
267            };
268            let normalized = crate::filter::normalize_paths(&native);
269            Self {
270                inner: Some(RedactedValueInner::Path { native, normalized }),
271            }
272        }
273    }
274}
275
276impl From<&'_ PathBuf> for RedactedValue {
277    fn from(inner: &'_ PathBuf) -> Self {
278        inner.clone().into()
279    }
280}
281
282#[cfg(feature = "regex")]
283impl From<regex::Regex> for RedactedValue {
284    fn from(inner: regex::Regex) -> Self {
285        Self {
286            inner: Some(RedactedValueInner::Regex(inner)),
287        }
288    }
289}
290
291#[cfg(feature = "regex")]
292impl From<&'_ regex::Regex> for RedactedValue {
293    fn from(inner: &'_ regex::Regex) -> Self {
294        inner.clone().into()
295    }
296}
297
298impl PartialOrd for RedactedValueInner {
299    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
300        Some(self.cmp(other))
301    }
302}
303
304impl Ord for RedactedValueInner {
305    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
306        self.as_cmp().cmp(&other.as_cmp())
307    }
308}
309
310impl PartialEq for RedactedValueInner {
311    fn eq(&self, other: &Self) -> bool {
312        self.as_cmp().eq(&other.as_cmp())
313    }
314}
315
316impl Eq for RedactedValueInner {}
317
318/// Replacements is `(from, to)`
319fn replace_many<'a>(
320    buffer: &mut String,
321    replacements: impl IntoIterator<Item = (&'a RedactedValueInner, &'a str)>,
322) {
323    for (var, replace) in replacements {
324        let mut index = 0;
325        while let Some(offset) = var.find_in(&buffer[index..]) {
326            let old_range = (index + offset.start)..(index + offset.end);
327            buffer.replace_range(old_range, replace);
328            index += offset.start + replace.len();
329        }
330    }
331}
332
333fn validate_placeholder(placeholder: &'static str) -> crate::assert::Result<&'static str> {
334    if !placeholder.starts_with('[') || !placeholder.ends_with(']') {
335        return Err(format!("Key `{placeholder}` is not enclosed in []").into());
336    }
337
338    if placeholder[1..(placeholder.len() - 1)]
339        .find(|c: char| !c.is_ascii_uppercase() && c != '_')
340        .is_some()
341    {
342        return Err(format!("Key `{placeholder}` can only be A-Z but ").into());
343    }
344
345    Ok(placeholder)
346}
347
348#[cfg(test)]
349mod test {
350    use super::*;
351
352    #[test]
353    fn test_validate_placeholder() {
354        let cases = [
355            ("[HELLO", false),
356            ("HELLO]", false),
357            ("[HELLO]", true),
358            ("[HELLO_WORLD]", true),
359            ("[hello]", false),
360            ("[HE  O]", false),
361        ];
362        for (placeholder, expected) in cases {
363            let actual = validate_placeholder(placeholder).is_ok();
364            assert_eq!(expected, actual, "placeholder={placeholder:?}");
365        }
366    }
367}