Skip to main content

zeph_common/
sanitize.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Shared sanitization primitives.
5//!
6//! Domain-specific sanitization belongs in the respective crates. This module
7//! only provides the shared low-level primitives (control char stripping,
8//! null byte removal) that multiple crates need.
9
10/// Strip ASCII control characters (U+0000–U+001F, U+007F) from `s`.
11///
12/// Also strips Unicode `BiDi` override codepoints (U+202A–U+202E, U+2066–U+2069)
13/// which can be used to visually obscure malicious content.
14#[must_use]
15pub fn strip_control_chars(s: &str) -> String {
16    s.chars()
17        .filter(|c| !c.is_control() && !matches!(*c as u32, 0x202A..=0x202E | 0x2066..=0x2069))
18        .collect()
19}
20
21/// Remove null bytes (`\0`) from `s`.
22#[must_use]
23pub fn strip_null_bytes(s: &str) -> String {
24    s.chars().filter(|c| *c != '\0').collect()
25}
26
27#[cfg(test)]
28mod tests {
29    use super::*;
30
31    #[test]
32    fn control_chars_removed() {
33        let s = "hello\x00\x01\x1f world\x7f";
34        assert_eq!(strip_control_chars(s), "hello world");
35    }
36
37    #[test]
38    fn bidi_overrides_removed() {
39        let bidi = "\u{202A}hidden\u{202C}text";
40        let result = strip_control_chars(bidi);
41        assert!(!result.contains('\u{202A}'));
42        assert!(!result.contains('\u{202C}'));
43    }
44
45    #[test]
46    fn normal_text_unchanged() {
47        assert_eq!(strip_control_chars("hello world"), "hello world");
48    }
49
50    #[test]
51    fn null_bytes_removed() {
52        assert_eq!(strip_null_bytes("hel\0lo"), "hello");
53    }
54
55    #[test]
56    fn null_bytes_empty_string() {
57        assert_eq!(strip_null_bytes(""), "");
58    }
59}