Skip to main content

shell_sanitize_rules/
control_char.rs

1use crate::charset::CharSet;
2use shell_sanitize::{Rule, RuleResult, RuleViolation};
3
4/// Rejects input containing control characters (U+0000–U+001F, U+007F, U+0080–U+009F).
5///
6/// Allowed exceptions can be configured (e.g. tab `\t`).
7///
8/// # Rationale
9///
10/// Control characters (especially newline, NUL, ESC) can cause:
11/// - Command splitting when injected into shell strings
12/// - Terminal escape sequence attacks
13/// - NUL-byte truncation in C-backed APIs
14pub struct ControlCharRule {
15    /// Characters in this set are allowed even though they are control chars.
16    allowed: CharSet,
17}
18
19impl Default for ControlCharRule {
20    fn default() -> Self {
21        Self {
22            allowed: CharSet::from_chars(&[]),
23        }
24    }
25}
26
27impl ControlCharRule {
28    /// Create a rule that permits specific control characters.
29    ///
30    /// ```
31    /// use shell_sanitize_rules::ControlCharRule;
32    ///
33    /// // Allow tab but reject everything else
34    /// let rule = ControlCharRule::allowing(&['\t']);
35    /// ```
36    pub fn allowing(chars: &[char]) -> Self {
37        Self {
38            allowed: CharSet::from_chars(chars),
39        }
40    }
41}
42
43impl Rule for ControlCharRule {
44    fn name(&self) -> &'static str {
45        "control_char"
46    }
47
48    fn check(&self, input: &str) -> RuleResult {
49        let violations: Vec<_> = input
50            .char_indices()
51            .filter(|(_, c)| is_control(*c) && !self.allowed.contains(*c))
52            .map(|(i, c)| {
53                RuleViolation::new(
54                    self.name(),
55                    format!("control character U+{:04X} found", c as u32),
56                )
57                .at(i)
58                .with_fragment(format!("U+{:04X}", c as u32))
59            })
60            .collect();
61
62        if violations.is_empty() {
63            Ok(())
64        } else {
65            Err(violations)
66        }
67    }
68}
69
70fn is_control(c: char) -> bool {
71    // C0 controls: U+0000–U+001F
72    // DEL: U+007F
73    // C1 controls: U+0080–U+009F
74    matches!(c, '\u{0000}'..='\u{001F}' | '\u{007F}' | '\u{0080}'..='\u{009F}')
75}