shell_sanitize_rules/control_char.rs
1use crate::charset::CharSet;
2use shell_sanitize::{Rule, RuleResult, RuleViolation};
3
4/// Rejects input containing control characters (U+0000–U+001F, U+007F, U+0080–U+009F).
5///
6/// Allowed exceptions can be configured (e.g. tab `\t`).
7///
8/// # Rationale
9///
10/// Control characters (especially newline, NUL, ESC) can cause:
11/// - Command splitting when injected into shell strings
12/// - Terminal escape sequence attacks
13/// - NUL-byte truncation in C-backed APIs
14pub struct ControlCharRule {
15 /// Characters in this set are allowed even though they are control chars.
16 allowed: CharSet,
17}
18
19impl Default for ControlCharRule {
20 fn default() -> Self {
21 Self {
22 allowed: CharSet::from_chars(&[]),
23 }
24 }
25}
26
27impl ControlCharRule {
28 /// Create a rule that permits specific control characters.
29 ///
30 /// ```
31 /// use shell_sanitize_rules::ControlCharRule;
32 ///
33 /// // Allow tab but reject everything else
34 /// let rule = ControlCharRule::allowing(&['\t']);
35 /// ```
36 pub fn allowing(chars: &[char]) -> Self {
37 Self {
38 allowed: CharSet::from_chars(chars),
39 }
40 }
41}
42
43impl Rule for ControlCharRule {
44 fn name(&self) -> &'static str {
45 "control_char"
46 }
47
48 fn check(&self, input: &str) -> RuleResult {
49 let violations: Vec<_> = input
50 .char_indices()
51 .filter(|(_, c)| is_control(*c) && !self.allowed.contains(*c))
52 .map(|(i, c)| {
53 RuleViolation::new(
54 self.name(),
55 format!("control character U+{:04X} found", c as u32),
56 )
57 .at(i)
58 .with_fragment(format!("U+{:04X}", c as u32))
59 })
60 .collect();
61
62 if violations.is_empty() {
63 Ok(())
64 } else {
65 Err(violations)
66 }
67 }
68}
69
70fn is_control(c: char) -> bool {
71 // C0 controls: U+0000–U+001F
72 // DEL: U+007F
73 // C1 controls: U+0080–U+009F
74 matches!(c, '\u{0000}'..='\u{001F}' | '\u{007F}' | '\u{0080}'..='\u{009F}')
75}