Skip to main content

ftui_render/
sanitize.rs

1#![forbid(unsafe_code)]
2
3//! Sanitization for untrusted terminal output.
4//!
5//! This module implements the sanitize-by-default policy (ADR-006) to protect
6//! against terminal escape injection attacks. Any untrusted bytes displayed
7//! as logs, tool output, or LLM streams must be treated as **data**, not
8//! executed as terminal control sequences.
9//!
10//! # Threat Model
11//!
12//! Malicious content in logs could:
13//! 1. Manipulate cursor position (break inline mode)
14//! 2. Change terminal colors/modes persistently
15//! 3. Hide text or show fake prompts (social engineering)
16//! 4. Trigger terminal queries that exfiltrate data
17//! 5. Set window title to misleading values
18//!
19//! # Performance
20//!
21//! - **Fast path (95%+ of cases)**: Scan for ESC byte using memchr.
22//!   If no ESC found, content is safe - return borrowed slice.
23//!   Zero allocation in common case, < 100ns for typical log line.
24//!
25//! - **Slow path**: Allocate output buffer, strip control sequences,
26//!   return owned String. Linear in input size.
27//!
28//! # Usage
29//!
30//! ```
31//! use ftui_render::sanitize::sanitize;
32//! use std::borrow::Cow;
33//!
34//! // Fast path - no escapes, returns borrowed
35//! let safe = sanitize("Normal log message");
36//! assert!(matches!(safe, Cow::Borrowed(_)));
37//!
38//! // Slow path - escapes stripped, returns owned
39//! let malicious = sanitize("Evil \x1b[31mred\x1b[0m text");
40//! assert!(matches!(malicious, Cow::Owned(_)));
41//! assert_eq!(malicious.as_ref(), "Evil red text");
42//! ```
43
44use std::borrow::Cow;
45
46use memchr::memchr;
47
48/// Sanitize untrusted text for safe terminal display.
49///
50/// # Fast Path
51/// If no ESC (0x1B) found and no forbidden C0 controls, returns borrowed input
52/// with zero allocation.
53///
54/// # Slow Path
55/// Strips all escape sequences and forbidden C0 controls, returns owned String.
56///
57/// # What Gets Stripped
58/// - ESC (0x1B) and all following CSI/OSC/DCS/APC sequences
59/// - C0 controls except: TAB (0x09), LF (0x0A), CR (0x0D)
60/// - C1 controls (U+0080..U+009F) — these are the 8-bit equivalents of
61///   ESC-prefixed sequences and some terminals honor them
62/// - DEL (0x7F)
63///
64/// # What Gets Preserved
65/// - TAB, LF, CR (allowed control characters)
66/// - All printable ASCII (0x20-0x7E)
67/// - All valid UTF-8 sequences above U+009F
68#[inline]
69pub fn sanitize(input: &str) -> Cow<'_, str> {
70    let bytes = input.as_bytes();
71
72    // Fast path: check for any ESC byte, forbidden C0 controls, DEL, or C1 controls.
73    // C1 controls (U+0080..U+009F) are encoded in UTF-8 as \xC2\x80..\xC2\x9F.
74    if memchr(0x1B, bytes).is_none()
75        && memchr(0x7F, bytes).is_none()
76        && !has_forbidden_c0(bytes)
77        && !has_c1_controls(bytes)
78    {
79        return Cow::Borrowed(input);
80    }
81
82    // Slow path: strip escape sequences
83    Cow::Owned(sanitize_slow(input))
84}
85
86/// Check if any forbidden C0 control characters are present.
87///
88/// Forbidden: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1A, 0x1C-0x1F
89/// Allowed: TAB (0x09), LF (0x0A), CR (0x0D)
90#[inline]
91fn has_forbidden_c0(bytes: &[u8]) -> bool {
92    bytes.iter().any(|&b| is_forbidden_c0(b))
93}
94
95/// Check if a single byte is a forbidden C0 control.
96#[inline]
97const fn is_forbidden_c0(b: u8) -> bool {
98    matches!(
99        b,
100        0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F
101    )
102}
103
104/// Check if any C1 control characters (U+0080..U+009F) are present.
105///
106/// In UTF-8, these are encoded as the two-byte sequence \xC2\x80..\xC2\x9F.
107/// C1 controls include CSI (U+009B), OSC (U+009D), DCS (U+0090), APC (U+009F),
108/// etc. — some terminals honor these as equivalent to their ESC-prefixed forms.
109#[inline]
110fn has_c1_controls(bytes: &[u8]) -> bool {
111    bytes
112        .windows(2)
113        .any(|w| w[0] == 0xC2 && (0x80..=0x9F).contains(&w[1]))
114}
115
116/// Slow path: strip escape sequences and forbidden controls.
117fn sanitize_slow(input: &str) -> String {
118    let bytes = input.as_bytes();
119    let mut output = String::with_capacity(input.len());
120    let mut i = 0;
121
122    while i < bytes.len() {
123        let b = bytes[i];
124        match b {
125            // ESC - start of escape sequence
126            0x1B => {
127                i = skip_escape_sequence(bytes, i);
128            }
129            // Allowed C0 controls: TAB, LF, CR
130            0x09 | 0x0A | 0x0D => {
131                output.push(b as char);
132                i += 1;
133            }
134            // Forbidden C0 controls - skip
135            0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F => {
136                i += 1;
137            }
138            // DEL - skip
139            0x7F => {
140                i += 1;
141            }
142            // Printable ASCII
143            0x20..=0x7E => {
144                output.push(b as char);
145                i += 1;
146            }
147            // Start of UTF-8 sequence (high bit set)
148            0x80..=0xFF => {
149                if let Some((c, len)) = decode_utf8_char(&bytes[i..]) {
150                    // Skip C1 controls (U+0080..U+009F) — these are the 8-bit
151                    // equivalents of ESC-prefixed sequences (CSI, OSC, DCS, etc.)
152                    if !('\u{0080}'..='\u{009F}').contains(&c) {
153                        output.push(c);
154                    }
155                    i += len;
156                } else {
157                    // Invalid UTF-8, skip byte
158                    i += 1;
159                }
160            }
161        }
162    }
163
164    output
165}
166
167/// Skip over escape sequence, returning index after it.
168///
169/// Handles:
170/// - CSI: ESC [ ... final_byte (0x40-0x7E)
171/// - OSC: ESC ] ... (BEL or ST)
172/// - DCS: ESC P ... ST
173/// - PM: ESC ^ ... ST
174/// - APC: ESC _ ... ST
175/// - Single-char escapes: ESC char
176fn skip_escape_sequence(bytes: &[u8], start: usize) -> usize {
177    let mut i = start + 1; // Skip ESC
178    if i >= bytes.len() {
179        return i;
180    }
181
182    match bytes[i] {
183        // CSI sequence: ESC [ params... final_byte
184        b'[' => {
185            i += 1;
186            // Consume parameter bytes (0x30-0x3F) and intermediate bytes (0x20-0x2F)
187            // Stop at final byte (0x40-0x7E)
188            while i < bytes.len() {
189                let b = bytes[i];
190                if (0x40..=0x7E).contains(&b) {
191                    return i + 1;
192                }
193                // Valid parameter/intermediate bytes are 0x20-0x3F
194                if !(0x20..=0x3F).contains(&b) {
195                    // Invalid char in CSI (e.g. newline, control char, or high byte)
196                    // Abort sequence processing to prevent eating valid text
197                    return i;
198                }
199                i += 1;
200            }
201        }
202        // OSC sequence: ESC ] ... (BEL or ST)
203        b']' => {
204            i += 1;
205            while i < bytes.len() {
206                let b = bytes[i];
207                // BEL terminates OSC
208                if b == 0x07 {
209                    return i + 1;
210                }
211                // ST (ESC \) terminates OSC
212                if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
213                    return i + 2;
214                }
215                // Lone ESC (not followed by \): abort OSC and let the main loop
216                // re-process this ESC as a potential new escape sequence.
217                if b == 0x1B {
218                    return i;
219                }
220                // Abort on other C0 controls (e.g. newline) to prevent swallowing logs
221                if b < 0x20 {
222                    return i;
223                }
224                i += 1;
225            }
226        }
227        // DCS/PM/APC: ESC P/^/_ ... ST
228        b'P' | b'^' | b'_' => {
229            i += 1;
230            while i < bytes.len() {
231                let b = bytes[i];
232                // ST (ESC \) terminates
233                if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
234                    return i + 2;
235                }
236                // Lone ESC (not followed by \): abort and let the main loop
237                // re-process this ESC as a potential new escape sequence.
238                if b == 0x1B {
239                    return i;
240                }
241                // Abort on C0 controls
242                if b < 0x20 {
243                    return i;
244                }
245                i += 1;
246            }
247        }
248        // Single-char escape sequences (ESC followed by 0x20-0x7E)
249        0x20..=0x7E => {
250            return i + 1;
251        }
252        // Unknown or invalid - just skip the ESC
253        _ => {}
254    }
255
256    i
257}
258
259/// Decode a single UTF-8 character from byte slice.
260///
261/// Returns the character and number of bytes consumed, or None if invalid.
262fn decode_utf8_char(bytes: &[u8]) -> Option<(char, usize)> {
263    if bytes.is_empty() {
264        return None;
265    }
266
267    let first = bytes[0];
268    let (expected_len, mut codepoint) = match first {
269        0x00..=0x7F => return Some((first as char, 1)),
270        0xC0..=0xDF => (2, (first & 0x1F) as u32),
271        0xE0..=0xEF => (3, (first & 0x0F) as u32),
272        0xF0..=0xF7 => (4, (first & 0x07) as u32),
273        _ => return None, // Invalid lead byte
274    };
275
276    if bytes.len() < expected_len {
277        return None;
278    }
279
280    // Process continuation bytes
281    for &b in bytes.iter().take(expected_len).skip(1) {
282        if (b & 0xC0) != 0x80 {
283            return None; // Invalid continuation byte
284        }
285        codepoint = (codepoint << 6) | (b & 0x3F) as u32;
286    }
287
288    // Reject overlong encodings (RFC 3629)
289    let min_codepoint = match expected_len {
290        2 => 0x80,
291        3 => 0x800,
292        4 => 0x1_0000,
293        _ => return None,
294    };
295    if codepoint < min_codepoint {
296        return None;
297    }
298
299    // Validate codepoint
300    char::from_u32(codepoint).map(|c| (c, expected_len))
301}
302
303/// Text with trust level annotation.
304///
305/// Use this to explicitly mark whether text has been sanitized or comes
306/// from a trusted source.
307#[derive(Debug, Clone, PartialEq, Eq)]
308pub enum Text<'a> {
309    /// Sanitized text (escape sequences stripped).
310    Sanitized(Cow<'a, str>),
311
312    /// Trusted text (may contain ANSI sequences).
313    /// Only use with content from trusted sources.
314    Trusted(Cow<'a, str>),
315}
316
317impl<'a> Text<'a> {
318    /// Create sanitized text from an untrusted source.
319    #[inline]
320    pub fn sanitized(s: &'a str) -> Self {
321        Text::Sanitized(sanitize(s))
322    }
323
324    /// Create from a trusted source (ANSI sequences allowed).
325    ///
326    /// # Safety
327    /// Only use with content from trusted sources. Untrusted content
328    /// can corrupt terminal state or deceive users.
329    #[inline]
330    pub fn trusted(s: &'a str) -> Self {
331        Text::Trusted(Cow::Borrowed(s))
332    }
333
334    /// Create owned sanitized text.
335    #[inline]
336    pub fn sanitized_owned(s: String) -> Self {
337        match sanitize(&s) {
338            Cow::Borrowed(_) => Text::Sanitized(Cow::Owned(s)),
339            Cow::Owned(owned) => Text::Sanitized(Cow::Owned(owned)),
340        }
341    }
342
343    /// Create owned trusted text.
344    #[inline]
345    pub fn trusted_owned(s: String) -> Self {
346        Text::Trusted(Cow::Owned(s))
347    }
348
349    /// Get the inner string slice.
350    #[inline]
351    pub fn as_str(&self) -> &str {
352        match self {
353            Text::Sanitized(cow) => cow.as_ref(),
354            Text::Trusted(cow) => cow.as_ref(),
355        }
356    }
357
358    /// Check if this text is sanitized.
359    #[inline]
360    pub fn is_sanitized(&self) -> bool {
361        matches!(self, Text::Sanitized(_))
362    }
363
364    /// Check if this text is trusted.
365    #[inline]
366    pub fn is_trusted(&self) -> bool {
367        matches!(self, Text::Trusted(_))
368    }
369
370    /// Convert to owned version.
371    pub fn into_owned(self) -> Text<'static> {
372        match self {
373            Text::Sanitized(cow) => Text::Sanitized(Cow::Owned(cow.into_owned())),
374            Text::Trusted(cow) => Text::Trusted(Cow::Owned(cow.into_owned())),
375        }
376    }
377}
378
379impl AsRef<str> for Text<'_> {
380    fn as_ref(&self) -> &str {
381        self.as_str()
382    }
383}
384
385impl std::fmt::Display for Text<'_> {
386    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
387        write!(f, "{}", self.as_str())
388    }
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394
395    // ============== Fast Path Tests ==============
396
397    #[test]
398    fn fast_path_no_escape() {
399        let input = "Normal log message without escapes";
400        let result = sanitize(input);
401        assert!(matches!(result, Cow::Borrowed(_)));
402        assert_eq!(result.as_ref(), input);
403    }
404
405    #[test]
406    fn fast_path_with_allowed_controls() {
407        let input = "Line1\nLine2\tTabbed\rCarriage";
408        let result = sanitize(input);
409        assert!(matches!(result, Cow::Borrowed(_)));
410        assert_eq!(result.as_ref(), input);
411    }
412
413    #[test]
414    fn fast_path_unicode() {
415        let input = "Hello \u{4e16}\u{754c} \u{1f468}\u{200d}\u{1f469}\u{200d}\u{1f467}";
416        let result = sanitize(input);
417        assert!(matches!(result, Cow::Borrowed(_)));
418        assert_eq!(result.as_ref(), input);
419    }
420
421    #[test]
422    fn fast_path_empty() {
423        let input = "";
424        let result = sanitize(input);
425        assert!(matches!(result, Cow::Borrowed(_)));
426        assert_eq!(result.as_ref(), "");
427    }
428
429    #[test]
430    fn fast_path_printable_ascii() {
431        let input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()";
432        let result = sanitize(input);
433        assert!(matches!(result, Cow::Borrowed(_)));
434        assert_eq!(result.as_ref(), input);
435    }
436
437    // ============== Slow Path: CSI Sequences ==============
438
439    #[test]
440    fn slow_path_strips_sgr_color() {
441        let input = "Hello \x1b[31mred\x1b[0m world";
442        let result = sanitize(input);
443        assert!(matches!(result, Cow::Owned(_)));
444        assert_eq!(result.as_ref(), "Hello red world");
445    }
446
447    #[test]
448    fn slow_path_strips_cursor_movement() {
449        let input = "Before\x1b[2;5HAfter";
450        let result = sanitize(input);
451        assert_eq!(result.as_ref(), "BeforeAfter");
452    }
453
454    #[test]
455    fn slow_path_strips_erase() {
456        let input = "Text\x1b[2JCleared";
457        let result = sanitize(input);
458        assert_eq!(result.as_ref(), "TextCleared");
459    }
460
461    #[test]
462    fn slow_path_strips_multiple_sequences() {
463        let input = "\x1b[1mBold\x1b[0m \x1b[4mUnderline\x1b[24m \x1b[38;5;196mColor\x1b[0m";
464        let result = sanitize(input);
465        assert_eq!(result.as_ref(), "Bold Underline Color");
466    }
467
468    // ============== Slow Path: OSC Sequences ==============
469
470    #[test]
471    fn slow_path_strips_osc_title_bel() {
472        // OSC 0: set title, terminated by BEL
473        let input = "Text\x1b]0;Evil Title\x07More";
474        let result = sanitize(input);
475        assert_eq!(result.as_ref(), "TextMore");
476    }
477
478    #[test]
479    fn slow_path_strips_osc_title_st() {
480        // OSC 0: set title, terminated by ST
481        let input = "Text\x1b]0;Evil Title\x1b\\More";
482        let result = sanitize(input);
483        assert_eq!(result.as_ref(), "TextMore");
484    }
485
486    #[test]
487    fn slow_path_strips_osc8_hyperlink() {
488        // OSC 8: hyperlink
489        let input = "Click \x1b]8;;https://evil.com\x07here\x1b]8;;\x07 please";
490        let result = sanitize(input);
491        assert_eq!(result.as_ref(), "Click here please");
492    }
493
494    // ============== Slow Path: DCS/PM/APC ==============
495
496    #[test]
497    fn slow_path_strips_dcs() {
498        let input = "Before\x1bPdevice control string\x1b\\After";
499        let result = sanitize(input);
500        assert_eq!(result.as_ref(), "BeforeAfter");
501    }
502
503    #[test]
504    fn slow_path_strips_apc() {
505        let input = "Before\x1b_application program command\x1b\\After";
506        let result = sanitize(input);
507        assert_eq!(result.as_ref(), "BeforeAfter");
508    }
509
510    #[test]
511    fn slow_path_strips_pm() {
512        let input = "Before\x1b^privacy message\x1b\\After";
513        let result = sanitize(input);
514        assert_eq!(result.as_ref(), "BeforeAfter");
515    }
516
517    #[test]
518    fn slow_path_strips_osc52_clipboard() {
519        let input = "Before\x1b]52;c;SGVsbG8=\x07After";
520        let result = sanitize(input);
521        assert_eq!(result.as_ref(), "BeforeAfter");
522    }
523
524    #[test]
525    fn slow_path_strips_osc52_clipboard_st() {
526        let input = "Before\x1b]52;c;SGVsbG8=\x1b\\After";
527        let result = sanitize(input);
528        assert_eq!(result.as_ref(), "BeforeAfter");
529    }
530
531    #[test]
532    fn slow_path_strips_private_modes() {
533        let input = "A\x1b[?1049hB\x1b[?1000hC\x1b[?2004hD";
534        let result = sanitize(input);
535        assert_eq!(result.as_ref(), "ABCD");
536    }
537
538    // ============== Slow Path: C0 Controls ==============
539
540    #[test]
541    fn slow_path_strips_nul() {
542        let input = "Hello\x00World";
543        let result = sanitize(input);
544        assert_eq!(result.as_ref(), "HelloWorld");
545    }
546
547    #[test]
548    fn slow_path_strips_bel() {
549        // BEL (0x07) outside of OSC should be stripped
550        let input = "Hello\x07World";
551        let result = sanitize(input);
552        assert_eq!(result.as_ref(), "HelloWorld");
553    }
554
555    #[test]
556    fn slow_path_strips_backspace() {
557        let input = "Hello\x08World";
558        let result = sanitize(input);
559        assert_eq!(result.as_ref(), "HelloWorld");
560    }
561
562    #[test]
563    fn slow_path_strips_form_feed() {
564        let input = "Hello\x0CWorld";
565        let result = sanitize(input);
566        assert_eq!(result.as_ref(), "HelloWorld");
567    }
568
569    #[test]
570    fn slow_path_strips_vertical_tab() {
571        let input = "Hello\x0BWorld";
572        let result = sanitize(input);
573        assert_eq!(result.as_ref(), "HelloWorld");
574    }
575
576    #[test]
577    fn slow_path_strips_del() {
578        let input = "Hello\x7FWorld";
579        let result = sanitize(input);
580        assert_eq!(result.as_ref(), "HelloWorld");
581    }
582
583    #[test]
584    fn slow_path_preserves_tab_lf_cr() {
585        let input = "Line1\nLine2\tTabbed\rReturn";
586        // This should trigger slow path due to needing to scan
587        // but preserve tab/lf/cr
588        let result = sanitize(input);
589        assert_eq!(result.as_ref(), "Line1\nLine2\tTabbed\rReturn");
590    }
591
592    // ============== Edge Cases ==============
593
594    #[test]
595    fn handles_truncated_csi() {
596        let input = "Hello\x1b[";
597        let result = sanitize(input);
598        assert!(!result.contains('\x1b'));
599        assert_eq!(result.as_ref(), "Hello");
600    }
601
602    #[test]
603    fn handles_truncated_dcs() {
604        let input = "Hello\x1bP1;2;3";
605        let result = sanitize(input);
606        assert!(!result.contains('\x1b'));
607        assert_eq!(result.as_ref(), "Hello");
608    }
609
610    #[test]
611    fn handles_truncated_apc() {
612        let input = "Hello\x1b_test";
613        let result = sanitize(input);
614        assert!(!result.contains('\x1b'));
615        assert_eq!(result.as_ref(), "Hello");
616    }
617
618    #[test]
619    fn handles_truncated_pm() {
620        let input = "Hello\x1b^secret";
621        let result = sanitize(input);
622        assert!(!result.contains('\x1b'));
623        assert_eq!(result.as_ref(), "Hello");
624    }
625
626    #[test]
627    fn handles_truncated_osc() {
628        let input = "Hello\x1b]0;Title";
629        let result = sanitize(input);
630        assert!(!result.contains('\x1b'));
631        assert_eq!(result.as_ref(), "Hello");
632    }
633
634    #[test]
635    fn handles_esc_at_end() {
636        let input = "Hello\x1b";
637        let result = sanitize(input);
638        assert_eq!(result.as_ref(), "Hello");
639    }
640
641    #[test]
642    fn handles_lone_esc() {
643        let input = "\x1b";
644        let result = sanitize(input);
645        assert_eq!(result.as_ref(), "");
646    }
647
648    #[test]
649    fn handles_single_char_escape() {
650        // ESC 7 (save cursor) and ESC 8 (restore cursor)
651        let input = "Before\x1b7Middle\x1b8After";
652        let result = sanitize(input);
653        assert_eq!(result.as_ref(), "BeforeMiddleAfter");
654    }
655
656    #[test]
657    fn handles_unknown_escape() {
658        // ESC followed by a byte that's not a valid escape introducer
659        // Using a valid printable byte that's not a known escape char
660        let input = "Before\x1b!After";
661        let result = sanitize(input);
662        // Single-char escape: ESC ! gets stripped
663        assert_eq!(result.as_ref(), "BeforeAfter");
664    }
665
666    // ============== Unicode Tests ==============
667
668    #[test]
669    fn preserves_unicode_characters() {
670        let input = "\u{4e16}\u{754c}"; // Chinese characters
671        let result = sanitize(input);
672        assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
673    }
674
675    #[test]
676    fn preserves_emoji() {
677        let input = "\u{1f600}\u{1f389}\u{1f680}"; // Emoji
678        let result = sanitize(input);
679        assert_eq!(result.as_ref(), "\u{1f600}\u{1f389}\u{1f680}");
680    }
681
682    #[test]
683    fn preserves_combining_characters() {
684        // e with combining acute accent
685        let input = "e\u{0301}";
686        let result = sanitize(input);
687        assert_eq!(result.as_ref(), "e\u{0301}");
688    }
689
690    #[test]
691    fn mixed_unicode_and_escapes() {
692        let input = "\u{4e16}\x1b[31m\u{754c}\x1b[0m";
693        let result = sanitize(input);
694        assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
695    }
696
697    // ============== Text Type Tests ==============
698
699    #[test]
700    fn text_sanitized() {
701        let text = Text::sanitized("Hello \x1b[31mWorld\x1b[0m");
702        assert!(text.is_sanitized());
703        assert!(!text.is_trusted());
704        assert_eq!(text.as_str(), "Hello World");
705    }
706
707    #[test]
708    fn text_trusted() {
709        let text = Text::trusted("Hello \x1b[31mWorld\x1b[0m");
710        assert!(!text.is_sanitized());
711        assert!(text.is_trusted());
712        assert_eq!(text.as_str(), "Hello \x1b[31mWorld\x1b[0m");
713    }
714
715    #[test]
716    fn text_into_owned() {
717        let text = Text::sanitized("Hello");
718        let owned = text.into_owned();
719        assert!(owned.is_sanitized());
720        assert_eq!(owned.as_str(), "Hello");
721    }
722
723    #[test]
724    fn text_display() {
725        let text = Text::sanitized("Hello");
726        assert_eq!(format!("{text}"), "Hello");
727    }
728
729    // ============== Property Tests (basic) ==============
730
731    #[test]
732    fn output_never_contains_esc() {
733        let inputs = [
734            "Normal text",
735            "\x1b[31mRed\x1b[0m",
736            "\x1b]0;Title\x07",
737            "\x1bPDCS\x1b\\",
738            "Mixed\x1b[1m\x1b]8;;url\x07text\x1b]8;;\x07\x1b[0m",
739            "",
740            "\x1b",
741            "\x1b[",
742            "\x1b]",
743        ];
744
745        for input in inputs {
746            let result = sanitize(input);
747            assert!(
748                !result.contains('\x1b'),
749                "Output contains ESC for input: {input:?}"
750            );
751        }
752    }
753
754    #[test]
755    fn output_never_contains_forbidden_c0() {
756        let inputs = [
757            "\x00\x01\x02\x03\x04\x05\x06\x07",
758            "\x08\x0B\x0C\x0E\x0F",
759            "\x10\x11\x12\x13\x14\x15\x16\x17",
760            "\x18\x19\x1A\x1C\x1D\x1E\x1F",
761            "Mixed\x00text\x07with\x0Ccontrols",
762        ];
763
764        for input in inputs {
765            let result = sanitize(input);
766            for b in result.as_bytes() {
767                assert!(
768                    !is_forbidden_c0(*b),
769                    "Output contains forbidden C0 0x{b:02X} for input: {input:?}"
770                );
771            }
772        }
773    }
774
775    #[test]
776    fn allowed_controls_preserved_in_output() {
777        let input = "Tab\there\nNewline\rCarriage";
778        let result = sanitize(input);
779        assert!(result.contains('\t'));
780        assert!(result.contains('\n'));
781        assert!(result.contains('\r'));
782    }
783
784    // ============== Decode UTF-8 Tests ==============
785
786    #[test]
787    fn decode_ascii() {
788        let bytes = b"A";
789        let result = decode_utf8_char(bytes);
790        assert_eq!(result, Some(('A', 1)));
791    }
792
793    #[test]
794    fn decode_two_byte() {
795        let bytes = "\u{00E9}".as_bytes(); // é
796        let result = decode_utf8_char(bytes);
797        assert_eq!(result, Some(('\u{00E9}', 2)));
798    }
799
800    #[test]
801    fn decode_three_byte() {
802        let bytes = "\u{4e16}".as_bytes(); // Chinese
803        let result = decode_utf8_char(bytes);
804        assert_eq!(result, Some(('\u{4e16}', 3)));
805    }
806
807    #[test]
808    fn decode_four_byte() {
809        let bytes = "\u{1f600}".as_bytes(); // Emoji
810        let result = decode_utf8_char(bytes);
811        assert_eq!(result, Some(('\u{1f600}', 4)));
812    }
813
814    #[test]
815    fn decode_invalid_lead() {
816        let bytes = &[0xFF];
817        let result = decode_utf8_char(bytes);
818        assert_eq!(result, None);
819    }
820
821    #[test]
822    fn decode_truncated() {
823        let bytes = &[0xC2]; // Incomplete 2-byte sequence
824        let result = decode_utf8_char(bytes);
825        assert_eq!(result, None);
826    }
827
828    #[test]
829    fn decode_invalid_continuation() {
830        let bytes = &[0xC2, 0x00]; // Invalid continuation byte
831        let result = decode_utf8_char(bytes);
832        assert_eq!(result, None);
833    }
834
835    // ================================================================
836    // Adversarial Security Tests (bd-397)
837    //
838    // Tests below exercise the specific threat model from ADR-006:
839    //   1. Log injection / cursor corruption
840    //   2. Title injection (OSC 0)
841    //   3. Clipboard hijacking (OSC 52)
842    //   4. Terminal mode hijacking
843    //   5. Data exfiltration via terminal queries
844    //   6. Social engineering via fake prompts
845    //   7. C1 control code injection
846    //   8. Sequence terminator confusion
847    //   9. DoS via large / deeply nested payloads
848    //  10. Combined / chained attacks
849    // ================================================================
850
851    // ---- 1. Log injection / cursor corruption ----
852
853    #[test]
854    fn adversarial_clear_screen() {
855        let input = "\x1b[2J";
856        let result = sanitize(input);
857        assert_eq!(result.as_ref(), "");
858    }
859
860    #[test]
861    fn adversarial_home_cursor() {
862        let input = "visible\x1b[Hhidden";
863        let result = sanitize(input);
864        assert_eq!(result.as_ref(), "visiblehidden");
865    }
866
867    #[test]
868    fn adversarial_cursor_absolute_position() {
869        let input = "ok\x1b[999;999Hmalicious";
870        let result = sanitize(input);
871        assert_eq!(result.as_ref(), "okmalicious");
872    }
873
874    #[test]
875    fn adversarial_scroll_up() {
876        let input = "text\x1b[5Smore";
877        let result = sanitize(input);
878        assert_eq!(result.as_ref(), "textmore");
879    }
880
881    #[test]
882    fn adversarial_scroll_down() {
883        let input = "text\x1b[5Tmore";
884        let result = sanitize(input);
885        assert_eq!(result.as_ref(), "textmore");
886    }
887
888    #[test]
889    fn adversarial_erase_line() {
890        let input = "secret\x1b[2Koverwrite";
891        let result = sanitize(input);
892        assert_eq!(result.as_ref(), "secretoverwrite");
893    }
894
895    #[test]
896    fn adversarial_insert_delete_lines() {
897        let input = "text\x1b[10Linserted\x1b[5Mdeleted";
898        let result = sanitize(input);
899        assert_eq!(result.as_ref(), "textinserteddeleted");
900    }
901
902    // ---- 2. Title injection (OSC 0, 1, 2) ----
903
904    #[test]
905    fn adversarial_osc0_title_injection() {
906        let input = "\x1b]0;PWNED - Enter Password\x07";
907        let result = sanitize(input);
908        assert_eq!(result.as_ref(), "");
909        assert!(!result.contains('\x1b'));
910        assert!(!result.contains('\x07'));
911    }
912
913    #[test]
914    fn adversarial_osc1_icon_title() {
915        let input = "\x1b]1;evil-icon\x07";
916        let result = sanitize(input);
917        assert_eq!(result.as_ref(), "");
918    }
919
920    #[test]
921    fn adversarial_osc2_window_title() {
922        let input = "\x1b]2;sudo password required\x1b\\";
923        let result = sanitize(input);
924        assert_eq!(result.as_ref(), "");
925    }
926
927    // ---- 3. Clipboard hijacking (OSC 52) ----
928
929    #[test]
930    fn adversarial_osc52_clipboard_set_bel() {
931        // Set clipboard to "rm -rf /" encoded in base64
932        let input = "safe\x1b]52;c;cm0gLXJmIC8=\x07text";
933        let result = sanitize(input);
934        assert_eq!(result.as_ref(), "safetext");
935    }
936
937    #[test]
938    fn adversarial_osc52_clipboard_set_st() {
939        let input = "safe\x1b]52;c;cm0gLXJmIC8=\x1b\\text";
940        let result = sanitize(input);
941        assert_eq!(result.as_ref(), "safetext");
942    }
943
944    #[test]
945    fn adversarial_osc52_clipboard_query() {
946        // Query clipboard (could exfiltrate data)
947        let input = "\x1b]52;c;?\x07";
948        let result = sanitize(input);
949        assert_eq!(result.as_ref(), "");
950    }
951
952    // ---- 4. Terminal mode hijacking ----
953
954    #[test]
955    fn adversarial_alt_screen_enable() {
956        let input = "\x1b[?1049h";
957        let result = sanitize(input);
958        assert_eq!(result.as_ref(), "");
959    }
960
961    #[test]
962    fn adversarial_alt_screen_disable() {
963        let input = "\x1b[?1049l";
964        let result = sanitize(input);
965        assert_eq!(result.as_ref(), "");
966    }
967
968    #[test]
969    fn adversarial_mouse_enable() {
970        let input = "\x1b[?1000h\x1b[?1002h\x1b[?1003h\x1b[?1006h";
971        let result = sanitize(input);
972        assert_eq!(result.as_ref(), "");
973    }
974
975    #[test]
976    fn adversarial_bracketed_paste_enable() {
977        let input = "\x1b[?2004h";
978        let result = sanitize(input);
979        assert_eq!(result.as_ref(), "");
980    }
981
982    #[test]
983    fn adversarial_focus_events_enable() {
984        let input = "\x1b[?1004h";
985        let result = sanitize(input);
986        assert_eq!(result.as_ref(), "");
987    }
988
989    #[test]
990    fn adversarial_raw_mode_sequence() {
991        // Attempt to set raw mode
992        let input = "\x1b[?7727h";
993        let result = sanitize(input);
994        assert_eq!(result.as_ref(), "");
995    }
996
997    #[test]
998    fn adversarial_cursor_hide_show() {
999        let input = "\x1b[?25l\x1b[?25h";
1000        let result = sanitize(input);
1001        assert_eq!(result.as_ref(), "");
1002    }
1003
1004    // ---- 5. Data exfiltration via terminal queries ----
1005
1006    #[test]
1007    fn adversarial_device_attributes_query_da1() {
1008        let input = "\x1b[c";
1009        let result = sanitize(input);
1010        assert_eq!(result.as_ref(), "");
1011    }
1012
1013    #[test]
1014    fn adversarial_device_attributes_query_da2() {
1015        let input = "\x1b[>c";
1016        let result = sanitize(input);
1017        assert_eq!(result.as_ref(), "");
1018    }
1019
1020    #[test]
1021    fn adversarial_device_status_report() {
1022        let input = "\x1b[6n";
1023        let result = sanitize(input);
1024        assert_eq!(result.as_ref(), "");
1025    }
1026
1027    #[test]
1028    fn adversarial_osc_color_query() {
1029        // Query background color (OSC 11)
1030        let input = "\x1b]11;?\x07";
1031        let result = sanitize(input);
1032        assert_eq!(result.as_ref(), "");
1033    }
1034
1035    #[test]
1036    fn adversarial_decrpm_query() {
1037        let input = "\x1b[?2026$p";
1038        let result = sanitize(input);
1039        assert_eq!(result.as_ref(), "");
1040    }
1041
1042    // ---- 6. Social engineering via fake prompts ----
1043
1044    #[test]
1045    fn adversarial_fake_shell_prompt() {
1046        // Try to move cursor to create a fake prompt
1047        let input = "\x1b[999;1H\x1b[2K$ sudo rm -rf /\x1b[A";
1048        let result = sanitize(input);
1049        assert!(!result.contains('\x1b'));
1050        // Only text content should survive
1051        assert_eq!(result.as_ref(), "$ sudo rm -rf /");
1052    }
1053
1054    #[test]
1055    fn adversarial_fake_password_prompt() {
1056        // Combine title set + cursor move + fake prompt
1057        let input = "\x1b]0;Terminal\x07\x1b[2J\x1b[HPassword: ";
1058        let result = sanitize(input);
1059        assert_eq!(result.as_ref(), "Password: ");
1060    }
1061
1062    #[test]
1063    fn adversarial_overwrite_existing_content() {
1064        // Try to use backspaces + CR to overwrite existing output
1065        let input = "safe output\r\x1b[2Kmalicious replacement";
1066        let result = sanitize(input);
1067        assert_eq!(result.as_ref(), "safe output\rmalicious replacement");
1068    }
1069
1070    // ---- 7. C1 control codes (single-byte, 0x80-0x9F) ----
1071    //
1072    // In ISO-8859-1, 0x80-0x9F are C1 control characters.
1073    // In UTF-8, these byte values are continuation bytes and should
1074    // be handled by the UTF-8 decoder (invalid as leading bytes).
1075    // The sanitizer should not let them through as control codes.
1076
1077    #[test]
1078    fn adversarial_c1_single_byte_csi() {
1079        // U+009B is the C1 equivalent of ESC [ (CSI)
1080        // Some terminals treat this as a CSI introducer, so it MUST be stripped.
1081        let input = "text\u{009B}31mmalicious";
1082        let result = sanitize(input);
1083        assert!(!result.contains('\x1b'));
1084        assert!(
1085            !result.contains('\u{009B}'),
1086            "C1 CSI (U+009B) must be stripped"
1087        );
1088    }
1089
1090    #[test]
1091    fn adversarial_c1_osc_byte() {
1092        // U+009D is the C1 equivalent of ESC ] (OSC)
1093        let input = "text\u{009D}0;Evil Title\x07malicious";
1094        let result = sanitize(input);
1095        assert!(!result.contains('\x1b'));
1096        assert!(
1097            !result.contains('\u{009D}'),
1098            "C1 OSC (U+009D) must be stripped"
1099        );
1100    }
1101
1102    #[test]
1103    fn adversarial_c1_dcs_byte() {
1104        // U+0090 (DCS)
1105        let input = "A\u{0090}device control\x1b\\B";
1106        let result = sanitize(input);
1107        assert!(!result.contains('\u{0090}'));
1108    }
1109
1110    #[test]
1111    fn adversarial_c1_apc_byte() {
1112        // U+009F (APC)
1113        let input = "A\u{009F}app command\x1b\\B";
1114        let result = sanitize(input);
1115        assert!(!result.contains('\u{009F}'));
1116    }
1117
1118    #[test]
1119    fn adversarial_c1_pm_byte() {
1120        // U+009E (PM)
1121        let input = "A\u{009E}private msg\x1b\\B";
1122        let result = sanitize(input);
1123        assert!(!result.contains('\u{009E}'));
1124    }
1125
1126    #[test]
1127    fn adversarial_c1_st_byte() {
1128        // U+009C (ST = String Terminator)
1129        let input = "A\u{009C}B";
1130        let result = sanitize(input);
1131        assert!(!result.contains('\u{009C}'));
1132    }
1133
1134    #[test]
1135    fn adversarial_all_c1_controls_stripped() {
1136        // Every C1 control (U+0080..U+009F) must be stripped
1137        for cp in 0x0080..=0x009F_u32 {
1138            let c = char::from_u32(cp).unwrap();
1139            let input = format!("A{c}B");
1140            let result = sanitize(&input);
1141            assert!(
1142                !result
1143                    .chars()
1144                    .any(|ch| ('\u{0080}'..='\u{009F}').contains(&ch)),
1145                "C1 control U+{cp:04X} passed through sanitizer"
1146            );
1147            // The surrounding text must survive
1148            assert!(result.contains('A'), "Text before C1 U+{cp:04X} lost");
1149            assert!(result.contains('B'), "Text after C1 U+{cp:04X} lost");
1150        }
1151    }
1152
1153    #[test]
1154    fn adversarial_c1_fast_path_triggers_slow_path() {
1155        // C1 controls must trigger the slow path even without ESC/DEL/C0
1156        let input = "clean\u{0085}text"; // U+0085 = NEL (Next Line)
1157        let result = sanitize(input);
1158        assert!(
1159            matches!(result, Cow::Owned(_)),
1160            "C1 should trigger slow path"
1161        );
1162        assert!(!result.contains('\u{0085}'));
1163        assert_eq!(result.as_ref(), "cleantext");
1164    }
1165
1166    // ---- 8. Sequence terminator confusion ----
1167
1168    #[test]
1169    fn adversarial_nested_osc_in_osc() {
1170        // OSC within OSC - inner should not terminate outer
1171        let input = "safe\x1b]8;;\x1b]0;evil\x07https://ok.com\x07text";
1172        let result = sanitize(input);
1173        assert!(!result.contains('\x1b'));
1174        assert!(!result.contains('\x07'));
1175    }
1176
1177    #[test]
1178    fn adversarial_st_inside_dcs() {
1179        // DCS with lone ESC (not followed by \) in body: aborts the DCS handler.
1180        // The lone ESC is re-processed by the main loop as ESC d (single-char escape),
1181        // and the remaining "ata" appears as text before ESC \ (another single-char escape).
1182        let input = "A\x1bPsome\x1bdata\x1b\\B";
1183        let result = sanitize(input);
1184        assert_eq!(result.as_ref(), "AataB");
1185    }
1186
1187    #[test]
1188    fn dcs_with_proper_st_fully_consumed() {
1189        // DCS properly terminated by ST (no lone ESC in body)
1190        let input = "A\x1bPsomedata\x1b\\B";
1191        let result = sanitize(input);
1192        assert_eq!(result.as_ref(), "AB");
1193    }
1194
1195    #[test]
1196    fn adversarial_bel_vs_st_terminator() {
1197        // OSC terminated by BEL, then more text, then ST
1198        let input = "A\x1b]0;title\x07B\x1b\\C";
1199        let result = sanitize(input);
1200        // BEL terminates the OSC; "B" is text; ESC \ is a single-char escape
1201        assert!(!result.contains('\x1b'));
1202        assert!(!result.contains('\x07'));
1203    }
1204
1205    #[test]
1206    fn adversarial_csi_without_final_byte() {
1207        // CSI with only parameter bytes, never reaching a final byte
1208        let input = "A\x1b[0;0;0;0;0;0;0;0;0;0B";
1209        let result = sanitize(input);
1210        // The 'B' (0x42) IS a valid CSI final byte, so entire CSI is consumed
1211        assert_eq!(result.as_ref(), "A");
1212    }
1213
1214    #[test]
1215    fn adversarial_csi_many_params_then_final() {
1216        // CSI with many parameters followed by a valid final byte
1217        let input = "X\x1b[1;2;3;4;5;6;7;8;9;10mY";
1218        let result = sanitize(input);
1219        assert_eq!(result.as_ref(), "XY");
1220    }
1221
1222    // ---- 9. DoS-style payloads ----
1223
1224    #[test]
1225    fn adversarial_very_long_csi_params() {
1226        // Very long CSI parameter string
1227        let params: String = std::iter::repeat_n("0;", 10_000).collect();
1228        let input = format!("start\x1b[{params}mend");
1229        let result = sanitize(&input);
1230        assert_eq!(result.as_ref(), "startend");
1231    }
1232
1233    #[test]
1234    fn adversarial_many_short_sequences() {
1235        // Many small CSI sequences back to back
1236        let input: String = (0..10_000).map(|_| "\x1b[0m").collect();
1237        let input = format!("start{input}end");
1238        let result = sanitize(&input);
1239        assert_eq!(result.as_ref(), "startend");
1240    }
1241
1242    #[test]
1243    fn adversarial_very_long_osc_content() {
1244        // Very long OSC payload (could be used to cause memory issues)
1245        let payload: String = std::iter::repeat_n('A', 100_000).collect();
1246        let input = format!("text\x1b]0;{payload}\x07more");
1247        let result = sanitize(&input);
1248        assert_eq!(result.as_ref(), "textmore");
1249    }
1250
1251    #[test]
1252    fn adversarial_very_long_dcs_content() {
1253        let payload: String = std::iter::repeat_n('X', 100_000).collect();
1254        let input = format!("text\x1bP{payload}\x1b\\more");
1255        let result = sanitize(&input);
1256        assert_eq!(result.as_ref(), "textmore");
1257    }
1258
1259    #[test]
1260    fn adversarial_only_escape_bytes() {
1261        // Input composed entirely of ESC bytes
1262        let input: String = std::iter::repeat_n('\x1b', 1000).collect();
1263        let result = sanitize(&input);
1264        assert_eq!(result.as_ref(), "");
1265    }
1266
1267    #[test]
1268    fn adversarial_alternating_esc_and_text() {
1269        // ESC-char-ESC-char pattern
1270        let input: String = (0..1000)
1271            .map(|i| if i % 2 == 0 { "\x1b[m" } else { "a" })
1272            .collect();
1273        let result = sanitize(&input);
1274        // Only the "a" chars survive
1275        let expected: String = std::iter::repeat_n('a', 500).collect();
1276        assert_eq!(result.as_ref(), expected);
1277    }
1278
1279    #[test]
1280    fn adversarial_all_forbidden_c0_in_sequence() {
1281        // Every forbidden C0 byte
1282        let mut input = String::from("start");
1283        for b in 0x00u8..=0x1F {
1284            if b != 0x09 && b != 0x0A && b != 0x0D && b != 0x1B {
1285                input.push(b as char);
1286            }
1287        }
1288        input.push_str("end");
1289        let result = sanitize(&input);
1290        assert_eq!(result.as_ref(), "startend");
1291    }
1292
1293    // ---- 10. Combined / chained attacks ----
1294
1295    #[test]
1296    fn adversarial_combined_title_clear_clipboard() {
1297        // Chain: set title + clear screen + set clipboard + fake prompt
1298        let input = concat!(
1299            "\x1b]0;Terminal\x07",    // set title
1300            "\x1b[2J",                // clear screen
1301            "\x1b[H",                 // home cursor
1302            "\x1b]52;c;cm0gLXJm\x07", // set clipboard
1303            "Password: ",             // fake prompt
1304        );
1305        let result = sanitize(input);
1306        assert_eq!(result.as_ref(), "Password: ");
1307        assert!(!result.contains('\x1b'));
1308        assert!(!result.contains('\x07'));
1309    }
1310
1311    #[test]
1312    fn adversarial_sgr_color_soup() {
1313        // Many SGR sequences interspersed with text to try to leak colors
1314        let input = "\x1b[31m\x1b[1m\x1b[4m\x1b[7m\x1b[38;2;255;0;0mred\x1b[0m";
1315        let result = sanitize(input);
1316        assert_eq!(result.as_ref(), "red");
1317    }
1318
1319    #[test]
1320    fn adversarial_hyperlink_wrapping_attack() {
1321        // Try to create a clickable region that covers existing content
1322        let input = concat!(
1323            "\x1b]8;;https://evil.com\x07",
1324            "Click here for info",
1325            "\x1b]8;;\x07",
1326        );
1327        let result = sanitize(input);
1328        assert_eq!(result.as_ref(), "Click here for info");
1329    }
1330
1331    #[test]
1332    fn adversarial_kitty_graphics_protocol() {
1333        // Kitty graphics protocol uses APC
1334        let input = "img\x1b_Gf=100,s=1,v=1;AAAA\x1b\\text";
1335        let result = sanitize(input);
1336        assert_eq!(result.as_ref(), "imgtext");
1337    }
1338
1339    #[test]
1340    fn adversarial_sixel_data() {
1341        // Sixel graphics data via DCS
1342        let input = "pre\x1bPq#0;2;0;0;0#1;2;100;100;100~-\x1b\\post";
1343        let result = sanitize(input);
1344        assert_eq!(result.as_ref(), "prepost");
1345    }
1346
1347    #[test]
1348    fn adversarial_mixed_valid_utf8_and_escapes() {
1349        // Unicode text interspersed with escape sequences
1350        let input = "\u{1f512}\x1b[31m\u{26a0}\x1b[0m secure\x1b]0;evil\x07\u{2705}";
1351        let result = sanitize(input);
1352        assert_eq!(result.as_ref(), "\u{1f512}\u{26a0} secure\u{2705}");
1353    }
1354
1355    #[test]
1356    fn adversarial_control_char_near_escape() {
1357        // Control chars adjacent to escape sequences
1358        let input = "\x01\x1b[31m\x02text\x03\x1b[0m\x04";
1359        let result = sanitize(input);
1360        assert!(!result.contains('\x1b'));
1361        assert_eq!(result.as_ref(), "text");
1362    }
1363
1364    #[test]
1365    fn adversarial_save_restore_cursor_attack() {
1366        // Save cursor, write fake content, restore cursor to hide it
1367        let input = "\x1b7fake prompt\x1b8real content";
1368        let result = sanitize(input);
1369        assert_eq!(result.as_ref(), "fake promptreal content");
1370    }
1371
1372    #[test]
1373    fn adversarial_dec_set_reset_barrage() {
1374        // Barrage of DEC private mode set/reset sequences
1375        let input = (1..100)
1376            .map(|i| format!("\x1b[?{i}h\x1b[?{i}l"))
1377            .collect::<String>();
1378        let input = format!("A{input}B");
1379        let result = sanitize(&input);
1380        assert_eq!(result.as_ref(), "AB");
1381    }
1382
1383    // ---- Property-based tests via proptest ----
1384
1385    mod proptest_adversarial {
1386        use super::*;
1387        use proptest::prelude::*;
1388
1389        proptest! {
1390            #[test]
1391            fn sanitize_never_panics(input in ".*") {
1392                let _ = sanitize(&input);
1393            }
1394
1395            #[test]
1396            fn sanitize_output_never_contains_esc(input in ".*") {
1397                let result = sanitize(&input);
1398                prop_assert!(
1399                    !result.contains('\x1b'),
1400                    "Output contained ESC for input {:?}", input
1401                );
1402            }
1403
1404            #[test]
1405            fn sanitize_output_never_contains_del(input in ".*") {
1406                let result = sanitize(&input);
1407                prop_assert!(
1408                    !result.contains('\x7f'),
1409                    "Output contained DEL for input {:?}", input
1410                );
1411            }
1412
1413            #[test]
1414            fn sanitize_output_no_forbidden_c0(input in ".*") {
1415                let result = sanitize(&input);
1416                for &b in result.as_bytes() {
1417                    prop_assert!(
1418                        !is_forbidden_c0(b),
1419                        "Output contains forbidden C0 0x{:02X}", b
1420                    );
1421                }
1422            }
1423
1424            #[test]
1425            fn sanitize_preserves_clean_input(input in "[a-zA-Z0-9 .,!?\\n\\t]+") {
1426                let result = sanitize(&input);
1427                prop_assert_eq!(result.as_ref(), input.as_str());
1428            }
1429
1430            #[test]
1431            fn sanitize_idempotent(input in ".*") {
1432                let first = sanitize(&input);
1433                let second = sanitize(first.as_ref());
1434                prop_assert_eq!(
1435                    first.as_ref(),
1436                    second.as_ref(),
1437                    "Sanitize is not idempotent"
1438                );
1439            }
1440
1441            #[test]
1442            fn sanitize_output_len_lte_input(input in ".*") {
1443                let result = sanitize(&input);
1444                prop_assert!(
1445                    result.len() <= input.len(),
1446                    "Output ({}) longer than input ({})", result.len(), input.len()
1447                );
1448            }
1449
1450            #[test]
1451            fn sanitize_output_is_valid_utf8(input in ".*") {
1452                let result = sanitize(&input);
1453                // The return type is Cow<str> so it's guaranteed valid UTF-8,
1454                // but verify the invariant explicitly.
1455                prop_assert!(std::str::from_utf8(result.as_bytes()).is_ok());
1456            }
1457
1458            #[test]
1459            fn sanitize_output_no_c1_controls(input in ".*") {
1460                let result = sanitize(&input);
1461                for c in result.as_ref().chars() {
1462                    prop_assert!(
1463                        !('\u{0080}'..='\u{009F}').contains(&c),
1464                        "Output contains C1 control U+{:04X}", c as u32
1465                    );
1466                }
1467            }
1468        }
1469
1470        // Targeted generators for adversarial byte patterns
1471
1472        fn escape_sequence() -> impl Strategy<Value = String> {
1473            prop_oneof![
1474                // CSI sequences with random params and final bytes
1475                (
1476                    proptest::collection::vec(0x30u8..=0x3F, 0..20),
1477                    0x40u8..=0x7E,
1478                )
1479                    .prop_map(|(params, final_byte)| {
1480                        let mut s = String::from("\x1b[");
1481                        for b in params {
1482                            s.push(b as char);
1483                        }
1484                        s.push(final_byte as char);
1485                        s
1486                    }),
1487                // OSC with BEL terminator
1488                proptest::string::string_regex("[^\x07\x1b]{0,50}")
1489                    .unwrap()
1490                    .prop_map(|content| format!("\x1b]{content}\x07")),
1491                // OSC with ST terminator
1492                proptest::string::string_regex("[^\x1b]{0,50}")
1493                    .unwrap()
1494                    .prop_map(|content| format!("\x1b]{content}\x1b\\")),
1495                // DCS
1496                proptest::string::string_regex("[^\x1b]{0,50}")
1497                    .unwrap()
1498                    .prop_map(|content| format!("\x1bP{content}\x1b\\")),
1499                // APC
1500                proptest::string::string_regex("[^\x1b]{0,50}")
1501                    .unwrap()
1502                    .prop_map(|content| format!("\x1b_{content}\x1b\\")),
1503                // PM
1504                proptest::string::string_regex("[^\x1b]{0,50}")
1505                    .unwrap()
1506                    .prop_map(|content| format!("\x1b^{content}\x1b\\")),
1507                // Single-char escapes
1508                (0x20u8..=0x7E).prop_map(|b| format!("\x1b{}", b as char)),
1509            ]
1510        }
1511
1512        fn mixed_adversarial_input() -> impl Strategy<Value = String> {
1513            proptest::collection::vec(
1514                prop_oneof![
1515                    // Clean text
1516                    proptest::string::string_regex("[a-zA-Z0-9 ]{1,10}").unwrap(),
1517                    // Escape sequences
1518                    escape_sequence(),
1519                    // Forbidden C0 controls
1520                    (0x00u8..=0x1F)
1521                        .prop_filter("not allowed control", |b| {
1522                            *b != 0x09 && *b != 0x0A && *b != 0x0D
1523                        })
1524                        .prop_map(|b| String::from(b as char)),
1525                ],
1526                1..20,
1527            )
1528            .prop_map(|parts| parts.join(""))
1529        }
1530
1531        proptest! {
1532            #[test]
1533            fn adversarial_mixed_input_safe(input in mixed_adversarial_input()) {
1534                let result = sanitize(&input);
1535                prop_assert!(!result.contains('\x1b'));
1536                prop_assert!(!result.contains('\x7f'));
1537                for &b in result.as_bytes() {
1538                    prop_assert!(!is_forbidden_c0(b));
1539                }
1540            }
1541
1542            #[test]
1543            fn escape_sequences_fully_stripped(seq in escape_sequence()) {
1544                let input = format!("before{seq}after");
1545                let result = sanitize(&input);
1546                prop_assert!(
1547                    !result.contains('\x1b'),
1548                    "Output contains ESC for sequence {:?}", seq
1549                );
1550                prop_assert!(
1551                    result.starts_with("before"),
1552                    "Output doesn't start with 'before' for {:?}: got {:?}", seq, result
1553                );
1554                // Note: unterminated DCS/APC/PM/OSC sequences consume to
1555                // end of input, so "after" may be absorbed. This is correct
1556                // security behavior — consuming unterminated sequences is
1557                // safer than letting potential payload through.
1558            }
1559        }
1560    }
1561}