Skip to main content

ftui_render/
sanitize.rs

1#![forbid(unsafe_code)]
2
3//! Sanitization for untrusted terminal output.
4//!
5//! This module implements the sanitize-by-default policy (ADR-006) to protect
6//! against terminal escape injection attacks. Any untrusted bytes displayed
7//! as logs, tool output, or LLM streams must be treated as **data**, not
8//! executed as terminal control sequences.
9//!
10//! # Threat Model
11//!
12//! Malicious content in logs could:
13//! 1. Manipulate cursor position (break inline mode)
14//! 2. Change terminal colors/modes persistently
15//! 3. Hide text or show fake prompts (social engineering)
16//! 4. Trigger terminal queries that exfiltrate data
17//! 5. Set window title to misleading values
18//!
19//! # Performance
20//!
21//! - **Fast path (95%+ of cases)**: Scan for ESC byte using memchr.
22//!   If no ESC found, content is safe - return borrowed slice.
23//!   Zero allocation in common case, < 100ns for typical log line.
24//!
25//! - **Slow path**: Allocate output buffer, strip control sequences,
26//!   return owned String. Linear in input size.
27//!
28//! # Usage
29//!
30//! ```
31//! use ftui_render::sanitize::sanitize;
32//! use std::borrow::Cow;
33//!
34//! // Fast path - no escapes, returns borrowed
35//! let safe = sanitize("Normal log message");
36//! assert!(matches!(safe, Cow::Borrowed(_)));
37//!
38//! // Slow path - escapes stripped, returns owned
39//! let malicious = sanitize("Evil \x1b[31mred\x1b[0m text");
40//! assert!(matches!(malicious, Cow::Owned(_)));
41//! assert_eq!(malicious.as_ref(), "Evil red text");
42//! ```
43
44use std::borrow::Cow;
45
46use memchr::memchr;
47
48/// Sanitize untrusted text for safe terminal display.
49///
50/// # Fast Path
51/// If no ESC (0x1B) found and no forbidden C0 controls, returns borrowed input
52/// with zero allocation.
53///
54/// # Slow Path
55/// Strips all escape sequences and forbidden C0 controls, returns owned String.
56///
57/// # What Gets Stripped
58/// - ESC (0x1B) and all following CSI/OSC/DCS/APC sequences
59/// - C0 controls except: TAB (0x09), LF (0x0A), CR (0x0D)
60/// - C1 controls (U+0080..U+009F) — these are the 8-bit equivalents of
61///   ESC-prefixed sequences and some terminals honor them
62/// - DEL (0x7F)
63///
64/// # What Gets Preserved
65/// - TAB, LF, CR (allowed control characters)
66/// - All printable ASCII (0x20-0x7E)
67/// - All valid UTF-8 sequences above U+009F
68#[inline]
69pub fn sanitize(input: &str) -> Cow<'_, str> {
70    let bytes = input.as_bytes();
71
72    // Fast path: check for any ESC byte, forbidden C0 controls, DEL, or C1 controls.
73    // C1 controls (U+0080..U+009F) are encoded in UTF-8 as \xC2\x80..\xC2\x9F.
74    if memchr(0x1B, bytes).is_none()
75        && memchr(0x7F, bytes).is_none()
76        && !has_forbidden_c0(bytes)
77        && !has_c1_controls(bytes)
78    {
79        return Cow::Borrowed(input);
80    }
81
82    // Slow path: strip escape sequences
83    Cow::Owned(sanitize_slow(input))
84}
85
86/// Check if any forbidden C0 control characters are present.
87///
88/// Forbidden: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1A, 0x1C-0x1F
89/// Allowed: TAB (0x09), LF (0x0A), CR (0x0D)
90#[inline]
91fn has_forbidden_c0(bytes: &[u8]) -> bool {
92    bytes.iter().any(|&b| is_forbidden_c0(b))
93}
94
95/// Check if a single byte is a forbidden C0 control.
96#[inline]
97const fn is_forbidden_c0(b: u8) -> bool {
98    matches!(
99        b,
100        0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F
101    )
102}
103
104/// Check if any C1 control characters (U+0080..U+009F) are present.
105///
106/// In UTF-8, these are encoded as the two-byte sequence \xC2\x80..\xC2\x9F.
107/// C1 controls include CSI (U+009B), OSC (U+009D), DCS (U+0090), APC (U+009F),
108/// etc. — some terminals honor these as equivalent to their ESC-prefixed forms.
109#[inline]
110fn has_c1_controls(bytes: &[u8]) -> bool {
111    bytes
112        .windows(2)
113        .any(|w| w[0] == 0xC2 && (0x80..=0x9F).contains(&w[1]))
114}
115
116/// Slow path: strip escape sequences and forbidden controls.
117fn sanitize_slow(input: &str) -> String {
118    let bytes = input.as_bytes();
119    let mut output = String::with_capacity(input.len());
120    let mut i = 0;
121
122    while i < bytes.len() {
123        let b = bytes[i];
124        match b {
125            // ESC - start of escape sequence
126            0x1B => {
127                i = skip_escape_sequence(bytes, i);
128            }
129            // Allowed C0 controls: TAB, LF, CR
130            0x09 | 0x0A | 0x0D => {
131                output.push(b as char);
132                i += 1;
133            }
134            // Forbidden C0 controls - skip
135            0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F => {
136                i += 1;
137            }
138            // DEL - skip
139            0x7F => {
140                i += 1;
141            }
142            // Printable ASCII
143            0x20..=0x7E => {
144                output.push(b as char);
145                i += 1;
146            }
147            // Start of UTF-8 sequence (high bit set)
148            0x80..=0xFF => {
149                if let Some((c, len)) = decode_utf8_char(&bytes[i..]) {
150                    // Skip C1 controls (U+0080..U+009F) — these are the 8-bit
151                    // equivalents of ESC-prefixed sequences (CSI, OSC, DCS, etc.)
152                    if !('\u{0080}'..='\u{009F}').contains(&c) {
153                        output.push(c);
154                    }
155                    i += len;
156                } else {
157                    // Invalid UTF-8, skip byte
158                    i += 1;
159                }
160            }
161        }
162    }
163
164    output
165}
166
167/// Skip over escape sequence, returning index after it.
168///
169/// Handles:
170/// - CSI: ESC [ ... final_byte (0x40-0x7E)
171/// - OSC: ESC ] ... (BEL or ST)
172/// - DCS: ESC P ... ST
173/// - PM: ESC ^ ... ST
174/// - APC: ESC _ ... ST
175/// - Single-char escapes: ESC char
176fn skip_escape_sequence(bytes: &[u8], start: usize) -> usize {
177    let mut i = start + 1; // Skip ESC
178    if i >= bytes.len() {
179        return i;
180    }
181
182    match bytes[i] {
183        // CSI sequence: ESC [ params... final_byte
184        b'[' => {
185            i += 1;
186            // Consume parameter bytes (0x30-0x3F) and intermediate bytes (0x20-0x2F)
187            // Stop at final byte (0x40-0x7E)
188            while i < bytes.len() {
189                let b = bytes[i];
190                if (0x40..=0x7E).contains(&b) {
191                    return i + 1;
192                }
193                // Valid parameter/intermediate bytes are 0x20-0x3F
194                if !(0x20..=0x3F).contains(&b) {
195                    // Invalid char in CSI (e.g. newline, control char, or high byte)
196                    // Abort sequence processing to prevent eating valid text
197                    return i;
198                }
199                i += 1;
200            }
201        }
202        // OSC sequence: ESC ] ... (BEL or ST)
203        b']' => {
204            i += 1;
205            while i < bytes.len() {
206                let b = bytes[i];
207                // BEL terminates OSC
208                if b == 0x07 {
209                    return i + 1;
210                }
211                // ST (ESC \) terminates OSC
212                if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
213                    return i + 2;
214                }
215                // Lone ESC (not followed by \): abort OSC and let the main loop
216                // re-process this ESC as a potential new escape sequence.
217                if b == 0x1B {
218                    return i;
219                }
220                // Abort on other C0 controls (e.g. newline) to prevent swallowing logs
221                if b < 0x20 {
222                    return i;
223                }
224                i += 1;
225            }
226        }
227        // DCS/PM/APC: ESC P/^/_ ... ST
228        b'P' | b'^' | b'_' => {
229            i += 1;
230            while i < bytes.len() {
231                let b = bytes[i];
232                // ST (ESC \) terminates
233                if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
234                    return i + 2;
235                }
236                // Lone ESC (not followed by \): abort and let the main loop
237                // re-process this ESC as a potential new escape sequence.
238                if b == 0x1B {
239                    return i;
240                }
241                // Abort on C0 controls
242                if b < 0x20 {
243                    return i;
244                }
245                i += 1;
246            }
247        }
248        // Single-char escape sequences (ESC followed by 0x20-0x7E)
249        0x20..=0x7E => {
250            return i + 1;
251        }
252        // Unknown or invalid - just skip the ESC
253        _ => {}
254    }
255
256    i
257}
258
259/// Decode a single UTF-8 character from byte slice.
260///
261/// Returns the character and number of bytes consumed, or None if invalid.
262fn decode_utf8_char(bytes: &[u8]) -> Option<(char, usize)> {
263    if bytes.is_empty() {
264        return None;
265    }
266
267    let first = bytes[0];
268    let (expected_len, mut codepoint) = match first {
269        0x00..=0x7F => return Some((first as char, 1)),
270        0xC0..=0xDF => (2, (first & 0x1F) as u32),
271        0xE0..=0xEF => (3, (first & 0x0F) as u32),
272        0xF0..=0xF7 => (4, (first & 0x07) as u32),
273        _ => return None, // Invalid lead byte
274    };
275
276    if bytes.len() < expected_len {
277        return None;
278    }
279
280    // Process continuation bytes
281    for &b in bytes.iter().take(expected_len).skip(1) {
282        if (b & 0xC0) != 0x80 {
283            return None; // Invalid continuation byte
284        }
285        codepoint = (codepoint << 6) | (b & 0x3F) as u32;
286    }
287
288    // Reject overlong encodings (RFC 3629)
289    let min_codepoint = match expected_len {
290        2 => 0x80,
291        3 => 0x800,
292        4 => 0x1_0000,
293        _ => return None,
294    };
295    if codepoint < min_codepoint {
296        return None;
297    }
298
299    // Validate codepoint
300    char::from_u32(codepoint).map(|c| (c, expected_len))
301}
302
303/// Text with trust level annotation.
304///
305/// Use this to explicitly mark whether text has been sanitized or comes
306/// from a trusted source.
307#[derive(Debug, Clone, PartialEq, Eq)]
308pub enum Text<'a> {
309    /// Sanitized text (escape sequences stripped).
310    Sanitized(Cow<'a, str>),
311
312    /// Trusted text (may contain ANSI sequences).
313    /// Only use with content from trusted sources.
314    Trusted(Cow<'a, str>),
315}
316
317impl<'a> Text<'a> {
318    /// Create sanitized text from an untrusted source.
319    #[inline]
320    pub fn sanitized(s: &'a str) -> Self {
321        Text::Sanitized(sanitize(s))
322    }
323
324    /// Create from a trusted source (ANSI sequences allowed).
325    ///
326    /// # Safety
327    /// Only use with content from trusted sources. Untrusted content
328    /// can corrupt terminal state or deceive users.
329    #[inline]
330    pub fn trusted(s: &'a str) -> Self {
331        Text::Trusted(Cow::Borrowed(s))
332    }
333
334    /// Create owned sanitized text.
335    #[inline]
336    pub fn sanitized_owned(s: String) -> Self {
337        match sanitize(&s) {
338            Cow::Borrowed(_) => Text::Sanitized(Cow::Owned(s)),
339            Cow::Owned(owned) => Text::Sanitized(Cow::Owned(owned)),
340        }
341    }
342
343    /// Create owned trusted text.
344    #[inline]
345    pub fn trusted_owned(s: String) -> Self {
346        Text::Trusted(Cow::Owned(s))
347    }
348
349    /// Get the inner string slice.
350    #[inline]
351    #[must_use]
352    pub fn as_str(&self) -> &str {
353        match self {
354            Text::Sanitized(cow) => cow.as_ref(),
355            Text::Trusted(cow) => cow.as_ref(),
356        }
357    }
358
359    /// Check if this text is sanitized.
360    #[inline]
361    #[must_use]
362    pub fn is_sanitized(&self) -> bool {
363        matches!(self, Text::Sanitized(_))
364    }
365
366    /// Check if this text is trusted.
367    #[inline]
368    #[must_use]
369    pub fn is_trusted(&self) -> bool {
370        matches!(self, Text::Trusted(_))
371    }
372
373    /// Convert to owned version.
374    pub fn into_owned(self) -> Text<'static> {
375        match self {
376            Text::Sanitized(cow) => Text::Sanitized(Cow::Owned(cow.into_owned())),
377            Text::Trusted(cow) => Text::Trusted(Cow::Owned(cow.into_owned())),
378        }
379    }
380}
381
382impl AsRef<str> for Text<'_> {
383    fn as_ref(&self) -> &str {
384        self.as_str()
385    }
386}
387
388impl std::fmt::Display for Text<'_> {
389    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
390        write!(f, "{}", self.as_str())
391    }
392}
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397
398    // ============== Fast Path Tests ==============
399
400    #[test]
401    fn fast_path_no_escape() {
402        let input = "Normal log message without escapes";
403        let result = sanitize(input);
404        assert!(matches!(result, Cow::Borrowed(_)));
405        assert_eq!(result.as_ref(), input);
406    }
407
408    #[test]
409    fn fast_path_with_allowed_controls() {
410        let input = "Line1\nLine2\tTabbed\rCarriage";
411        let result = sanitize(input);
412        assert!(matches!(result, Cow::Borrowed(_)));
413        assert_eq!(result.as_ref(), input);
414    }
415
416    #[test]
417    fn fast_path_unicode() {
418        let input = "Hello \u{4e16}\u{754c} \u{1f468}\u{200d}\u{1f469}\u{200d}\u{1f467}";
419        let result = sanitize(input);
420        assert!(matches!(result, Cow::Borrowed(_)));
421        assert_eq!(result.as_ref(), input);
422    }
423
424    #[test]
425    fn fast_path_empty() {
426        let input = "";
427        let result = sanitize(input);
428        assert!(matches!(result, Cow::Borrowed(_)));
429        assert_eq!(result.as_ref(), "");
430    }
431
432    #[test]
433    fn fast_path_printable_ascii() {
434        let input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()";
435        let result = sanitize(input);
436        assert!(matches!(result, Cow::Borrowed(_)));
437        assert_eq!(result.as_ref(), input);
438    }
439
440    // ============== Slow Path: CSI Sequences ==============
441
442    #[test]
443    fn slow_path_strips_sgr_color() {
444        let input = "Hello \x1b[31mred\x1b[0m world";
445        let result = sanitize(input);
446        assert!(matches!(result, Cow::Owned(_)));
447        assert_eq!(result.as_ref(), "Hello red world");
448    }
449
450    #[test]
451    fn slow_path_strips_cursor_movement() {
452        let input = "Before\x1b[2;5HAfter";
453        let result = sanitize(input);
454        assert_eq!(result.as_ref(), "BeforeAfter");
455    }
456
457    #[test]
458    fn slow_path_strips_erase() {
459        let input = "Text\x1b[2JCleared";
460        let result = sanitize(input);
461        assert_eq!(result.as_ref(), "TextCleared");
462    }
463
464    #[test]
465    fn slow_path_strips_multiple_sequences() {
466        let input = "\x1b[1mBold\x1b[0m \x1b[4mUnderline\x1b[24m \x1b[38;5;196mColor\x1b[0m";
467        let result = sanitize(input);
468        assert_eq!(result.as_ref(), "Bold Underline Color");
469    }
470
471    // ============== Slow Path: OSC Sequences ==============
472
473    #[test]
474    fn slow_path_strips_osc_title_bel() {
475        // OSC 0: set title, terminated by BEL
476        let input = "Text\x1b]0;Evil Title\x07More";
477        let result = sanitize(input);
478        assert_eq!(result.as_ref(), "TextMore");
479    }
480
481    #[test]
482    fn slow_path_strips_osc_title_st() {
483        // OSC 0: set title, terminated by ST
484        let input = "Text\x1b]0;Evil Title\x1b\\More";
485        let result = sanitize(input);
486        assert_eq!(result.as_ref(), "TextMore");
487    }
488
489    #[test]
490    fn slow_path_strips_osc8_hyperlink() {
491        // OSC 8: hyperlink
492        let input = "Click \x1b]8;;https://evil.com\x07here\x1b]8;;\x07 please";
493        let result = sanitize(input);
494        assert_eq!(result.as_ref(), "Click here please");
495    }
496
497    // ============== Slow Path: DCS/PM/APC ==============
498
499    #[test]
500    fn slow_path_strips_dcs() {
501        let input = "Before\x1bPdevice control string\x1b\\After";
502        let result = sanitize(input);
503        assert_eq!(result.as_ref(), "BeforeAfter");
504    }
505
506    #[test]
507    fn slow_path_strips_apc() {
508        let input = "Before\x1b_application program command\x1b\\After";
509        let result = sanitize(input);
510        assert_eq!(result.as_ref(), "BeforeAfter");
511    }
512
513    #[test]
514    fn slow_path_strips_pm() {
515        let input = "Before\x1b^privacy message\x1b\\After";
516        let result = sanitize(input);
517        assert_eq!(result.as_ref(), "BeforeAfter");
518    }
519
520    #[test]
521    fn slow_path_strips_osc52_clipboard() {
522        let input = "Before\x1b]52;c;SGVsbG8=\x07After";
523        let result = sanitize(input);
524        assert_eq!(result.as_ref(), "BeforeAfter");
525    }
526
527    #[test]
528    fn slow_path_strips_osc52_clipboard_st() {
529        let input = "Before\x1b]52;c;SGVsbG8=\x1b\\After";
530        let result = sanitize(input);
531        assert_eq!(result.as_ref(), "BeforeAfter");
532    }
533
534    #[test]
535    fn slow_path_strips_private_modes() {
536        let input = "A\x1b[?1049hB\x1b[?1000hC\x1b[?2004hD";
537        let result = sanitize(input);
538        assert_eq!(result.as_ref(), "ABCD");
539    }
540
541    // ============== Slow Path: C0 Controls ==============
542
543    #[test]
544    fn slow_path_strips_nul() {
545        let input = "Hello\x00World";
546        let result = sanitize(input);
547        assert_eq!(result.as_ref(), "HelloWorld");
548    }
549
550    #[test]
551    fn slow_path_strips_bel() {
552        // BEL (0x07) outside of OSC should be stripped
553        let input = "Hello\x07World";
554        let result = sanitize(input);
555        assert_eq!(result.as_ref(), "HelloWorld");
556    }
557
558    #[test]
559    fn slow_path_strips_backspace() {
560        let input = "Hello\x08World";
561        let result = sanitize(input);
562        assert_eq!(result.as_ref(), "HelloWorld");
563    }
564
565    #[test]
566    fn slow_path_strips_form_feed() {
567        let input = "Hello\x0CWorld";
568        let result = sanitize(input);
569        assert_eq!(result.as_ref(), "HelloWorld");
570    }
571
572    #[test]
573    fn slow_path_strips_vertical_tab() {
574        let input = "Hello\x0BWorld";
575        let result = sanitize(input);
576        assert_eq!(result.as_ref(), "HelloWorld");
577    }
578
579    #[test]
580    fn slow_path_strips_del() {
581        let input = "Hello\x7FWorld";
582        let result = sanitize(input);
583        assert_eq!(result.as_ref(), "HelloWorld");
584    }
585
586    #[test]
587    fn slow_path_preserves_tab_lf_cr() {
588        let input = "Line1\nLine2\tTabbed\rReturn";
589        // This should trigger slow path due to needing to scan
590        // but preserve tab/lf/cr
591        let result = sanitize(input);
592        assert_eq!(result.as_ref(), "Line1\nLine2\tTabbed\rReturn");
593    }
594
595    // ============== Edge Cases ==============
596
597    #[test]
598    fn handles_truncated_csi() {
599        let input = "Hello\x1b[";
600        let result = sanitize(input);
601        assert!(!result.contains('\x1b'));
602        assert_eq!(result.as_ref(), "Hello");
603    }
604
605    #[test]
606    fn handles_truncated_dcs() {
607        let input = "Hello\x1bP1;2;3";
608        let result = sanitize(input);
609        assert!(!result.contains('\x1b'));
610        assert_eq!(result.as_ref(), "Hello");
611    }
612
613    #[test]
614    fn handles_truncated_apc() {
615        let input = "Hello\x1b_test";
616        let result = sanitize(input);
617        assert!(!result.contains('\x1b'));
618        assert_eq!(result.as_ref(), "Hello");
619    }
620
621    #[test]
622    fn handles_truncated_pm() {
623        let input = "Hello\x1b^secret";
624        let result = sanitize(input);
625        assert!(!result.contains('\x1b'));
626        assert_eq!(result.as_ref(), "Hello");
627    }
628
629    #[test]
630    fn handles_truncated_osc() {
631        let input = "Hello\x1b]0;Title";
632        let result = sanitize(input);
633        assert!(!result.contains('\x1b'));
634        assert_eq!(result.as_ref(), "Hello");
635    }
636
637    #[test]
638    fn handles_esc_at_end() {
639        let input = "Hello\x1b";
640        let result = sanitize(input);
641        assert_eq!(result.as_ref(), "Hello");
642    }
643
644    #[test]
645    fn handles_lone_esc() {
646        let input = "\x1b";
647        let result = sanitize(input);
648        assert_eq!(result.as_ref(), "");
649    }
650
651    #[test]
652    fn handles_single_char_escape() {
653        // ESC 7 (save cursor) and ESC 8 (restore cursor)
654        let input = "Before\x1b7Middle\x1b8After";
655        let result = sanitize(input);
656        assert_eq!(result.as_ref(), "BeforeMiddleAfter");
657    }
658
659    #[test]
660    fn handles_unknown_escape() {
661        // ESC followed by a byte that's not a valid escape introducer
662        // Using a valid printable byte that's not a known escape char
663        let input = "Before\x1b!After";
664        let result = sanitize(input);
665        // Single-char escape: ESC ! gets stripped
666        assert_eq!(result.as_ref(), "BeforeAfter");
667    }
668
669    // ============== Unicode Tests ==============
670
671    #[test]
672    fn preserves_unicode_characters() {
673        let input = "\u{4e16}\u{754c}"; // Chinese characters
674        let result = sanitize(input);
675        assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
676    }
677
678    #[test]
679    fn preserves_emoji() {
680        let input = "\u{1f600}\u{1f389}\u{1f680}"; // Emoji
681        let result = sanitize(input);
682        assert_eq!(result.as_ref(), "\u{1f600}\u{1f389}\u{1f680}");
683    }
684
685    #[test]
686    fn preserves_combining_characters() {
687        // e with combining acute accent
688        let input = "e\u{0301}";
689        let result = sanitize(input);
690        assert_eq!(result.as_ref(), "e\u{0301}");
691    }
692
693    #[test]
694    fn mixed_unicode_and_escapes() {
695        let input = "\u{4e16}\x1b[31m\u{754c}\x1b[0m";
696        let result = sanitize(input);
697        assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
698    }
699
700    // ============== Text Type Tests ==============
701
702    #[test]
703    fn text_sanitized() {
704        let text = Text::sanitized("Hello \x1b[31mWorld\x1b[0m");
705        assert!(text.is_sanitized());
706        assert!(!text.is_trusted());
707        assert_eq!(text.as_str(), "Hello World");
708    }
709
710    #[test]
711    fn text_trusted() {
712        let text = Text::trusted("Hello \x1b[31mWorld\x1b[0m");
713        assert!(!text.is_sanitized());
714        assert!(text.is_trusted());
715        assert_eq!(text.as_str(), "Hello \x1b[31mWorld\x1b[0m");
716    }
717
718    #[test]
719    fn text_into_owned() {
720        let text = Text::sanitized("Hello");
721        let owned = text.into_owned();
722        assert!(owned.is_sanitized());
723        assert_eq!(owned.as_str(), "Hello");
724    }
725
726    #[test]
727    fn text_display() {
728        let text = Text::sanitized("Hello");
729        assert_eq!(format!("{text}"), "Hello");
730    }
731
732    // ============== Property Tests (basic) ==============
733
734    #[test]
735    fn output_never_contains_esc() {
736        let inputs = [
737            "Normal text",
738            "\x1b[31mRed\x1b[0m",
739            "\x1b]0;Title\x07",
740            "\x1bPDCS\x1b\\",
741            "Mixed\x1b[1m\x1b]8;;url\x07text\x1b]8;;\x07\x1b[0m",
742            "",
743            "\x1b",
744            "\x1b[",
745            "\x1b]",
746        ];
747
748        for input in inputs {
749            let result = sanitize(input);
750            assert!(
751                !result.contains('\x1b'),
752                "Output contains ESC for input: {input:?}"
753            );
754        }
755    }
756
757    #[test]
758    fn output_never_contains_forbidden_c0() {
759        let inputs = [
760            "\x00\x01\x02\x03\x04\x05\x06\x07",
761            "\x08\x0B\x0C\x0E\x0F",
762            "\x10\x11\x12\x13\x14\x15\x16\x17",
763            "\x18\x19\x1A\x1C\x1D\x1E\x1F",
764            "Mixed\x00text\x07with\x0Ccontrols",
765        ];
766
767        for input in inputs {
768            let result = sanitize(input);
769            for b in result.as_bytes() {
770                assert!(
771                    !is_forbidden_c0(*b),
772                    "Output contains forbidden C0 0x{b:02X} for input: {input:?}"
773                );
774            }
775        }
776    }
777
778    #[test]
779    fn allowed_controls_preserved_in_output() {
780        let input = "Tab\there\nNewline\rCarriage";
781        let result = sanitize(input);
782        assert!(result.contains('\t'));
783        assert!(result.contains('\n'));
784        assert!(result.contains('\r'));
785    }
786
787    // ============== Decode UTF-8 Tests ==============
788
789    #[test]
790    fn decode_ascii() {
791        let bytes = b"A";
792        let result = decode_utf8_char(bytes);
793        assert_eq!(result, Some(('A', 1)));
794    }
795
796    #[test]
797    fn decode_two_byte() {
798        let bytes = "\u{00E9}".as_bytes(); // é
799        let result = decode_utf8_char(bytes);
800        assert_eq!(result, Some(('\u{00E9}', 2)));
801    }
802
803    #[test]
804    fn decode_three_byte() {
805        let bytes = "\u{4e16}".as_bytes(); // Chinese
806        let result = decode_utf8_char(bytes);
807        assert_eq!(result, Some(('\u{4e16}', 3)));
808    }
809
810    #[test]
811    fn decode_four_byte() {
812        let bytes = "\u{1f600}".as_bytes(); // Emoji
813        let result = decode_utf8_char(bytes);
814        assert_eq!(result, Some(('\u{1f600}', 4)));
815    }
816
817    #[test]
818    fn decode_invalid_lead() {
819        let bytes = &[0xFF];
820        let result = decode_utf8_char(bytes);
821        assert_eq!(result, None);
822    }
823
824    #[test]
825    fn decode_truncated() {
826        let bytes = &[0xC2]; // Incomplete 2-byte sequence
827        let result = decode_utf8_char(bytes);
828        assert_eq!(result, None);
829    }
830
831    #[test]
832    fn decode_invalid_continuation() {
833        let bytes = &[0xC2, 0x00]; // Invalid continuation byte
834        let result = decode_utf8_char(bytes);
835        assert_eq!(result, None);
836    }
837
838    // ================================================================
839    // Adversarial Security Tests (bd-397)
840    //
841    // Tests below exercise the specific threat model from ADR-006:
842    //   1. Log injection / cursor corruption
843    //   2. Title injection (OSC 0)
844    //   3. Clipboard hijacking (OSC 52)
845    //   4. Terminal mode hijacking
846    //   5. Data exfiltration via terminal queries
847    //   6. Social engineering via fake prompts
848    //   7. C1 control code injection
849    //   8. Sequence terminator confusion
850    //   9. DoS via large / deeply nested payloads
851    //  10. Combined / chained attacks
852    // ================================================================
853
854    // ---- 1. Log injection / cursor corruption ----
855
856    #[test]
857    fn adversarial_clear_screen() {
858        let input = "\x1b[2J";
859        let result = sanitize(input);
860        assert_eq!(result.as_ref(), "");
861    }
862
863    #[test]
864    fn adversarial_home_cursor() {
865        let input = "visible\x1b[Hhidden";
866        let result = sanitize(input);
867        assert_eq!(result.as_ref(), "visiblehidden");
868    }
869
870    #[test]
871    fn adversarial_cursor_absolute_position() {
872        let input = "ok\x1b[999;999Hmalicious";
873        let result = sanitize(input);
874        assert_eq!(result.as_ref(), "okmalicious");
875    }
876
877    #[test]
878    fn adversarial_scroll_up() {
879        let input = "text\x1b[5Smore";
880        let result = sanitize(input);
881        assert_eq!(result.as_ref(), "textmore");
882    }
883
884    #[test]
885    fn adversarial_scroll_down() {
886        let input = "text\x1b[5Tmore";
887        let result = sanitize(input);
888        assert_eq!(result.as_ref(), "textmore");
889    }
890
891    #[test]
892    fn adversarial_erase_line() {
893        let input = "secret\x1b[2Koverwrite";
894        let result = sanitize(input);
895        assert_eq!(result.as_ref(), "secretoverwrite");
896    }
897
898    #[test]
899    fn adversarial_insert_delete_lines() {
900        let input = "text\x1b[10Linserted\x1b[5Mdeleted";
901        let result = sanitize(input);
902        assert_eq!(result.as_ref(), "textinserteddeleted");
903    }
904
905    // ---- 2. Title injection (OSC 0, 1, 2) ----
906
907    #[test]
908    fn adversarial_osc0_title_injection() {
909        let input = "\x1b]0;PWNED - Enter Password\x07";
910        let result = sanitize(input);
911        assert_eq!(result.as_ref(), "");
912        assert!(!result.contains('\x1b'));
913        assert!(!result.contains('\x07'));
914    }
915
916    #[test]
917    fn adversarial_osc1_icon_title() {
918        let input = "\x1b]1;evil-icon\x07";
919        let result = sanitize(input);
920        assert_eq!(result.as_ref(), "");
921    }
922
923    #[test]
924    fn adversarial_osc2_window_title() {
925        let input = "\x1b]2;sudo password required\x1b\\";
926        let result = sanitize(input);
927        assert_eq!(result.as_ref(), "");
928    }
929
930    // ---- 3. Clipboard hijacking (OSC 52) ----
931
932    #[test]
933    fn adversarial_osc52_clipboard_set_bel() {
934        // Set clipboard to "rm -rf /" encoded in base64
935        let input = "safe\x1b]52;c;cm0gLXJmIC8=\x07text";
936        let result = sanitize(input);
937        assert_eq!(result.as_ref(), "safetext");
938    }
939
940    #[test]
941    fn adversarial_osc52_clipboard_set_st() {
942        let input = "safe\x1b]52;c;cm0gLXJmIC8=\x1b\\text";
943        let result = sanitize(input);
944        assert_eq!(result.as_ref(), "safetext");
945    }
946
947    #[test]
948    fn adversarial_osc52_clipboard_query() {
949        // Query clipboard (could exfiltrate data)
950        let input = "\x1b]52;c;?\x07";
951        let result = sanitize(input);
952        assert_eq!(result.as_ref(), "");
953    }
954
955    // ---- 4. Terminal mode hijacking ----
956
957    #[test]
958    fn adversarial_alt_screen_enable() {
959        let input = "\x1b[?1049h";
960        let result = sanitize(input);
961        assert_eq!(result.as_ref(), "");
962    }
963
964    #[test]
965    fn adversarial_alt_screen_disable() {
966        let input = "\x1b[?1049l";
967        let result = sanitize(input);
968        assert_eq!(result.as_ref(), "");
969    }
970
971    #[test]
972    fn adversarial_mouse_enable() {
973        let input = "\x1b[?1000h\x1b[?1002h\x1b[?1003h\x1b[?1006h";
974        let result = sanitize(input);
975        assert_eq!(result.as_ref(), "");
976    }
977
978    #[test]
979    fn adversarial_bracketed_paste_enable() {
980        let input = "\x1b[?2004h";
981        let result = sanitize(input);
982        assert_eq!(result.as_ref(), "");
983    }
984
985    #[test]
986    fn adversarial_focus_events_enable() {
987        let input = "\x1b[?1004h";
988        let result = sanitize(input);
989        assert_eq!(result.as_ref(), "");
990    }
991
992    #[test]
993    fn adversarial_raw_mode_sequence() {
994        // Attempt to set raw mode
995        let input = "\x1b[?7727h";
996        let result = sanitize(input);
997        assert_eq!(result.as_ref(), "");
998    }
999
1000    #[test]
1001    fn adversarial_cursor_hide_show() {
1002        let input = "\x1b[?25l\x1b[?25h";
1003        let result = sanitize(input);
1004        assert_eq!(result.as_ref(), "");
1005    }
1006
1007    // ---- 5. Data exfiltration via terminal queries ----
1008
1009    #[test]
1010    fn adversarial_device_attributes_query_da1() {
1011        let input = "\x1b[c";
1012        let result = sanitize(input);
1013        assert_eq!(result.as_ref(), "");
1014    }
1015
1016    #[test]
1017    fn adversarial_device_attributes_query_da2() {
1018        let input = "\x1b[>c";
1019        let result = sanitize(input);
1020        assert_eq!(result.as_ref(), "");
1021    }
1022
1023    #[test]
1024    fn adversarial_device_status_report() {
1025        let input = "\x1b[6n";
1026        let result = sanitize(input);
1027        assert_eq!(result.as_ref(), "");
1028    }
1029
1030    #[test]
1031    fn adversarial_osc_color_query() {
1032        // Query background color (OSC 11)
1033        let input = "\x1b]11;?\x07";
1034        let result = sanitize(input);
1035        assert_eq!(result.as_ref(), "");
1036    }
1037
1038    #[test]
1039    fn adversarial_decrpm_query() {
1040        let input = "\x1b[?2026$p";
1041        let result = sanitize(input);
1042        assert_eq!(result.as_ref(), "");
1043    }
1044
1045    // ---- 6. Social engineering via fake prompts ----
1046
1047    #[test]
1048    fn adversarial_fake_shell_prompt() {
1049        // Try to move cursor to create a fake prompt
1050        let input = "\x1b[999;1H\x1b[2K$ sudo rm -rf /\x1b[A";
1051        let result = sanitize(input);
1052        assert!(!result.contains('\x1b'));
1053        // Only text content should survive
1054        assert_eq!(result.as_ref(), "$ sudo rm -rf /");
1055    }
1056
1057    #[test]
1058    fn adversarial_fake_password_prompt() {
1059        // Combine title set + cursor move + fake prompt
1060        let input = "\x1b]0;Terminal\x07\x1b[2J\x1b[HPassword: ";
1061        let result = sanitize(input);
1062        assert_eq!(result.as_ref(), "Password: ");
1063    }
1064
1065    #[test]
1066    fn adversarial_overwrite_existing_content() {
1067        // Try to use backspaces + CR to overwrite existing output
1068        let input = "safe output\r\x1b[2Kmalicious replacement";
1069        let result = sanitize(input);
1070        assert_eq!(result.as_ref(), "safe output\rmalicious replacement");
1071    }
1072
1073    // ---- 7. C1 control codes (single-byte, 0x80-0x9F) ----
1074    //
1075    // In ISO-8859-1, 0x80-0x9F are C1 control characters.
1076    // In UTF-8, these byte values are continuation bytes and should
1077    // be handled by the UTF-8 decoder (invalid as leading bytes).
1078    // The sanitizer should not let them through as control codes.
1079
1080    #[test]
1081    fn adversarial_c1_single_byte_csi() {
1082        // U+009B is the C1 equivalent of ESC [ (CSI)
1083        // Some terminals treat this as a CSI introducer, so it MUST be stripped.
1084        let input = "text\u{009B}31mmalicious";
1085        let result = sanitize(input);
1086        assert!(!result.contains('\x1b'));
1087        assert!(
1088            !result.contains('\u{009B}'),
1089            "C1 CSI (U+009B) must be stripped"
1090        );
1091    }
1092
1093    #[test]
1094    fn adversarial_c1_osc_byte() {
1095        // U+009D is the C1 equivalent of ESC ] (OSC)
1096        let input = "text\u{009D}0;Evil Title\x07malicious";
1097        let result = sanitize(input);
1098        assert!(!result.contains('\x1b'));
1099        assert!(
1100            !result.contains('\u{009D}'),
1101            "C1 OSC (U+009D) must be stripped"
1102        );
1103    }
1104
1105    #[test]
1106    fn adversarial_c1_dcs_byte() {
1107        // U+0090 (DCS)
1108        let input = "A\u{0090}device control\x1b\\B";
1109        let result = sanitize(input);
1110        assert!(!result.contains('\u{0090}'));
1111    }
1112
1113    #[test]
1114    fn adversarial_c1_apc_byte() {
1115        // U+009F (APC)
1116        let input = "A\u{009F}app command\x1b\\B";
1117        let result = sanitize(input);
1118        assert!(!result.contains('\u{009F}'));
1119    }
1120
1121    #[test]
1122    fn adversarial_c1_pm_byte() {
1123        // U+009E (PM)
1124        let input = "A\u{009E}private msg\x1b\\B";
1125        let result = sanitize(input);
1126        assert!(!result.contains('\u{009E}'));
1127    }
1128
1129    #[test]
1130    fn adversarial_c1_st_byte() {
1131        // U+009C (ST = String Terminator)
1132        let input = "A\u{009C}B";
1133        let result = sanitize(input);
1134        assert!(!result.contains('\u{009C}'));
1135    }
1136
1137    #[test]
1138    fn adversarial_all_c1_controls_stripped() {
1139        // Every C1 control (U+0080..U+009F) must be stripped
1140        for cp in 0x0080..=0x009F_u32 {
1141            let c = char::from_u32(cp).unwrap();
1142            let input = format!("A{c}B");
1143            let result = sanitize(&input);
1144            assert!(
1145                !result
1146                    .chars()
1147                    .any(|ch| ('\u{0080}'..='\u{009F}').contains(&ch)),
1148                "C1 control U+{cp:04X} passed through sanitizer"
1149            );
1150            // The surrounding text must survive
1151            assert!(result.contains('A'), "Text before C1 U+{cp:04X} lost");
1152            assert!(result.contains('B'), "Text after C1 U+{cp:04X} lost");
1153        }
1154    }
1155
1156    #[test]
1157    fn adversarial_c1_fast_path_triggers_slow_path() {
1158        // C1 controls must trigger the slow path even without ESC/DEL/C0
1159        let input = "clean\u{0085}text"; // U+0085 = NEL (Next Line)
1160        let result = sanitize(input);
1161        assert!(
1162            matches!(result, Cow::Owned(_)),
1163            "C1 should trigger slow path"
1164        );
1165        assert!(!result.contains('\u{0085}'));
1166        assert_eq!(result.as_ref(), "cleantext");
1167    }
1168
1169    // ---- 8. Sequence terminator confusion ----
1170
1171    #[test]
1172    fn adversarial_nested_osc_in_osc() {
1173        // OSC within OSC - inner should not terminate outer
1174        let input = "safe\x1b]8;;\x1b]0;evil\x07https://ok.com\x07text";
1175        let result = sanitize(input);
1176        assert!(!result.contains('\x1b'));
1177        assert!(!result.contains('\x07'));
1178    }
1179
1180    #[test]
1181    fn adversarial_st_inside_dcs() {
1182        // DCS with lone ESC (not followed by \) in body: aborts the DCS handler.
1183        // The lone ESC is re-processed by the main loop as ESC d (single-char escape),
1184        // and the remaining "ata" appears as text before ESC \ (another single-char escape).
1185        let input = "A\x1bPsome\x1bdata\x1b\\B";
1186        let result = sanitize(input);
1187        assert_eq!(result.as_ref(), "AataB");
1188    }
1189
1190    #[test]
1191    fn dcs_with_proper_st_fully_consumed() {
1192        // DCS properly terminated by ST (no lone ESC in body)
1193        let input = "A\x1bPsomedata\x1b\\B";
1194        let result = sanitize(input);
1195        assert_eq!(result.as_ref(), "AB");
1196    }
1197
1198    #[test]
1199    fn adversarial_bel_vs_st_terminator() {
1200        // OSC terminated by BEL, then more text, then ST
1201        let input = "A\x1b]0;title\x07B\x1b\\C";
1202        let result = sanitize(input);
1203        // BEL terminates the OSC; "B" is text; ESC \ is a single-char escape
1204        assert!(!result.contains('\x1b'));
1205        assert!(!result.contains('\x07'));
1206    }
1207
1208    #[test]
1209    fn adversarial_csi_without_final_byte() {
1210        // CSI with only parameter bytes, never reaching a final byte
1211        let input = "A\x1b[0;0;0;0;0;0;0;0;0;0B";
1212        let result = sanitize(input);
1213        // The 'B' (0x42) IS a valid CSI final byte, so entire CSI is consumed
1214        assert_eq!(result.as_ref(), "A");
1215    }
1216
1217    #[test]
1218    fn adversarial_csi_many_params_then_final() {
1219        // CSI with many parameters followed by a valid final byte
1220        let input = "X\x1b[1;2;3;4;5;6;7;8;9;10mY";
1221        let result = sanitize(input);
1222        assert_eq!(result.as_ref(), "XY");
1223    }
1224
1225    // ---- 9. DoS-style payloads ----
1226
1227    #[test]
1228    fn adversarial_very_long_csi_params() {
1229        // Very long CSI parameter string
1230        let params: String = std::iter::repeat_n("0;", 10_000).collect();
1231        let input = format!("start\x1b[{params}mend");
1232        let result = sanitize(&input);
1233        assert_eq!(result.as_ref(), "startend");
1234    }
1235
1236    #[test]
1237    fn adversarial_many_short_sequences() {
1238        // Many small CSI sequences back to back
1239        let input: String = (0..10_000).map(|_| "\x1b[0m").collect();
1240        let input = format!("start{input}end");
1241        let result = sanitize(&input);
1242        assert_eq!(result.as_ref(), "startend");
1243    }
1244
1245    #[test]
1246    fn adversarial_very_long_osc_content() {
1247        // Very long OSC payload (could be used to cause memory issues)
1248        let payload: String = std::iter::repeat_n('A', 100_000).collect();
1249        let input = format!("text\x1b]0;{payload}\x07more");
1250        let result = sanitize(&input);
1251        assert_eq!(result.as_ref(), "textmore");
1252    }
1253
1254    #[test]
1255    fn adversarial_very_long_dcs_content() {
1256        let payload: String = std::iter::repeat_n('X', 100_000).collect();
1257        let input = format!("text\x1bP{payload}\x1b\\more");
1258        let result = sanitize(&input);
1259        assert_eq!(result.as_ref(), "textmore");
1260    }
1261
1262    #[test]
1263    fn adversarial_only_escape_bytes() {
1264        // Input composed entirely of ESC bytes
1265        let input: String = std::iter::repeat_n('\x1b', 1000).collect();
1266        let result = sanitize(&input);
1267        assert_eq!(result.as_ref(), "");
1268    }
1269
1270    #[test]
1271    fn adversarial_alternating_esc_and_text() {
1272        // ESC-char-ESC-char pattern
1273        let input: String = (0..1000)
1274            .map(|i| if i % 2 == 0 { "\x1b[m" } else { "a" })
1275            .collect();
1276        let result = sanitize(&input);
1277        // Only the "a" chars survive
1278        let expected: String = std::iter::repeat_n('a', 500).collect();
1279        assert_eq!(result.as_ref(), expected);
1280    }
1281
1282    #[test]
1283    fn adversarial_all_forbidden_c0_in_sequence() {
1284        // Every forbidden C0 byte
1285        let mut input = String::from("start");
1286        for b in 0x00u8..=0x1F {
1287            if b != 0x09 && b != 0x0A && b != 0x0D && b != 0x1B {
1288                input.push(b as char);
1289            }
1290        }
1291        input.push_str("end");
1292        let result = sanitize(&input);
1293        assert_eq!(result.as_ref(), "startend");
1294    }
1295
1296    // ---- 10. Combined / chained attacks ----
1297
1298    #[test]
1299    fn adversarial_combined_title_clear_clipboard() {
1300        // Chain: set title + clear screen + set clipboard + fake prompt
1301        let input = concat!(
1302            "\x1b]0;Terminal\x07",    // set title
1303            "\x1b[2J",                // clear screen
1304            "\x1b[H",                 // home cursor
1305            "\x1b]52;c;cm0gLXJm\x07", // set clipboard
1306            "Password: ",             // fake prompt
1307        );
1308        let result = sanitize(input);
1309        assert_eq!(result.as_ref(), "Password: ");
1310        assert!(!result.contains('\x1b'));
1311        assert!(!result.contains('\x07'));
1312    }
1313
1314    #[test]
1315    fn adversarial_sgr_color_soup() {
1316        // Many SGR sequences interspersed with text to try to leak colors
1317        let input = "\x1b[31m\x1b[1m\x1b[4m\x1b[7m\x1b[38;2;255;0;0mred\x1b[0m";
1318        let result = sanitize(input);
1319        assert_eq!(result.as_ref(), "red");
1320    }
1321
1322    #[test]
1323    fn adversarial_hyperlink_wrapping_attack() {
1324        // Try to create a clickable region that covers existing content
1325        let input = concat!(
1326            "\x1b]8;;https://evil.com\x07",
1327            "Click here for info",
1328            "\x1b]8;;\x07",
1329        );
1330        let result = sanitize(input);
1331        assert_eq!(result.as_ref(), "Click here for info");
1332    }
1333
1334    #[test]
1335    fn adversarial_kitty_graphics_protocol() {
1336        // Kitty graphics protocol uses APC
1337        let input = "img\x1b_Gf=100,s=1,v=1;AAAA\x1b\\text";
1338        let result = sanitize(input);
1339        assert_eq!(result.as_ref(), "imgtext");
1340    }
1341
1342    #[test]
1343    fn adversarial_sixel_data() {
1344        // Sixel graphics data via DCS
1345        let input = "pre\x1bPq#0;2;0;0;0#1;2;100;100;100~-\x1b\\post";
1346        let result = sanitize(input);
1347        assert_eq!(result.as_ref(), "prepost");
1348    }
1349
1350    #[test]
1351    fn adversarial_mixed_valid_utf8_and_escapes() {
1352        // Unicode text interspersed with escape sequences
1353        let input = "\u{1f512}\x1b[31m\u{26a0}\x1b[0m secure\x1b]0;evil\x07\u{2705}";
1354        let result = sanitize(input);
1355        assert_eq!(result.as_ref(), "\u{1f512}\u{26a0} secure\u{2705}");
1356    }
1357
1358    #[test]
1359    fn adversarial_control_char_near_escape() {
1360        // Control chars adjacent to escape sequences
1361        let input = "\x01\x1b[31m\x02text\x03\x1b[0m\x04";
1362        let result = sanitize(input);
1363        assert!(!result.contains('\x1b'));
1364        assert_eq!(result.as_ref(), "text");
1365    }
1366
1367    #[test]
1368    fn adversarial_save_restore_cursor_attack() {
1369        // Save cursor, write fake content, restore cursor to hide it
1370        let input = "\x1b7fake prompt\x1b8real content";
1371        let result = sanitize(input);
1372        assert_eq!(result.as_ref(), "fake promptreal content");
1373    }
1374
1375    #[test]
1376    fn adversarial_dec_set_reset_barrage() {
1377        // Barrage of DEC private mode set/reset sequences
1378        let input = (1..100)
1379            .map(|i| format!("\x1b[?{i}h\x1b[?{i}l"))
1380            .collect::<String>();
1381        let input = format!("A{input}B");
1382        let result = sanitize(&input);
1383        assert_eq!(result.as_ref(), "AB");
1384    }
1385
1386    // ---- Property-based tests via proptest ----
1387
1388    mod proptest_adversarial {
1389        use super::*;
1390        use proptest::prelude::*;
1391
1392        proptest! {
1393            #[test]
1394            fn sanitize_never_panics(input in ".*") {
1395                let _ = sanitize(&input);
1396            }
1397
1398            #[test]
1399            fn sanitize_output_never_contains_esc(input in ".*") {
1400                let result = sanitize(&input);
1401                prop_assert!(
1402                    !result.contains('\x1b'),
1403                    "Output contained ESC for input {:?}", input
1404                );
1405            }
1406
1407            #[test]
1408            fn sanitize_output_never_contains_del(input in ".*") {
1409                let result = sanitize(&input);
1410                prop_assert!(
1411                    !result.contains('\x7f'),
1412                    "Output contained DEL for input {:?}", input
1413                );
1414            }
1415
1416            #[test]
1417            fn sanitize_output_no_forbidden_c0(input in ".*") {
1418                let result = sanitize(&input);
1419                for &b in result.as_bytes() {
1420                    prop_assert!(
1421                        !is_forbidden_c0(b),
1422                        "Output contains forbidden C0 0x{:02X}", b
1423                    );
1424                }
1425            }
1426
1427            #[test]
1428            fn sanitize_preserves_clean_input(input in "[a-zA-Z0-9 .,!?\\n\\t]+") {
1429                let result = sanitize(&input);
1430                prop_assert_eq!(result.as_ref(), input.as_str());
1431            }
1432
1433            #[test]
1434            fn sanitize_idempotent(input in ".*") {
1435                let first = sanitize(&input);
1436                let second = sanitize(first.as_ref());
1437                prop_assert_eq!(
1438                    first.as_ref(),
1439                    second.as_ref(),
1440                    "Sanitize is not idempotent"
1441                );
1442            }
1443
1444            #[test]
1445            fn sanitize_output_len_lte_input(input in ".*") {
1446                let result = sanitize(&input);
1447                prop_assert!(
1448                    result.len() <= input.len(),
1449                    "Output ({}) longer than input ({})", result.len(), input.len()
1450                );
1451            }
1452
1453            #[test]
1454            fn sanitize_output_is_valid_utf8(input in ".*") {
1455                let result = sanitize(&input);
1456                // The return type is Cow<str> so it's guaranteed valid UTF-8,
1457                // but verify the invariant explicitly.
1458                prop_assert!(std::str::from_utf8(result.as_bytes()).is_ok());
1459            }
1460
1461            #[test]
1462            fn sanitize_output_no_c1_controls(input in ".*") {
1463                let result = sanitize(&input);
1464                for c in result.as_ref().chars() {
1465                    prop_assert!(
1466                        !('\u{0080}'..='\u{009F}').contains(&c),
1467                        "Output contains C1 control U+{:04X}", c as u32
1468                    );
1469                }
1470            }
1471        }
1472
1473        // Targeted generators for adversarial byte patterns
1474
1475        fn escape_sequence() -> impl Strategy<Value = String> {
1476            prop_oneof![
1477                // CSI sequences with random params and final bytes
1478                (
1479                    proptest::collection::vec(0x30u8..=0x3F, 0..20),
1480                    0x40u8..=0x7E,
1481                )
1482                    .prop_map(|(params, final_byte)| {
1483                        let mut s = String::from("\x1b[");
1484                        for b in params {
1485                            s.push(b as char);
1486                        }
1487                        s.push(final_byte as char);
1488                        s
1489                    }),
1490                // OSC with BEL terminator
1491                proptest::string::string_regex("[^\x07\x1b]{0,50}")
1492                    .unwrap()
1493                    .prop_map(|content| format!("\x1b]{content}\x07")),
1494                // OSC with ST terminator
1495                proptest::string::string_regex("[^\x1b]{0,50}")
1496                    .unwrap()
1497                    .prop_map(|content| format!("\x1b]{content}\x1b\\")),
1498                // DCS
1499                proptest::string::string_regex("[^\x1b]{0,50}")
1500                    .unwrap()
1501                    .prop_map(|content| format!("\x1bP{content}\x1b\\")),
1502                // APC
1503                proptest::string::string_regex("[^\x1b]{0,50}")
1504                    .unwrap()
1505                    .prop_map(|content| format!("\x1b_{content}\x1b\\")),
1506                // PM
1507                proptest::string::string_regex("[^\x1b]{0,50}")
1508                    .unwrap()
1509                    .prop_map(|content| format!("\x1b^{content}\x1b\\")),
1510                // Single-char escapes
1511                (0x20u8..=0x7E).prop_map(|b| format!("\x1b{}", b as char)),
1512            ]
1513        }
1514
1515        fn mixed_adversarial_input() -> impl Strategy<Value = String> {
1516            proptest::collection::vec(
1517                prop_oneof![
1518                    // Clean text
1519                    proptest::string::string_regex("[a-zA-Z0-9 ]{1,10}").unwrap(),
1520                    // Escape sequences
1521                    escape_sequence(),
1522                    // Forbidden C0 controls
1523                    (0x00u8..=0x1F)
1524                        .prop_filter("not allowed control", |b| {
1525                            *b != 0x09 && *b != 0x0A && *b != 0x0D
1526                        })
1527                        .prop_map(|b| String::from(b as char)),
1528                ],
1529                1..20,
1530            )
1531            .prop_map(|parts| parts.join(""))
1532        }
1533
1534        proptest! {
1535            #[test]
1536            fn adversarial_mixed_input_safe(input in mixed_adversarial_input()) {
1537                let result = sanitize(&input);
1538                prop_assert!(!result.contains('\x1b'));
1539                prop_assert!(!result.contains('\x7f'));
1540                for &b in result.as_bytes() {
1541                    prop_assert!(!is_forbidden_c0(b));
1542                }
1543            }
1544
1545            #[test]
1546            fn escape_sequences_fully_stripped(seq in escape_sequence()) {
1547                let input = format!("before{seq}after");
1548                let result = sanitize(&input);
1549                prop_assert!(
1550                    !result.contains('\x1b'),
1551                    "Output contains ESC for sequence {:?}", seq
1552                );
1553                prop_assert!(
1554                    result.starts_with("before"),
1555                    "Output doesn't start with 'before' for {:?}: got {:?}", seq, result
1556                );
1557                // Note: unterminated DCS/APC/PM/OSC sequences consume to
1558                // end of input, so "after" may be absorbed. This is correct
1559                // security behavior — consuming unterminated sequences is
1560                // safer than letting potential payload through.
1561            }
1562        }
1563    }
1564}