1#![forbid(unsafe_code)]
2
3use std::borrow::Cow;
45
46use memchr::memchr;
47
48#[inline]
69pub fn sanitize(input: &str) -> Cow<'_, str> {
70 let bytes = input.as_bytes();
71
72 if memchr(0x1B, bytes).is_none()
75 && memchr(0x7F, bytes).is_none()
76 && !has_forbidden_c0(bytes)
77 && !has_c1_controls(bytes)
78 {
79 return Cow::Borrowed(input);
80 }
81
82 Cow::Owned(sanitize_slow(input))
84}
85
86#[inline]
91fn has_forbidden_c0(bytes: &[u8]) -> bool {
92 bytes.iter().any(|&b| is_forbidden_c0(b))
93}
94
95#[inline]
97const fn is_forbidden_c0(b: u8) -> bool {
98 matches!(
99 b,
100 0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F
101 )
102}
103
104#[inline]
110fn has_c1_controls(bytes: &[u8]) -> bool {
111 bytes
112 .windows(2)
113 .any(|w| w[0] == 0xC2 && (0x80..=0x9F).contains(&w[1]))
114}
115
116fn sanitize_slow(input: &str) -> String {
118 let bytes = input.as_bytes();
119 let mut output = String::with_capacity(input.len());
120 let mut i = 0;
121
122 while i < bytes.len() {
123 let b = bytes[i];
124 match b {
125 0x1B => {
127 i = skip_escape_sequence(bytes, i);
128 }
129 0x09 | 0x0A | 0x0D => {
131 output.push(b as char);
132 i += 1;
133 }
134 0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F => {
136 i += 1;
137 }
138 0x7F => {
140 i += 1;
141 }
142 0x20..=0x7E => {
144 output.push(b as char);
145 i += 1;
146 }
147 0x80..=0xFF => {
149 if let Some((c, len)) = decode_utf8_char(&bytes[i..]) {
150 if !('\u{0080}'..='\u{009F}').contains(&c) {
153 output.push(c);
154 }
155 i += len;
156 } else {
157 i += 1;
159 }
160 }
161 }
162 }
163
164 output
165}
166
167fn skip_escape_sequence(bytes: &[u8], start: usize) -> usize {
177 let mut i = start + 1; if i >= bytes.len() {
179 return i;
180 }
181
182 match bytes[i] {
183 b'[' => {
185 i += 1;
186 while i < bytes.len() {
189 let b = bytes[i];
190 if (0x40..=0x7E).contains(&b) {
191 return i + 1;
192 }
193 if !(0x20..=0x3F).contains(&b) {
195 return i;
198 }
199 i += 1;
200 }
201 }
202 b']' => {
204 i += 1;
205 while i < bytes.len() {
206 let b = bytes[i];
207 if b == 0x07 {
209 return i + 1;
210 }
211 if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
213 return i + 2;
214 }
215 if b == 0x1B {
218 return i;
219 }
220 if b < 0x20 {
222 return i;
223 }
224 i += 1;
225 }
226 }
227 b'P' | b'^' | b'_' => {
229 i += 1;
230 while i < bytes.len() {
231 let b = bytes[i];
232 if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
234 return i + 2;
235 }
236 if b == 0x1B {
239 return i;
240 }
241 if b < 0x20 {
243 return i;
244 }
245 i += 1;
246 }
247 }
248 0x20..=0x7E => {
250 return i + 1;
251 }
252 _ => {}
254 }
255
256 i
257}
258
259fn decode_utf8_char(bytes: &[u8]) -> Option<(char, usize)> {
263 if bytes.is_empty() {
264 return None;
265 }
266
267 let first = bytes[0];
268 let (expected_len, mut codepoint) = match first {
269 0x00..=0x7F => return Some((first as char, 1)),
270 0xC0..=0xDF => (2, (first & 0x1F) as u32),
271 0xE0..=0xEF => (3, (first & 0x0F) as u32),
272 0xF0..=0xF7 => (4, (first & 0x07) as u32),
273 _ => return None, };
275
276 if bytes.len() < expected_len {
277 return None;
278 }
279
280 for &b in bytes.iter().take(expected_len).skip(1) {
282 if (b & 0xC0) != 0x80 {
283 return None; }
285 codepoint = (codepoint << 6) | (b & 0x3F) as u32;
286 }
287
288 let min_codepoint = match expected_len {
290 2 => 0x80,
291 3 => 0x800,
292 4 => 0x1_0000,
293 _ => return None,
294 };
295 if codepoint < min_codepoint {
296 return None;
297 }
298
299 char::from_u32(codepoint).map(|c| (c, expected_len))
301}
302
303#[derive(Debug, Clone, PartialEq, Eq)]
308pub enum Text<'a> {
309 Sanitized(Cow<'a, str>),
311
312 Trusted(Cow<'a, str>),
315}
316
317impl<'a> Text<'a> {
318 #[inline]
320 pub fn sanitized(s: &'a str) -> Self {
321 Text::Sanitized(sanitize(s))
322 }
323
324 #[inline]
330 pub fn trusted(s: &'a str) -> Self {
331 Text::Trusted(Cow::Borrowed(s))
332 }
333
334 #[inline]
336 pub fn sanitized_owned(s: String) -> Self {
337 match sanitize(&s) {
338 Cow::Borrowed(_) => Text::Sanitized(Cow::Owned(s)),
339 Cow::Owned(owned) => Text::Sanitized(Cow::Owned(owned)),
340 }
341 }
342
343 #[inline]
345 pub fn trusted_owned(s: String) -> Self {
346 Text::Trusted(Cow::Owned(s))
347 }
348
349 #[inline]
351 #[must_use]
352 pub fn as_str(&self) -> &str {
353 match self {
354 Text::Sanitized(cow) => cow.as_ref(),
355 Text::Trusted(cow) => cow.as_ref(),
356 }
357 }
358
359 #[inline]
361 #[must_use]
362 pub fn is_sanitized(&self) -> bool {
363 matches!(self, Text::Sanitized(_))
364 }
365
366 #[inline]
368 #[must_use]
369 pub fn is_trusted(&self) -> bool {
370 matches!(self, Text::Trusted(_))
371 }
372
373 pub fn into_owned(self) -> Text<'static> {
375 match self {
376 Text::Sanitized(cow) => Text::Sanitized(Cow::Owned(cow.into_owned())),
377 Text::Trusted(cow) => Text::Trusted(Cow::Owned(cow.into_owned())),
378 }
379 }
380}
381
382impl AsRef<str> for Text<'_> {
383 fn as_ref(&self) -> &str {
384 self.as_str()
385 }
386}
387
388impl std::fmt::Display for Text<'_> {
389 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
390 write!(f, "{}", self.as_str())
391 }
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397
398 #[test]
401 fn fast_path_no_escape() {
402 let input = "Normal log message without escapes";
403 let result = sanitize(input);
404 assert!(matches!(result, Cow::Borrowed(_)));
405 assert_eq!(result.as_ref(), input);
406 }
407
408 #[test]
409 fn fast_path_with_allowed_controls() {
410 let input = "Line1\nLine2\tTabbed\rCarriage";
411 let result = sanitize(input);
412 assert!(matches!(result, Cow::Borrowed(_)));
413 assert_eq!(result.as_ref(), input);
414 }
415
416 #[test]
417 fn fast_path_unicode() {
418 let input = "Hello \u{4e16}\u{754c} \u{1f468}\u{200d}\u{1f469}\u{200d}\u{1f467}";
419 let result = sanitize(input);
420 assert!(matches!(result, Cow::Borrowed(_)));
421 assert_eq!(result.as_ref(), input);
422 }
423
424 #[test]
425 fn fast_path_empty() {
426 let input = "";
427 let result = sanitize(input);
428 assert!(matches!(result, Cow::Borrowed(_)));
429 assert_eq!(result.as_ref(), "");
430 }
431
432 #[test]
433 fn fast_path_printable_ascii() {
434 let input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()";
435 let result = sanitize(input);
436 assert!(matches!(result, Cow::Borrowed(_)));
437 assert_eq!(result.as_ref(), input);
438 }
439
440 #[test]
443 fn slow_path_strips_sgr_color() {
444 let input = "Hello \x1b[31mred\x1b[0m world";
445 let result = sanitize(input);
446 assert!(matches!(result, Cow::Owned(_)));
447 assert_eq!(result.as_ref(), "Hello red world");
448 }
449
450 #[test]
451 fn slow_path_strips_cursor_movement() {
452 let input = "Before\x1b[2;5HAfter";
453 let result = sanitize(input);
454 assert_eq!(result.as_ref(), "BeforeAfter");
455 }
456
457 #[test]
458 fn slow_path_strips_erase() {
459 let input = "Text\x1b[2JCleared";
460 let result = sanitize(input);
461 assert_eq!(result.as_ref(), "TextCleared");
462 }
463
464 #[test]
465 fn slow_path_strips_multiple_sequences() {
466 let input = "\x1b[1mBold\x1b[0m \x1b[4mUnderline\x1b[24m \x1b[38;5;196mColor\x1b[0m";
467 let result = sanitize(input);
468 assert_eq!(result.as_ref(), "Bold Underline Color");
469 }
470
471 #[test]
474 fn slow_path_strips_osc_title_bel() {
475 let input = "Text\x1b]0;Evil Title\x07More";
477 let result = sanitize(input);
478 assert_eq!(result.as_ref(), "TextMore");
479 }
480
481 #[test]
482 fn slow_path_strips_osc_title_st() {
483 let input = "Text\x1b]0;Evil Title\x1b\\More";
485 let result = sanitize(input);
486 assert_eq!(result.as_ref(), "TextMore");
487 }
488
489 #[test]
490 fn slow_path_strips_osc8_hyperlink() {
491 let input = "Click \x1b]8;;https://evil.com\x07here\x1b]8;;\x07 please";
493 let result = sanitize(input);
494 assert_eq!(result.as_ref(), "Click here please");
495 }
496
497 #[test]
500 fn slow_path_strips_dcs() {
501 let input = "Before\x1bPdevice control string\x1b\\After";
502 let result = sanitize(input);
503 assert_eq!(result.as_ref(), "BeforeAfter");
504 }
505
506 #[test]
507 fn slow_path_strips_apc() {
508 let input = "Before\x1b_application program command\x1b\\After";
509 let result = sanitize(input);
510 assert_eq!(result.as_ref(), "BeforeAfter");
511 }
512
513 #[test]
514 fn slow_path_strips_pm() {
515 let input = "Before\x1b^privacy message\x1b\\After";
516 let result = sanitize(input);
517 assert_eq!(result.as_ref(), "BeforeAfter");
518 }
519
520 #[test]
521 fn slow_path_strips_osc52_clipboard() {
522 let input = "Before\x1b]52;c;SGVsbG8=\x07After";
523 let result = sanitize(input);
524 assert_eq!(result.as_ref(), "BeforeAfter");
525 }
526
527 #[test]
528 fn slow_path_strips_osc52_clipboard_st() {
529 let input = "Before\x1b]52;c;SGVsbG8=\x1b\\After";
530 let result = sanitize(input);
531 assert_eq!(result.as_ref(), "BeforeAfter");
532 }
533
534 #[test]
535 fn slow_path_strips_private_modes() {
536 let input = "A\x1b[?1049hB\x1b[?1000hC\x1b[?2004hD";
537 let result = sanitize(input);
538 assert_eq!(result.as_ref(), "ABCD");
539 }
540
541 #[test]
544 fn slow_path_strips_nul() {
545 let input = "Hello\x00World";
546 let result = sanitize(input);
547 assert_eq!(result.as_ref(), "HelloWorld");
548 }
549
550 #[test]
551 fn slow_path_strips_bel() {
552 let input = "Hello\x07World";
554 let result = sanitize(input);
555 assert_eq!(result.as_ref(), "HelloWorld");
556 }
557
558 #[test]
559 fn slow_path_strips_backspace() {
560 let input = "Hello\x08World";
561 let result = sanitize(input);
562 assert_eq!(result.as_ref(), "HelloWorld");
563 }
564
565 #[test]
566 fn slow_path_strips_form_feed() {
567 let input = "Hello\x0CWorld";
568 let result = sanitize(input);
569 assert_eq!(result.as_ref(), "HelloWorld");
570 }
571
572 #[test]
573 fn slow_path_strips_vertical_tab() {
574 let input = "Hello\x0BWorld";
575 let result = sanitize(input);
576 assert_eq!(result.as_ref(), "HelloWorld");
577 }
578
579 #[test]
580 fn slow_path_strips_del() {
581 let input = "Hello\x7FWorld";
582 let result = sanitize(input);
583 assert_eq!(result.as_ref(), "HelloWorld");
584 }
585
586 #[test]
587 fn slow_path_preserves_tab_lf_cr() {
588 let input = "Line1\nLine2\tTabbed\rReturn";
589 let result = sanitize(input);
592 assert_eq!(result.as_ref(), "Line1\nLine2\tTabbed\rReturn");
593 }
594
595 #[test]
598 fn handles_truncated_csi() {
599 let input = "Hello\x1b[";
600 let result = sanitize(input);
601 assert!(!result.contains('\x1b'));
602 assert_eq!(result.as_ref(), "Hello");
603 }
604
605 #[test]
606 fn handles_truncated_dcs() {
607 let input = "Hello\x1bP1;2;3";
608 let result = sanitize(input);
609 assert!(!result.contains('\x1b'));
610 assert_eq!(result.as_ref(), "Hello");
611 }
612
613 #[test]
614 fn handles_truncated_apc() {
615 let input = "Hello\x1b_test";
616 let result = sanitize(input);
617 assert!(!result.contains('\x1b'));
618 assert_eq!(result.as_ref(), "Hello");
619 }
620
621 #[test]
622 fn handles_truncated_pm() {
623 let input = "Hello\x1b^secret";
624 let result = sanitize(input);
625 assert!(!result.contains('\x1b'));
626 assert_eq!(result.as_ref(), "Hello");
627 }
628
629 #[test]
630 fn handles_truncated_osc() {
631 let input = "Hello\x1b]0;Title";
632 let result = sanitize(input);
633 assert!(!result.contains('\x1b'));
634 assert_eq!(result.as_ref(), "Hello");
635 }
636
637 #[test]
638 fn handles_esc_at_end() {
639 let input = "Hello\x1b";
640 let result = sanitize(input);
641 assert_eq!(result.as_ref(), "Hello");
642 }
643
644 #[test]
645 fn handles_lone_esc() {
646 let input = "\x1b";
647 let result = sanitize(input);
648 assert_eq!(result.as_ref(), "");
649 }
650
651 #[test]
652 fn handles_single_char_escape() {
653 let input = "Before\x1b7Middle\x1b8After";
655 let result = sanitize(input);
656 assert_eq!(result.as_ref(), "BeforeMiddleAfter");
657 }
658
659 #[test]
660 fn handles_unknown_escape() {
661 let input = "Before\x1b!After";
664 let result = sanitize(input);
665 assert_eq!(result.as_ref(), "BeforeAfter");
667 }
668
669 #[test]
672 fn preserves_unicode_characters() {
673 let input = "\u{4e16}\u{754c}"; let result = sanitize(input);
675 assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
676 }
677
678 #[test]
679 fn preserves_emoji() {
680 let input = "\u{1f600}\u{1f389}\u{1f680}"; let result = sanitize(input);
682 assert_eq!(result.as_ref(), "\u{1f600}\u{1f389}\u{1f680}");
683 }
684
685 #[test]
686 fn preserves_combining_characters() {
687 let input = "e\u{0301}";
689 let result = sanitize(input);
690 assert_eq!(result.as_ref(), "e\u{0301}");
691 }
692
693 #[test]
694 fn mixed_unicode_and_escapes() {
695 let input = "\u{4e16}\x1b[31m\u{754c}\x1b[0m";
696 let result = sanitize(input);
697 assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
698 }
699
700 #[test]
703 fn text_sanitized() {
704 let text = Text::sanitized("Hello \x1b[31mWorld\x1b[0m");
705 assert!(text.is_sanitized());
706 assert!(!text.is_trusted());
707 assert_eq!(text.as_str(), "Hello World");
708 }
709
710 #[test]
711 fn text_trusted() {
712 let text = Text::trusted("Hello \x1b[31mWorld\x1b[0m");
713 assert!(!text.is_sanitized());
714 assert!(text.is_trusted());
715 assert_eq!(text.as_str(), "Hello \x1b[31mWorld\x1b[0m");
716 }
717
718 #[test]
719 fn text_into_owned() {
720 let text = Text::sanitized("Hello");
721 let owned = text.into_owned();
722 assert!(owned.is_sanitized());
723 assert_eq!(owned.as_str(), "Hello");
724 }
725
726 #[test]
727 fn text_display() {
728 let text = Text::sanitized("Hello");
729 assert_eq!(format!("{text}"), "Hello");
730 }
731
732 #[test]
735 fn output_never_contains_esc() {
736 let inputs = [
737 "Normal text",
738 "\x1b[31mRed\x1b[0m",
739 "\x1b]0;Title\x07",
740 "\x1bPDCS\x1b\\",
741 "Mixed\x1b[1m\x1b]8;;url\x07text\x1b]8;;\x07\x1b[0m",
742 "",
743 "\x1b",
744 "\x1b[",
745 "\x1b]",
746 ];
747
748 for input in inputs {
749 let result = sanitize(input);
750 assert!(
751 !result.contains('\x1b'),
752 "Output contains ESC for input: {input:?}"
753 );
754 }
755 }
756
757 #[test]
758 fn output_never_contains_forbidden_c0() {
759 let inputs = [
760 "\x00\x01\x02\x03\x04\x05\x06\x07",
761 "\x08\x0B\x0C\x0E\x0F",
762 "\x10\x11\x12\x13\x14\x15\x16\x17",
763 "\x18\x19\x1A\x1C\x1D\x1E\x1F",
764 "Mixed\x00text\x07with\x0Ccontrols",
765 ];
766
767 for input in inputs {
768 let result = sanitize(input);
769 for b in result.as_bytes() {
770 assert!(
771 !is_forbidden_c0(*b),
772 "Output contains forbidden C0 0x{b:02X} for input: {input:?}"
773 );
774 }
775 }
776 }
777
778 #[test]
779 fn allowed_controls_preserved_in_output() {
780 let input = "Tab\there\nNewline\rCarriage";
781 let result = sanitize(input);
782 assert!(result.contains('\t'));
783 assert!(result.contains('\n'));
784 assert!(result.contains('\r'));
785 }
786
787 #[test]
790 fn decode_ascii() {
791 let bytes = b"A";
792 let result = decode_utf8_char(bytes);
793 assert_eq!(result, Some(('A', 1)));
794 }
795
796 #[test]
797 fn decode_two_byte() {
798 let bytes = "\u{00E9}".as_bytes(); let result = decode_utf8_char(bytes);
800 assert_eq!(result, Some(('\u{00E9}', 2)));
801 }
802
803 #[test]
804 fn decode_three_byte() {
805 let bytes = "\u{4e16}".as_bytes(); let result = decode_utf8_char(bytes);
807 assert_eq!(result, Some(('\u{4e16}', 3)));
808 }
809
810 #[test]
811 fn decode_four_byte() {
812 let bytes = "\u{1f600}".as_bytes(); let result = decode_utf8_char(bytes);
814 assert_eq!(result, Some(('\u{1f600}', 4)));
815 }
816
817 #[test]
818 fn decode_invalid_lead() {
819 let bytes = &[0xFF];
820 let result = decode_utf8_char(bytes);
821 assert_eq!(result, None);
822 }
823
824 #[test]
825 fn decode_truncated() {
826 let bytes = &[0xC2]; let result = decode_utf8_char(bytes);
828 assert_eq!(result, None);
829 }
830
831 #[test]
832 fn decode_invalid_continuation() {
833 let bytes = &[0xC2, 0x00]; let result = decode_utf8_char(bytes);
835 assert_eq!(result, None);
836 }
837
838 #[test]
857 fn adversarial_clear_screen() {
858 let input = "\x1b[2J";
859 let result = sanitize(input);
860 assert_eq!(result.as_ref(), "");
861 }
862
863 #[test]
864 fn adversarial_home_cursor() {
865 let input = "visible\x1b[Hhidden";
866 let result = sanitize(input);
867 assert_eq!(result.as_ref(), "visiblehidden");
868 }
869
870 #[test]
871 fn adversarial_cursor_absolute_position() {
872 let input = "ok\x1b[999;999Hmalicious";
873 let result = sanitize(input);
874 assert_eq!(result.as_ref(), "okmalicious");
875 }
876
877 #[test]
878 fn adversarial_scroll_up() {
879 let input = "text\x1b[5Smore";
880 let result = sanitize(input);
881 assert_eq!(result.as_ref(), "textmore");
882 }
883
884 #[test]
885 fn adversarial_scroll_down() {
886 let input = "text\x1b[5Tmore";
887 let result = sanitize(input);
888 assert_eq!(result.as_ref(), "textmore");
889 }
890
891 #[test]
892 fn adversarial_erase_line() {
893 let input = "secret\x1b[2Koverwrite";
894 let result = sanitize(input);
895 assert_eq!(result.as_ref(), "secretoverwrite");
896 }
897
898 #[test]
899 fn adversarial_insert_delete_lines() {
900 let input = "text\x1b[10Linserted\x1b[5Mdeleted";
901 let result = sanitize(input);
902 assert_eq!(result.as_ref(), "textinserteddeleted");
903 }
904
905 #[test]
908 fn adversarial_osc0_title_injection() {
909 let input = "\x1b]0;PWNED - Enter Password\x07";
910 let result = sanitize(input);
911 assert_eq!(result.as_ref(), "");
912 assert!(!result.contains('\x1b'));
913 assert!(!result.contains('\x07'));
914 }
915
916 #[test]
917 fn adversarial_osc1_icon_title() {
918 let input = "\x1b]1;evil-icon\x07";
919 let result = sanitize(input);
920 assert_eq!(result.as_ref(), "");
921 }
922
923 #[test]
924 fn adversarial_osc2_window_title() {
925 let input = "\x1b]2;sudo password required\x1b\\";
926 let result = sanitize(input);
927 assert_eq!(result.as_ref(), "");
928 }
929
930 #[test]
933 fn adversarial_osc52_clipboard_set_bel() {
934 let input = "safe\x1b]52;c;cm0gLXJmIC8=\x07text";
936 let result = sanitize(input);
937 assert_eq!(result.as_ref(), "safetext");
938 }
939
940 #[test]
941 fn adversarial_osc52_clipboard_set_st() {
942 let input = "safe\x1b]52;c;cm0gLXJmIC8=\x1b\\text";
943 let result = sanitize(input);
944 assert_eq!(result.as_ref(), "safetext");
945 }
946
947 #[test]
948 fn adversarial_osc52_clipboard_query() {
949 let input = "\x1b]52;c;?\x07";
951 let result = sanitize(input);
952 assert_eq!(result.as_ref(), "");
953 }
954
955 #[test]
958 fn adversarial_alt_screen_enable() {
959 let input = "\x1b[?1049h";
960 let result = sanitize(input);
961 assert_eq!(result.as_ref(), "");
962 }
963
964 #[test]
965 fn adversarial_alt_screen_disable() {
966 let input = "\x1b[?1049l";
967 let result = sanitize(input);
968 assert_eq!(result.as_ref(), "");
969 }
970
971 #[test]
972 fn adversarial_mouse_enable() {
973 let input = "\x1b[?1000h\x1b[?1002h\x1b[?1003h\x1b[?1006h";
974 let result = sanitize(input);
975 assert_eq!(result.as_ref(), "");
976 }
977
978 #[test]
979 fn adversarial_bracketed_paste_enable() {
980 let input = "\x1b[?2004h";
981 let result = sanitize(input);
982 assert_eq!(result.as_ref(), "");
983 }
984
985 #[test]
986 fn adversarial_focus_events_enable() {
987 let input = "\x1b[?1004h";
988 let result = sanitize(input);
989 assert_eq!(result.as_ref(), "");
990 }
991
992 #[test]
993 fn adversarial_raw_mode_sequence() {
994 let input = "\x1b[?7727h";
996 let result = sanitize(input);
997 assert_eq!(result.as_ref(), "");
998 }
999
1000 #[test]
1001 fn adversarial_cursor_hide_show() {
1002 let input = "\x1b[?25l\x1b[?25h";
1003 let result = sanitize(input);
1004 assert_eq!(result.as_ref(), "");
1005 }
1006
1007 #[test]
1010 fn adversarial_device_attributes_query_da1() {
1011 let input = "\x1b[c";
1012 let result = sanitize(input);
1013 assert_eq!(result.as_ref(), "");
1014 }
1015
1016 #[test]
1017 fn adversarial_device_attributes_query_da2() {
1018 let input = "\x1b[>c";
1019 let result = sanitize(input);
1020 assert_eq!(result.as_ref(), "");
1021 }
1022
1023 #[test]
1024 fn adversarial_device_status_report() {
1025 let input = "\x1b[6n";
1026 let result = sanitize(input);
1027 assert_eq!(result.as_ref(), "");
1028 }
1029
1030 #[test]
1031 fn adversarial_osc_color_query() {
1032 let input = "\x1b]11;?\x07";
1034 let result = sanitize(input);
1035 assert_eq!(result.as_ref(), "");
1036 }
1037
1038 #[test]
1039 fn adversarial_decrpm_query() {
1040 let input = "\x1b[?2026$p";
1041 let result = sanitize(input);
1042 assert_eq!(result.as_ref(), "");
1043 }
1044
1045 #[test]
1048 fn adversarial_fake_shell_prompt() {
1049 let input = "\x1b[999;1H\x1b[2K$ sudo rm -rf /\x1b[A";
1051 let result = sanitize(input);
1052 assert!(!result.contains('\x1b'));
1053 assert_eq!(result.as_ref(), "$ sudo rm -rf /");
1055 }
1056
1057 #[test]
1058 fn adversarial_fake_password_prompt() {
1059 let input = "\x1b]0;Terminal\x07\x1b[2J\x1b[HPassword: ";
1061 let result = sanitize(input);
1062 assert_eq!(result.as_ref(), "Password: ");
1063 }
1064
1065 #[test]
1066 fn adversarial_overwrite_existing_content() {
1067 let input = "safe output\r\x1b[2Kmalicious replacement";
1069 let result = sanitize(input);
1070 assert_eq!(result.as_ref(), "safe output\rmalicious replacement");
1071 }
1072
1073 #[test]
1081 fn adversarial_c1_single_byte_csi() {
1082 let input = "text\u{009B}31mmalicious";
1085 let result = sanitize(input);
1086 assert!(!result.contains('\x1b'));
1087 assert!(
1088 !result.contains('\u{009B}'),
1089 "C1 CSI (U+009B) must be stripped"
1090 );
1091 }
1092
1093 #[test]
1094 fn adversarial_c1_osc_byte() {
1095 let input = "text\u{009D}0;Evil Title\x07malicious";
1097 let result = sanitize(input);
1098 assert!(!result.contains('\x1b'));
1099 assert!(
1100 !result.contains('\u{009D}'),
1101 "C1 OSC (U+009D) must be stripped"
1102 );
1103 }
1104
1105 #[test]
1106 fn adversarial_c1_dcs_byte() {
1107 let input = "A\u{0090}device control\x1b\\B";
1109 let result = sanitize(input);
1110 assert!(!result.contains('\u{0090}'));
1111 }
1112
1113 #[test]
1114 fn adversarial_c1_apc_byte() {
1115 let input = "A\u{009F}app command\x1b\\B";
1117 let result = sanitize(input);
1118 assert!(!result.contains('\u{009F}'));
1119 }
1120
1121 #[test]
1122 fn adversarial_c1_pm_byte() {
1123 let input = "A\u{009E}private msg\x1b\\B";
1125 let result = sanitize(input);
1126 assert!(!result.contains('\u{009E}'));
1127 }
1128
1129 #[test]
1130 fn adversarial_c1_st_byte() {
1131 let input = "A\u{009C}B";
1133 let result = sanitize(input);
1134 assert!(!result.contains('\u{009C}'));
1135 }
1136
1137 #[test]
1138 fn adversarial_all_c1_controls_stripped() {
1139 for cp in 0x0080..=0x009F_u32 {
1141 let c = char::from_u32(cp).unwrap();
1142 let input = format!("A{c}B");
1143 let result = sanitize(&input);
1144 assert!(
1145 !result
1146 .chars()
1147 .any(|ch| ('\u{0080}'..='\u{009F}').contains(&ch)),
1148 "C1 control U+{cp:04X} passed through sanitizer"
1149 );
1150 assert!(result.contains('A'), "Text before C1 U+{cp:04X} lost");
1152 assert!(result.contains('B'), "Text after C1 U+{cp:04X} lost");
1153 }
1154 }
1155
1156 #[test]
1157 fn adversarial_c1_fast_path_triggers_slow_path() {
1158 let input = "clean\u{0085}text"; let result = sanitize(input);
1161 assert!(
1162 matches!(result, Cow::Owned(_)),
1163 "C1 should trigger slow path"
1164 );
1165 assert!(!result.contains('\u{0085}'));
1166 assert_eq!(result.as_ref(), "cleantext");
1167 }
1168
1169 #[test]
1172 fn adversarial_nested_osc_in_osc() {
1173 let input = "safe\x1b]8;;\x1b]0;evil\x07https://ok.com\x07text";
1175 let result = sanitize(input);
1176 assert!(!result.contains('\x1b'));
1177 assert!(!result.contains('\x07'));
1178 }
1179
1180 #[test]
1181 fn adversarial_st_inside_dcs() {
1182 let input = "A\x1bPsome\x1bdata\x1b\\B";
1186 let result = sanitize(input);
1187 assert_eq!(result.as_ref(), "AataB");
1188 }
1189
1190 #[test]
1191 fn dcs_with_proper_st_fully_consumed() {
1192 let input = "A\x1bPsomedata\x1b\\B";
1194 let result = sanitize(input);
1195 assert_eq!(result.as_ref(), "AB");
1196 }
1197
1198 #[test]
1199 fn adversarial_bel_vs_st_terminator() {
1200 let input = "A\x1b]0;title\x07B\x1b\\C";
1202 let result = sanitize(input);
1203 assert!(!result.contains('\x1b'));
1205 assert!(!result.contains('\x07'));
1206 }
1207
1208 #[test]
1209 fn adversarial_csi_without_final_byte() {
1210 let input = "A\x1b[0;0;0;0;0;0;0;0;0;0B";
1212 let result = sanitize(input);
1213 assert_eq!(result.as_ref(), "A");
1215 }
1216
1217 #[test]
1218 fn adversarial_csi_many_params_then_final() {
1219 let input = "X\x1b[1;2;3;4;5;6;7;8;9;10mY";
1221 let result = sanitize(input);
1222 assert_eq!(result.as_ref(), "XY");
1223 }
1224
1225 #[test]
1228 fn adversarial_very_long_csi_params() {
1229 let params: String = std::iter::repeat_n("0;", 10_000).collect();
1231 let input = format!("start\x1b[{params}mend");
1232 let result = sanitize(&input);
1233 assert_eq!(result.as_ref(), "startend");
1234 }
1235
1236 #[test]
1237 fn adversarial_many_short_sequences() {
1238 let input: String = (0..10_000).map(|_| "\x1b[0m").collect();
1240 let input = format!("start{input}end");
1241 let result = sanitize(&input);
1242 assert_eq!(result.as_ref(), "startend");
1243 }
1244
1245 #[test]
1246 fn adversarial_very_long_osc_content() {
1247 let payload: String = std::iter::repeat_n('A', 100_000).collect();
1249 let input = format!("text\x1b]0;{payload}\x07more");
1250 let result = sanitize(&input);
1251 assert_eq!(result.as_ref(), "textmore");
1252 }
1253
1254 #[test]
1255 fn adversarial_very_long_dcs_content() {
1256 let payload: String = std::iter::repeat_n('X', 100_000).collect();
1257 let input = format!("text\x1bP{payload}\x1b\\more");
1258 let result = sanitize(&input);
1259 assert_eq!(result.as_ref(), "textmore");
1260 }
1261
1262 #[test]
1263 fn adversarial_only_escape_bytes() {
1264 let input: String = std::iter::repeat_n('\x1b', 1000).collect();
1266 let result = sanitize(&input);
1267 assert_eq!(result.as_ref(), "");
1268 }
1269
1270 #[test]
1271 fn adversarial_alternating_esc_and_text() {
1272 let input: String = (0..1000)
1274 .map(|i| if i % 2 == 0 { "\x1b[m" } else { "a" })
1275 .collect();
1276 let result = sanitize(&input);
1277 let expected: String = std::iter::repeat_n('a', 500).collect();
1279 assert_eq!(result.as_ref(), expected);
1280 }
1281
1282 #[test]
1283 fn adversarial_all_forbidden_c0_in_sequence() {
1284 let mut input = String::from("start");
1286 for b in 0x00u8..=0x1F {
1287 if b != 0x09 && b != 0x0A && b != 0x0D && b != 0x1B {
1288 input.push(b as char);
1289 }
1290 }
1291 input.push_str("end");
1292 let result = sanitize(&input);
1293 assert_eq!(result.as_ref(), "startend");
1294 }
1295
1296 #[test]
1299 fn adversarial_combined_title_clear_clipboard() {
1300 let input = concat!(
1302 "\x1b]0;Terminal\x07", "\x1b[2J", "\x1b[H", "\x1b]52;c;cm0gLXJm\x07", "Password: ", );
1308 let result = sanitize(input);
1309 assert_eq!(result.as_ref(), "Password: ");
1310 assert!(!result.contains('\x1b'));
1311 assert!(!result.contains('\x07'));
1312 }
1313
1314 #[test]
1315 fn adversarial_sgr_color_soup() {
1316 let input = "\x1b[31m\x1b[1m\x1b[4m\x1b[7m\x1b[38;2;255;0;0mred\x1b[0m";
1318 let result = sanitize(input);
1319 assert_eq!(result.as_ref(), "red");
1320 }
1321
1322 #[test]
1323 fn adversarial_hyperlink_wrapping_attack() {
1324 let input = concat!(
1326 "\x1b]8;;https://evil.com\x07",
1327 "Click here for info",
1328 "\x1b]8;;\x07",
1329 );
1330 let result = sanitize(input);
1331 assert_eq!(result.as_ref(), "Click here for info");
1332 }
1333
1334 #[test]
1335 fn adversarial_kitty_graphics_protocol() {
1336 let input = "img\x1b_Gf=100,s=1,v=1;AAAA\x1b\\text";
1338 let result = sanitize(input);
1339 assert_eq!(result.as_ref(), "imgtext");
1340 }
1341
1342 #[test]
1343 fn adversarial_sixel_data() {
1344 let input = "pre\x1bPq#0;2;0;0;0#1;2;100;100;100~-\x1b\\post";
1346 let result = sanitize(input);
1347 assert_eq!(result.as_ref(), "prepost");
1348 }
1349
1350 #[test]
1351 fn adversarial_mixed_valid_utf8_and_escapes() {
1352 let input = "\u{1f512}\x1b[31m\u{26a0}\x1b[0m secure\x1b]0;evil\x07\u{2705}";
1354 let result = sanitize(input);
1355 assert_eq!(result.as_ref(), "\u{1f512}\u{26a0} secure\u{2705}");
1356 }
1357
1358 #[test]
1359 fn adversarial_control_char_near_escape() {
1360 let input = "\x01\x1b[31m\x02text\x03\x1b[0m\x04";
1362 let result = sanitize(input);
1363 assert!(!result.contains('\x1b'));
1364 assert_eq!(result.as_ref(), "text");
1365 }
1366
1367 #[test]
1368 fn adversarial_save_restore_cursor_attack() {
1369 let input = "\x1b7fake prompt\x1b8real content";
1371 let result = sanitize(input);
1372 assert_eq!(result.as_ref(), "fake promptreal content");
1373 }
1374
1375 #[test]
1376 fn adversarial_dec_set_reset_barrage() {
1377 let input = (1..100)
1379 .map(|i| format!("\x1b[?{i}h\x1b[?{i}l"))
1380 .collect::<String>();
1381 let input = format!("A{input}B");
1382 let result = sanitize(&input);
1383 assert_eq!(result.as_ref(), "AB");
1384 }
1385
1386 mod proptest_adversarial {
1389 use super::*;
1390 use proptest::prelude::*;
1391
1392 proptest! {
1393 #[test]
1394 fn sanitize_never_panics(input in ".*") {
1395 let _ = sanitize(&input);
1396 }
1397
1398 #[test]
1399 fn sanitize_output_never_contains_esc(input in ".*") {
1400 let result = sanitize(&input);
1401 prop_assert!(
1402 !result.contains('\x1b'),
1403 "Output contained ESC for input {:?}", input
1404 );
1405 }
1406
1407 #[test]
1408 fn sanitize_output_never_contains_del(input in ".*") {
1409 let result = sanitize(&input);
1410 prop_assert!(
1411 !result.contains('\x7f'),
1412 "Output contained DEL for input {:?}", input
1413 );
1414 }
1415
1416 #[test]
1417 fn sanitize_output_no_forbidden_c0(input in ".*") {
1418 let result = sanitize(&input);
1419 for &b in result.as_bytes() {
1420 prop_assert!(
1421 !is_forbidden_c0(b),
1422 "Output contains forbidden C0 0x{:02X}", b
1423 );
1424 }
1425 }
1426
1427 #[test]
1428 fn sanitize_preserves_clean_input(input in "[a-zA-Z0-9 .,!?\\n\\t]+") {
1429 let result = sanitize(&input);
1430 prop_assert_eq!(result.as_ref(), input.as_str());
1431 }
1432
1433 #[test]
1434 fn sanitize_idempotent(input in ".*") {
1435 let first = sanitize(&input);
1436 let second = sanitize(first.as_ref());
1437 prop_assert_eq!(
1438 first.as_ref(),
1439 second.as_ref(),
1440 "Sanitize is not idempotent"
1441 );
1442 }
1443
1444 #[test]
1445 fn sanitize_output_len_lte_input(input in ".*") {
1446 let result = sanitize(&input);
1447 prop_assert!(
1448 result.len() <= input.len(),
1449 "Output ({}) longer than input ({})", result.len(), input.len()
1450 );
1451 }
1452
1453 #[test]
1454 fn sanitize_output_is_valid_utf8(input in ".*") {
1455 let result = sanitize(&input);
1456 prop_assert!(std::str::from_utf8(result.as_bytes()).is_ok());
1459 }
1460
1461 #[test]
1462 fn sanitize_output_no_c1_controls(input in ".*") {
1463 let result = sanitize(&input);
1464 for c in result.as_ref().chars() {
1465 prop_assert!(
1466 !('\u{0080}'..='\u{009F}').contains(&c),
1467 "Output contains C1 control U+{:04X}", c as u32
1468 );
1469 }
1470 }
1471 }
1472
1473 fn escape_sequence() -> impl Strategy<Value = String> {
1476 prop_oneof![
1477 (
1479 proptest::collection::vec(0x30u8..=0x3F, 0..20),
1480 0x40u8..=0x7E,
1481 )
1482 .prop_map(|(params, final_byte)| {
1483 let mut s = String::from("\x1b[");
1484 for b in params {
1485 s.push(b as char);
1486 }
1487 s.push(final_byte as char);
1488 s
1489 }),
1490 proptest::string::string_regex("[^\x07\x1b]{0,50}")
1492 .unwrap()
1493 .prop_map(|content| format!("\x1b]{content}\x07")),
1494 proptest::string::string_regex("[^\x1b]{0,50}")
1496 .unwrap()
1497 .prop_map(|content| format!("\x1b]{content}\x1b\\")),
1498 proptest::string::string_regex("[^\x1b]{0,50}")
1500 .unwrap()
1501 .prop_map(|content| format!("\x1bP{content}\x1b\\")),
1502 proptest::string::string_regex("[^\x1b]{0,50}")
1504 .unwrap()
1505 .prop_map(|content| format!("\x1b_{content}\x1b\\")),
1506 proptest::string::string_regex("[^\x1b]{0,50}")
1508 .unwrap()
1509 .prop_map(|content| format!("\x1b^{content}\x1b\\")),
1510 (0x20u8..=0x7E).prop_map(|b| format!("\x1b{}", b as char)),
1512 ]
1513 }
1514
1515 fn mixed_adversarial_input() -> impl Strategy<Value = String> {
1516 proptest::collection::vec(
1517 prop_oneof![
1518 proptest::string::string_regex("[a-zA-Z0-9 ]{1,10}").unwrap(),
1520 escape_sequence(),
1522 (0x00u8..=0x1F)
1524 .prop_filter("not allowed control", |b| {
1525 *b != 0x09 && *b != 0x0A && *b != 0x0D
1526 })
1527 .prop_map(|b| String::from(b as char)),
1528 ],
1529 1..20,
1530 )
1531 .prop_map(|parts| parts.join(""))
1532 }
1533
1534 proptest! {
1535 #[test]
1536 fn adversarial_mixed_input_safe(input in mixed_adversarial_input()) {
1537 let result = sanitize(&input);
1538 prop_assert!(!result.contains('\x1b'));
1539 prop_assert!(!result.contains('\x7f'));
1540 for &b in result.as_bytes() {
1541 prop_assert!(!is_forbidden_c0(b));
1542 }
1543 }
1544
1545 #[test]
1546 fn escape_sequences_fully_stripped(seq in escape_sequence()) {
1547 let input = format!("before{seq}after");
1548 let result = sanitize(&input);
1549 prop_assert!(
1550 !result.contains('\x1b'),
1551 "Output contains ESC for sequence {:?}", seq
1552 );
1553 prop_assert!(
1554 result.starts_with("before"),
1555 "Output doesn't start with 'before' for {:?}: got {:?}", seq, result
1556 );
1557 }
1562 }
1563 }
1564}