1#![forbid(unsafe_code)]
2
3use std::borrow::Cow;
45
46use memchr::memchr;
47
48#[inline]
69pub fn sanitize(input: &str) -> Cow<'_, str> {
70 let bytes = input.as_bytes();
71
72 if memchr(0x1B, bytes).is_none()
75 && memchr(0x7F, bytes).is_none()
76 && !has_forbidden_c0(bytes)
77 && !has_c1_controls(bytes)
78 {
79 return Cow::Borrowed(input);
80 }
81
82 Cow::Owned(sanitize_slow(input))
84}
85
86#[inline]
91fn has_forbidden_c0(bytes: &[u8]) -> bool {
92 bytes.iter().any(|&b| is_forbidden_c0(b))
93}
94
95#[inline]
97const fn is_forbidden_c0(b: u8) -> bool {
98 matches!(
99 b,
100 0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F
101 )
102}
103
104#[inline]
110fn has_c1_controls(bytes: &[u8]) -> bool {
111 bytes
112 .windows(2)
113 .any(|w| w[0] == 0xC2 && (0x80..=0x9F).contains(&w[1]))
114}
115
116fn sanitize_slow(input: &str) -> String {
118 let bytes = input.as_bytes();
119 let mut output = String::with_capacity(input.len());
120 let mut i = 0;
121
122 while i < bytes.len() {
123 let b = bytes[i];
124 match b {
125 0x1B => {
127 i = skip_escape_sequence(bytes, i);
128 }
129 0x09 | 0x0A | 0x0D => {
131 output.push(b as char);
132 i += 1;
133 }
134 0x00..=0x08 | 0x0B..=0x0C | 0x0E..=0x1A | 0x1C..=0x1F => {
136 i += 1;
137 }
138 0x7F => {
140 i += 1;
141 }
142 0x20..=0x7E => {
144 output.push(b as char);
145 i += 1;
146 }
147 0x80..=0xFF => {
149 if let Some((c, len)) = decode_utf8_char(&bytes[i..]) {
150 if !('\u{0080}'..='\u{009F}').contains(&c) {
153 output.push(c);
154 }
155 i += len;
156 } else {
157 i += 1;
159 }
160 }
161 }
162 }
163
164 output
165}
166
167fn skip_escape_sequence(bytes: &[u8], start: usize) -> usize {
177 let mut i = start + 1; if i >= bytes.len() {
179 return i;
180 }
181
182 match bytes[i] {
183 b'[' => {
185 i += 1;
186 while i < bytes.len() {
189 let b = bytes[i];
190 if (0x40..=0x7E).contains(&b) {
191 return i + 1;
192 }
193 if !(0x20..=0x3F).contains(&b) {
195 return i;
198 }
199 i += 1;
200 }
201 }
202 b']' => {
204 i += 1;
205 while i < bytes.len() {
206 let b = bytes[i];
207 if b == 0x07 {
209 return i + 1;
210 }
211 if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
213 return i + 2;
214 }
215 if b == 0x1B {
218 return i;
219 }
220 if b < 0x20 {
222 return i;
223 }
224 i += 1;
225 }
226 }
227 b'P' | b'^' | b'_' => {
229 i += 1;
230 while i < bytes.len() {
231 let b = bytes[i];
232 if b == 0x1B && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
234 return i + 2;
235 }
236 if b == 0x1B {
239 return i;
240 }
241 if b < 0x20 {
243 return i;
244 }
245 i += 1;
246 }
247 }
248 0x20..=0x7E => {
250 return i + 1;
251 }
252 _ => {}
254 }
255
256 i
257}
258
259fn decode_utf8_char(bytes: &[u8]) -> Option<(char, usize)> {
263 if bytes.is_empty() {
264 return None;
265 }
266
267 let first = bytes[0];
268 let (expected_len, mut codepoint) = match first {
269 0x00..=0x7F => return Some((first as char, 1)),
270 0xC0..=0xDF => (2, (first & 0x1F) as u32),
271 0xE0..=0xEF => (3, (first & 0x0F) as u32),
272 0xF0..=0xF7 => (4, (first & 0x07) as u32),
273 _ => return None, };
275
276 if bytes.len() < expected_len {
277 return None;
278 }
279
280 for &b in bytes.iter().take(expected_len).skip(1) {
282 if (b & 0xC0) != 0x80 {
283 return None; }
285 codepoint = (codepoint << 6) | (b & 0x3F) as u32;
286 }
287
288 let min_codepoint = match expected_len {
290 2 => 0x80,
291 3 => 0x800,
292 4 => 0x1_0000,
293 _ => return None,
294 };
295 if codepoint < min_codepoint {
296 return None;
297 }
298
299 char::from_u32(codepoint).map(|c| (c, expected_len))
301}
302
303#[derive(Debug, Clone, PartialEq, Eq)]
308pub enum Text<'a> {
309 Sanitized(Cow<'a, str>),
311
312 Trusted(Cow<'a, str>),
315}
316
317impl<'a> Text<'a> {
318 #[inline]
320 pub fn sanitized(s: &'a str) -> Self {
321 Text::Sanitized(sanitize(s))
322 }
323
324 #[inline]
330 pub fn trusted(s: &'a str) -> Self {
331 Text::Trusted(Cow::Borrowed(s))
332 }
333
334 #[inline]
336 pub fn sanitized_owned(s: String) -> Self {
337 match sanitize(&s) {
338 Cow::Borrowed(_) => Text::Sanitized(Cow::Owned(s)),
339 Cow::Owned(owned) => Text::Sanitized(Cow::Owned(owned)),
340 }
341 }
342
343 #[inline]
345 pub fn trusted_owned(s: String) -> Self {
346 Text::Trusted(Cow::Owned(s))
347 }
348
349 #[inline]
351 pub fn as_str(&self) -> &str {
352 match self {
353 Text::Sanitized(cow) => cow.as_ref(),
354 Text::Trusted(cow) => cow.as_ref(),
355 }
356 }
357
358 #[inline]
360 pub fn is_sanitized(&self) -> bool {
361 matches!(self, Text::Sanitized(_))
362 }
363
364 #[inline]
366 pub fn is_trusted(&self) -> bool {
367 matches!(self, Text::Trusted(_))
368 }
369
370 pub fn into_owned(self) -> Text<'static> {
372 match self {
373 Text::Sanitized(cow) => Text::Sanitized(Cow::Owned(cow.into_owned())),
374 Text::Trusted(cow) => Text::Trusted(Cow::Owned(cow.into_owned())),
375 }
376 }
377}
378
379impl AsRef<str> for Text<'_> {
380 fn as_ref(&self) -> &str {
381 self.as_str()
382 }
383}
384
385impl std::fmt::Display for Text<'_> {
386 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
387 write!(f, "{}", self.as_str())
388 }
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[test]
398 fn fast_path_no_escape() {
399 let input = "Normal log message without escapes";
400 let result = sanitize(input);
401 assert!(matches!(result, Cow::Borrowed(_)));
402 assert_eq!(result.as_ref(), input);
403 }
404
405 #[test]
406 fn fast_path_with_allowed_controls() {
407 let input = "Line1\nLine2\tTabbed\rCarriage";
408 let result = sanitize(input);
409 assert!(matches!(result, Cow::Borrowed(_)));
410 assert_eq!(result.as_ref(), input);
411 }
412
413 #[test]
414 fn fast_path_unicode() {
415 let input = "Hello \u{4e16}\u{754c} \u{1f468}\u{200d}\u{1f469}\u{200d}\u{1f467}";
416 let result = sanitize(input);
417 assert!(matches!(result, Cow::Borrowed(_)));
418 assert_eq!(result.as_ref(), input);
419 }
420
421 #[test]
422 fn fast_path_empty() {
423 let input = "";
424 let result = sanitize(input);
425 assert!(matches!(result, Cow::Borrowed(_)));
426 assert_eq!(result.as_ref(), "");
427 }
428
429 #[test]
430 fn fast_path_printable_ascii() {
431 let input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()";
432 let result = sanitize(input);
433 assert!(matches!(result, Cow::Borrowed(_)));
434 assert_eq!(result.as_ref(), input);
435 }
436
437 #[test]
440 fn slow_path_strips_sgr_color() {
441 let input = "Hello \x1b[31mred\x1b[0m world";
442 let result = sanitize(input);
443 assert!(matches!(result, Cow::Owned(_)));
444 assert_eq!(result.as_ref(), "Hello red world");
445 }
446
447 #[test]
448 fn slow_path_strips_cursor_movement() {
449 let input = "Before\x1b[2;5HAfter";
450 let result = sanitize(input);
451 assert_eq!(result.as_ref(), "BeforeAfter");
452 }
453
454 #[test]
455 fn slow_path_strips_erase() {
456 let input = "Text\x1b[2JCleared";
457 let result = sanitize(input);
458 assert_eq!(result.as_ref(), "TextCleared");
459 }
460
461 #[test]
462 fn slow_path_strips_multiple_sequences() {
463 let input = "\x1b[1mBold\x1b[0m \x1b[4mUnderline\x1b[24m \x1b[38;5;196mColor\x1b[0m";
464 let result = sanitize(input);
465 assert_eq!(result.as_ref(), "Bold Underline Color");
466 }
467
468 #[test]
471 fn slow_path_strips_osc_title_bel() {
472 let input = "Text\x1b]0;Evil Title\x07More";
474 let result = sanitize(input);
475 assert_eq!(result.as_ref(), "TextMore");
476 }
477
478 #[test]
479 fn slow_path_strips_osc_title_st() {
480 let input = "Text\x1b]0;Evil Title\x1b\\More";
482 let result = sanitize(input);
483 assert_eq!(result.as_ref(), "TextMore");
484 }
485
486 #[test]
487 fn slow_path_strips_osc8_hyperlink() {
488 let input = "Click \x1b]8;;https://evil.com\x07here\x1b]8;;\x07 please";
490 let result = sanitize(input);
491 assert_eq!(result.as_ref(), "Click here please");
492 }
493
494 #[test]
497 fn slow_path_strips_dcs() {
498 let input = "Before\x1bPdevice control string\x1b\\After";
499 let result = sanitize(input);
500 assert_eq!(result.as_ref(), "BeforeAfter");
501 }
502
503 #[test]
504 fn slow_path_strips_apc() {
505 let input = "Before\x1b_application program command\x1b\\After";
506 let result = sanitize(input);
507 assert_eq!(result.as_ref(), "BeforeAfter");
508 }
509
510 #[test]
511 fn slow_path_strips_pm() {
512 let input = "Before\x1b^privacy message\x1b\\After";
513 let result = sanitize(input);
514 assert_eq!(result.as_ref(), "BeforeAfter");
515 }
516
517 #[test]
518 fn slow_path_strips_osc52_clipboard() {
519 let input = "Before\x1b]52;c;SGVsbG8=\x07After";
520 let result = sanitize(input);
521 assert_eq!(result.as_ref(), "BeforeAfter");
522 }
523
524 #[test]
525 fn slow_path_strips_osc52_clipboard_st() {
526 let input = "Before\x1b]52;c;SGVsbG8=\x1b\\After";
527 let result = sanitize(input);
528 assert_eq!(result.as_ref(), "BeforeAfter");
529 }
530
531 #[test]
532 fn slow_path_strips_private_modes() {
533 let input = "A\x1b[?1049hB\x1b[?1000hC\x1b[?2004hD";
534 let result = sanitize(input);
535 assert_eq!(result.as_ref(), "ABCD");
536 }
537
538 #[test]
541 fn slow_path_strips_nul() {
542 let input = "Hello\x00World";
543 let result = sanitize(input);
544 assert_eq!(result.as_ref(), "HelloWorld");
545 }
546
547 #[test]
548 fn slow_path_strips_bel() {
549 let input = "Hello\x07World";
551 let result = sanitize(input);
552 assert_eq!(result.as_ref(), "HelloWorld");
553 }
554
555 #[test]
556 fn slow_path_strips_backspace() {
557 let input = "Hello\x08World";
558 let result = sanitize(input);
559 assert_eq!(result.as_ref(), "HelloWorld");
560 }
561
562 #[test]
563 fn slow_path_strips_form_feed() {
564 let input = "Hello\x0CWorld";
565 let result = sanitize(input);
566 assert_eq!(result.as_ref(), "HelloWorld");
567 }
568
569 #[test]
570 fn slow_path_strips_vertical_tab() {
571 let input = "Hello\x0BWorld";
572 let result = sanitize(input);
573 assert_eq!(result.as_ref(), "HelloWorld");
574 }
575
576 #[test]
577 fn slow_path_strips_del() {
578 let input = "Hello\x7FWorld";
579 let result = sanitize(input);
580 assert_eq!(result.as_ref(), "HelloWorld");
581 }
582
583 #[test]
584 fn slow_path_preserves_tab_lf_cr() {
585 let input = "Line1\nLine2\tTabbed\rReturn";
586 let result = sanitize(input);
589 assert_eq!(result.as_ref(), "Line1\nLine2\tTabbed\rReturn");
590 }
591
592 #[test]
595 fn handles_truncated_csi() {
596 let input = "Hello\x1b[";
597 let result = sanitize(input);
598 assert!(!result.contains('\x1b'));
599 assert_eq!(result.as_ref(), "Hello");
600 }
601
602 #[test]
603 fn handles_truncated_dcs() {
604 let input = "Hello\x1bP1;2;3";
605 let result = sanitize(input);
606 assert!(!result.contains('\x1b'));
607 assert_eq!(result.as_ref(), "Hello");
608 }
609
610 #[test]
611 fn handles_truncated_apc() {
612 let input = "Hello\x1b_test";
613 let result = sanitize(input);
614 assert!(!result.contains('\x1b'));
615 assert_eq!(result.as_ref(), "Hello");
616 }
617
618 #[test]
619 fn handles_truncated_pm() {
620 let input = "Hello\x1b^secret";
621 let result = sanitize(input);
622 assert!(!result.contains('\x1b'));
623 assert_eq!(result.as_ref(), "Hello");
624 }
625
626 #[test]
627 fn handles_truncated_osc() {
628 let input = "Hello\x1b]0;Title";
629 let result = sanitize(input);
630 assert!(!result.contains('\x1b'));
631 assert_eq!(result.as_ref(), "Hello");
632 }
633
634 #[test]
635 fn handles_esc_at_end() {
636 let input = "Hello\x1b";
637 let result = sanitize(input);
638 assert_eq!(result.as_ref(), "Hello");
639 }
640
641 #[test]
642 fn handles_lone_esc() {
643 let input = "\x1b";
644 let result = sanitize(input);
645 assert_eq!(result.as_ref(), "");
646 }
647
648 #[test]
649 fn handles_single_char_escape() {
650 let input = "Before\x1b7Middle\x1b8After";
652 let result = sanitize(input);
653 assert_eq!(result.as_ref(), "BeforeMiddleAfter");
654 }
655
656 #[test]
657 fn handles_unknown_escape() {
658 let input = "Before\x1b!After";
661 let result = sanitize(input);
662 assert_eq!(result.as_ref(), "BeforeAfter");
664 }
665
666 #[test]
669 fn preserves_unicode_characters() {
670 let input = "\u{4e16}\u{754c}"; let result = sanitize(input);
672 assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
673 }
674
675 #[test]
676 fn preserves_emoji() {
677 let input = "\u{1f600}\u{1f389}\u{1f680}"; let result = sanitize(input);
679 assert_eq!(result.as_ref(), "\u{1f600}\u{1f389}\u{1f680}");
680 }
681
682 #[test]
683 fn preserves_combining_characters() {
684 let input = "e\u{0301}";
686 let result = sanitize(input);
687 assert_eq!(result.as_ref(), "e\u{0301}");
688 }
689
690 #[test]
691 fn mixed_unicode_and_escapes() {
692 let input = "\u{4e16}\x1b[31m\u{754c}\x1b[0m";
693 let result = sanitize(input);
694 assert_eq!(result.as_ref(), "\u{4e16}\u{754c}");
695 }
696
697 #[test]
700 fn text_sanitized() {
701 let text = Text::sanitized("Hello \x1b[31mWorld\x1b[0m");
702 assert!(text.is_sanitized());
703 assert!(!text.is_trusted());
704 assert_eq!(text.as_str(), "Hello World");
705 }
706
707 #[test]
708 fn text_trusted() {
709 let text = Text::trusted("Hello \x1b[31mWorld\x1b[0m");
710 assert!(!text.is_sanitized());
711 assert!(text.is_trusted());
712 assert_eq!(text.as_str(), "Hello \x1b[31mWorld\x1b[0m");
713 }
714
715 #[test]
716 fn text_into_owned() {
717 let text = Text::sanitized("Hello");
718 let owned = text.into_owned();
719 assert!(owned.is_sanitized());
720 assert_eq!(owned.as_str(), "Hello");
721 }
722
723 #[test]
724 fn text_display() {
725 let text = Text::sanitized("Hello");
726 assert_eq!(format!("{text}"), "Hello");
727 }
728
729 #[test]
732 fn output_never_contains_esc() {
733 let inputs = [
734 "Normal text",
735 "\x1b[31mRed\x1b[0m",
736 "\x1b]0;Title\x07",
737 "\x1bPDCS\x1b\\",
738 "Mixed\x1b[1m\x1b]8;;url\x07text\x1b]8;;\x07\x1b[0m",
739 "",
740 "\x1b",
741 "\x1b[",
742 "\x1b]",
743 ];
744
745 for input in inputs {
746 let result = sanitize(input);
747 assert!(
748 !result.contains('\x1b'),
749 "Output contains ESC for input: {input:?}"
750 );
751 }
752 }
753
754 #[test]
755 fn output_never_contains_forbidden_c0() {
756 let inputs = [
757 "\x00\x01\x02\x03\x04\x05\x06\x07",
758 "\x08\x0B\x0C\x0E\x0F",
759 "\x10\x11\x12\x13\x14\x15\x16\x17",
760 "\x18\x19\x1A\x1C\x1D\x1E\x1F",
761 "Mixed\x00text\x07with\x0Ccontrols",
762 ];
763
764 for input in inputs {
765 let result = sanitize(input);
766 for b in result.as_bytes() {
767 assert!(
768 !is_forbidden_c0(*b),
769 "Output contains forbidden C0 0x{b:02X} for input: {input:?}"
770 );
771 }
772 }
773 }
774
775 #[test]
776 fn allowed_controls_preserved_in_output() {
777 let input = "Tab\there\nNewline\rCarriage";
778 let result = sanitize(input);
779 assert!(result.contains('\t'));
780 assert!(result.contains('\n'));
781 assert!(result.contains('\r'));
782 }
783
784 #[test]
787 fn decode_ascii() {
788 let bytes = b"A";
789 let result = decode_utf8_char(bytes);
790 assert_eq!(result, Some(('A', 1)));
791 }
792
793 #[test]
794 fn decode_two_byte() {
795 let bytes = "\u{00E9}".as_bytes(); let result = decode_utf8_char(bytes);
797 assert_eq!(result, Some(('\u{00E9}', 2)));
798 }
799
800 #[test]
801 fn decode_three_byte() {
802 let bytes = "\u{4e16}".as_bytes(); let result = decode_utf8_char(bytes);
804 assert_eq!(result, Some(('\u{4e16}', 3)));
805 }
806
807 #[test]
808 fn decode_four_byte() {
809 let bytes = "\u{1f600}".as_bytes(); let result = decode_utf8_char(bytes);
811 assert_eq!(result, Some(('\u{1f600}', 4)));
812 }
813
814 #[test]
815 fn decode_invalid_lead() {
816 let bytes = &[0xFF];
817 let result = decode_utf8_char(bytes);
818 assert_eq!(result, None);
819 }
820
821 #[test]
822 fn decode_truncated() {
823 let bytes = &[0xC2]; let result = decode_utf8_char(bytes);
825 assert_eq!(result, None);
826 }
827
828 #[test]
829 fn decode_invalid_continuation() {
830 let bytes = &[0xC2, 0x00]; let result = decode_utf8_char(bytes);
832 assert_eq!(result, None);
833 }
834
835 #[test]
854 fn adversarial_clear_screen() {
855 let input = "\x1b[2J";
856 let result = sanitize(input);
857 assert_eq!(result.as_ref(), "");
858 }
859
860 #[test]
861 fn adversarial_home_cursor() {
862 let input = "visible\x1b[Hhidden";
863 let result = sanitize(input);
864 assert_eq!(result.as_ref(), "visiblehidden");
865 }
866
867 #[test]
868 fn adversarial_cursor_absolute_position() {
869 let input = "ok\x1b[999;999Hmalicious";
870 let result = sanitize(input);
871 assert_eq!(result.as_ref(), "okmalicious");
872 }
873
874 #[test]
875 fn adversarial_scroll_up() {
876 let input = "text\x1b[5Smore";
877 let result = sanitize(input);
878 assert_eq!(result.as_ref(), "textmore");
879 }
880
881 #[test]
882 fn adversarial_scroll_down() {
883 let input = "text\x1b[5Tmore";
884 let result = sanitize(input);
885 assert_eq!(result.as_ref(), "textmore");
886 }
887
888 #[test]
889 fn adversarial_erase_line() {
890 let input = "secret\x1b[2Koverwrite";
891 let result = sanitize(input);
892 assert_eq!(result.as_ref(), "secretoverwrite");
893 }
894
895 #[test]
896 fn adversarial_insert_delete_lines() {
897 let input = "text\x1b[10Linserted\x1b[5Mdeleted";
898 let result = sanitize(input);
899 assert_eq!(result.as_ref(), "textinserteddeleted");
900 }
901
902 #[test]
905 fn adversarial_osc0_title_injection() {
906 let input = "\x1b]0;PWNED - Enter Password\x07";
907 let result = sanitize(input);
908 assert_eq!(result.as_ref(), "");
909 assert!(!result.contains('\x1b'));
910 assert!(!result.contains('\x07'));
911 }
912
913 #[test]
914 fn adversarial_osc1_icon_title() {
915 let input = "\x1b]1;evil-icon\x07";
916 let result = sanitize(input);
917 assert_eq!(result.as_ref(), "");
918 }
919
920 #[test]
921 fn adversarial_osc2_window_title() {
922 let input = "\x1b]2;sudo password required\x1b\\";
923 let result = sanitize(input);
924 assert_eq!(result.as_ref(), "");
925 }
926
927 #[test]
930 fn adversarial_osc52_clipboard_set_bel() {
931 let input = "safe\x1b]52;c;cm0gLXJmIC8=\x07text";
933 let result = sanitize(input);
934 assert_eq!(result.as_ref(), "safetext");
935 }
936
937 #[test]
938 fn adversarial_osc52_clipboard_set_st() {
939 let input = "safe\x1b]52;c;cm0gLXJmIC8=\x1b\\text";
940 let result = sanitize(input);
941 assert_eq!(result.as_ref(), "safetext");
942 }
943
944 #[test]
945 fn adversarial_osc52_clipboard_query() {
946 let input = "\x1b]52;c;?\x07";
948 let result = sanitize(input);
949 assert_eq!(result.as_ref(), "");
950 }
951
952 #[test]
955 fn adversarial_alt_screen_enable() {
956 let input = "\x1b[?1049h";
957 let result = sanitize(input);
958 assert_eq!(result.as_ref(), "");
959 }
960
961 #[test]
962 fn adversarial_alt_screen_disable() {
963 let input = "\x1b[?1049l";
964 let result = sanitize(input);
965 assert_eq!(result.as_ref(), "");
966 }
967
968 #[test]
969 fn adversarial_mouse_enable() {
970 let input = "\x1b[?1000h\x1b[?1002h\x1b[?1003h\x1b[?1006h";
971 let result = sanitize(input);
972 assert_eq!(result.as_ref(), "");
973 }
974
975 #[test]
976 fn adversarial_bracketed_paste_enable() {
977 let input = "\x1b[?2004h";
978 let result = sanitize(input);
979 assert_eq!(result.as_ref(), "");
980 }
981
982 #[test]
983 fn adversarial_focus_events_enable() {
984 let input = "\x1b[?1004h";
985 let result = sanitize(input);
986 assert_eq!(result.as_ref(), "");
987 }
988
989 #[test]
990 fn adversarial_raw_mode_sequence() {
991 let input = "\x1b[?7727h";
993 let result = sanitize(input);
994 assert_eq!(result.as_ref(), "");
995 }
996
997 #[test]
998 fn adversarial_cursor_hide_show() {
999 let input = "\x1b[?25l\x1b[?25h";
1000 let result = sanitize(input);
1001 assert_eq!(result.as_ref(), "");
1002 }
1003
1004 #[test]
1007 fn adversarial_device_attributes_query_da1() {
1008 let input = "\x1b[c";
1009 let result = sanitize(input);
1010 assert_eq!(result.as_ref(), "");
1011 }
1012
1013 #[test]
1014 fn adversarial_device_attributes_query_da2() {
1015 let input = "\x1b[>c";
1016 let result = sanitize(input);
1017 assert_eq!(result.as_ref(), "");
1018 }
1019
1020 #[test]
1021 fn adversarial_device_status_report() {
1022 let input = "\x1b[6n";
1023 let result = sanitize(input);
1024 assert_eq!(result.as_ref(), "");
1025 }
1026
1027 #[test]
1028 fn adversarial_osc_color_query() {
1029 let input = "\x1b]11;?\x07";
1031 let result = sanitize(input);
1032 assert_eq!(result.as_ref(), "");
1033 }
1034
1035 #[test]
1036 fn adversarial_decrpm_query() {
1037 let input = "\x1b[?2026$p";
1038 let result = sanitize(input);
1039 assert_eq!(result.as_ref(), "");
1040 }
1041
1042 #[test]
1045 fn adversarial_fake_shell_prompt() {
1046 let input = "\x1b[999;1H\x1b[2K$ sudo rm -rf /\x1b[A";
1048 let result = sanitize(input);
1049 assert!(!result.contains('\x1b'));
1050 assert_eq!(result.as_ref(), "$ sudo rm -rf /");
1052 }
1053
1054 #[test]
1055 fn adversarial_fake_password_prompt() {
1056 let input = "\x1b]0;Terminal\x07\x1b[2J\x1b[HPassword: ";
1058 let result = sanitize(input);
1059 assert_eq!(result.as_ref(), "Password: ");
1060 }
1061
1062 #[test]
1063 fn adversarial_overwrite_existing_content() {
1064 let input = "safe output\r\x1b[2Kmalicious replacement";
1066 let result = sanitize(input);
1067 assert_eq!(result.as_ref(), "safe output\rmalicious replacement");
1068 }
1069
1070 #[test]
1078 fn adversarial_c1_single_byte_csi() {
1079 let input = "text\u{009B}31mmalicious";
1082 let result = sanitize(input);
1083 assert!(!result.contains('\x1b'));
1084 assert!(
1085 !result.contains('\u{009B}'),
1086 "C1 CSI (U+009B) must be stripped"
1087 );
1088 }
1089
1090 #[test]
1091 fn adversarial_c1_osc_byte() {
1092 let input = "text\u{009D}0;Evil Title\x07malicious";
1094 let result = sanitize(input);
1095 assert!(!result.contains('\x1b'));
1096 assert!(
1097 !result.contains('\u{009D}'),
1098 "C1 OSC (U+009D) must be stripped"
1099 );
1100 }
1101
1102 #[test]
1103 fn adversarial_c1_dcs_byte() {
1104 let input = "A\u{0090}device control\x1b\\B";
1106 let result = sanitize(input);
1107 assert!(!result.contains('\u{0090}'));
1108 }
1109
1110 #[test]
1111 fn adversarial_c1_apc_byte() {
1112 let input = "A\u{009F}app command\x1b\\B";
1114 let result = sanitize(input);
1115 assert!(!result.contains('\u{009F}'));
1116 }
1117
1118 #[test]
1119 fn adversarial_c1_pm_byte() {
1120 let input = "A\u{009E}private msg\x1b\\B";
1122 let result = sanitize(input);
1123 assert!(!result.contains('\u{009E}'));
1124 }
1125
1126 #[test]
1127 fn adversarial_c1_st_byte() {
1128 let input = "A\u{009C}B";
1130 let result = sanitize(input);
1131 assert!(!result.contains('\u{009C}'));
1132 }
1133
1134 #[test]
1135 fn adversarial_all_c1_controls_stripped() {
1136 for cp in 0x0080..=0x009F_u32 {
1138 let c = char::from_u32(cp).unwrap();
1139 let input = format!("A{c}B");
1140 let result = sanitize(&input);
1141 assert!(
1142 !result
1143 .chars()
1144 .any(|ch| ('\u{0080}'..='\u{009F}').contains(&ch)),
1145 "C1 control U+{cp:04X} passed through sanitizer"
1146 );
1147 assert!(result.contains('A'), "Text before C1 U+{cp:04X} lost");
1149 assert!(result.contains('B'), "Text after C1 U+{cp:04X} lost");
1150 }
1151 }
1152
1153 #[test]
1154 fn adversarial_c1_fast_path_triggers_slow_path() {
1155 let input = "clean\u{0085}text"; let result = sanitize(input);
1158 assert!(
1159 matches!(result, Cow::Owned(_)),
1160 "C1 should trigger slow path"
1161 );
1162 assert!(!result.contains('\u{0085}'));
1163 assert_eq!(result.as_ref(), "cleantext");
1164 }
1165
1166 #[test]
1169 fn adversarial_nested_osc_in_osc() {
1170 let input = "safe\x1b]8;;\x1b]0;evil\x07https://ok.com\x07text";
1172 let result = sanitize(input);
1173 assert!(!result.contains('\x1b'));
1174 assert!(!result.contains('\x07'));
1175 }
1176
1177 #[test]
1178 fn adversarial_st_inside_dcs() {
1179 let input = "A\x1bPsome\x1bdata\x1b\\B";
1183 let result = sanitize(input);
1184 assert_eq!(result.as_ref(), "AataB");
1185 }
1186
1187 #[test]
1188 fn dcs_with_proper_st_fully_consumed() {
1189 let input = "A\x1bPsomedata\x1b\\B";
1191 let result = sanitize(input);
1192 assert_eq!(result.as_ref(), "AB");
1193 }
1194
1195 #[test]
1196 fn adversarial_bel_vs_st_terminator() {
1197 let input = "A\x1b]0;title\x07B\x1b\\C";
1199 let result = sanitize(input);
1200 assert!(!result.contains('\x1b'));
1202 assert!(!result.contains('\x07'));
1203 }
1204
1205 #[test]
1206 fn adversarial_csi_without_final_byte() {
1207 let input = "A\x1b[0;0;0;0;0;0;0;0;0;0B";
1209 let result = sanitize(input);
1210 assert_eq!(result.as_ref(), "A");
1212 }
1213
1214 #[test]
1215 fn adversarial_csi_many_params_then_final() {
1216 let input = "X\x1b[1;2;3;4;5;6;7;8;9;10mY";
1218 let result = sanitize(input);
1219 assert_eq!(result.as_ref(), "XY");
1220 }
1221
1222 #[test]
1225 fn adversarial_very_long_csi_params() {
1226 let params: String = std::iter::repeat_n("0;", 10_000).collect();
1228 let input = format!("start\x1b[{params}mend");
1229 let result = sanitize(&input);
1230 assert_eq!(result.as_ref(), "startend");
1231 }
1232
1233 #[test]
1234 fn adversarial_many_short_sequences() {
1235 let input: String = (0..10_000).map(|_| "\x1b[0m").collect();
1237 let input = format!("start{input}end");
1238 let result = sanitize(&input);
1239 assert_eq!(result.as_ref(), "startend");
1240 }
1241
1242 #[test]
1243 fn adversarial_very_long_osc_content() {
1244 let payload: String = std::iter::repeat_n('A', 100_000).collect();
1246 let input = format!("text\x1b]0;{payload}\x07more");
1247 let result = sanitize(&input);
1248 assert_eq!(result.as_ref(), "textmore");
1249 }
1250
1251 #[test]
1252 fn adversarial_very_long_dcs_content() {
1253 let payload: String = std::iter::repeat_n('X', 100_000).collect();
1254 let input = format!("text\x1bP{payload}\x1b\\more");
1255 let result = sanitize(&input);
1256 assert_eq!(result.as_ref(), "textmore");
1257 }
1258
1259 #[test]
1260 fn adversarial_only_escape_bytes() {
1261 let input: String = std::iter::repeat_n('\x1b', 1000).collect();
1263 let result = sanitize(&input);
1264 assert_eq!(result.as_ref(), "");
1265 }
1266
1267 #[test]
1268 fn adversarial_alternating_esc_and_text() {
1269 let input: String = (0..1000)
1271 .map(|i| if i % 2 == 0 { "\x1b[m" } else { "a" })
1272 .collect();
1273 let result = sanitize(&input);
1274 let expected: String = std::iter::repeat_n('a', 500).collect();
1276 assert_eq!(result.as_ref(), expected);
1277 }
1278
1279 #[test]
1280 fn adversarial_all_forbidden_c0_in_sequence() {
1281 let mut input = String::from("start");
1283 for b in 0x00u8..=0x1F {
1284 if b != 0x09 && b != 0x0A && b != 0x0D && b != 0x1B {
1285 input.push(b as char);
1286 }
1287 }
1288 input.push_str("end");
1289 let result = sanitize(&input);
1290 assert_eq!(result.as_ref(), "startend");
1291 }
1292
1293 #[test]
1296 fn adversarial_combined_title_clear_clipboard() {
1297 let input = concat!(
1299 "\x1b]0;Terminal\x07", "\x1b[2J", "\x1b[H", "\x1b]52;c;cm0gLXJm\x07", "Password: ", );
1305 let result = sanitize(input);
1306 assert_eq!(result.as_ref(), "Password: ");
1307 assert!(!result.contains('\x1b'));
1308 assert!(!result.contains('\x07'));
1309 }
1310
1311 #[test]
1312 fn adversarial_sgr_color_soup() {
1313 let input = "\x1b[31m\x1b[1m\x1b[4m\x1b[7m\x1b[38;2;255;0;0mred\x1b[0m";
1315 let result = sanitize(input);
1316 assert_eq!(result.as_ref(), "red");
1317 }
1318
1319 #[test]
1320 fn adversarial_hyperlink_wrapping_attack() {
1321 let input = concat!(
1323 "\x1b]8;;https://evil.com\x07",
1324 "Click here for info",
1325 "\x1b]8;;\x07",
1326 );
1327 let result = sanitize(input);
1328 assert_eq!(result.as_ref(), "Click here for info");
1329 }
1330
1331 #[test]
1332 fn adversarial_kitty_graphics_protocol() {
1333 let input = "img\x1b_Gf=100,s=1,v=1;AAAA\x1b\\text";
1335 let result = sanitize(input);
1336 assert_eq!(result.as_ref(), "imgtext");
1337 }
1338
1339 #[test]
1340 fn adversarial_sixel_data() {
1341 let input = "pre\x1bPq#0;2;0;0;0#1;2;100;100;100~-\x1b\\post";
1343 let result = sanitize(input);
1344 assert_eq!(result.as_ref(), "prepost");
1345 }
1346
1347 #[test]
1348 fn adversarial_mixed_valid_utf8_and_escapes() {
1349 let input = "\u{1f512}\x1b[31m\u{26a0}\x1b[0m secure\x1b]0;evil\x07\u{2705}";
1351 let result = sanitize(input);
1352 assert_eq!(result.as_ref(), "\u{1f512}\u{26a0} secure\u{2705}");
1353 }
1354
1355 #[test]
1356 fn adversarial_control_char_near_escape() {
1357 let input = "\x01\x1b[31m\x02text\x03\x1b[0m\x04";
1359 let result = sanitize(input);
1360 assert!(!result.contains('\x1b'));
1361 assert_eq!(result.as_ref(), "text");
1362 }
1363
1364 #[test]
1365 fn adversarial_save_restore_cursor_attack() {
1366 let input = "\x1b7fake prompt\x1b8real content";
1368 let result = sanitize(input);
1369 assert_eq!(result.as_ref(), "fake promptreal content");
1370 }
1371
1372 #[test]
1373 fn adversarial_dec_set_reset_barrage() {
1374 let input = (1..100)
1376 .map(|i| format!("\x1b[?{i}h\x1b[?{i}l"))
1377 .collect::<String>();
1378 let input = format!("A{input}B");
1379 let result = sanitize(&input);
1380 assert_eq!(result.as_ref(), "AB");
1381 }
1382
1383 mod proptest_adversarial {
1386 use super::*;
1387 use proptest::prelude::*;
1388
1389 proptest! {
1390 #[test]
1391 fn sanitize_never_panics(input in ".*") {
1392 let _ = sanitize(&input);
1393 }
1394
1395 #[test]
1396 fn sanitize_output_never_contains_esc(input in ".*") {
1397 let result = sanitize(&input);
1398 prop_assert!(
1399 !result.contains('\x1b'),
1400 "Output contained ESC for input {:?}", input
1401 );
1402 }
1403
1404 #[test]
1405 fn sanitize_output_never_contains_del(input in ".*") {
1406 let result = sanitize(&input);
1407 prop_assert!(
1408 !result.contains('\x7f'),
1409 "Output contained DEL for input {:?}", input
1410 );
1411 }
1412
1413 #[test]
1414 fn sanitize_output_no_forbidden_c0(input in ".*") {
1415 let result = sanitize(&input);
1416 for &b in result.as_bytes() {
1417 prop_assert!(
1418 !is_forbidden_c0(b),
1419 "Output contains forbidden C0 0x{:02X}", b
1420 );
1421 }
1422 }
1423
1424 #[test]
1425 fn sanitize_preserves_clean_input(input in "[a-zA-Z0-9 .,!?\\n\\t]+") {
1426 let result = sanitize(&input);
1427 prop_assert_eq!(result.as_ref(), input.as_str());
1428 }
1429
1430 #[test]
1431 fn sanitize_idempotent(input in ".*") {
1432 let first = sanitize(&input);
1433 let second = sanitize(first.as_ref());
1434 prop_assert_eq!(
1435 first.as_ref(),
1436 second.as_ref(),
1437 "Sanitize is not idempotent"
1438 );
1439 }
1440
1441 #[test]
1442 fn sanitize_output_len_lte_input(input in ".*") {
1443 let result = sanitize(&input);
1444 prop_assert!(
1445 result.len() <= input.len(),
1446 "Output ({}) longer than input ({})", result.len(), input.len()
1447 );
1448 }
1449
1450 #[test]
1451 fn sanitize_output_is_valid_utf8(input in ".*") {
1452 let result = sanitize(&input);
1453 prop_assert!(std::str::from_utf8(result.as_bytes()).is_ok());
1456 }
1457
1458 #[test]
1459 fn sanitize_output_no_c1_controls(input in ".*") {
1460 let result = sanitize(&input);
1461 for c in result.as_ref().chars() {
1462 prop_assert!(
1463 !('\u{0080}'..='\u{009F}').contains(&c),
1464 "Output contains C1 control U+{:04X}", c as u32
1465 );
1466 }
1467 }
1468 }
1469
1470 fn escape_sequence() -> impl Strategy<Value = String> {
1473 prop_oneof![
1474 (
1476 proptest::collection::vec(0x30u8..=0x3F, 0..20),
1477 0x40u8..=0x7E,
1478 )
1479 .prop_map(|(params, final_byte)| {
1480 let mut s = String::from("\x1b[");
1481 for b in params {
1482 s.push(b as char);
1483 }
1484 s.push(final_byte as char);
1485 s
1486 }),
1487 proptest::string::string_regex("[^\x07\x1b]{0,50}")
1489 .unwrap()
1490 .prop_map(|content| format!("\x1b]{content}\x07")),
1491 proptest::string::string_regex("[^\x1b]{0,50}")
1493 .unwrap()
1494 .prop_map(|content| format!("\x1b]{content}\x1b\\")),
1495 proptest::string::string_regex("[^\x1b]{0,50}")
1497 .unwrap()
1498 .prop_map(|content| format!("\x1bP{content}\x1b\\")),
1499 proptest::string::string_regex("[^\x1b]{0,50}")
1501 .unwrap()
1502 .prop_map(|content| format!("\x1b_{content}\x1b\\")),
1503 proptest::string::string_regex("[^\x1b]{0,50}")
1505 .unwrap()
1506 .prop_map(|content| format!("\x1b^{content}\x1b\\")),
1507 (0x20u8..=0x7E).prop_map(|b| format!("\x1b{}", b as char)),
1509 ]
1510 }
1511
1512 fn mixed_adversarial_input() -> impl Strategy<Value = String> {
1513 proptest::collection::vec(
1514 prop_oneof![
1515 proptest::string::string_regex("[a-zA-Z0-9 ]{1,10}").unwrap(),
1517 escape_sequence(),
1519 (0x00u8..=0x1F)
1521 .prop_filter("not allowed control", |b| {
1522 *b != 0x09 && *b != 0x0A && *b != 0x0D
1523 })
1524 .prop_map(|b| String::from(b as char)),
1525 ],
1526 1..20,
1527 )
1528 .prop_map(|parts| parts.join(""))
1529 }
1530
1531 proptest! {
1532 #[test]
1533 fn adversarial_mixed_input_safe(input in mixed_adversarial_input()) {
1534 let result = sanitize(&input);
1535 prop_assert!(!result.contains('\x1b'));
1536 prop_assert!(!result.contains('\x7f'));
1537 for &b in result.as_bytes() {
1538 prop_assert!(!is_forbidden_c0(b));
1539 }
1540 }
1541
1542 #[test]
1543 fn escape_sequences_fully_stripped(seq in escape_sequence()) {
1544 let input = format!("before{seq}after");
1545 let result = sanitize(&input);
1546 prop_assert!(
1547 !result.contains('\x1b'),
1548 "Output contains ESC for sequence {:?}", seq
1549 );
1550 prop_assert!(
1551 result.starts_with("before"),
1552 "Output doesn't start with 'before' for {:?}: got {:?}", seq, result
1553 );
1554 }
1559 }
1560 }
1561}