1use std::fmt;
2use std::str::FromStr;
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
9pub enum Language {
10 Rust,
11 Python,
12 JavaScript,
13 TypeScript,
14 Go,
15 Ruby,
16 C,
17 Cpp,
18 CSharp,
19 Java,
20 Kotlin,
21 Shell,
22 Swift,
23 Scala,
24 Sql,
25 Xml,
26 Php,
27 Yaml,
28 Toml,
29 Json,
30 #[default]
31 Unknown,
32}
33
34impl FromStr for Language {
35 type Err = std::convert::Infallible;
36
37 fn from_str(s: &str) -> Result<Self, Self::Err> {
41 Ok(match s.to_ascii_lowercase().as_str() {
42 "rust" => Language::Rust,
43 "python" => Language::Python,
44 "javascript" => Language::JavaScript,
45 "typescript" => Language::TypeScript,
46 "go" => Language::Go,
47 "ruby" => Language::Ruby,
48 "c" => Language::C,
49 "cpp" => Language::Cpp,
50 "csharp" => Language::CSharp,
51 "java" => Language::Java,
52 "kotlin" => Language::Kotlin,
53 "shell" | "bash" | "sh" | "zsh" | "ksh" | "fish" => Language::Shell,
54 "swift" => Language::Swift,
55 "scala" => Language::Scala,
56 "sql" => Language::Sql,
57 "xml" | "html" | "xhtml" | "svg" | "xsl" | "xslt" => Language::Xml,
58 "php" => Language::Php,
59 "yaml" | "yml" => Language::Yaml,
60 "toml" => Language::Toml,
61 "json" | "jsonc" | "json5" => Language::Json,
62 _ => Language::Unknown,
63 })
64 }
65}
66
67impl Language {
68 pub fn comment_syntax(self) -> CommentSyntax {
70 match self {
71 Language::Python | Language::Ruby | Language::Shell => CommentSyntax::Hash,
72 Language::Rust | Language::Swift | Language::Scala => CommentSyntax::CStyleNested,
74 Language::Sql => CommentSyntax::Sql,
76 Language::Xml => CommentSyntax::Xml,
78 Language::Php => CommentSyntax::Php,
80 Language::Yaml | Language::Toml => CommentSyntax::Hash,
82 Language::Json => CommentSyntax::CStyle,
84 _ => CommentSyntax::CStyle,
85 }
86 }
87
88 pub fn string_syntax(self) -> StringSyntax {
90 match self {
91 Language::Rust => StringSyntax::Rust,
92 Language::Python => StringSyntax::Python,
93 Language::JavaScript | Language::TypeScript | Language::Ruby => {
95 StringSyntax::JavaScript
96 }
97 Language::Go => StringSyntax::Go,
98 Language::Shell => StringSyntax::Shell,
99 Language::Swift | Language::Scala => StringSyntax::SwiftScala,
101 Language::Sql => StringSyntax::Sql,
103 Language::Xml => StringSyntax::Xml,
105 Language::Php => StringSyntax::Php,
107 Language::Yaml | Language::Toml | Language::Json => StringSyntax::CStyle,
109 _ => StringSyntax::CStyle,
110 }
111 }
112}
113
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub enum CommentSyntax {
117 CStyle,
119 CStyleNested,
121 Hash,
123 Sql,
125 Xml,
127 Php,
129}
130
131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
133pub enum StringSyntax {
134 CStyle,
136 Rust,
138 Python,
140 JavaScript,
142 Go,
144 Shell,
146 SwiftScala,
148 Sql,
150 Xml,
152 Php,
154}
155
156#[derive(Debug, Clone, Copy, PartialEq, Eq)]
163pub struct PreprocessOptions {
164 pub mask_comments: bool,
165 pub mask_strings: bool,
166}
167
168impl PreprocessOptions {
169 pub fn none() -> Self {
170 Self {
171 mask_comments: false,
172 mask_strings: false,
173 }
174 }
175
176 pub fn comments_only() -> Self {
177 Self {
178 mask_comments: true,
179 mask_strings: false,
180 }
181 }
182
183 pub fn strings_only() -> Self {
184 Self {
185 mask_comments: false,
186 mask_strings: true,
187 }
188 }
189
190 pub fn comments_and_strings() -> Self {
191 Self {
192 mask_comments: true,
193 mask_strings: true,
194 }
195 }
196
197 fn track_strings(self) -> bool {
198 self.mask_strings || self.mask_comments
199 }
200}
201
202#[derive(Clone, Copy, PartialEq, Eq)]
203enum Mode {
204 Normal,
205 LineComment,
206 BlockComment {
207 depth: u32,
208 },
209 NormalString {
210 escaped: bool,
211 quote: u8,
212 },
213 RawString {
214 hashes: usize,
215 },
216 Char {
217 escaped: bool,
218 },
219 TemplateLiteral {
220 escaped: bool,
221 },
222 TripleQuotedString {
223 escaped: bool,
224 quote: u8,
225 },
226 ShellLiteralString,
228 ShellAnsiCString {
230 escaped: bool,
231 },
232 XmlComment,
234}
235
236impl fmt::Debug for Mode {
237 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
238 match self {
239 Mode::Normal => write!(f, "Normal"),
240 Mode::LineComment => write!(f, "LineComment"),
241 Mode::BlockComment { depth } => write!(f, "BlockComment(depth={depth})"),
242 Mode::NormalString { escaped, quote } => {
243 write!(f, "NormalString(escaped={escaped}, quote={quote})")
244 }
245 Mode::RawString { hashes } => write!(f, "RawString(hashes={hashes})"),
246 Mode::Char { escaped } => write!(f, "Char(escaped={escaped})"),
247 Mode::TemplateLiteral { escaped } => write!(f, "TemplateLiteral(escaped={escaped})"),
248 Mode::TripleQuotedString { escaped, quote } => {
249 write!(f, "TripleQuotedString(escaped={escaped}, quote={quote})")
250 }
251 Mode::ShellLiteralString => write!(f, "ShellLiteralString"),
252 Mode::ShellAnsiCString { escaped } => {
253 write!(f, "ShellAnsiCString(escaped={escaped})")
254 }
255 Mode::XmlComment => write!(f, "XmlComment"),
256 }
257 }
258}
259
260#[derive(Debug, Clone)]
265pub struct Preprocessor {
266 opts: PreprocessOptions,
267 mode: Mode,
268 lang: Language,
269}
270
271impl Preprocessor {
272 pub fn new(opts: PreprocessOptions) -> Self {
273 Self {
274 opts,
275 mode: Mode::Normal,
276 lang: Language::Unknown,
277 }
278 }
279
280 pub fn with_language(opts: PreprocessOptions, lang: Language) -> Self {
282 Self {
283 opts,
284 mode: Mode::Normal,
285 lang,
286 }
287 }
288
289 pub fn set_language(&mut self, lang: Language) {
291 self.lang = lang;
292 self.reset();
293 }
294
295 pub fn reset(&mut self) {
296 self.mode = Mode::Normal;
297 }
298
299 #[cfg_attr(mutants, mutants::skip)]
303 #[allow(clippy::collapsible_if)]
304 pub fn sanitize_line(&mut self, line: &str) -> String {
305 let mut out: Vec<u8> = line.as_bytes().to_vec();
306 let bytes = line.as_bytes();
307 let len = bytes.len();
308
309 let comment_syntax = self.lang.comment_syntax();
310 let string_syntax = self.lang.string_syntax();
311
312 let mut i = 0;
313
314 while i < len {
315 match self.mode {
316 Mode::Normal => {
317 if self.opts.track_strings() {
319 if string_syntax == StringSyntax::Rust {
321 if let Some((_, end_quote_i, hashes)) =
322 detect_raw_string_start(bytes, i)
323 {
324 if self.opts.mask_strings {
325 mask_range(&mut out, i, end_quote_i + 1);
326 }
327 self.mode = Mode::RawString { hashes };
328 i = end_quote_i + 1;
329 continue;
330 }
331
332 if bytes[i] == b'b' && i + 1 < len && bytes[i + 1] == b'"' {
334 if self.opts.mask_strings {
335 mask_range(&mut out, i, i + 2);
336 }
337 self.mode = Mode::NormalString {
338 escaped: false,
339 quote: b'"',
340 };
341 i += 2;
342 continue;
343 }
344 }
345
346 if string_syntax == StringSyntax::Python
349 || string_syntax == StringSyntax::SwiftScala
350 {
351 if let Some((quote, end_i)) = detect_triple_quote_start(bytes, i) {
352 if string_syntax == StringSyntax::SwiftScala && quote != b'"' {
354 } else {
356 if self.opts.mask_strings {
357 mask_range(&mut out, i, end_i);
358 }
359 self.mode = Mode::TripleQuotedString {
360 escaped: false,
361 quote,
362 };
363 i = end_i;
364 continue;
365 }
366 }
367 }
368
369 if string_syntax == StringSyntax::JavaScript && bytes[i] == b'`' {
371 if self.opts.mask_strings {
372 out[i] = b' ';
373 }
374 self.mode = Mode::TemplateLiteral { escaped: false };
375 i += 1;
376 continue;
377 }
378
379 if string_syntax == StringSyntax::Go && bytes[i] == b'`' {
381 if self.opts.mask_strings {
382 out[i] = b' ';
383 }
384 self.mode = Mode::RawString { hashes: 0 };
386 i += 1;
387 continue;
388 }
389
390 if string_syntax == StringSyntax::Shell
392 && bytes[i] == b'$'
393 && i + 1 < len
394 && bytes[i + 1] == b'\''
395 {
396 if self.opts.mask_strings {
397 mask_range(&mut out, i, i + 2);
398 }
399 self.mode = Mode::ShellAnsiCString { escaped: false };
400 i += 2;
401 continue;
402 }
403
404 if string_syntax == StringSyntax::Shell && bytes[i] == b'\'' {
406 if self.opts.mask_strings {
407 out[i] = b' ';
408 }
409 self.mode = Mode::ShellLiteralString;
410 i += 1;
411 continue;
412 }
413
414 if string_syntax == StringSyntax::Sql && bytes[i] == b'\'' {
416 if self.opts.mask_strings {
417 out[i] = b' ';
418 }
419 self.mode = Mode::NormalString {
420 escaped: false,
421 quote: b'\'',
422 };
423 i += 1;
424 continue;
425 }
426
427 if string_syntax == StringSyntax::Xml
429 && (bytes[i] == b'"' || bytes[i] == b'\'')
430 {
431 let quote = bytes[i];
432 if self.opts.mask_strings {
433 out[i] = b' ';
434 }
435 self.mode = Mode::NormalString {
437 escaped: false,
438 quote,
439 };
440 i += 1;
441 continue;
442 }
443
444 if string_syntax == StringSyntax::Php
446 && (bytes[i] == b'"' || bytes[i] == b'\'')
447 {
448 let quote = bytes[i];
449 if self.opts.mask_strings {
450 out[i] = b' ';
451 }
452 self.mode = Mode::NormalString {
453 escaped: false,
454 quote,
455 };
456 i += 1;
457 continue;
458 }
459
460 if bytes[i] == b'"' && string_syntax != StringSyntax::Sql {
463 if self.opts.mask_strings {
464 out[i] = b' ';
465 }
466 self.mode = Mode::NormalString {
467 escaped: false,
468 quote: b'"',
469 };
470 i += 1;
471 continue;
472 }
473
474 if (string_syntax == StringSyntax::Python
476 || string_syntax == StringSyntax::JavaScript
477 || string_syntax == StringSyntax::CStyle)
478 && bytes[i] == b'\''
479 {
480 if string_syntax == StringSyntax::CStyle {
482 if self.opts.mask_strings {
483 out[i] = b' ';
484 }
485 self.mode = Mode::Char { escaped: false };
486 i += 1;
487 continue;
488 }
489 if self.opts.mask_strings {
491 out[i] = b' ';
492 }
493 self.mode = Mode::NormalString {
494 escaped: false,
495 quote: b'\'',
496 };
497 i += 1;
498 continue;
499 }
500
501 if string_syntax == StringSyntax::Rust && bytes[i] == b'\'' {
503 if self.opts.mask_strings {
504 out[i] = b' ';
505 }
506 self.mode = Mode::Char { escaped: false };
507 i += 1;
508 continue;
509 }
510 }
511
512 if self.opts.mask_comments {
514 if comment_syntax == CommentSyntax::Hash && bytes[i] == b'#' {
516 mask_range(&mut out, i, len);
517 self.mode = Mode::LineComment;
518 break;
519 }
520
521 if comment_syntax == CommentSyntax::Php {
523 if bytes[i] == b'#' {
524 mask_range(&mut out, i, len);
525 self.mode = Mode::LineComment;
526 break;
527 }
528 if bytes[i] == b'/' && i + 1 < len {
529 let n = bytes[i + 1];
530 if n == b'/' {
531 mask_range(&mut out, i, len);
532 self.mode = Mode::LineComment;
533 break;
534 }
535 if n == b'*' {
536 mask_range(&mut out, i, i + 2);
537 self.mode = Mode::BlockComment { depth: 1 };
538 i += 2;
539 continue;
540 }
541 }
542 }
543
544 if comment_syntax == CommentSyntax::Sql {
546 if bytes[i] == b'-' && i + 1 < len && bytes[i + 1] == b'-' {
548 mask_range(&mut out, i, len);
549 self.mode = Mode::LineComment;
550 break;
551 }
552 if bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'*' {
554 mask_range(&mut out, i, i + 2);
555 self.mode = Mode::BlockComment { depth: 1 };
556 i += 2;
557 continue;
558 }
559 }
560
561 if comment_syntax == CommentSyntax::Xml
563 && bytes[i] == b'<'
564 && i + 3 < len
565 && bytes[i + 1] == b'!'
566 && bytes[i + 2] == b'-'
567 && bytes[i + 3] == b'-'
568 {
569 mask_range(&mut out, i, i + 4);
570 self.mode = Mode::XmlComment;
571 i += 4;
572 continue;
573 }
574
575 if (comment_syntax == CommentSyntax::CStyle
577 || comment_syntax == CommentSyntax::CStyleNested)
578 && bytes[i] == b'/'
579 && i + 1 < len
580 {
581 let n = bytes[i + 1];
582 if n == b'/' {
583 mask_range(&mut out, i, len);
585 self.mode = Mode::LineComment;
586 break;
587 }
588 if n == b'*' {
589 mask_range(&mut out, i, i + 2);
591 self.mode = Mode::BlockComment { depth: 1 };
592 i += 2;
593 continue;
594 }
595 }
596 }
597
598 i += 1;
599 }
600
601 Mode::LineComment => {
602 self.mode = Mode::Normal;
604 break;
605 }
606
607 Mode::BlockComment { depth } => {
608 if self.opts.mask_comments {
610 out[i] = b' ';
611 }
612
613 let supports_nesting = comment_syntax == CommentSyntax::CStyleNested;
615 if supports_nesting && bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'*' {
616 if self.opts.mask_comments {
617 out[i + 1] = b' ';
618 }
619 self.mode = Mode::BlockComment { depth: depth + 1 };
620 i += 2;
621 continue;
622 }
623
624 if bytes[i] == b'*' && i + 1 < len && bytes[i + 1] == b'/' {
625 if self.opts.mask_comments {
626 out[i + 1] = b' ';
627 }
628 if depth == 1 {
629 self.mode = Mode::Normal;
630 } else {
631 self.mode = Mode::BlockComment { depth: depth - 1 };
632 }
633 i += 2;
634 continue;
635 }
636
637 i += 1;
638 }
639
640 Mode::NormalString { escaped, quote } => {
641 if self.opts.mask_strings {
642 out[i] = b' ';
643 }
644
645 if escaped {
646 self.mode = Mode::NormalString {
647 escaped: false,
648 quote,
649 };
650 i += 1;
651 continue;
652 }
653
654 if bytes[i] == b'\\' {
655 self.mode = Mode::NormalString {
656 escaped: true,
657 quote,
658 };
659 i += 1;
660 continue;
661 }
662
663 if bytes[i] == quote {
664 self.mode = Mode::Normal;
666 i += 1;
667 continue;
668 }
669
670 i += 1;
671 }
672
673 Mode::Char { escaped } => {
674 if self.opts.mask_strings {
675 out[i] = b' ';
676 }
677
678 if escaped {
679 self.mode = Mode::Char { escaped: false };
680 i += 1;
681 continue;
682 }
683
684 if bytes[i] == b'\\' {
685 self.mode = Mode::Char { escaped: true };
686 i += 1;
687 continue;
688 }
689
690 if bytes[i] == b'\'' {
691 self.mode = Mode::Normal;
692 i += 1;
693 continue;
694 }
695
696 i += 1;
697 }
698
699 Mode::RawString { hashes } => {
700 if self.opts.mask_strings {
701 out[i] = b' ';
702 }
703
704 if hashes == 0 && string_syntax == StringSyntax::Go {
706 if bytes[i] == b'`' {
707 self.mode = Mode::Normal;
708 i += 1;
709 continue;
710 }
711 i += 1;
712 continue;
713 }
714
715 if bytes[i] == b'"' {
717 let mut ok = true;
718 for j in 0..hashes {
719 if i + 1 + j >= len || bytes[i + 1 + j] != b'#' {
720 ok = false;
721 break;
722 }
723 }
724
725 if ok {
726 if self.opts.mask_strings {
727 mask_range(&mut out, i, (i + 1 + hashes).min(len));
728 }
729 self.mode = Mode::Normal;
730 i = (i + 1 + hashes).min(len);
731 continue;
732 }
733 }
734
735 i += 1;
736 }
737
738 Mode::TemplateLiteral { escaped } => {
739 if self.opts.mask_strings {
740 out[i] = b' ';
741 }
742
743 if escaped {
744 self.mode = Mode::TemplateLiteral { escaped: false };
745 i += 1;
746 continue;
747 }
748
749 if bytes[i] == b'\\' {
750 self.mode = Mode::TemplateLiteral { escaped: true };
751 i += 1;
752 continue;
753 }
754
755 if bytes[i] == b'`' {
756 self.mode = Mode::Normal;
758 i += 1;
759 continue;
760 }
761
762 i += 1;
763 }
764
765 Mode::TripleQuotedString { escaped, quote } => {
766 if self.opts.mask_strings {
767 out[i] = b' ';
768 }
769
770 if escaped {
771 self.mode = Mode::TripleQuotedString {
772 escaped: false,
773 quote,
774 };
775 i += 1;
776 continue;
777 }
778
779 if bytes[i] == b'\\' {
780 self.mode = Mode::TripleQuotedString {
781 escaped: true,
782 quote,
783 };
784 i += 1;
785 continue;
786 }
787
788 if bytes[i] == quote
790 && i + 2 < len
791 && bytes[i + 1] == quote
792 && bytes[i + 2] == quote
793 {
794 if self.opts.mask_strings {
795 mask_range(&mut out, i, i + 3);
796 }
797 self.mode = Mode::Normal;
798 i += 3;
799 continue;
800 }
801
802 i += 1;
803 }
804
805 Mode::ShellLiteralString => {
806 if self.opts.mask_strings {
809 out[i] = b' ';
810 }
811
812 if bytes[i] == b'\'' {
813 self.mode = Mode::Normal;
815 i += 1;
816 continue;
817 }
818
819 i += 1;
820 }
821
822 Mode::ShellAnsiCString { escaped } => {
823 if self.opts.mask_strings {
825 out[i] = b' ';
826 }
827
828 if escaped {
829 self.mode = Mode::ShellAnsiCString { escaped: false };
830 i += 1;
831 continue;
832 }
833
834 if bytes[i] == b'\\' {
835 self.mode = Mode::ShellAnsiCString { escaped: true };
836 i += 1;
837 continue;
838 }
839
840 if bytes[i] == b'\'' {
841 self.mode = Mode::Normal;
843 i += 1;
844 continue;
845 }
846
847 i += 1;
848 }
849
850 Mode::XmlComment => {
851 if self.opts.mask_comments {
854 out[i] = b' ';
855 }
856
857 if bytes[i] == b'-'
859 && i + 2 < len
860 && bytes[i + 1] == b'-'
861 && bytes[i + 2] == b'>'
862 {
863 if self.opts.mask_comments {
864 out[i + 1] = b' ';
865 out[i + 2] = b' ';
866 }
867 self.mode = Mode::Normal;
868 i += 3;
869 continue;
870 }
871
872 i += 1;
873 }
874 }
875 }
876
877 if matches!(self.mode, Mode::LineComment) {
879 self.mode = Mode::Normal;
880 }
881
882 String::from_utf8_lossy(&out).into_owned()
883 }
884}
885
886fn mask_range(out: &mut [u8], start: usize, end: usize) {
887 let end = end.min(out.len());
888 for b in &mut out[start..end] {
889 *b = b' ';
890 }
891}
892
893fn detect_triple_quote_start(bytes: &[u8], i: usize) -> Option<(u8, usize)> {
897 let len = bytes.len();
898 if i + 2 >= len {
899 return None;
900 }
901
902 let quote = bytes[i];
903 if (quote == b'"' || quote == b'\'') && bytes[i + 1] == quote && bytes[i + 2] == quote {
904 Some((quote, i + 3))
905 } else {
906 None
907 }
908}
909
910fn detect_raw_string_start(bytes: &[u8], i: usize) -> Option<(usize, usize, usize)> {
914 let len = bytes.len();
915
916 let (start, r_i) = if bytes.get(i) == Some(&b'r') {
918 (i, i)
919 } else if bytes.get(i) == Some(&b'b') && bytes.get(i + 1) == Some(&b'r') {
920 (i, i + 1)
921 } else {
922 return None;
923 };
924
925 let j = r_i + 1;
926 let hashes = bytes
927 .get(j..len)
928 .unwrap_or(&[])
929 .iter()
930 .take_while(|&&b| b == b'#')
931 .count();
932 let j = j + hashes;
933
934 if j < len && bytes[j] == b'"' {
935 Some((start, j, hashes))
936 } else {
937 None
938 }
939}
940
941#[cfg(test)]
942mod tests {
943 use super::*;
944
945 #[test]
948 fn preprocess_options_track_strings_reflects_masks() {
949 assert!(!PreprocessOptions::none().track_strings());
950 assert!(PreprocessOptions::comments_only().track_strings());
951 assert!(PreprocessOptions::strings_only().track_strings());
952 assert!(PreprocessOptions::comments_and_strings().track_strings());
953 }
954
955 #[test]
956 fn mode_debug_format_includes_variant() {
957 assert_eq!(format!("{:?}", Mode::Normal), "Normal");
958 assert_eq!(format!("{:?}", Mode::LineComment), "LineComment");
959 assert_eq!(
960 format!("{:?}", Mode::BlockComment { depth: 2 }),
961 "BlockComment(depth=2)"
962 );
963 assert_eq!(
964 format!(
965 "{:?}",
966 Mode::NormalString {
967 escaped: true,
968 quote: b'\"'
969 }
970 ),
971 "NormalString(escaped=true, quote=34)"
972 );
973 }
974
975 #[test]
976 fn detect_triple_quote_start_detects_quotes() {
977 assert_eq!(detect_triple_quote_start(b"\"\"\"rest", 0), Some((b'"', 3)));
978 assert_eq!(detect_triple_quote_start(b"'''abc", 0), Some((b'\'', 3)));
979 assert_eq!(detect_triple_quote_start(b"x\"\"y", 1), None);
980 assert_eq!(detect_triple_quote_start(b"\"x\"", 0), None);
981 assert_eq!(detect_triple_quote_start(b"''", 0), None);
982 assert_eq!(detect_triple_quote_start(b"x'''y", 0), None);
983 }
984
985 #[test]
986 fn detect_raw_string_start_detects_rust_raw_strings() {
987 assert_eq!(detect_raw_string_start(b"r\"rest", 0), Some((0, 1, 0)));
988 assert_eq!(detect_raw_string_start(b"br\"rest", 0), Some((0, 2, 0)));
989 assert_eq!(detect_raw_string_start(b"r#\"rest", 0), Some((0, 2, 1)));
990 assert_eq!(detect_raw_string_start(b"br##\"rest", 0), Some((0, 4, 2)));
991 assert_eq!(detect_raw_string_start(b"b\"\"rest", 0), None);
992 assert_eq!(detect_raw_string_start(b"b\"rest", 0), None);
993 assert_eq!(detect_raw_string_start(b"x\"rest", 0), None);
994 assert_eq!(detect_raw_string_start(b"r###", 0), None);
995 }
996
997 #[test]
998 fn language_from_str_known_languages() {
999 assert_eq!("rust".parse::<Language>().unwrap(), Language::Rust);
1000 assert_eq!("python".parse::<Language>().unwrap(), Language::Python);
1001 assert_eq!(
1002 "javascript".parse::<Language>().unwrap(),
1003 Language::JavaScript
1004 );
1005 assert_eq!(
1006 "typescript".parse::<Language>().unwrap(),
1007 Language::TypeScript
1008 );
1009 assert_eq!("go".parse::<Language>().unwrap(), Language::Go);
1010 assert_eq!("ruby".parse::<Language>().unwrap(), Language::Ruby);
1011 assert_eq!("c".parse::<Language>().unwrap(), Language::C);
1012 assert_eq!("cpp".parse::<Language>().unwrap(), Language::Cpp);
1013 assert_eq!("csharp".parse::<Language>().unwrap(), Language::CSharp);
1014 assert_eq!("java".parse::<Language>().unwrap(), Language::Java);
1015 assert_eq!("kotlin".parse::<Language>().unwrap(), Language::Kotlin);
1016 assert_eq!("yaml".parse::<Language>().unwrap(), Language::Yaml);
1017 assert_eq!("toml".parse::<Language>().unwrap(), Language::Toml);
1018 assert_eq!("json".parse::<Language>().unwrap(), Language::Json);
1019 }
1020
1021 #[test]
1022 fn language_from_str_case_insensitive() {
1023 assert_eq!("RUST".parse::<Language>().unwrap(), Language::Rust);
1024 assert_eq!("Python".parse::<Language>().unwrap(), Language::Python);
1025 assert_eq!(
1026 "JavaScript".parse::<Language>().unwrap(),
1027 Language::JavaScript
1028 );
1029 assert_eq!(
1030 "TypeScript".parse::<Language>().unwrap(),
1031 Language::TypeScript
1032 );
1033 assert_eq!("GO".parse::<Language>().unwrap(), Language::Go);
1034 assert_eq!("RUBY".parse::<Language>().unwrap(), Language::Ruby);
1035 assert_eq!("C".parse::<Language>().unwrap(), Language::C);
1036 assert_eq!("CPP".parse::<Language>().unwrap(), Language::Cpp);
1037 assert_eq!("CSharp".parse::<Language>().unwrap(), Language::CSharp);
1038 assert_eq!("JAVA".parse::<Language>().unwrap(), Language::Java);
1039 assert_eq!("KOTLIN".parse::<Language>().unwrap(), Language::Kotlin);
1040 assert_eq!("YAML".parse::<Language>().unwrap(), Language::Yaml);
1041 assert_eq!("TOML".parse::<Language>().unwrap(), Language::Toml);
1042 assert_eq!("JSON".parse::<Language>().unwrap(), Language::Json);
1043 }
1044
1045 #[test]
1046 fn language_from_str_unknown() {
1047 assert_eq!("unknown".parse::<Language>().unwrap(), Language::Unknown);
1048 assert_eq!("".parse::<Language>().unwrap(), Language::Unknown);
1049 assert_eq!("fortran".parse::<Language>().unwrap(), Language::Unknown);
1050 assert_eq!("cobol".parse::<Language>().unwrap(), Language::Unknown);
1051 }
1052
1053 #[test]
1054 fn language_default_is_unknown() {
1055 assert_eq!(Language::default(), Language::Unknown);
1056 }
1057
1058 #[test]
1061 fn comment_syntax_hash_languages() {
1062 assert_eq!(Language::Python.comment_syntax(), CommentSyntax::Hash);
1063 assert_eq!(Language::Ruby.comment_syntax(), CommentSyntax::Hash);
1064 assert_eq!(Language::Yaml.comment_syntax(), CommentSyntax::Hash);
1065 assert_eq!(Language::Toml.comment_syntax(), CommentSyntax::Hash);
1066 }
1067
1068 #[test]
1069 fn comment_syntax_cstyle_nested_languages() {
1070 assert_eq!(Language::Rust.comment_syntax(), CommentSyntax::CStyleNested);
1071 }
1072
1073 #[test]
1074 fn comment_syntax_cstyle_languages() {
1075 assert_eq!(Language::JavaScript.comment_syntax(), CommentSyntax::CStyle);
1076 assert_eq!(Language::TypeScript.comment_syntax(), CommentSyntax::CStyle);
1077 assert_eq!(Language::Go.comment_syntax(), CommentSyntax::CStyle);
1078 assert_eq!(Language::C.comment_syntax(), CommentSyntax::CStyle);
1079 assert_eq!(Language::Cpp.comment_syntax(), CommentSyntax::CStyle);
1080 assert_eq!(Language::CSharp.comment_syntax(), CommentSyntax::CStyle);
1081 assert_eq!(Language::Java.comment_syntax(), CommentSyntax::CStyle);
1082 assert_eq!(Language::Kotlin.comment_syntax(), CommentSyntax::CStyle);
1083 assert_eq!(Language::Json.comment_syntax(), CommentSyntax::CStyle);
1084 assert_eq!(Language::Unknown.comment_syntax(), CommentSyntax::CStyle);
1085 }
1086
1087 #[test]
1090 fn string_syntax_rust() {
1091 assert_eq!(Language::Rust.string_syntax(), StringSyntax::Rust);
1092 }
1093
1094 #[test]
1095 fn string_syntax_python() {
1096 assert_eq!(Language::Python.string_syntax(), StringSyntax::Python);
1097 }
1098
1099 #[test]
1100 fn string_syntax_javascript() {
1101 assert_eq!(
1102 Language::JavaScript.string_syntax(),
1103 StringSyntax::JavaScript
1104 );
1105 assert_eq!(
1106 Language::TypeScript.string_syntax(),
1107 StringSyntax::JavaScript
1108 );
1109 }
1110
1111 #[test]
1112 fn string_syntax_go() {
1113 assert_eq!(Language::Go.string_syntax(), StringSyntax::Go);
1114 }
1115
1116 #[test]
1117 fn string_syntax_cstyle_languages() {
1118 assert_eq!(Language::C.string_syntax(), StringSyntax::CStyle);
1119 assert_eq!(Language::Cpp.string_syntax(), StringSyntax::CStyle);
1120 assert_eq!(Language::CSharp.string_syntax(), StringSyntax::CStyle);
1121 assert_eq!(Language::Java.string_syntax(), StringSyntax::CStyle);
1122 assert_eq!(Language::Kotlin.string_syntax(), StringSyntax::CStyle);
1123 assert_eq!(Language::Unknown.string_syntax(), StringSyntax::CStyle);
1124 }
1125
1126 #[test]
1127 fn cstyle_masks_double_quoted_strings() {
1128 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::C);
1129 let s = p.sanitize_line("printf(\"hello\");");
1130 assert!(s.contains("printf("));
1131 assert!(!s.contains("hello"));
1132 }
1133
1134 #[test]
1135 fn cstyle_masks_char_literals() {
1136 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::C);
1137 let line = r#"char c = 'x'; char nl = '\n';"#;
1138 let s = p.sanitize_line(line);
1139 assert!(!s.contains("'x'"));
1140 assert!(!s.contains("'\\n'"));
1141 assert_eq!(s.len(), line.len());
1142 }
1143
1144 #[test]
1145 fn rust_masks_char_literals() {
1146 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust);
1147 let line = r#"let c = 'z'; let escaped = '\'';"#;
1148 let s = p.sanitize_line(line);
1149 assert!(!s.contains("'z'"));
1150 assert!(!s.contains("'\\''"));
1151 assert_eq!(s.len(), line.len());
1152 }
1153
1154 #[test]
1155 fn string_syntax_ruby() {
1156 assert_eq!(Language::Ruby.string_syntax(), StringSyntax::JavaScript);
1158 }
1159
1160 #[test]
1163 fn preprocessor_with_language() {
1164 let p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Python);
1165 assert_eq!(p.lang, Language::Python);
1166 }
1167
1168 #[test]
1169 fn preprocessor_set_language() {
1170 let mut p = Preprocessor::new(PreprocessOptions::comments_only());
1171 assert_eq!(p.lang, Language::Unknown);
1172 p.set_language(Language::Python);
1173 assert_eq!(p.lang, Language::Python);
1174 }
1175
1176 #[test]
1179 fn masks_line_comments_when_enabled() {
1180 let mut p = Preprocessor::new(PreprocessOptions::comments_only());
1181 let s = p.sanitize_line("let x = 1; // .unwrap() should be ignored");
1182 assert!(s.contains("let x = 1;"));
1183 assert!(!s.contains("unwrap"));
1184 }
1185
1186 #[test]
1187 fn does_not_mask_line_comments_when_disabled() {
1188 let mut p = Preprocessor::new(PreprocessOptions::none());
1189 let s = p.sanitize_line("// .unwrap() should be visible");
1190 assert!(s.contains("unwrap"));
1191 }
1192
1193 #[test]
1194 fn masks_strings_when_enabled() {
1195 let mut p = Preprocessor::new(PreprocessOptions::strings_only());
1196 let s = p.sanitize_line("let s = \".unwrap()\";");
1197 assert!(!s.contains("unwrap"));
1198 assert!(s.contains("let s ="));
1199 }
1200
1201 #[test]
1202 fn does_not_start_comment_inside_string() {
1203 let mut p = Preprocessor::new(PreprocessOptions::comments_only());
1204 let s = p.sanitize_line("let s = \"// not a comment\"; // real comment");
1205 assert!(s.contains("// not a comment"));
1206 assert!(!s.contains("real comment"));
1207 }
1208
1209 #[test]
1210 fn masks_raw_string() {
1211 let mut p =
1212 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Rust);
1213 let s = p.sanitize_line("let s = r#\".unwrap()\"#;");
1214 assert!(!s.contains("unwrap"));
1215 }
1216
1217 #[test]
1220 fn python_masks_hash_comments() {
1221 let mut p =
1222 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Python);
1223 let s = p.sanitize_line("x = 1 # this is a comment with print()");
1224 assert!(s.contains("x = 1"));
1225 assert!(!s.contains("print"));
1226 assert!(!s.contains("comment"));
1227 }
1228
1229 #[test]
1230 fn python_does_not_mask_hash_in_string() {
1231 let mut p =
1232 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Python);
1233 let s = p.sanitize_line("x = \"# not a comment\" # real comment");
1234 assert!(s.contains("# not a comment"));
1235 assert!(!s.contains("real comment"));
1236 }
1237
1238 #[test]
1239 fn python_masks_single_quoted_strings() {
1240 let mut p =
1241 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1242 let s = p.sanitize_line("x = 'print() inside string'");
1243 assert!(s.contains("x ="));
1244 assert!(!s.contains("print"));
1245 }
1246
1247 #[test]
1248 fn python_masks_double_quoted_strings() {
1249 let mut p =
1250 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1251 let s = p.sanitize_line("x = \"print() inside string\"");
1252 assert!(s.contains("x ="));
1253 assert!(!s.contains("print"));
1254 }
1255
1256 #[test]
1257 fn python_masks_triple_double_quoted_strings() {
1258 let mut p =
1259 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1260 let s = p.sanitize_line("x = \"\"\"print() inside triple string\"\"\"");
1261 assert!(s.contains("x ="));
1262 assert!(!s.contains("print"));
1263 }
1264
1265 #[test]
1266 fn python_masks_triple_single_quoted_strings() {
1267 let mut p =
1268 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1269 let s = p.sanitize_line("x = '''print() inside triple string'''");
1270 assert!(s.contains("x ="));
1271 assert!(!s.contains("print"));
1272 }
1273
1274 #[test]
1275 fn python_triple_quoted_string_multiline() {
1276 let mut p =
1277 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1278 let s1 = p.sanitize_line("x = \"\"\"start of");
1280 assert!(s1.contains("x ="));
1281 assert!(!s1.contains("start"));
1282
1283 let s2 = p.sanitize_line("print() in middle");
1285 assert!(!s2.contains("print"));
1286
1287 let s3 = p.sanitize_line("end of string\"\"\" + y");
1289 assert!(!s3.contains("end of string"));
1290 assert!(s3.contains("+ y"));
1291 }
1292
1293 #[test]
1296 fn javascript_masks_line_comments() {
1297 let mut p =
1298 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::JavaScript);
1299 let s = p.sanitize_line("let x = 1; // console.log here");
1300 assert!(s.contains("let x = 1;"));
1301 assert!(!s.contains("console"));
1302 }
1303
1304 #[test]
1305 fn javascript_masks_block_comments() {
1306 let mut p =
1307 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::JavaScript);
1308 let s = p.sanitize_line("let x = /* console.log */ 1;");
1309 assert!(s.contains("let x ="));
1310 assert!(s.contains("1;"));
1311 assert!(!s.contains("console"));
1312 }
1313
1314 #[test]
1315 fn javascript_masks_single_quoted_strings() {
1316 let mut p =
1317 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1318 let s = p.sanitize_line("let x = 'console.log inside';");
1319 assert!(s.contains("let x ="));
1320 assert!(!s.contains("console"));
1321 }
1322
1323 #[test]
1324 fn javascript_masks_double_quoted_strings() {
1325 let mut p =
1326 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1327 let s = p.sanitize_line("let x = \"console.log inside\";");
1328 assert!(s.contains("let x ="));
1329 assert!(!s.contains("console"));
1330 }
1331
1332 #[test]
1333 fn javascript_masks_template_literals() {
1334 let mut p =
1335 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1336 let s = p.sanitize_line("let x = `console.log inside template`;");
1337 assert!(s.contains("let x ="));
1338 assert!(!s.contains("console"));
1339 }
1340
1341 #[test]
1342 fn javascript_template_literal_multiline() {
1343 let mut p =
1344 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1345 let s1 = p.sanitize_line("let x = `start of");
1347 assert!(s1.contains("let x ="));
1348 assert!(!s1.contains("start"));
1349
1350 let s2 = p.sanitize_line("console.log in middle");
1352 assert!(!s2.contains("console"));
1353
1354 let s3 = p.sanitize_line("end of template` + y;");
1356 assert!(!s3.contains("end of template"));
1357 assert!(s3.contains("+ y;"));
1358 }
1359
1360 #[test]
1361 fn typescript_masks_template_literals() {
1362 let mut p =
1363 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::TypeScript);
1364 let s = p.sanitize_line("let x = `console.log inside template`;");
1365 assert!(s.contains("let x ="));
1366 assert!(!s.contains("console"));
1367 }
1368
1369 #[test]
1372 fn go_masks_line_comments() {
1373 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Go);
1374 let s = p.sanitize_line("x := 1 // fmt.Println here");
1375 assert!(s.contains("x := 1"));
1376 assert!(!s.contains("fmt"));
1377 }
1378
1379 #[test]
1380 fn go_masks_block_comments() {
1381 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Go);
1382 let s = p.sanitize_line("x := /* fmt.Println */ 1");
1383 assert!(s.contains("x :="));
1384 assert!(s.contains("1"));
1385 assert!(!s.contains("fmt"));
1386 }
1387
1388 #[test]
1389 fn go_masks_double_quoted_strings() {
1390 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Go);
1391 let s = p.sanitize_line("x := \"fmt.Println inside\"");
1392 assert!(s.contains("x :="));
1393 assert!(!s.contains("fmt"));
1394 }
1395
1396 #[test]
1397 fn go_masks_backtick_raw_strings() {
1398 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Go);
1399 let s = p.sanitize_line("x := `fmt.Println inside raw string`");
1400 assert!(s.contains("x :="));
1401 assert!(!s.contains("fmt"));
1402 }
1403
1404 #[test]
1405 fn go_backtick_raw_string_multiline() {
1406 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Go);
1407 let s1 = p.sanitize_line("x := `start of");
1409 assert!(s1.contains("x :="));
1410 assert!(!s1.contains("start"));
1411
1412 let s2 = p.sanitize_line("fmt.Println in middle");
1414 assert!(!s2.contains("fmt"));
1415
1416 let s3 = p.sanitize_line("end of raw` + y");
1418 assert!(!s3.contains("end of raw"));
1419 assert!(s3.contains("+ y"));
1420 }
1421
1422 #[test]
1425 fn ruby_masks_hash_comments() {
1426 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Ruby);
1427 let s = p.sanitize_line("x = 1 # this is a comment with puts");
1428 assert!(s.contains("x = 1"));
1429 assert!(!s.contains("puts"));
1430 assert!(!s.contains("comment"));
1431 }
1432
1433 #[test]
1434 fn ruby_does_not_mask_hash_in_string() {
1435 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Ruby);
1436 let s = p.sanitize_line("x = \"# not a comment\" # real comment");
1437 assert!(s.contains("# not a comment"));
1438 assert!(!s.contains("real comment"));
1439 }
1440
1441 #[test]
1442 fn ruby_masks_single_quoted_strings() {
1443 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Ruby);
1445 let s = p.sanitize_line("puts 'hello world'");
1446 assert!(s.contains("puts"));
1447 assert!(!s.contains("hello"));
1449 assert!(!s.contains("world"));
1450 }
1451
1452 #[test]
1453 fn ruby_masks_double_quoted_strings() {
1454 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Ruby);
1455 let s = p.sanitize_line("puts \"hello world\"");
1456 assert!(s.contains("puts"));
1457 assert!(!s.contains("hello"));
1458 assert!(!s.contains("world"));
1459 }
1460
1461 #[test]
1464 fn unknown_language_uses_cstyle_comments() {
1465 let mut p =
1466 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Unknown);
1467 let s = p.sanitize_line("x = 1; // this is a comment");
1468 assert!(s.contains("x = 1;"));
1469 assert!(!s.contains("comment"));
1470 }
1471
1472 #[test]
1473 fn unknown_language_uses_cstyle_block_comments() {
1474 let mut p =
1475 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Unknown);
1476 let s = p.sanitize_line("x = /* comment */ 1;");
1477 assert!(s.contains("x ="));
1478 assert!(s.contains("1;"));
1479 assert!(!s.contains("comment"));
1480 }
1481
1482 #[test]
1483 fn unknown_language_does_not_mask_hash_as_comment() {
1484 let mut p =
1485 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Unknown);
1486 let s = p.sanitize_line("x = 1 # this is NOT a comment");
1487 assert!(s.contains("# this is NOT a comment"));
1489 }
1490
1491 #[test]
1494 fn preserves_line_length_python_hash_comment() {
1495 let mut p =
1496 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Python);
1497 let line = "x = 1 # comment";
1498 let s = p.sanitize_line(line);
1499 assert_eq!(s.len(), line.len());
1500 }
1501
1502 #[test]
1503 fn preserves_line_length_javascript_template_literal() {
1504 let mut p =
1505 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1506 let line = "let x = `template`;";
1507 let s = p.sanitize_line(line);
1508 assert_eq!(s.len(), line.len());
1509 }
1510
1511 #[test]
1512 fn preserves_line_length_go_raw_string() {
1513 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Go);
1514 let line = "x := `raw string`";
1515 let s = p.sanitize_line(line);
1516 assert_eq!(s.len(), line.len());
1517 }
1518
1519 #[test]
1520 fn preserves_line_length_python_triple_quoted() {
1521 let mut p =
1522 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1523 let line = "x = \"\"\"triple\"\"\"";
1524 let s = p.sanitize_line(line);
1525 assert_eq!(s.len(), line.len());
1526 }
1527
1528 #[test]
1531 fn multiline_block_comment_cstyle() {
1532 let mut p =
1533 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::JavaScript);
1534
1535 let s1 = p.sanitize_line("let x = 1; /* start of comment");
1537 assert!(s1.contains("let x = 1;"));
1538 assert!(!s1.contains("start of comment"));
1539
1540 let s2 = p.sanitize_line("console.log('hidden') in middle");
1542 assert!(!s2.contains("console"));
1543 assert!(!s2.contains("hidden"));
1544
1545 let s3 = p.sanitize_line("end of comment */ let y = 2;");
1547 assert!(!s3.contains("end of comment"));
1548 assert!(s3.contains("let y = 2;"));
1549 }
1550
1551 #[test]
1552 fn multiline_block_comment_rust_nested() {
1553 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust);
1554
1555 let s1 = p.sanitize_line("let x = 1; /* outer /* inner");
1557 assert!(s1.contains("let x = 1;"));
1558 assert!(!s1.contains("outer"));
1559 assert!(!s1.contains("inner"));
1560
1561 let s2 = p.sanitize_line("still in comment");
1563 assert!(!s2.contains("still"));
1564
1565 let s3 = p.sanitize_line("inner closed */ still outer");
1567 assert!(!s3.contains("inner closed"));
1568 assert!(!s3.contains("still outer"));
1569
1570 let s4 = p.sanitize_line("outer closed */ let y = 2;");
1572 assert!(!s4.contains("outer closed"));
1573 assert!(s4.contains("let y = 2;"));
1574 }
1575
1576 #[test]
1577 fn multiline_block_comment_go() {
1578 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Go);
1579
1580 let s1 = p.sanitize_line("x := 1 /* start");
1582 assert!(s1.contains("x := 1"));
1583 assert!(!s1.contains("start"));
1584
1585 let s2 = p.sanitize_line("fmt.Println hidden");
1587 assert!(!s2.contains("fmt"));
1588 assert!(!s2.contains("hidden"));
1589
1590 let s3 = p.sanitize_line("end */ y := 2");
1592 assert!(!s3.contains("end"));
1593 assert!(s3.contains("y := 2"));
1594 }
1595
1596 #[test]
1597 fn multiline_block_comment_java() {
1598 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Java);
1599
1600 let s1 = p.sanitize_line("int x = 1; /* javadoc style");
1602 assert!(s1.contains("int x = 1;"));
1603 assert!(!s1.contains("javadoc"));
1604
1605 let s2 = p.sanitize_line(" * System.out.println hidden");
1607 assert!(!s2.contains("System"));
1608 assert!(!s2.contains("hidden"));
1609
1610 let s3 = p.sanitize_line(" */ int y = 2;");
1612 assert!(s3.contains("int y = 2;"));
1613 }
1614
1615 #[test]
1616 fn multiline_block_comment_preserves_line_length() {
1617 let mut p =
1618 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::JavaScript);
1619
1620 let line1 = "let x = 1; /* start";
1621 let s1 = p.sanitize_line(line1);
1622 assert_eq!(s1.len(), line1.len());
1623
1624 let line2 = "middle of comment";
1625 let s2 = p.sanitize_line(line2);
1626 assert_eq!(s2.len(), line2.len());
1627
1628 let line3 = "end */ let y = 2;";
1629 let s3 = p.sanitize_line(line3);
1630 assert_eq!(s3.len(), line3.len());
1631 }
1632
1633 #[test]
1636 fn multiline_string_with_escaped_newline() {
1637 let mut p =
1638 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1639
1640 let s1 = p.sanitize_line("let x = \"start\\");
1642 assert!(s1.contains("let x ="));
1643 assert!(!s1.contains("start"));
1645
1646 let s2 = p.sanitize_line("console.log hidden\"");
1648 assert!(!s2.contains("console"));
1651 }
1652
1653 #[test]
1654 fn multiline_rust_raw_string() {
1655 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust);
1656
1657 let s1 = p.sanitize_line("let x = r#\"start of raw");
1659 assert!(s1.contains("let x ="));
1660 assert!(!s1.contains("start"));
1661
1662 let s2 = p.sanitize_line("unwrap() hidden in raw string");
1664 assert!(!s2.contains("unwrap"));
1665
1666 let s3 = p.sanitize_line("end of raw\"# + y;");
1668 assert!(!s3.contains("end of raw"));
1669 assert!(s3.contains("+ y;"));
1670 }
1671
1672 #[test]
1673 fn multiline_rust_raw_string_with_hashes() {
1674 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust);
1675
1676 let s1 = p.sanitize_line("let x = r##\"start");
1678 assert!(s1.contains("let x ="));
1679 assert!(!s1.contains("start"));
1680
1681 let s2 = p.sanitize_line("fake end\"# still inside");
1683 assert!(!s2.contains("fake"));
1684 assert!(!s2.contains("still inside"));
1685
1686 let s3 = p.sanitize_line("real end\"## + y;");
1688 assert!(!s3.contains("real end"));
1689 assert!(s3.contains("+ y;"));
1690 }
1691
1692 #[test]
1693 fn multiline_python_triple_quoted_with_embedded_quotes() {
1694 let mut p =
1695 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1696
1697 let s1 = p.sanitize_line("x = \"\"\"start with \"embedded\" quote");
1699 assert!(s1.contains("x ="));
1700 assert!(!s1.contains("start"));
1701 assert!(!s1.contains("embedded"));
1702
1703 let s2 = p.sanitize_line("more \"quotes\" and 'single' too");
1705 assert!(!s2.contains("quotes"));
1706 assert!(!s2.contains("single"));
1707
1708 let s3 = p.sanitize_line("end\"\"\" + y");
1710 assert!(!s3.contains("end"));
1711 assert!(s3.contains("+ y"));
1712 }
1713
1714 #[test]
1715 fn multiline_javascript_template_literal_with_expressions() {
1716 let mut p =
1717 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1718
1719 let s1 = p.sanitize_line("let x = `start ${expr}");
1721 assert!(s1.contains("let x ="));
1722 assert!(!s1.contains("start"));
1723
1724 let s2 = p.sanitize_line("console.log in template");
1726 assert!(!s2.contains("console"));
1727
1728 let s3 = p.sanitize_line("end` + y;");
1730 assert!(!s3.contains("end"));
1731 assert!(s3.contains("+ y;"));
1732 }
1733
1734 #[test]
1737 fn reset_clears_block_comment_state() {
1738 let mut p =
1739 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::JavaScript);
1740
1741 let s1 = p.sanitize_line("let x = 1; /* start comment");
1743 assert!(!s1.contains("start comment"));
1744
1745 let s2 = p.sanitize_line("still in comment");
1747 assert!(!s2.contains("still"));
1748
1749 p.reset();
1751
1752 let s3 = p.sanitize_line("not in comment anymore");
1754 assert!(s3.contains("not in comment anymore"));
1755 }
1756
1757 #[test]
1758 fn reset_clears_string_state() {
1759 let mut p =
1760 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
1761
1762 let s1 = p.sanitize_line("x = \"\"\"start of string");
1764 assert!(!s1.contains("start"));
1765
1766 let s2 = p.sanitize_line("still in string");
1768 assert!(!s2.contains("still"));
1769
1770 p.reset();
1772
1773 let s3 = p.sanitize_line("not in string anymore");
1775 assert!(s3.contains("not in string anymore"));
1776 }
1777
1778 #[test]
1779 fn reset_clears_template_literal_state() {
1780 let mut p =
1781 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
1782
1783 let s1 = p.sanitize_line("let x = `start of template");
1785 assert!(!s1.contains("start"));
1786
1787 let s2 = p.sanitize_line("still in template");
1789 assert!(!s2.contains("still"));
1790
1791 p.reset();
1793
1794 let s3 = p.sanitize_line("not in template anymore");
1796 assert!(s3.contains("not in template anymore"));
1797 }
1798
1799 #[test]
1800 fn reset_clears_raw_string_state() {
1801 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Go);
1802
1803 let s1 = p.sanitize_line("x := `start of raw");
1805 assert!(!s1.contains("start"));
1806
1807 let s2 = p.sanitize_line("still in raw");
1809 assert!(!s2.contains("still"));
1810
1811 p.reset();
1813
1814 let s3 = p.sanitize_line("not in raw anymore");
1816 assert!(s3.contains("not in raw anymore"));
1817 }
1818
1819 #[test]
1820 fn set_language_resets_state() {
1821 let mut p =
1822 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::JavaScript);
1823
1824 let s1 = p.sanitize_line("let x = 1; /* start comment");
1826 assert!(!s1.contains("start comment"));
1827
1828 let s2 = p.sanitize_line("still in comment");
1830 assert!(!s2.contains("still"));
1831
1832 p.set_language(Language::Python);
1834
1835 let s3 = p.sanitize_line("not in comment anymore");
1837 assert!(s3.contains("not in comment anymore"));
1838 }
1839
1840 #[test]
1841 fn set_language_changes_syntax_and_resets() {
1842 let mut p =
1843 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::JavaScript);
1844
1845 let s1 = p.sanitize_line("let x = 1; /* start");
1847 assert!(!s1.contains("start"));
1848
1849 p.set_language(Language::Python);
1851
1852 let s2 = p.sanitize_line("x = 1 # python comment");
1854 assert!(s2.contains("x = 1"));
1855 assert!(!s2.contains("python comment"));
1856
1857 let s3 = p.sanitize_line("x = 1 /* not a comment */");
1859 assert!(s3.contains("/* not a comment */"));
1860 }
1861
1862 #[test]
1863 fn state_reset_between_files_simulation() {
1864 let mut p =
1865 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Rust);
1866
1867 let f1_l1 = p.sanitize_line("// File 1");
1869 assert!(!f1_l1.contains("File 1"));
1870
1871 let f1_l2 = p.sanitize_line("let x = 1; /* unclosed comment");
1872 assert!(!f1_l2.contains("unclosed"));
1873
1874 p.reset();
1876
1877 let f2_l1 = p.sanitize_line("// File 2");
1879 assert!(!f2_l1.contains("File 2"));
1880
1881 let f2_l2 = p.sanitize_line("let y = 2; // normal code");
1882 assert!(f2_l2.contains("let y = 2;"));
1883 assert!(!f2_l2.contains("normal code"));
1884 }
1885
1886 #[test]
1887 fn state_reset_between_files_with_language_change() {
1888 let mut p = Preprocessor::with_language(
1889 PreprocessOptions::comments_and_strings(),
1890 Language::Python,
1891 );
1892
1893 let py_l1 = p.sanitize_line("x = \"\"\"unclosed");
1895 assert!(!py_l1.contains("unclosed"));
1896
1897 p.set_language(Language::JavaScript);
1899
1900 let js_l1 = p.sanitize_line("let x = `template`;");
1902 assert!(js_l1.contains("let x ="));
1903 assert!(!js_l1.contains("template"));
1904
1905 let js_l2 = p.sanitize_line("let y = 2; // comment");
1907 assert!(js_l2.contains("let y = 2;"));
1908 assert!(!js_l2.contains("comment"));
1909 }
1910
1911 #[test]
1912 fn nested_rust_block_comment_state_tracking() {
1913 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust);
1914
1915 let s1 = p.sanitize_line("/* level 1 /* level 2");
1917 assert!(!s1.contains("level 1"));
1918 assert!(!s1.contains("level 2"));
1919
1920 let s2 = p.sanitize_line("close level 2 */ still level 1");
1922 assert!(!s2.contains("close level 2"));
1923 assert!(!s2.contains("still level 1"));
1924
1925 let s3 = p.sanitize_line("close level 1 */ visible code");
1927 assert!(!s3.contains("close level 1"));
1928 assert!(s3.contains("visible code"));
1929 }
1930
1931 #[test]
1934 fn shell_language_from_str() {
1935 assert_eq!("shell".parse::<Language>().unwrap(), Language::Shell);
1936 assert_eq!("bash".parse::<Language>().unwrap(), Language::Shell);
1937 assert_eq!("sh".parse::<Language>().unwrap(), Language::Shell);
1938 assert_eq!("zsh".parse::<Language>().unwrap(), Language::Shell);
1939 assert_eq!("ksh".parse::<Language>().unwrap(), Language::Shell);
1940 assert_eq!("fish".parse::<Language>().unwrap(), Language::Shell);
1941 }
1942
1943 #[test]
1944 fn shell_comment_syntax() {
1945 assert_eq!(Language::Shell.comment_syntax(), CommentSyntax::Hash);
1946 }
1947
1948 #[test]
1949 fn shell_string_syntax() {
1950 assert_eq!(Language::Shell.string_syntax(), StringSyntax::Shell);
1951 }
1952
1953 #[test]
1954 fn shell_masks_hash_comments() {
1955 let mut p =
1956 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Shell);
1957 let s = p.sanitize_line("echo hello # this is a comment");
1958 assert!(s.contains("echo hello"));
1959 assert!(!s.contains("this is a comment"));
1960 }
1961
1962 #[test]
1963 fn shell_does_not_mask_hash_in_string() {
1964 let mut p =
1965 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Shell);
1966 let s = p.sanitize_line("echo \"# not a comment\" # real comment");
1967 assert!(s.contains("# not a comment"));
1968 assert!(!s.contains("real comment"));
1969 }
1970
1971 #[test]
1972 fn shell_single_quoted_string_no_escapes() {
1973 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
1975 let s = p.sanitize_line("echo 'hello\\nworld'");
1976 assert!(s.contains("echo"));
1977 assert!(!s.contains("hello"));
1978 assert!(!s.contains("world"));
1979 assert!(!s.contains("\\n"));
1981 }
1982
1983 #[test]
1984 fn shell_single_quoted_cannot_contain_single_quote() {
1985 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
1988 let s = p.sanitize_line("echo 'hello' world");
1989 assert!(s.contains("echo"));
1990 assert!(!s.contains("hello"));
1991 assert!(s.contains("world")); }
1993
1994 #[test]
1995 fn shell_double_quoted_strings() {
1996 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
1997 let s = p.sanitize_line("echo \"hello world\"");
1998 assert!(s.contains("echo"));
1999 assert!(!s.contains("hello"));
2000 assert!(!s.contains("world"));
2001 }
2002
2003 #[test]
2004 fn shell_double_quoted_with_escapes() {
2005 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2007 let s = p.sanitize_line("echo \"say \\\"hello\\\"\"");
2008 assert!(s.contains("echo"));
2009 assert!(!s.contains("say"));
2010 assert!(!s.contains("hello"));
2011 }
2012
2013 #[test]
2014 fn shell_ansi_c_quoting() {
2015 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2017 let s = p.sanitize_line("echo $'hello\\nworld'");
2018 assert!(s.contains("echo"));
2019 assert!(!s.contains("hello"));
2020 assert!(!s.contains("world"));
2021 }
2022
2023 #[test]
2024 fn shell_ansi_c_quoting_with_escapes() {
2025 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2027 let s = p.sanitize_line("echo $'tab\\there'");
2028 assert!(s.contains("echo"));
2029 assert!(!s.contains("tab"));
2030 assert!(!s.contains("here"));
2031 }
2032
2033 #[test]
2034 fn shell_ansi_c_escaped_single_quote() {
2035 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2037 let s = p.sanitize_line("echo $'it\\'s ok'");
2038 assert!(s.contains("echo"));
2039 assert!(!s.contains("it"));
2040 assert!(!s.contains("ok"));
2041 }
2042
2043 #[test]
2044 fn shell_preserves_line_length() {
2045 let mut p =
2046 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Shell);
2047 let line = "echo 'hello' # comment";
2048 let s = p.sanitize_line(line);
2049 assert_eq!(s.len(), line.len());
2050 }
2051
2052 #[test]
2053 fn shell_multiline_double_quoted_string() {
2054 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2055
2056 let s1 = p.sanitize_line("echo \"start\\");
2058 assert!(s1.contains("echo"));
2059 assert!(!s1.contains("start"));
2060
2061 let s2 = p.sanitize_line("middle\" end");
2063 assert!(s2.contains("end"));
2065 }
2066
2067 #[test]
2068 fn shell_hash_not_comment_in_string() {
2069 let mut p =
2070 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Shell);
2071 let s = p.sanitize_line("grep '#include' file.c # search for includes");
2072 assert!(!s.contains("#include")); assert!(!s.contains("search")); assert!(s.contains("grep"));
2075 assert!(s.contains("file.c"));
2076 }
2077
2078 #[test]
2079 fn shell_complex_mixed_quotes() {
2080 let mut p =
2081 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Shell);
2082 let s = p.sanitize_line("echo 'single' \"double\" $'ansi' # comment");
2084 assert!(s.contains("echo"));
2085 assert!(!s.contains("single"));
2086 assert!(!s.contains("double"));
2087 assert!(!s.contains("ansi"));
2088 assert!(!s.contains("comment"));
2089 }
2090
2091 #[test]
2094 fn swift_language_from_str() {
2095 assert_eq!("swift".parse::<Language>().unwrap(), Language::Swift);
2096 assert_eq!("Swift".parse::<Language>().unwrap(), Language::Swift);
2097 assert_eq!("SWIFT".parse::<Language>().unwrap(), Language::Swift);
2098 }
2099
2100 #[test]
2101 fn swift_comment_syntax() {
2102 assert_eq!(
2103 Language::Swift.comment_syntax(),
2104 CommentSyntax::CStyleNested
2105 );
2106 }
2107
2108 #[test]
2109 fn swift_string_syntax() {
2110 assert_eq!(Language::Swift.string_syntax(), StringSyntax::SwiftScala);
2111 }
2112
2113 #[test]
2114 fn swift_masks_line_comments() {
2115 let mut p =
2116 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Swift);
2117 let s = p.sanitize_line("let x = 1 // print() here");
2118 assert!(s.contains("let x = 1"));
2119 assert!(!s.contains("print"));
2120 }
2121
2122 #[test]
2123 fn swift_masks_block_comments() {
2124 let mut p =
2125 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Swift);
2126 let s = p.sanitize_line("let x = /* print() */ 1");
2127 assert!(s.contains("let x ="));
2128 assert!(s.contains("1"));
2129 assert!(!s.contains("print"));
2130 }
2131
2132 #[test]
2133 fn swift_nested_block_comments() {
2134 let mut p =
2135 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Swift);
2136 let s1 = p.sanitize_line("let x = 1 /* outer /* inner");
2138 assert!(s1.contains("let x = 1"));
2139 assert!(!s1.contains("outer"));
2140 assert!(!s1.contains("inner"));
2141
2142 let s2 = p.sanitize_line("still inside");
2144 assert!(!s2.contains("still"));
2145
2146 let s3 = p.sanitize_line("close inner */ still outer");
2148 assert!(!s3.contains("close inner"));
2149 assert!(!s3.contains("still outer"));
2150
2151 let s4 = p.sanitize_line("close outer */ let y = 2");
2153 assert!(!s4.contains("close outer"));
2154 assert!(s4.contains("let y = 2"));
2155 }
2156
2157 #[test]
2158 fn swift_masks_double_quoted_strings() {
2159 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Swift);
2160 let s = p.sanitize_line("let x = \"print() inside\"");
2161 assert!(s.contains("let x ="));
2162 assert!(!s.contains("print"));
2163 }
2164
2165 #[test]
2166 fn swift_masks_triple_quoted_strings() {
2167 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Swift);
2168 let s = p.sanitize_line("let x = \"\"\"print() inside\"\"\"");
2169 assert!(s.contains("let x ="));
2170 assert!(!s.contains("print"));
2171 }
2172
2173 #[test]
2174 fn swift_triple_quoted_string_multiline() {
2175 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Swift);
2176 let s1 = p.sanitize_line("let x = \"\"\"start of");
2178 assert!(s1.contains("let x ="));
2179 assert!(!s1.contains("start"));
2180
2181 let s2 = p.sanitize_line("print() in middle");
2183 assert!(!s2.contains("print"));
2184
2185 let s3 = p.sanitize_line("end of string\"\"\" + y");
2187 assert!(!s3.contains("end of string"));
2188 assert!(s3.contains("+ y"));
2189 }
2190
2191 #[test]
2192 fn swift_preserves_line_length() {
2193 let mut p =
2194 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Swift);
2195 let line = "let x = \"hello\" // comment";
2196 let s = p.sanitize_line(line);
2197 assert_eq!(s.len(), line.len());
2198 }
2199
2200 #[test]
2203 fn scala_language_from_str() {
2204 assert_eq!("scala".parse::<Language>().unwrap(), Language::Scala);
2205 assert_eq!("Scala".parse::<Language>().unwrap(), Language::Scala);
2206 assert_eq!("SCALA".parse::<Language>().unwrap(), Language::Scala);
2207 }
2208
2209 #[test]
2210 fn scala_comment_syntax() {
2211 assert_eq!(
2212 Language::Scala.comment_syntax(),
2213 CommentSyntax::CStyleNested
2214 );
2215 }
2216
2217 #[test]
2218 fn scala_string_syntax() {
2219 assert_eq!(Language::Scala.string_syntax(), StringSyntax::SwiftScala);
2220 }
2221
2222 #[test]
2223 fn scala_masks_line_comments() {
2224 let mut p =
2225 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Scala);
2226 let s = p.sanitize_line("val x = 1 // println() here");
2227 assert!(s.contains("val x = 1"));
2228 assert!(!s.contains("println"));
2229 }
2230
2231 #[test]
2232 fn scala_masks_block_comments() {
2233 let mut p =
2234 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Scala);
2235 let s = p.sanitize_line("val x = /* println() */ 1");
2236 assert!(s.contains("val x ="));
2237 assert!(s.contains("1"));
2238 assert!(!s.contains("println"));
2239 }
2240
2241 #[test]
2242 fn scala_nested_block_comments() {
2243 let mut p =
2244 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Scala);
2245 let s1 = p.sanitize_line("val x = 1 /* outer /* inner");
2247 assert!(s1.contains("val x = 1"));
2248 assert!(!s1.contains("outer"));
2249
2250 let s2 = p.sanitize_line("still inside");
2251 assert!(!s2.contains("still"));
2252
2253 let s3 = p.sanitize_line("inner */ still outer");
2254 assert!(!s3.contains("inner"));
2255
2256 let s4 = p.sanitize_line("outer */ val y = 2");
2257 assert!(s4.contains("val y = 2"));
2258 }
2259
2260 #[test]
2261 fn scala_masks_double_quoted_strings() {
2262 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Scala);
2263 let s = p.sanitize_line("val x = \"println() inside\"");
2264 assert!(s.contains("val x ="));
2265 assert!(!s.contains("println"));
2266 }
2267
2268 #[test]
2269 fn scala_masks_triple_quoted_strings() {
2270 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Scala);
2271 let s = p.sanitize_line("val x = \"\"\"println() inside\"\"\"");
2272 assert!(s.contains("val x ="));
2273 assert!(!s.contains("println"));
2274 }
2275
2276 #[test]
2277 fn scala_triple_single_quotes_do_not_start_triple_string() {
2278 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Scala);
2279 let line = "val x = '''println() inside'''";
2280 let s = p.sanitize_line(line);
2281 assert_eq!(s, line);
2282 assert!(p.mode == Mode::Normal);
2283 }
2284
2285 #[test]
2286 fn scala_triple_quoted_string_multiline() {
2287 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Scala);
2288 let s1 = p.sanitize_line("val x = \"\"\"start of");
2289 assert!(s1.contains("val x ="));
2290 assert!(!s1.contains("start"));
2291
2292 let s2 = p.sanitize_line("println() in middle");
2293 assert!(!s2.contains("println"));
2294
2295 let s3 = p.sanitize_line("end of string\"\"\" + y");
2296 assert!(!s3.contains("end of string"));
2297 assert!(s3.contains("+ y"));
2298 }
2299
2300 #[test]
2301 fn scala_preserves_line_length() {
2302 let mut p =
2303 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Scala);
2304 let line = "val x = \"hello\" // comment";
2305 let s = p.sanitize_line(line);
2306 assert_eq!(s.len(), line.len());
2307 }
2308
2309 #[test]
2312 fn sql_language_from_str() {
2313 assert_eq!("sql".parse::<Language>().unwrap(), Language::Sql);
2314 assert_eq!("SQL".parse::<Language>().unwrap(), Language::Sql);
2315 assert_eq!("Sql".parse::<Language>().unwrap(), Language::Sql);
2316 }
2317
2318 #[test]
2319 fn sql_comment_syntax() {
2320 assert_eq!(Language::Sql.comment_syntax(), CommentSyntax::Sql);
2321 }
2322
2323 #[test]
2324 fn sql_string_syntax() {
2325 assert_eq!(Language::Sql.string_syntax(), StringSyntax::Sql);
2326 }
2327
2328 #[test]
2329 fn sql_masks_double_dash_comments() {
2330 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Sql);
2331 let s = p.sanitize_line("SELECT * FROM users -- secret query");
2332 assert!(s.contains("SELECT * FROM users"));
2333 assert!(!s.contains("secret"));
2334 }
2335
2336 #[test]
2337 fn sql_masks_block_comments() {
2338 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Sql);
2339 let s = p.sanitize_line("SELECT /* hidden */ * FROM users");
2340 assert!(s.contains("SELECT"));
2341 assert!(s.contains("* FROM users"));
2342 assert!(!s.contains("hidden"));
2343 }
2344
2345 #[test]
2346 fn sql_multiline_block_comment() {
2347 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Sql);
2348 let s1 = p.sanitize_line("SELECT * /* start comment");
2349 assert!(s1.contains("SELECT *"));
2350 assert!(!s1.contains("start"));
2351
2352 let s2 = p.sanitize_line("hidden query");
2353 assert!(!s2.contains("hidden"));
2354
2355 let s3 = p.sanitize_line("end comment */ FROM users");
2356 assert!(!s3.contains("end comment"));
2357 assert!(s3.contains("FROM users"));
2358 }
2359
2360 #[test]
2361 fn sql_masks_single_quoted_strings() {
2362 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Sql);
2363 let s = p.sanitize_line("SELECT * FROM users WHERE name = 'secret_password'");
2364 assert!(s.contains("SELECT * FROM users WHERE name ="));
2365 assert!(!s.contains("secret_password"));
2366 }
2367
2368 #[test]
2369 fn sql_does_not_mask_double_quoted_as_string() {
2370 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Sql);
2373 let s = p.sanitize_line("SELECT \"column\" FROM users");
2374 assert!(s.contains("SELECT"));
2376 assert!(s.contains("column")); }
2378
2379 #[test]
2380 fn sql_single_dash_not_comment() {
2381 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Sql);
2383 let s = p.sanitize_line("SELECT a - b FROM table");
2384 assert!(s.contains("SELECT a - b FROM table"));
2385 }
2386
2387 #[test]
2388 fn sql_preserves_line_length() {
2389 let mut p =
2390 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Sql);
2391 let line = "SELECT 'hello' -- comment";
2392 let s = p.sanitize_line(line);
2393 assert_eq!(s.len(), line.len());
2394 }
2395
2396 #[test]
2397 fn sql_does_not_mask_hash_in_string() {
2398 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Sql);
2399 let s = p.sanitize_line("SELECT * WHERE name = '-- not a comment' -- real comment");
2400 assert!(s.contains("-- not a comment"));
2401 assert!(!s.contains("real comment"));
2402 }
2403
2404 #[test]
2407 fn xml_language_from_str() {
2408 assert_eq!("xml".parse::<Language>().unwrap(), Language::Xml);
2409 assert_eq!("html".parse::<Language>().unwrap(), Language::Xml);
2410 assert_eq!("xhtml".parse::<Language>().unwrap(), Language::Xml);
2411 assert_eq!("svg".parse::<Language>().unwrap(), Language::Xml);
2412 assert_eq!("xsl".parse::<Language>().unwrap(), Language::Xml);
2413 assert_eq!("xslt".parse::<Language>().unwrap(), Language::Xml);
2414 }
2415
2416 #[test]
2417 fn xml_comment_syntax() {
2418 assert_eq!(Language::Xml.comment_syntax(), CommentSyntax::Xml);
2419 }
2420
2421 #[test]
2422 fn xml_string_syntax() {
2423 assert_eq!(Language::Xml.string_syntax(), StringSyntax::Xml);
2424 }
2425
2426 #[test]
2427 fn xml_masks_comments() {
2428 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Xml);
2429 let s = p.sanitize_line("<div><!-- secret comment --></div>");
2430 assert!(s.contains("<div>"));
2431 assert!(s.contains("</div>"));
2432 assert!(!s.contains("secret"));
2433 }
2434
2435 #[test]
2436 fn xml_multiline_comment() {
2437 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Xml);
2438 let s1 = p.sanitize_line("<div><!-- start comment");
2439 assert!(s1.contains("<div>"));
2440 assert!(!s1.contains("start"));
2441
2442 let s2 = p.sanitize_line("hidden content");
2443 assert!(!s2.contains("hidden"));
2444
2445 let s3 = p.sanitize_line("end comment --></div>");
2446 assert!(!s3.contains("end comment"));
2447 assert!(s3.contains("</div>"));
2448 }
2449
2450 #[test]
2451 fn xml_masks_double_quoted_attributes() {
2452 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Xml);
2453 let s = p.sanitize_line("<input type=\"password\" value=\"secret\">");
2454 assert!(s.contains("<input type="));
2455 assert!(s.contains("value="));
2456 assert!(!s.contains("password"));
2457 assert!(!s.contains("secret"));
2458 }
2459
2460 #[test]
2461 fn xml_masks_single_quoted_attributes() {
2462 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Xml);
2463 let s = p.sanitize_line("<input type='password' value='secret'>");
2464 assert!(s.contains("<input type="));
2465 assert!(s.contains("value="));
2466 assert!(!s.contains("password"));
2467 assert!(!s.contains("secret"));
2468 }
2469
2470 #[test]
2471 fn xml_mixed_quotes() {
2472 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Xml);
2473 let s = p.sanitize_line("<div class=\"myclass\" id='myid'>");
2474 assert!(s.contains("<div class="));
2475 assert!(s.contains("id="));
2476 assert!(!s.contains("myclass"));
2477 assert!(!s.contains("myid"));
2478 }
2479
2480 #[test]
2481 fn xml_preserves_line_length() {
2482 let mut p =
2483 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Xml);
2484 let line = "<div class=\"test\"><!-- comment --></div>";
2485 let s = p.sanitize_line(line);
2486 assert_eq!(s.len(), line.len());
2487 }
2488
2489 #[test]
2490 fn xml_comment_delimiter_not_in_string() {
2491 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Xml);
2494 let s = p.sanitize_line("<div data-comment=\"<!-- not a comment -->\"><!-- real --></div>");
2495 assert!(s.contains("<!-- not a comment -->"));
2497 assert!(!s.contains("real"));
2499 }
2500
2501 #[test]
2504 fn php_language_from_str() {
2505 assert_eq!("php".parse::<Language>().unwrap(), Language::Php);
2506 assert_eq!("PHP".parse::<Language>().unwrap(), Language::Php);
2507 assert_eq!("Php".parse::<Language>().unwrap(), Language::Php);
2508 }
2509
2510 #[test]
2511 fn php_comment_syntax() {
2512 assert_eq!(Language::Php.comment_syntax(), CommentSyntax::Php);
2513 }
2514
2515 #[test]
2516 fn php_string_syntax() {
2517 assert_eq!(Language::Php.string_syntax(), StringSyntax::Php);
2518 }
2519
2520 #[test]
2521 fn php_masks_double_slash_comments() {
2522 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2523 let s = p.sanitize_line("$x = 1; // echo secret");
2524 assert!(s.contains("$x = 1;"));
2525 assert!(!s.contains("echo"));
2526 assert!(!s.contains("secret"));
2527 }
2528
2529 #[test]
2530 fn php_masks_hash_comments() {
2531 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2532 let s = p.sanitize_line("$x = 1; # echo secret");
2533 assert!(s.contains("$x = 1;"));
2534 assert!(!s.contains("echo"));
2535 assert!(!s.contains("secret"));
2536 }
2537
2538 #[test]
2539 fn php_masks_block_comments() {
2540 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2541 let s = p.sanitize_line("$x = /* echo secret */ 1;");
2542 assert!(s.contains("$x ="));
2543 assert!(s.contains("1;"));
2544 assert!(!s.contains("echo"));
2545 assert!(!s.contains("secret"));
2546 }
2547
2548 #[test]
2549 fn php_multiline_block_comment() {
2550 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2551 let s1 = p.sanitize_line("$x = 1; /* start comment");
2552 assert!(s1.contains("$x = 1;"));
2553 assert!(!s1.contains("start"));
2554
2555 let s2 = p.sanitize_line("hidden code");
2556 assert!(!s2.contains("hidden"));
2557
2558 let s3 = p.sanitize_line("end comment */ $y = 2;");
2559 assert!(!s3.contains("end comment"));
2560 assert!(s3.contains("$y = 2;"));
2561 }
2562
2563 #[test]
2564 fn php_masks_double_quoted_strings() {
2565 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Php);
2566 let s = p.sanitize_line("$x = \"echo secret\";");
2567 assert!(s.contains("$x ="));
2568 assert!(!s.contains("echo"));
2569 assert!(!s.contains("secret"));
2570 }
2571
2572 #[test]
2573 fn php_masks_single_quoted_strings() {
2574 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Php);
2575 let s = p.sanitize_line("$x = 'echo secret';");
2576 assert!(s.contains("$x ="));
2577 assert!(!s.contains("echo"));
2578 assert!(!s.contains("secret"));
2579 }
2580
2581 #[test]
2582 fn php_string_with_escapes() {
2583 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Php);
2584 let s = p.sanitize_line("$x = \"say \\\"hello\\\"\";");
2585 assert!(s.contains("$x ="));
2586 assert!(!s.contains("say"));
2587 assert!(!s.contains("hello"));
2588 }
2589
2590 #[test]
2591 fn php_hash_not_comment_in_string() {
2592 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2593 let s = p.sanitize_line("$x = \"# not a comment\"; # real comment");
2594 assert!(s.contains("# not a comment"));
2595 assert!(!s.contains("real comment"));
2596 }
2597
2598 #[test]
2599 fn php_slash_not_comment_in_string() {
2600 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2601 let s = p.sanitize_line("$x = \"// not a comment\"; // real comment");
2602 assert!(s.contains("// not a comment"));
2603 assert!(!s.contains("real comment"));
2604 }
2605
2606 #[test]
2607 fn php_preserves_line_length() {
2608 let mut p =
2609 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Php);
2610 let line = "$x = 'hello'; // comment";
2611 let s = p.sanitize_line(line);
2612 assert_eq!(s.len(), line.len());
2613 }
2614
2615 #[test]
2616 fn php_mixed_comments_and_strings() {
2617 let mut p =
2618 Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Php);
2619 let s = p.sanitize_line("echo 'single' . \"double\"; // comment # more");
2620 assert!(s.contains("echo"));
2621 assert!(s.contains("."));
2622 assert!(!s.contains("single"));
2623 assert!(!s.contains("double"));
2624 assert!(!s.contains("comment"));
2625 assert!(!s.contains("more"));
2626 }
2627
2628 #[test]
2631 fn yaml_hash_comment_ignored() {
2632 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Yaml);
2633 let s = p.sanitize_line("key: value # secret");
2634 assert!(s.contains("key: value"));
2635 assert!(!s.contains("secret"));
2636 }
2637
2638 #[test]
2639 fn toml_hash_comment_ignored() {
2640 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Toml);
2641 let s = p.sanitize_line("name = \"app\" # local");
2642 assert!(s.contains("name = \"app\""));
2643 assert!(!s.contains("local"));
2644 }
2645
2646 #[test]
2647 fn jsonc_double_slash_comment_ignored() {
2648 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Json);
2649 let s = p.sanitize_line("{\"key\": \"value\" // trailing note");
2650 assert!(s.contains("{\"key\": \"value\""));
2651 assert!(!s.contains("trailing note"));
2652 }
2653
2654 #[test]
2655 fn mode_debug_formats_variants() {
2656 let modes = [
2657 Mode::RawString { hashes: 2 },
2658 Mode::Char { escaped: false },
2659 Mode::TemplateLiteral { escaped: true },
2660 Mode::TripleQuotedString {
2661 escaped: false,
2662 quote: b'"',
2663 },
2664 Mode::ShellLiteralString,
2665 Mode::ShellAnsiCString { escaped: false },
2666 Mode::XmlComment,
2667 ];
2668
2669 for mode in modes {
2670 let rendered = format!("{:?}", mode);
2671 assert!(!rendered.is_empty());
2672 }
2673 }
2674
2675 #[test]
2676 fn rust_raw_and_byte_strings_masked() {
2677 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust);
2678 let line = "let a = r#\"raw\"#; let b = b\"byte\";";
2679 let s = p.sanitize_line(line);
2680 assert!(s.contains("let a ="));
2681 assert!(s.contains("let b ="));
2682 assert!(!s.contains("raw"));
2683 assert!(!s.contains("byte"));
2684 }
2685
2686 #[test]
2687 fn python_triple_quoted_string_handles_escapes() {
2688 let mut p =
2689 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
2690 let line = "x = \"\"\"a\\\\b\"\"\"";
2691 let s = p.sanitize_line(line);
2692 assert!(s.contains("x ="));
2693 assert!(!s.contains("a\\\\b"));
2694 }
2695
2696 #[test]
2697 fn shell_ansi_c_string_masks_and_escapes() {
2698 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2699 let line = "echo $'a\\\\n'";
2700 let s = p.sanitize_line(line);
2701 assert!(s.contains("echo"));
2702 assert!(!s.contains("a"));
2703 assert!(!s.contains("n"));
2704 }
2705
2706 #[test]
2707 fn php_block_comments_masked() {
2708 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2709 let line = "<?php /* block */ echo $x; ?>";
2710 let s = p.sanitize_line(line);
2711 assert!(s.contains("echo"));
2712 assert!(!s.contains("block"));
2713 }
2714
2715 #[test]
2716 fn line_comment_resets_mode() {
2717 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust);
2718 let s1 = p.sanitize_line("// comment");
2719 assert!(!s1.contains("comment"));
2720 let s2 = p.sanitize_line("let x = 1;");
2721 assert!(s2.contains("let x = 1;"));
2722 }
2723
2724 #[test]
2725 fn nested_block_comment_masks_nested() {
2726 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust);
2727 let line = "/* outer /* inner */ tail */ let x = 1;";
2728 let s = p.sanitize_line(line);
2729 assert!(s.contains("let x = 1;"));
2730 assert!(!s.contains("outer"));
2731 assert!(!s.contains("inner"));
2732 assert!(!s.contains("tail"));
2733 }
2734
2735 #[test]
2736 fn xml_comment_masks_and_closes() {
2737 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Xml);
2738 let line = "<!-- hi --> <tag>";
2739 let s = p.sanitize_line(line);
2740 assert!(s.contains("<tag>"));
2741 assert!(!s.contains("hi"));
2742 }
2743
2744 #[test]
2745 fn rust_byte_string_masks_when_strings_only() {
2746 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust);
2747 let s = p.sanitize_line(r#"let b = b"bytes";"#);
2748 assert!(!s.contains("bytes"));
2749 }
2750
2751 #[test]
2752 fn rust_raw_string_masks_end_delimiter() {
2753 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust);
2754 let _ = p.sanitize_line("let s = r#\"raw");
2755 let end = p.sanitize_line("\"#;");
2756 assert!(!end.contains("\"#"));
2757 }
2758
2759 #[test]
2760 fn shell_ansi_c_string_masks_prefix_and_body() {
2761 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2762 let s = p.sanitize_line("$'a\\n'");
2763 assert!(s.trim().is_empty());
2764 }
2765
2766 #[test]
2767 fn shell_literal_string_masks_body() {
2768 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell);
2769 let s = p.sanitize_line("'abc'");
2770 assert!(s.trim().is_empty());
2771 }
2772
2773 #[test]
2774 fn swift_double_quoted_masks_when_strings_only() {
2775 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Swift);
2776 let s = p.sanitize_line("let s = \"hello\";");
2777 assert!(!s.contains("hello"));
2778 }
2779
2780 #[test]
2781 fn php_block_comment_masks_opening() {
2782 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2783 let s = p.sanitize_line("/* php block */");
2784 assert!(!s.contains("php"));
2785 }
2786
2787 #[test]
2788 fn line_comment_mode_branch_executes() {
2789 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust);
2790 p.mode = Mode::LineComment;
2791 let _ = p.sanitize_line("still comment");
2792 assert!(p.mode == Mode::Normal);
2793 }
2794
2795 #[test]
2796 fn triple_quoted_string_masks_closing() {
2797 let mut p =
2798 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python);
2799 let _ = p.sanitize_line("s = '''hello");
2800 let end = p.sanitize_line("world'''");
2801 assert!(!end.contains("'''"));
2802 }
2803
2804 #[test]
2805 fn rust_raw_and_byte_strings_preserved_when_strings_not_masked() {
2806 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust);
2807 let line = "let a = r#\"raw\"#; let b = b\"byte\";";
2808 let s = p.sanitize_line(line);
2809 assert!(s.contains("raw"));
2810 assert!(s.contains("byte"));
2811 }
2812
2813 #[test]
2814 fn python_triple_quoted_string_preserved_when_strings_not_masked() {
2815 let mut p =
2816 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Python);
2817 let line = "x = \"\"\"triple\"\"\"";
2818 let s = p.sanitize_line(line);
2819 assert!(s.contains("triple"));
2820 }
2821
2822 #[test]
2823 fn shell_strings_preserved_when_strings_not_masked() {
2824 let mut p =
2825 Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Shell);
2826 let line = "echo $'a\\n' 'b'";
2827 let s = p.sanitize_line(line);
2828 assert!(s.contains("a"));
2829 assert!(s.contains("b"));
2830 }
2831
2832 #[test]
2833 fn template_literal_escape_branches_execute() {
2834 let mut p =
2835 Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript);
2836 let line: String = ['`', '\\', '`', 'x', '`'].iter().collect();
2837 let s = p.sanitize_line(&line);
2838 assert_eq!(s.len(), line.len());
2839 }
2840
2841 #[test]
2842 fn php_slash_not_comment_in_mask_comments_mode() {
2843 let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Php);
2844 let line = "/x";
2845 let s = p.sanitize_line(line);
2846 assert_eq!(s, line);
2847 }
2848
2849 #[test]
2850 fn block_comment_mode_without_masking_handles_nested_and_close() {
2851 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust);
2852 p.mode = Mode::BlockComment { depth: 1 };
2853 let _ = p.sanitize_line("/*");
2854 assert!(p.mode == Mode::BlockComment { depth: 2 });
2855
2856 p.mode = Mode::BlockComment { depth: 1 };
2857 let _ = p.sanitize_line("*/");
2858 assert!(p.mode == Mode::Normal);
2859 }
2860
2861 #[test]
2862 fn xml_comment_mode_without_masking_handles_close() {
2863 let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Xml);
2864 p.mode = Mode::XmlComment;
2865 let _ = p.sanitize_line("-->");
2866 assert!(p.mode == Mode::Normal);
2867 }
2868}