1use crate::models::Language;
22
23pub trait LineFilter {
25 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool;
34
35 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool;
44}
45
46pub fn get_filter(lang: Language) -> Option<Box<dyn LineFilter>> {
48 match lang {
49 Language::Rust => Some(Box::new(RustLineFilter)),
50 Language::C => Some(Box::new(CLineFilter)),
51 Language::Cpp => Some(Box::new(CppLineFilter)),
52 Language::Go => Some(Box::new(GoLineFilter)),
53 Language::Java => Some(Box::new(JavaLineFilter)),
54 Language::JavaScript => Some(Box::new(JavaScriptLineFilter)),
55 Language::TypeScript => Some(Box::new(TypeScriptLineFilter)),
56 Language::Python => Some(Box::new(PythonLineFilter)),
57 Language::Ruby => Some(Box::new(RubyLineFilter)),
58 Language::PHP => Some(Box::new(PHPLineFilter)),
59 Language::CSharp => Some(Box::new(CSharpLineFilter)),
60 Language::Kotlin => Some(Box::new(KotlinLineFilter)),
61 Language::Zig => Some(Box::new(ZigLineFilter)),
62 Language::Vue => Some(Box::new(VueLineFilter)),
63 Language::Svelte => Some(Box::new(SvelteLineFilter)),
64 Language::Swift | Language::Unknown => None,
65 }
66}
67
68struct RustLineFilter;
73
74impl LineFilter for RustLineFilter {
75 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
76 if let Some(comment_start) = line.find("//") {
78 if comment_start <= pattern_pos {
79 return true;
80 }
81 }
82
83 if let Some(ml_start) = line.find("/*") {
87 if ml_start <= pattern_pos {
88 if let Some(ml_end) = line[ml_start..].find("*/") {
90 let ml_end_pos = ml_start + ml_end + 2;
91 if pattern_pos >= ml_end_pos {
92 return false;
94 }
95 }
96 return true;
98 }
99 }
100
101 false
102 }
103
104 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
105 if let Some(raw_start) = line.find("r#") {
109 if raw_start <= pattern_pos {
110 let hash_count = line[raw_start + 1..].chars().take_while(|&c| c == '#').count();
112 let closing = format!("\"{}#", "#".repeat(hash_count));
113
114 if let Some(raw_end) = line[raw_start..].find(&closing) {
115 let raw_end_pos = raw_start + raw_end + closing.len();
116 if pattern_pos < raw_end_pos {
117 return true;
118 }
119 }
120 }
121 }
122
123 if let Some(raw_start) = line.find("r\"") {
125 if raw_start <= pattern_pos {
126 if let Some(raw_end) = line[raw_start + 2..].find('"') {
127 let raw_end_pos = raw_start + 2 + raw_end + 1;
128 if pattern_pos < raw_end_pos {
129 return true;
130 }
131 }
132 }
133 }
134
135 let mut in_string = false;
137 let mut escaped = false;
138
139 for (i, ch) in line.char_indices() {
140 if i >= pattern_pos {
141 return in_string;
142 }
143
144 if escaped {
145 escaped = false;
146 continue;
147 }
148
149 match ch {
150 '\\' if in_string => escaped = true,
151 '"' => in_string = !in_string,
152 _ => {}
153 }
154 }
155
156 false
157 }
158}
159
160struct CLineFilter;
165
166impl LineFilter for CLineFilter {
167 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
168 if let Some(comment_start) = line.find("//") {
170 if comment_start <= pattern_pos {
171 return true;
172 }
173 }
174
175 if let Some(ml_start) = line.find("/*") {
177 if ml_start <= pattern_pos {
178 if let Some(ml_end) = line[ml_start..].find("*/") {
179 let ml_end_pos = ml_start + ml_end + 2;
180 if pattern_pos >= ml_end_pos {
181 return false;
182 }
183 }
184 return true;
185 }
186 }
187
188 false
189 }
190
191 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
192 let mut in_string = false;
194 let mut escaped = false;
195
196 for (i, ch) in line.char_indices() {
197 if i >= pattern_pos {
198 return in_string;
199 }
200
201 if escaped {
202 escaped = false;
203 continue;
204 }
205
206 match ch {
207 '\\' if in_string => escaped = true,
208 '"' => in_string = !in_string,
209 _ => {}
210 }
211 }
212
213 false
214 }
215}
216
217struct CppLineFilter;
222
223impl LineFilter for CppLineFilter {
224 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
225 CLineFilter.is_in_comment(line, pattern_pos)
226 }
227
228 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
229 CLineFilter.is_in_string(line, pattern_pos)
230 }
231}
232
233struct GoLineFilter;
238
239impl LineFilter for GoLineFilter {
240 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
241 if let Some(comment_start) = line.find("//") {
243 if comment_start <= pattern_pos {
244 return true;
245 }
246 }
247
248 if let Some(ml_start) = line.find("/*") {
249 if ml_start <= pattern_pos {
250 if let Some(ml_end) = line[ml_start..].find("*/") {
251 let ml_end_pos = ml_start + ml_end + 2;
252 if pattern_pos >= ml_end_pos {
253 return false;
254 }
255 }
256 return true;
257 }
258 }
259
260 false
261 }
262
263 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
264 let mut in_raw_string = false;
268 for (i, ch) in line.char_indices() {
269 if i >= pattern_pos {
270 return in_raw_string;
271 }
272 if ch == '`' {
273 in_raw_string = !in_raw_string;
274 }
275 }
276
277 let mut in_string = false;
279 let mut escaped = false;
280
281 for (i, ch) in line.char_indices() {
282 if i >= pattern_pos {
283 return in_string;
284 }
285
286 if escaped {
287 escaped = false;
288 continue;
289 }
290
291 match ch {
292 '\\' if in_string => escaped = true,
293 '"' => in_string = !in_string,
294 _ => {}
295 }
296 }
297
298 false
299 }
300}
301
302struct JavaLineFilter;
307
308impl LineFilter for JavaLineFilter {
309 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
310 if let Some(comment_start) = line.find("//") {
312 if comment_start <= pattern_pos {
313 return true;
314 }
315 }
316
317 if let Some(ml_start) = line.find("/*") {
318 if ml_start <= pattern_pos {
319 if let Some(ml_end) = line[ml_start..].find("*/") {
320 let ml_end_pos = ml_start + ml_end + 2;
321 if pattern_pos >= ml_end_pos {
322 return false;
323 }
324 }
325 return true;
326 }
327 }
328
329 false
330 }
331
332 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
333 let mut in_string = false;
335 let mut escaped = false;
336
337 for (i, ch) in line.char_indices() {
338 if i >= pattern_pos {
339 return in_string;
340 }
341
342 if escaped {
343 escaped = false;
344 continue;
345 }
346
347 match ch {
348 '\\' if in_string => escaped = true,
349 '"' => in_string = !in_string,
350 _ => {}
351 }
352 }
353
354 false
355 }
356}
357
358struct JavaScriptLineFilter;
363
364impl LineFilter for JavaScriptLineFilter {
365 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
366 if let Some(comment_start) = line.find("//") {
368 if comment_start <= pattern_pos {
369 return true;
370 }
371 }
372
373 if let Some(ml_start) = line.find("/*") {
374 if ml_start <= pattern_pos {
375 if let Some(ml_end) = line[ml_start..].find("*/") {
376 let ml_end_pos = ml_start + ml_end + 2;
377 if pattern_pos >= ml_end_pos {
378 return false;
379 }
380 }
381 return true;
382 }
383 }
384
385 false
386 }
387
388 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
389 let mut in_double_quote = false;
391 let mut in_single_quote = false;
392 let mut in_backtick = false;
393 let mut escaped = false;
394
395 for (i, ch) in line.char_indices() {
396 if i >= pattern_pos {
397 return in_double_quote || in_single_quote || in_backtick;
398 }
399
400 if escaped {
401 escaped = false;
402 continue;
403 }
404
405 match ch {
406 '\\' if (in_double_quote || in_single_quote || in_backtick) => escaped = true,
407 '"' if !in_single_quote && !in_backtick => in_double_quote = !in_double_quote,
408 '\'' if !in_double_quote && !in_backtick => in_single_quote = !in_single_quote,
409 '`' if !in_double_quote && !in_single_quote => in_backtick = !in_backtick,
410 _ => {}
411 }
412 }
413
414 false
415 }
416}
417
418struct TypeScriptLineFilter;
423
424impl LineFilter for TypeScriptLineFilter {
425 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
426 JavaScriptLineFilter.is_in_comment(line, pattern_pos)
427 }
428
429 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
430 JavaScriptLineFilter.is_in_string(line, pattern_pos)
431 }
432}
433
434struct PythonLineFilter;
439
440impl LineFilter for PythonLineFilter {
441 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
442 if let Some(comment_start) = line.find('#') {
444 if comment_start <= pattern_pos {
446 return true;
449 }
450 }
451
452 false
453 }
454
455 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
456 if let Some(triple_double) = line.find("\"\"\"") {
460 if triple_double <= pattern_pos {
461 if let Some(close) = line[triple_double + 3..].find("\"\"\"") {
463 let close_pos = triple_double + 3 + close + 3;
464 if pattern_pos < close_pos {
465 return true;
466 }
467 }
468 }
469 }
470
471 if let Some(triple_single) = line.find("'''") {
472 if triple_single <= pattern_pos {
473 if let Some(close) = line[triple_single + 3..].find("'''") {
474 let close_pos = triple_single + 3 + close + 3;
475 if pattern_pos < close_pos {
476 return true;
477 }
478 }
479 }
480 }
481
482 let mut in_double_quote = false;
484 let mut in_single_quote = false;
485 let mut escaped = false;
486
487 for (i, ch) in line.char_indices() {
488 if i >= pattern_pos {
489 return in_double_quote || in_single_quote;
490 }
491
492 if escaped {
493 escaped = false;
494 continue;
495 }
496
497 match ch {
498 '\\' if (in_double_quote || in_single_quote) => escaped = true,
499 '"' if !in_single_quote => in_double_quote = !in_double_quote,
500 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
501 _ => {}
502 }
503 }
504
505 false
506 }
507}
508
509struct RubyLineFilter;
514
515impl LineFilter for RubyLineFilter {
516 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
517 if let Some(comment_start) = line.find('#') {
520 if comment_start <= pattern_pos {
521 return true;
522 }
523 }
524
525 false
526 }
527
528 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
529 let mut in_double_quote = false;
532 let mut in_single_quote = false;
533 let mut escaped = false;
534
535 for (i, ch) in line.char_indices() {
536 if i >= pattern_pos {
537 return in_double_quote || in_single_quote;
538 }
539
540 if escaped {
541 escaped = false;
542 continue;
543 }
544
545 match ch {
546 '\\' if (in_double_quote || in_single_quote) => escaped = true,
547 '"' if !in_single_quote => in_double_quote = !in_double_quote,
548 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
549 _ => {}
550 }
551 }
552
553 false
554 }
555}
556
557struct PHPLineFilter;
562
563impl LineFilter for PHPLineFilter {
564 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
565 if let Some(comment_start) = line.find("//") {
569 if comment_start <= pattern_pos {
570 return true;
571 }
572 }
573
574 if let Some(comment_start) = line.find('#') {
576 if comment_start <= pattern_pos {
577 return true;
578 }
579 }
580
581 if let Some(ml_start) = line.find("/*") {
583 if ml_start <= pattern_pos {
584 if let Some(ml_end) = line[ml_start..].find("*/") {
585 let ml_end_pos = ml_start + ml_end + 2;
586 if pattern_pos >= ml_end_pos {
587 return false;
588 }
589 }
590 return true;
591 }
592 }
593
594 false
595 }
596
597 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
598 let mut in_double_quote = false;
600 let mut in_single_quote = false;
601 let mut escaped = false;
602
603 for (i, ch) in line.char_indices() {
604 if i >= pattern_pos {
605 return in_double_quote || in_single_quote;
606 }
607
608 if escaped {
609 escaped = false;
610 continue;
611 }
612
613 match ch {
614 '\\' if (in_double_quote || in_single_quote) => escaped = true,
615 '"' if !in_single_quote => in_double_quote = !in_double_quote,
616 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
617 _ => {}
618 }
619 }
620
621 false
622 }
623}
624
625struct CSharpLineFilter;
630
631impl LineFilter for CSharpLineFilter {
632 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
633 if let Some(comment_start) = line.find("//") {
635 if comment_start <= pattern_pos {
636 return true;
637 }
638 }
639
640 if let Some(ml_start) = line.find("/*") {
641 if ml_start <= pattern_pos {
642 if let Some(ml_end) = line[ml_start..].find("*/") {
643 let ml_end_pos = ml_start + ml_end + 2;
644 if pattern_pos >= ml_end_pos {
645 return false;
646 }
647 }
648 return true;
649 }
650 }
651
652 false
653 }
654
655 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
656 if let Some(verbatim_start) = line.find("@\"") {
660 if verbatim_start <= pattern_pos {
661 let mut pos = verbatim_start + 2;
663 let chars: Vec<char> = line.chars().collect();
664
665 while pos < chars.len() {
666 if chars[pos] == '"' {
667 if pos + 1 < chars.len() && chars[pos + 1] == '"' {
669 pos += 2;
670 continue;
671 }
672 if pattern_pos <= pos {
674 return true;
675 }
676 break;
677 }
678 pos += 1;
679 }
680 }
681 }
682
683 let mut in_string = false;
685 let mut escaped = false;
686
687 for (i, ch) in line.char_indices() {
688 if i >= pattern_pos {
689 return in_string;
690 }
691
692 if escaped {
693 escaped = false;
694 continue;
695 }
696
697 match ch {
698 '\\' if in_string => escaped = true,
699 '"' => in_string = !in_string,
700 _ => {}
701 }
702 }
703
704 false
705 }
706}
707
708struct KotlinLineFilter;
713
714impl LineFilter for KotlinLineFilter {
715 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
716 if let Some(comment_start) = line.find("//") {
718 if comment_start <= pattern_pos {
719 return true;
720 }
721 }
722
723 if let Some(ml_start) = line.find("/*") {
724 if ml_start <= pattern_pos {
725 if let Some(ml_end) = line[ml_start..].find("*/") {
726 let ml_end_pos = ml_start + ml_end + 2;
727 if pattern_pos >= ml_end_pos {
728 return false;
729 }
730 }
731 return true;
732 }
733 }
734
735 false
736 }
737
738 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
739 if let Some(triple_start) = line.find("\"\"\"") {
743 if triple_start <= pattern_pos {
744 if let Some(close) = line[triple_start + 3..].find("\"\"\"") {
745 let close_pos = triple_start + 3 + close + 3;
746 if pattern_pos < close_pos {
747 return true;
748 }
749 }
750 }
751 }
752
753 let mut in_string = false;
755 let mut escaped = false;
756
757 for (i, ch) in line.char_indices() {
758 if i >= pattern_pos {
759 return in_string;
760 }
761
762 if escaped {
763 escaped = false;
764 continue;
765 }
766
767 match ch {
768 '\\' if in_string => escaped = true,
769 '"' => in_string = !in_string,
770 _ => {}
771 }
772 }
773
774 false
775 }
776}
777
778struct ZigLineFilter;
783
784impl LineFilter for ZigLineFilter {
785 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
786 if let Some(comment_start) = line.find("//") {
788 if comment_start <= pattern_pos {
789 return true;
790 }
791 }
792
793 false
794 }
795
796 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
797 let mut in_string = false;
800 let mut escaped = false;
801
802 for (i, ch) in line.char_indices() {
803 if i >= pattern_pos {
804 return in_string;
805 }
806
807 if escaped {
808 escaped = false;
809 continue;
810 }
811
812 match ch {
813 '\\' if in_string => escaped = true,
814 '"' => in_string = !in_string,
815 _ => {}
816 }
817 }
818
819 false
820 }
821}
822
823struct VueLineFilter;
828
829impl LineFilter for VueLineFilter {
830 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
831 JavaScriptLineFilter.is_in_comment(line, pattern_pos)
834 }
835
836 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
837 JavaScriptLineFilter.is_in_string(line, pattern_pos)
838 }
839}
840
841struct SvelteLineFilter;
846
847impl LineFilter for SvelteLineFilter {
848 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
849 JavaScriptLineFilter.is_in_comment(line, pattern_pos)
850 }
851
852 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
853 JavaScriptLineFilter.is_in_string(line, pattern_pos)
854 }
855}
856
857#[cfg(test)]
858mod tests {
859 use super::*;
860
861 #[test]
866 fn test_rust_single_line_comment() {
867 let filter = RustLineFilter;
868 let line = "let x = 5; // extract_symbols here";
869 assert!(filter.is_in_comment(line, 15)); assert!(!filter.is_in_comment(line, 4)); }
872
873 #[test]
874 fn test_rust_multiline_comment() {
875 let filter = RustLineFilter;
876 let line = "let x = /* extract_symbols */ 5;";
877 assert!(filter.is_in_comment(line, 11)); assert!(!filter.is_in_comment(line, 30)); }
880
881 #[test]
882 fn test_rust_string_literal() {
883 let filter = RustLineFilter;
884 let line = r#"let s = "extract_symbols";"#;
885 assert!(filter.is_in_string(line, 9)); assert!(!filter.is_in_string(line, 27)); }
888
889 #[test]
890 fn test_rust_raw_string() {
891 let filter = RustLineFilter;
892 let line = r#"let s = r"extract_symbols";"#;
893 assert!(filter.is_in_string(line, 10)); }
895
896 #[test]
897 fn test_rust_raw_string_with_hashes() {
898 let filter = RustLineFilter;
899 let line = r###"let s = r#"extract_symbols"#;"###;
900 assert!(filter.is_in_string(line, 11)); }
902
903 #[test]
904 fn test_rust_escaped_quote() {
905 let filter = RustLineFilter;
906 let line = r#"let s = "before \" extract_symbols after";"#;
907 assert!(filter.is_in_string(line, 15)); }
909
910 #[test]
915 fn test_js_single_line_comment() {
916 let filter = JavaScriptLineFilter;
917 let line = "let x = 5; // extract_symbols here";
918 assert!(filter.is_in_comment(line, 15));
919 assert!(!filter.is_in_comment(line, 4));
920 }
921
922 #[test]
923 fn test_js_string_double_quote() {
924 let filter = JavaScriptLineFilter;
925 let line = r#"let s = "extract_symbols";"#;
926 assert!(filter.is_in_string(line, 9));
927 assert!(!filter.is_in_string(line, 27));
928 }
929
930 #[test]
931 fn test_js_string_single_quote() {
932 let filter = JavaScriptLineFilter;
933 let line = "let s = 'extract_symbols';";
934 assert!(filter.is_in_string(line, 9));
935 }
936
937 #[test]
938 fn test_js_template_literal() {
939 let filter = JavaScriptLineFilter;
940 let line = "let s = `extract_symbols`;";
941 assert!(filter.is_in_string(line, 9));
942 }
943
944 #[test]
949 fn test_python_comment() {
950 let filter = PythonLineFilter;
951 let line = "x = 5 # extract_symbols here";
952 assert!(filter.is_in_comment(line, 9));
953 assert!(!filter.is_in_comment(line, 0));
954 }
955
956 #[test]
957 fn test_python_string() {
958 let filter = PythonLineFilter;
959 let line = r#"s = "extract_symbols""#;
960 assert!(filter.is_in_string(line, 5));
961 }
962
963 #[test]
964 fn test_python_triple_quote() {
965 let filter = PythonLineFilter;
966 let line = r#"s = """extract_symbols""""#;
967 assert!(filter.is_in_string(line, 7));
968 }
969
970 #[test]
975 fn test_go_raw_string() {
976 let filter = GoLineFilter;
977 let line = "s := `extract_symbols`";
978 assert!(filter.is_in_string(line, 6));
979 }
980
981 #[test]
986 fn test_csharp_verbatim_string() {
987 let filter = CSharpLineFilter;
988 let line = r#"string s = @"extract_symbols";"#;
989 assert!(filter.is_in_string(line, 13));
990 }
991
992 #[test]
993 fn test_csharp_verbatim_escaped_quote() {
994 let filter = CSharpLineFilter;
995 let line = r#"string s = @"before "" extract_symbols after";"#;
996 assert!(filter.is_in_string(line, 19));
997 }
998}