1use crate::models::Language;
22
23pub trait LineFilter {
25 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool;
34
35 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool;
44}
45
46pub fn get_filter(lang: Language) -> Option<Box<dyn LineFilter>> {
48 match lang {
49 Language::Rust => Some(Box::new(RustLineFilter)),
50 Language::C => Some(Box::new(CLineFilter)),
51 Language::Cpp => Some(Box::new(CppLineFilter)),
52 Language::Go => Some(Box::new(GoLineFilter)),
53 Language::Java => Some(Box::new(JavaLineFilter)),
54 Language::JavaScript => Some(Box::new(JavaScriptLineFilter)),
55 Language::TypeScript => Some(Box::new(TypeScriptLineFilter)),
56 Language::Python => Some(Box::new(PythonLineFilter)),
57 Language::Ruby => Some(Box::new(RubyLineFilter)),
58 Language::PHP => Some(Box::new(PHPLineFilter)),
59 Language::CSharp => Some(Box::new(CSharpLineFilter)),
60 Language::Kotlin => Some(Box::new(KotlinLineFilter)),
61 Language::Zig => Some(Box::new(ZigLineFilter)),
62 Language::Vue => Some(Box::new(VueLineFilter)),
63 Language::Svelte => Some(Box::new(SvelteLineFilter)),
64 Language::Swift | Language::Unknown => None,
65 }
66}
67
68struct RustLineFilter;
73
74impl LineFilter for RustLineFilter {
75 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
76 if let Some(comment_start) = line.find("//") {
78 if comment_start <= pattern_pos {
79 return true;
80 }
81 }
82
83 if let Some(ml_start) = line.find("/*") {
87 if ml_start <= pattern_pos {
88 if let Some(ml_end) = line[ml_start..].find("*/") {
90 let ml_end_pos = ml_start + ml_end + 2;
91 if pattern_pos >= ml_end_pos {
92 return false;
94 }
95 }
96 return true;
98 }
99 }
100
101 false
102 }
103
104 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
105 if let Some(raw_start) = line.find("r#") {
109 if raw_start <= pattern_pos {
110 let hash_count = line[raw_start + 1..]
112 .chars()
113 .take_while(|&c| c == '#')
114 .count();
115 let closing = format!("\"{}#", "#".repeat(hash_count));
116
117 if let Some(raw_end) = line[raw_start..].find(&closing) {
118 let raw_end_pos = raw_start + raw_end + closing.len();
119 if pattern_pos < raw_end_pos {
120 return true;
121 }
122 }
123 }
124 }
125
126 if let Some(raw_start) = line.find("r\"") {
128 if raw_start <= pattern_pos {
129 if let Some(raw_end) = line[raw_start + 2..].find('"') {
130 let raw_end_pos = raw_start + 2 + raw_end + 1;
131 if pattern_pos < raw_end_pos {
132 return true;
133 }
134 }
135 }
136 }
137
138 let mut in_string = false;
140 let mut escaped = false;
141
142 for (i, ch) in line.char_indices() {
143 if i >= pattern_pos {
144 return in_string;
145 }
146
147 if escaped {
148 escaped = false;
149 continue;
150 }
151
152 match ch {
153 '\\' if in_string => escaped = true,
154 '"' => in_string = !in_string,
155 _ => {}
156 }
157 }
158
159 false
160 }
161}
162
163struct CLineFilter;
168
169impl LineFilter for CLineFilter {
170 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
171 if let Some(comment_start) = line.find("//") {
173 if comment_start <= pattern_pos {
174 return true;
175 }
176 }
177
178 if let Some(ml_start) = line.find("/*") {
180 if ml_start <= pattern_pos {
181 if let Some(ml_end) = line[ml_start..].find("*/") {
182 let ml_end_pos = ml_start + ml_end + 2;
183 if pattern_pos >= ml_end_pos {
184 return false;
185 }
186 }
187 return true;
188 }
189 }
190
191 false
192 }
193
194 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
195 let mut in_string = false;
197 let mut escaped = false;
198
199 for (i, ch) in line.char_indices() {
200 if i >= pattern_pos {
201 return in_string;
202 }
203
204 if escaped {
205 escaped = false;
206 continue;
207 }
208
209 match ch {
210 '\\' if in_string => escaped = true,
211 '"' => in_string = !in_string,
212 _ => {}
213 }
214 }
215
216 false
217 }
218}
219
220struct CppLineFilter;
225
226impl LineFilter for CppLineFilter {
227 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
228 CLineFilter.is_in_comment(line, pattern_pos)
229 }
230
231 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
232 CLineFilter.is_in_string(line, pattern_pos)
233 }
234}
235
236struct GoLineFilter;
241
242impl LineFilter for GoLineFilter {
243 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
244 if let Some(comment_start) = line.find("//") {
246 if comment_start <= pattern_pos {
247 return true;
248 }
249 }
250
251 if let Some(ml_start) = line.find("/*") {
252 if ml_start <= pattern_pos {
253 if let Some(ml_end) = line[ml_start..].find("*/") {
254 let ml_end_pos = ml_start + ml_end + 2;
255 if pattern_pos >= ml_end_pos {
256 return false;
257 }
258 }
259 return true;
260 }
261 }
262
263 false
264 }
265
266 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
267 let mut in_raw_string = false;
271 for (i, ch) in line.char_indices() {
272 if i >= pattern_pos {
273 return in_raw_string;
274 }
275 if ch == '`' {
276 in_raw_string = !in_raw_string;
277 }
278 }
279
280 let mut in_string = false;
282 let mut escaped = false;
283
284 for (i, ch) in line.char_indices() {
285 if i >= pattern_pos {
286 return in_string;
287 }
288
289 if escaped {
290 escaped = false;
291 continue;
292 }
293
294 match ch {
295 '\\' if in_string => escaped = true,
296 '"' => in_string = !in_string,
297 _ => {}
298 }
299 }
300
301 false
302 }
303}
304
305struct JavaLineFilter;
310
311impl LineFilter for JavaLineFilter {
312 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
313 if let Some(comment_start) = line.find("//") {
315 if comment_start <= pattern_pos {
316 return true;
317 }
318 }
319
320 if let Some(ml_start) = line.find("/*") {
321 if ml_start <= pattern_pos {
322 if let Some(ml_end) = line[ml_start..].find("*/") {
323 let ml_end_pos = ml_start + ml_end + 2;
324 if pattern_pos >= ml_end_pos {
325 return false;
326 }
327 }
328 return true;
329 }
330 }
331
332 false
333 }
334
335 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
336 let mut in_string = false;
338 let mut escaped = false;
339
340 for (i, ch) in line.char_indices() {
341 if i >= pattern_pos {
342 return in_string;
343 }
344
345 if escaped {
346 escaped = false;
347 continue;
348 }
349
350 match ch {
351 '\\' if in_string => escaped = true,
352 '"' => in_string = !in_string,
353 _ => {}
354 }
355 }
356
357 false
358 }
359}
360
361struct JavaScriptLineFilter;
366
367impl LineFilter for JavaScriptLineFilter {
368 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
369 if let Some(comment_start) = line.find("//") {
371 if comment_start <= pattern_pos {
372 return true;
373 }
374 }
375
376 if let Some(ml_start) = line.find("/*") {
377 if ml_start <= pattern_pos {
378 if let Some(ml_end) = line[ml_start..].find("*/") {
379 let ml_end_pos = ml_start + ml_end + 2;
380 if pattern_pos >= ml_end_pos {
381 return false;
382 }
383 }
384 return true;
385 }
386 }
387
388 false
389 }
390
391 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
392 let mut in_double_quote = false;
394 let mut in_single_quote = false;
395 let mut in_backtick = false;
396 let mut escaped = false;
397
398 for (i, ch) in line.char_indices() {
399 if i >= pattern_pos {
400 return in_double_quote || in_single_quote || in_backtick;
401 }
402
403 if escaped {
404 escaped = false;
405 continue;
406 }
407
408 match ch {
409 '\\' if (in_double_quote || in_single_quote || in_backtick) => escaped = true,
410 '"' if !in_single_quote && !in_backtick => in_double_quote = !in_double_quote,
411 '\'' if !in_double_quote && !in_backtick => in_single_quote = !in_single_quote,
412 '`' if !in_double_quote && !in_single_quote => in_backtick = !in_backtick,
413 _ => {}
414 }
415 }
416
417 false
418 }
419}
420
421struct TypeScriptLineFilter;
426
427impl LineFilter for TypeScriptLineFilter {
428 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
429 JavaScriptLineFilter.is_in_comment(line, pattern_pos)
430 }
431
432 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
433 JavaScriptLineFilter.is_in_string(line, pattern_pos)
434 }
435}
436
437struct PythonLineFilter;
442
443impl LineFilter for PythonLineFilter {
444 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
445 if let Some(comment_start) = line.find('#') {
447 if comment_start <= pattern_pos {
449 return true;
452 }
453 }
454
455 false
456 }
457
458 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
459 if let Some(triple_double) = line.find("\"\"\"") {
463 if triple_double <= pattern_pos {
464 if let Some(close) = line[triple_double + 3..].find("\"\"\"") {
466 let close_pos = triple_double + 3 + close + 3;
467 if pattern_pos < close_pos {
468 return true;
469 }
470 }
471 }
472 }
473
474 if let Some(triple_single) = line.find("'''") {
475 if triple_single <= pattern_pos {
476 if let Some(close) = line[triple_single + 3..].find("'''") {
477 let close_pos = triple_single + 3 + close + 3;
478 if pattern_pos < close_pos {
479 return true;
480 }
481 }
482 }
483 }
484
485 let mut in_double_quote = false;
487 let mut in_single_quote = false;
488 let mut escaped = false;
489
490 for (i, ch) in line.char_indices() {
491 if i >= pattern_pos {
492 return in_double_quote || in_single_quote;
493 }
494
495 if escaped {
496 escaped = false;
497 continue;
498 }
499
500 match ch {
501 '\\' if (in_double_quote || in_single_quote) => escaped = true,
502 '"' if !in_single_quote => in_double_quote = !in_double_quote,
503 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
504 _ => {}
505 }
506 }
507
508 false
509 }
510}
511
512struct RubyLineFilter;
517
518impl LineFilter for RubyLineFilter {
519 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
520 if let Some(comment_start) = line.find('#') {
523 if comment_start <= pattern_pos {
524 return true;
525 }
526 }
527
528 false
529 }
530
531 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
532 let mut in_double_quote = false;
535 let mut in_single_quote = false;
536 let mut escaped = false;
537
538 for (i, ch) in line.char_indices() {
539 if i >= pattern_pos {
540 return in_double_quote || in_single_quote;
541 }
542
543 if escaped {
544 escaped = false;
545 continue;
546 }
547
548 match ch {
549 '\\' if (in_double_quote || in_single_quote) => escaped = true,
550 '"' if !in_single_quote => in_double_quote = !in_double_quote,
551 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
552 _ => {}
553 }
554 }
555
556 false
557 }
558}
559
560struct PHPLineFilter;
565
566impl LineFilter for PHPLineFilter {
567 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
568 if let Some(comment_start) = line.find("//") {
571 if comment_start <= pattern_pos {
572 return true;
573 }
574 }
575
576 if let Some(comment_start) = line.find('#') {
578 if comment_start <= pattern_pos {
579 return true;
580 }
581 }
582
583 if let Some(ml_start) = line.find("/*") {
585 if ml_start <= pattern_pos {
586 if let Some(ml_end) = line[ml_start..].find("*/") {
587 let ml_end_pos = ml_start + ml_end + 2;
588 if pattern_pos >= ml_end_pos {
589 return false;
590 }
591 }
592 return true;
593 }
594 }
595
596 false
597 }
598
599 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
600 let mut in_double_quote = false;
602 let mut in_single_quote = false;
603 let mut escaped = false;
604
605 for (i, ch) in line.char_indices() {
606 if i >= pattern_pos {
607 return in_double_quote || in_single_quote;
608 }
609
610 if escaped {
611 escaped = false;
612 continue;
613 }
614
615 match ch {
616 '\\' if (in_double_quote || in_single_quote) => escaped = true,
617 '"' if !in_single_quote => in_double_quote = !in_double_quote,
618 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
619 _ => {}
620 }
621 }
622
623 false
624 }
625}
626
627struct CSharpLineFilter;
632
633impl LineFilter for CSharpLineFilter {
634 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
635 if let Some(comment_start) = line.find("//") {
637 if comment_start <= pattern_pos {
638 return true;
639 }
640 }
641
642 if let Some(ml_start) = line.find("/*") {
643 if ml_start <= pattern_pos {
644 if let Some(ml_end) = line[ml_start..].find("*/") {
645 let ml_end_pos = ml_start + ml_end + 2;
646 if pattern_pos >= ml_end_pos {
647 return false;
648 }
649 }
650 return true;
651 }
652 }
653
654 false
655 }
656
657 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
658 if let Some(verbatim_start) = line.find("@\"") {
662 if verbatim_start <= pattern_pos {
663 let mut pos = verbatim_start + 2;
665 let chars: Vec<char> = line.chars().collect();
666
667 while pos < chars.len() {
668 if chars[pos] == '"' {
669 if pos + 1 < chars.len() && chars[pos + 1] == '"' {
671 pos += 2;
672 continue;
673 }
674 if pattern_pos <= pos {
676 return true;
677 }
678 break;
679 }
680 pos += 1;
681 }
682 }
683 }
684
685 let mut in_string = false;
687 let mut escaped = false;
688
689 for (i, ch) in line.char_indices() {
690 if i >= pattern_pos {
691 return in_string;
692 }
693
694 if escaped {
695 escaped = false;
696 continue;
697 }
698
699 match ch {
700 '\\' if in_string => escaped = true,
701 '"' => in_string = !in_string,
702 _ => {}
703 }
704 }
705
706 false
707 }
708}
709
710struct KotlinLineFilter;
715
716impl LineFilter for KotlinLineFilter {
717 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
718 if let Some(comment_start) = line.find("//") {
720 if comment_start <= pattern_pos {
721 return true;
722 }
723 }
724
725 if let Some(ml_start) = line.find("/*") {
726 if ml_start <= pattern_pos {
727 if let Some(ml_end) = line[ml_start..].find("*/") {
728 let ml_end_pos = ml_start + ml_end + 2;
729 if pattern_pos >= ml_end_pos {
730 return false;
731 }
732 }
733 return true;
734 }
735 }
736
737 false
738 }
739
740 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
741 if let Some(triple_start) = line.find("\"\"\"") {
745 if triple_start <= pattern_pos {
746 if let Some(close) = line[triple_start + 3..].find("\"\"\"") {
747 let close_pos = triple_start + 3 + close + 3;
748 if pattern_pos < close_pos {
749 return true;
750 }
751 }
752 }
753 }
754
755 let mut in_string = false;
757 let mut escaped = false;
758
759 for (i, ch) in line.char_indices() {
760 if i >= pattern_pos {
761 return in_string;
762 }
763
764 if escaped {
765 escaped = false;
766 continue;
767 }
768
769 match ch {
770 '\\' if in_string => escaped = true,
771 '"' => in_string = !in_string,
772 _ => {}
773 }
774 }
775
776 false
777 }
778}
779
780struct ZigLineFilter;
785
786impl LineFilter for ZigLineFilter {
787 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
788 if let Some(comment_start) = line.find("//") {
790 if comment_start <= pattern_pos {
791 return true;
792 }
793 }
794
795 false
796 }
797
798 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
799 let mut in_string = false;
802 let mut escaped = false;
803
804 for (i, ch) in line.char_indices() {
805 if i >= pattern_pos {
806 return in_string;
807 }
808
809 if escaped {
810 escaped = false;
811 continue;
812 }
813
814 match ch {
815 '\\' if in_string => escaped = true,
816 '"' => in_string = !in_string,
817 _ => {}
818 }
819 }
820
821 false
822 }
823}
824
825struct VueLineFilter;
830
831impl LineFilter for VueLineFilter {
832 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
833 JavaScriptLineFilter.is_in_comment(line, pattern_pos)
836 }
837
838 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
839 JavaScriptLineFilter.is_in_string(line, pattern_pos)
840 }
841}
842
843struct SvelteLineFilter;
848
849impl LineFilter for SvelteLineFilter {
850 fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
851 JavaScriptLineFilter.is_in_comment(line, pattern_pos)
852 }
853
854 fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
855 JavaScriptLineFilter.is_in_string(line, pattern_pos)
856 }
857}
858
859#[cfg(test)]
860mod tests {
861 use super::*;
862
863 #[test]
868 fn test_rust_single_line_comment() {
869 let filter = RustLineFilter;
870 let line = "let x = 5; // extract_symbols here";
871 assert!(filter.is_in_comment(line, 15)); assert!(!filter.is_in_comment(line, 4)); }
874
875 #[test]
876 fn test_rust_multiline_comment() {
877 let filter = RustLineFilter;
878 let line = "let x = /* extract_symbols */ 5;";
879 assert!(filter.is_in_comment(line, 11)); assert!(!filter.is_in_comment(line, 30)); }
882
883 #[test]
884 fn test_rust_string_literal() {
885 let filter = RustLineFilter;
886 let line = r#"let s = "extract_symbols";"#;
887 assert!(filter.is_in_string(line, 9)); assert!(!filter.is_in_string(line, 27)); }
890
891 #[test]
892 fn test_rust_raw_string() {
893 let filter = RustLineFilter;
894 let line = r#"let s = r"extract_symbols";"#;
895 assert!(filter.is_in_string(line, 10)); }
897
898 #[test]
899 fn test_rust_raw_string_with_hashes() {
900 let filter = RustLineFilter;
901 let line = r###"let s = r#"extract_symbols"#;"###;
902 assert!(filter.is_in_string(line, 11)); }
904
905 #[test]
906 fn test_rust_escaped_quote() {
907 let filter = RustLineFilter;
908 let line = r#"let s = "before \" extract_symbols after";"#;
909 assert!(filter.is_in_string(line, 15)); }
911
912 #[test]
917 fn test_js_single_line_comment() {
918 let filter = JavaScriptLineFilter;
919 let line = "let x = 5; // extract_symbols here";
920 assert!(filter.is_in_comment(line, 15));
921 assert!(!filter.is_in_comment(line, 4));
922 }
923
924 #[test]
925 fn test_js_string_double_quote() {
926 let filter = JavaScriptLineFilter;
927 let line = r#"let s = "extract_symbols";"#;
928 assert!(filter.is_in_string(line, 9));
929 assert!(!filter.is_in_string(line, 27));
930 }
931
932 #[test]
933 fn test_js_string_single_quote() {
934 let filter = JavaScriptLineFilter;
935 let line = "let s = 'extract_symbols';";
936 assert!(filter.is_in_string(line, 9));
937 }
938
939 #[test]
940 fn test_js_template_literal() {
941 let filter = JavaScriptLineFilter;
942 let line = "let s = `extract_symbols`;";
943 assert!(filter.is_in_string(line, 9));
944 }
945
946 #[test]
951 fn test_python_comment() {
952 let filter = PythonLineFilter;
953 let line = "x = 5 # extract_symbols here";
954 assert!(filter.is_in_comment(line, 9));
955 assert!(!filter.is_in_comment(line, 0));
956 }
957
958 #[test]
959 fn test_python_string() {
960 let filter = PythonLineFilter;
961 let line = r#"s = "extract_symbols""#;
962 assert!(filter.is_in_string(line, 5));
963 }
964
965 #[test]
966 fn test_python_triple_quote() {
967 let filter = PythonLineFilter;
968 let line = r#"s = """extract_symbols""""#;
969 assert!(filter.is_in_string(line, 7));
970 }
971
972 #[test]
977 fn test_go_raw_string() {
978 let filter = GoLineFilter;
979 let line = "s := `extract_symbols`";
980 assert!(filter.is_in_string(line, 6));
981 }
982
983 #[test]
988 fn test_csharp_verbatim_string() {
989 let filter = CSharpLineFilter;
990 let line = r#"string s = @"extract_symbols";"#;
991 assert!(filter.is_in_string(line, 13));
992 }
993
994 #[test]
995 fn test_csharp_verbatim_escaped_quote() {
996 let filter = CSharpLineFilter;
997 let line = r#"string s = @"before "" extract_symbols after";"#;
998 assert!(filter.is_in_string(line, 19));
999 }
1000}