reflex/
line_filter.rs

1//! Line-based pre-filtering to detect comments and string literals
2//!
3//! This module provides language-specific filters that analyze lines of code
4//! to determine if a pattern match occurs inside a comment or string literal.
5//! This enables us to skip files where ALL matches are in non-code contexts,
6//! avoiding expensive tree-sitter parsing when possible.
7//!
8//! # Performance Impact
9//!
10//! Pre-filtering can reduce tree-sitter parsing workload by 2-5x:
11//! - Pattern "mb_" in Linux kernel: 2,500 files → ~500 files to parse
12//! - Expected speedup: 38s → ~1-2s for symbol queries
13//!
14//! # Design Philosophy
15//!
16//! - **Conservative**: Only skip files when 100% certain ALL matches are in comments/strings
17//! - **Language-specific**: Each language has its own comment/string syntax rules
18//! - **Line-based**: Fast heuristic analysis without full parsing
19//! - **No false negatives**: Never skip files with valid code matches
20
21use crate::models::Language;
22
23/// Trait for language-specific line filtering
24pub trait LineFilter {
25    /// Check if a position in a line is inside a comment
26    ///
27    /// # Arguments
28    /// * `line` - The full line of text
29    /// * `pattern_pos` - Byte position where the pattern starts (0-indexed)
30    ///
31    /// # Returns
32    /// `true` if the pattern is definitely inside a comment, `false` otherwise
33    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool;
34
35    /// Check if a position in a line is inside a string literal
36    ///
37    /// # Arguments
38    /// * `line` - The full line of text
39    /// * `pattern_pos` - Byte position where the pattern starts (0-indexed)
40    ///
41    /// # Returns
42    /// `true` if the pattern is definitely inside a string literal, `false` otherwise
43    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool;
44}
45
46/// Get a LineFilter for a specific language
47pub fn get_filter(lang: Language) -> Option<Box<dyn LineFilter>> {
48    match lang {
49        Language::Rust => Some(Box::new(RustLineFilter)),
50        Language::C => Some(Box::new(CLineFilter)),
51        Language::Cpp => Some(Box::new(CppLineFilter)),
52        Language::Go => Some(Box::new(GoLineFilter)),
53        Language::Java => Some(Box::new(JavaLineFilter)),
54        Language::JavaScript => Some(Box::new(JavaScriptLineFilter)),
55        Language::TypeScript => Some(Box::new(TypeScriptLineFilter)),
56        Language::Python => Some(Box::new(PythonLineFilter)),
57        Language::Ruby => Some(Box::new(RubyLineFilter)),
58        Language::PHP => Some(Box::new(PHPLineFilter)),
59        Language::CSharp => Some(Box::new(CSharpLineFilter)),
60        Language::Kotlin => Some(Box::new(KotlinLineFilter)),
61        Language::Zig => Some(Box::new(ZigLineFilter)),
62        Language::Vue => Some(Box::new(VueLineFilter)),
63        Language::Svelte => Some(Box::new(SvelteLineFilter)),
64        Language::Swift | Language::Unknown => None,
65    }
66}
67
68// ============================================================================
69// Rust Line Filter
70// ============================================================================
71
72struct RustLineFilter;
73
74impl LineFilter for RustLineFilter {
75    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
76        // Check for single-line comment: // before pattern
77        if let Some(comment_start) = line.find("//") {
78            if comment_start <= pattern_pos {
79                return true;
80            }
81        }
82
83        // Check for multi-line comment start: /* before pattern (unclosed on this line)
84        // Note: We can't reliably detect multi-line comment continuations without state,
85        // so we conservatively return false for those cases
86        if let Some(ml_start) = line.find("/*") {
87            if ml_start <= pattern_pos {
88                // Check if comment is closed before pattern
89                if let Some(ml_end) = line[ml_start..].find("*/") {
90                    let ml_end_pos = ml_start + ml_end + 2;
91                    if pattern_pos >= ml_end_pos {
92                        // Pattern is after comment closure
93                        return false;
94                    }
95                }
96                // Comment not closed, or pattern is inside
97                return true;
98            }
99        }
100
101        false
102    }
103
104    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
105        // Rust has multiple string types: "...", r"...", r#"..."#, r##"..."##, etc.
106
107        // Check for raw strings first (they don't have escape sequences)
108        if let Some(raw_start) = line.find("r#") {
109            if raw_start <= pattern_pos {
110                // Count the number of # symbols
111                let hash_count = line[raw_start + 1..].chars().take_while(|&c| c == '#').count();
112                let closing = format!("\"{}#", "#".repeat(hash_count));
113
114                if let Some(raw_end) = line[raw_start..].find(&closing) {
115                    let raw_end_pos = raw_start + raw_end + closing.len();
116                    if pattern_pos < raw_end_pos {
117                        return true;
118                    }
119                }
120            }
121        }
122
123        // Check for simple raw string r"..."
124        if let Some(raw_start) = line.find("r\"") {
125            if raw_start <= pattern_pos {
126                if let Some(raw_end) = line[raw_start + 2..].find('"') {
127                    let raw_end_pos = raw_start + 2 + raw_end + 1;
128                    if pattern_pos < raw_end_pos {
129                        return true;
130                    }
131                }
132            }
133        }
134
135        // Check for regular strings "..." with escape handling
136        let mut in_string = false;
137        let mut escaped = false;
138
139        for (i, ch) in line.char_indices() {
140            if i >= pattern_pos {
141                return in_string;
142            }
143
144            if escaped {
145                escaped = false;
146                continue;
147            }
148
149            match ch {
150                '\\' if in_string => escaped = true,
151                '"' => in_string = !in_string,
152                _ => {}
153            }
154        }
155
156        false
157    }
158}
159
160// ============================================================================
161// C Line Filter
162// ============================================================================
163
164struct CLineFilter;
165
166impl LineFilter for CLineFilter {
167    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
168        // Check for single-line comment: // before pattern
169        if let Some(comment_start) = line.find("//") {
170            if comment_start <= pattern_pos {
171                return true;
172            }
173        }
174
175        // Check for multi-line comment: /* ... */
176        if let Some(ml_start) = line.find("/*") {
177            if ml_start <= pattern_pos {
178                if let Some(ml_end) = line[ml_start..].find("*/") {
179                    let ml_end_pos = ml_start + ml_end + 2;
180                    if pattern_pos >= ml_end_pos {
181                        return false;
182                    }
183                }
184                return true;
185            }
186        }
187
188        false
189    }
190
191    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
192        // C strings: "..." with escape sequences
193        let mut in_string = false;
194        let mut escaped = false;
195
196        for (i, ch) in line.char_indices() {
197            if i >= pattern_pos {
198                return in_string;
199            }
200
201            if escaped {
202                escaped = false;
203                continue;
204            }
205
206            match ch {
207                '\\' if in_string => escaped = true,
208                '"' => in_string = !in_string,
209                _ => {}
210            }
211        }
212
213        false
214    }
215}
216
217// ============================================================================
218// C++ Line Filter (same as C)
219// ============================================================================
220
221struct CppLineFilter;
222
223impl LineFilter for CppLineFilter {
224    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
225        CLineFilter.is_in_comment(line, pattern_pos)
226    }
227
228    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
229        CLineFilter.is_in_string(line, pattern_pos)
230    }
231}
232
233// ============================================================================
234// Go Line Filter
235// ============================================================================
236
237struct GoLineFilter;
238
239impl LineFilter for GoLineFilter {
240    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
241        // Go comments: // and /* */
242        if let Some(comment_start) = line.find("//") {
243            if comment_start <= pattern_pos {
244                return true;
245            }
246        }
247
248        if let Some(ml_start) = line.find("/*") {
249            if ml_start <= pattern_pos {
250                if let Some(ml_end) = line[ml_start..].find("*/") {
251                    let ml_end_pos = ml_start + ml_end + 2;
252                    if pattern_pos >= ml_end_pos {
253                        return false;
254                    }
255                }
256                return true;
257            }
258        }
259
260        false
261    }
262
263    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
264        // Go strings: "...", `...` (raw strings with backticks)
265
266        // Check for raw string literals first (backticks)
267        let mut in_raw_string = false;
268        for (i, ch) in line.char_indices() {
269            if i >= pattern_pos {
270                return in_raw_string;
271            }
272            if ch == '`' {
273                in_raw_string = !in_raw_string;
274            }
275        }
276
277        // Check for regular strings
278        let mut in_string = false;
279        let mut escaped = false;
280
281        for (i, ch) in line.char_indices() {
282            if i >= pattern_pos {
283                return in_string;
284            }
285
286            if escaped {
287                escaped = false;
288                continue;
289            }
290
291            match ch {
292                '\\' if in_string => escaped = true,
293                '"' => in_string = !in_string,
294                _ => {}
295            }
296        }
297
298        false
299    }
300}
301
302// ============================================================================
303// Java Line Filter
304// ============================================================================
305
306struct JavaLineFilter;
307
308impl LineFilter for JavaLineFilter {
309    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
310        // Java comments: //, /* */, /** */ (Javadoc)
311        if let Some(comment_start) = line.find("//") {
312            if comment_start <= pattern_pos {
313                return true;
314            }
315        }
316
317        if let Some(ml_start) = line.find("/*") {
318            if ml_start <= pattern_pos {
319                if let Some(ml_end) = line[ml_start..].find("*/") {
320                    let ml_end_pos = ml_start + ml_end + 2;
321                    if pattern_pos >= ml_end_pos {
322                        return false;
323                    }
324                }
325                return true;
326            }
327        }
328
329        false
330    }
331
332    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
333        // Java strings: "..." with escape sequences
334        let mut in_string = false;
335        let mut escaped = false;
336
337        for (i, ch) in line.char_indices() {
338            if i >= pattern_pos {
339                return in_string;
340            }
341
342            if escaped {
343                escaped = false;
344                continue;
345            }
346
347            match ch {
348                '\\' if in_string => escaped = true,
349                '"' => in_string = !in_string,
350                _ => {}
351            }
352        }
353
354        false
355    }
356}
357
358// ============================================================================
359// JavaScript Line Filter
360// ============================================================================
361
362struct JavaScriptLineFilter;
363
364impl LineFilter for JavaScriptLineFilter {
365    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
366        // JavaScript comments: //, /* */
367        if let Some(comment_start) = line.find("//") {
368            if comment_start <= pattern_pos {
369                return true;
370            }
371        }
372
373        if let Some(ml_start) = line.find("/*") {
374            if ml_start <= pattern_pos {
375                if let Some(ml_end) = line[ml_start..].find("*/") {
376                    let ml_end_pos = ml_start + ml_end + 2;
377                    if pattern_pos >= ml_end_pos {
378                        return false;
379                    }
380                }
381                return true;
382            }
383        }
384
385        false
386    }
387
388    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
389        // JavaScript strings: "...", '...', `...` (template literals)
390        let mut in_double_quote = false;
391        let mut in_single_quote = false;
392        let mut in_backtick = false;
393        let mut escaped = false;
394
395        for (i, ch) in line.char_indices() {
396            if i >= pattern_pos {
397                return in_double_quote || in_single_quote || in_backtick;
398            }
399
400            if escaped {
401                escaped = false;
402                continue;
403            }
404
405            match ch {
406                '\\' if (in_double_quote || in_single_quote || in_backtick) => escaped = true,
407                '"' if !in_single_quote && !in_backtick => in_double_quote = !in_double_quote,
408                '\'' if !in_double_quote && !in_backtick => in_single_quote = !in_single_quote,
409                '`' if !in_double_quote && !in_single_quote => in_backtick = !in_backtick,
410                _ => {}
411            }
412        }
413
414        false
415    }
416}
417
418// ============================================================================
419// TypeScript Line Filter (same as JavaScript)
420// ============================================================================
421
422struct TypeScriptLineFilter;
423
424impl LineFilter for TypeScriptLineFilter {
425    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
426        JavaScriptLineFilter.is_in_comment(line, pattern_pos)
427    }
428
429    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
430        JavaScriptLineFilter.is_in_string(line, pattern_pos)
431    }
432}
433
434// ============================================================================
435// Python Line Filter
436// ============================================================================
437
438struct PythonLineFilter;
439
440impl LineFilter for PythonLineFilter {
441    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
442        // Python comments: # (single line only)
443        if let Some(comment_start) = line.find('#') {
444            // Make sure # is not inside a string
445            if comment_start <= pattern_pos {
446                // Conservative: assume it's a comment
447                // (We could check if # itself is in a string, but that's complex)
448                return true;
449            }
450        }
451
452        false
453    }
454
455    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
456        // Python strings: "...", '...', """...""", '''...''', f"...", r"...", etc.
457
458        // Check for triple-quoted strings first
459        if let Some(triple_double) = line.find("\"\"\"") {
460            if triple_double <= pattern_pos {
461                // Look for closing triple quote
462                if let Some(close) = line[triple_double + 3..].find("\"\"\"") {
463                    let close_pos = triple_double + 3 + close + 3;
464                    if pattern_pos < close_pos {
465                        return true;
466                    }
467                }
468            }
469        }
470
471        if let Some(triple_single) = line.find("'''") {
472            if triple_single <= pattern_pos {
473                if let Some(close) = line[triple_single + 3..].find("'''") {
474                    let close_pos = triple_single + 3 + close + 3;
475                    if pattern_pos < close_pos {
476                        return true;
477                    }
478                }
479            }
480        }
481
482        // Check for single-line strings (with prefix support: f"...", r"...", b"...", etc.)
483        let mut in_double_quote = false;
484        let mut in_single_quote = false;
485        let mut escaped = false;
486
487        for (i, ch) in line.char_indices() {
488            if i >= pattern_pos {
489                return in_double_quote || in_single_quote;
490            }
491
492            if escaped {
493                escaped = false;
494                continue;
495            }
496
497            match ch {
498                '\\' if (in_double_quote || in_single_quote) => escaped = true,
499                '"' if !in_single_quote => in_double_quote = !in_double_quote,
500                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
501                _ => {}
502            }
503        }
504
505        false
506    }
507}
508
509// ============================================================================
510// Ruby Line Filter
511// ============================================================================
512
513struct RubyLineFilter;
514
515impl LineFilter for RubyLineFilter {
516    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
517        // Ruby comments: # (single line)
518        // Note: Ruby also has =begin...=end multi-line comments, but those are entire-line only
519        if let Some(comment_start) = line.find('#') {
520            if comment_start <= pattern_pos {
521                return true;
522            }
523        }
524
525        false
526    }
527
528    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
529        // Ruby strings: "...", '...', %q{...}, %Q{...}, etc.
530        // For simplicity, we'll handle the common cases: "..." and '...'
531        let mut in_double_quote = false;
532        let mut in_single_quote = false;
533        let mut escaped = false;
534
535        for (i, ch) in line.char_indices() {
536            if i >= pattern_pos {
537                return in_double_quote || in_single_quote;
538            }
539
540            if escaped {
541                escaped = false;
542                continue;
543            }
544
545            match ch {
546                '\\' if (in_double_quote || in_single_quote) => escaped = true,
547                '"' if !in_single_quote => in_double_quote = !in_double_quote,
548                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
549                _ => {}
550            }
551        }
552
553        false
554    }
555}
556
557// ============================================================================
558// PHP Line Filter
559// ============================================================================
560
561struct PHPLineFilter;
562
563impl LineFilter for PHPLineFilter {
564    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
565        // PHP comments: //, #, /* */
566
567        // Check for // comment
568        if let Some(comment_start) = line.find("//") {
569            if comment_start <= pattern_pos {
570                return true;
571            }
572        }
573
574        // Check for # comment
575        if let Some(comment_start) = line.find('#') {
576            if comment_start <= pattern_pos {
577                return true;
578            }
579        }
580
581        // Check for /* */ comment
582        if let Some(ml_start) = line.find("/*") {
583            if ml_start <= pattern_pos {
584                if let Some(ml_end) = line[ml_start..].find("*/") {
585                    let ml_end_pos = ml_start + ml_end + 2;
586                    if pattern_pos >= ml_end_pos {
587                        return false;
588                    }
589                }
590                return true;
591            }
592        }
593
594        false
595    }
596
597    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
598        // PHP strings: "...", '...', with escape sequences
599        let mut in_double_quote = false;
600        let mut in_single_quote = false;
601        let mut escaped = false;
602
603        for (i, ch) in line.char_indices() {
604            if i >= pattern_pos {
605                return in_double_quote || in_single_quote;
606            }
607
608            if escaped {
609                escaped = false;
610                continue;
611            }
612
613            match ch {
614                '\\' if (in_double_quote || in_single_quote) => escaped = true,
615                '"' if !in_single_quote => in_double_quote = !in_double_quote,
616                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
617                _ => {}
618            }
619        }
620
621        false
622    }
623}
624
625// ============================================================================
626// C# Line Filter
627// ============================================================================
628
629struct CSharpLineFilter;
630
631impl LineFilter for CSharpLineFilter {
632    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
633        // C# comments: //, /* */, /// (XML doc comments)
634        if let Some(comment_start) = line.find("//") {
635            if comment_start <= pattern_pos {
636                return true;
637            }
638        }
639
640        if let Some(ml_start) = line.find("/*") {
641            if ml_start <= pattern_pos {
642                if let Some(ml_end) = line[ml_start..].find("*/") {
643                    let ml_end_pos = ml_start + ml_end + 2;
644                    if pattern_pos >= ml_end_pos {
645                        return false;
646                    }
647                }
648                return true;
649            }
650        }
651
652        false
653    }
654
655    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
656        // C# strings: "...", @"..." (verbatim strings)
657
658        // Check for verbatim strings @"..."
659        if let Some(verbatim_start) = line.find("@\"") {
660            if verbatim_start <= pattern_pos {
661                // In verbatim strings, "" escapes to single "
662                let mut pos = verbatim_start + 2;
663                let chars: Vec<char> = line.chars().collect();
664
665                while pos < chars.len() {
666                    if chars[pos] == '"' {
667                        // Check if it's escaped (double quote)
668                        if pos + 1 < chars.len() && chars[pos + 1] == '"' {
669                            pos += 2;
670                            continue;
671                        }
672                        // End of verbatim string
673                        if pattern_pos <= pos {
674                            return true;
675                        }
676                        break;
677                    }
678                    pos += 1;
679                }
680            }
681        }
682
683        // Check for regular strings "..."
684        let mut in_string = false;
685        let mut escaped = false;
686
687        for (i, ch) in line.char_indices() {
688            if i >= pattern_pos {
689                return in_string;
690            }
691
692            if escaped {
693                escaped = false;
694                continue;
695            }
696
697            match ch {
698                '\\' if in_string => escaped = true,
699                '"' => in_string = !in_string,
700                _ => {}
701            }
702        }
703
704        false
705    }
706}
707
708// ============================================================================
709// Kotlin Line Filter
710// ============================================================================
711
712struct KotlinLineFilter;
713
714impl LineFilter for KotlinLineFilter {
715    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
716        // Kotlin comments: //, /* */
717        if let Some(comment_start) = line.find("//") {
718            if comment_start <= pattern_pos {
719                return true;
720            }
721        }
722
723        if let Some(ml_start) = line.find("/*") {
724            if ml_start <= pattern_pos {
725                if let Some(ml_end) = line[ml_start..].find("*/") {
726                    let ml_end_pos = ml_start + ml_end + 2;
727                    if pattern_pos >= ml_end_pos {
728                        return false;
729                    }
730                }
731                return true;
732            }
733        }
734
735        false
736    }
737
738    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
739        // Kotlin strings: "...", """...""" (raw strings)
740
741        // Check for triple-quoted strings first
742        if let Some(triple_start) = line.find("\"\"\"") {
743            if triple_start <= pattern_pos {
744                if let Some(close) = line[triple_start + 3..].find("\"\"\"") {
745                    let close_pos = triple_start + 3 + close + 3;
746                    if pattern_pos < close_pos {
747                        return true;
748                    }
749                }
750            }
751        }
752
753        // Check for regular strings "..."
754        let mut in_string = false;
755        let mut escaped = false;
756
757        for (i, ch) in line.char_indices() {
758            if i >= pattern_pos {
759                return in_string;
760            }
761
762            if escaped {
763                escaped = false;
764                continue;
765            }
766
767            match ch {
768                '\\' if in_string => escaped = true,
769                '"' => in_string = !in_string,
770                _ => {}
771            }
772        }
773
774        false
775    }
776}
777
778// ============================================================================
779// Zig Line Filter
780// ============================================================================
781
782struct ZigLineFilter;
783
784impl LineFilter for ZigLineFilter {
785    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
786        // Zig comments: // and /// (doc comments)
787        if let Some(comment_start) = line.find("//") {
788            if comment_start <= pattern_pos {
789                return true;
790            }
791        }
792
793        false
794    }
795
796    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
797        // Zig strings: "..." and \\ (multiline string literals)
798        // For simplicity, we'll handle regular strings here
799        let mut in_string = false;
800        let mut escaped = false;
801
802        for (i, ch) in line.char_indices() {
803            if i >= pattern_pos {
804                return in_string;
805            }
806
807            if escaped {
808                escaped = false;
809                continue;
810            }
811
812            match ch {
813                '\\' if in_string => escaped = true,
814                '"' => in_string = !in_string,
815                _ => {}
816            }
817        }
818
819        false
820    }
821}
822
823// ============================================================================
824// Vue Line Filter (use JavaScript/TypeScript for <script> sections)
825// ============================================================================
826
827struct VueLineFilter;
828
829impl LineFilter for VueLineFilter {
830    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
831        // Vue uses JS/TS in <script> sections, HTML comments in <template>
832        // For simplicity, use JavaScript-style comments
833        JavaScriptLineFilter.is_in_comment(line, pattern_pos)
834    }
835
836    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
837        JavaScriptLineFilter.is_in_string(line, pattern_pos)
838    }
839}
840
841// ============================================================================
842// Svelte Line Filter (use JavaScript/TypeScript)
843// ============================================================================
844
845struct SvelteLineFilter;
846
847impl LineFilter for SvelteLineFilter {
848    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
849        JavaScriptLineFilter.is_in_comment(line, pattern_pos)
850    }
851
852    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
853        JavaScriptLineFilter.is_in_string(line, pattern_pos)
854    }
855}
856
857#[cfg(test)]
858mod tests {
859    use super::*;
860
861    // ========================================================================
862    // Rust Tests
863    // ========================================================================
864
865    #[test]
866    fn test_rust_single_line_comment() {
867        let filter = RustLineFilter;
868        let line = "let x = 5; // extract_symbols here";
869        assert!(filter.is_in_comment(line, 15)); // "extract" is in comment
870        assert!(!filter.is_in_comment(line, 4)); // "x" is not in comment
871    }
872
873    #[test]
874    fn test_rust_multiline_comment() {
875        let filter = RustLineFilter;
876        let line = "let x = /* extract_symbols */ 5;";
877        assert!(filter.is_in_comment(line, 11)); // "extract" is in comment
878        assert!(!filter.is_in_comment(line, 30)); // "5" is not in comment
879    }
880
881    #[test]
882    fn test_rust_string_literal() {
883        let filter = RustLineFilter;
884        let line = r#"let s = "extract_symbols";"#;
885        assert!(filter.is_in_string(line, 9)); // "extract" is in string
886        assert!(!filter.is_in_string(line, 27)); // after string
887    }
888
889    #[test]
890    fn test_rust_raw_string() {
891        let filter = RustLineFilter;
892        let line = r#"let s = r"extract_symbols";"#;
893        assert!(filter.is_in_string(line, 10)); // "extract" is in raw string
894    }
895
896    #[test]
897    fn test_rust_raw_string_with_hashes() {
898        let filter = RustLineFilter;
899        let line = r###"let s = r#"extract_symbols"#;"###;
900        assert!(filter.is_in_string(line, 11)); // "extract" is in raw string
901    }
902
903    #[test]
904    fn test_rust_escaped_quote() {
905        let filter = RustLineFilter;
906        let line = r#"let s = "before \" extract_symbols after";"#;
907        assert!(filter.is_in_string(line, 15)); // "extract" is in string
908    }
909
910    // ========================================================================
911    // JavaScript Tests
912    // ========================================================================
913
914    #[test]
915    fn test_js_single_line_comment() {
916        let filter = JavaScriptLineFilter;
917        let line = "let x = 5; // extract_symbols here";
918        assert!(filter.is_in_comment(line, 15));
919        assert!(!filter.is_in_comment(line, 4));
920    }
921
922    #[test]
923    fn test_js_string_double_quote() {
924        let filter = JavaScriptLineFilter;
925        let line = r#"let s = "extract_symbols";"#;
926        assert!(filter.is_in_string(line, 9));
927        assert!(!filter.is_in_string(line, 27));
928    }
929
930    #[test]
931    fn test_js_string_single_quote() {
932        let filter = JavaScriptLineFilter;
933        let line = "let s = 'extract_symbols';";
934        assert!(filter.is_in_string(line, 9));
935    }
936
937    #[test]
938    fn test_js_template_literal() {
939        let filter = JavaScriptLineFilter;
940        let line = "let s = `extract_symbols`;";
941        assert!(filter.is_in_string(line, 9));
942    }
943
944    // ========================================================================
945    // Python Tests
946    // ========================================================================
947
948    #[test]
949    fn test_python_comment() {
950        let filter = PythonLineFilter;
951        let line = "x = 5  # extract_symbols here";
952        assert!(filter.is_in_comment(line, 9));
953        assert!(!filter.is_in_comment(line, 0));
954    }
955
956    #[test]
957    fn test_python_string() {
958        let filter = PythonLineFilter;
959        let line = r#"s = "extract_symbols""#;
960        assert!(filter.is_in_string(line, 5));
961    }
962
963    #[test]
964    fn test_python_triple_quote() {
965        let filter = PythonLineFilter;
966        let line = r#"s = """extract_symbols""""#;
967        assert!(filter.is_in_string(line, 7));
968    }
969
970    // ========================================================================
971    // Go Tests
972    // ========================================================================
973
974    #[test]
975    fn test_go_raw_string() {
976        let filter = GoLineFilter;
977        let line = "s := `extract_symbols`";
978        assert!(filter.is_in_string(line, 6));
979    }
980
981    // ========================================================================
982    // C# Tests
983    // ========================================================================
984
985    #[test]
986    fn test_csharp_verbatim_string() {
987        let filter = CSharpLineFilter;
988        let line = r#"string s = @"extract_symbols";"#;
989        assert!(filter.is_in_string(line, 13));
990    }
991
992    #[test]
993    fn test_csharp_verbatim_escaped_quote() {
994        let filter = CSharpLineFilter;
995        let line = r#"string s = @"before "" extract_symbols after";"#;
996        assert!(filter.is_in_string(line, 19));
997    }
998}