Skip to main content

reflex/
line_filter.rs

1//! Line-based pre-filtering to detect comments and string literals
2//!
3//! This module provides language-specific filters that analyze lines of code
4//! to determine if a pattern match occurs inside a comment or string literal.
5//! This enables us to skip files where ALL matches are in non-code contexts,
6//! avoiding expensive tree-sitter parsing when possible.
7//!
8//! # Performance Impact
9//!
10//! Pre-filtering can reduce tree-sitter parsing workload by 2-5x:
11//! - Pattern "mb_" in Linux kernel: 2,500 files → ~500 files to parse
12//! - Expected speedup: 38s → ~1-2s for symbol queries
13//!
14//! # Design Philosophy
15//!
16//! - **Conservative**: Only skip files when 100% certain ALL matches are in comments/strings
17//! - **Language-specific**: Each language has its own comment/string syntax rules
18//! - **Line-based**: Fast heuristic analysis without full parsing
19//! - **No false negatives**: Never skip files with valid code matches
20
21use crate::models::Language;
22
23/// Trait for language-specific line filtering
24pub trait LineFilter {
25    /// Check if a position in a line is inside a comment
26    ///
27    /// # Arguments
28    /// * `line` - The full line of text
29    /// * `pattern_pos` - Byte position where the pattern starts (0-indexed)
30    ///
31    /// # Returns
32    /// `true` if the pattern is definitely inside a comment, `false` otherwise
33    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool;
34
35    /// Check if a position in a line is inside a string literal
36    ///
37    /// # Arguments
38    /// * `line` - The full line of text
39    /// * `pattern_pos` - Byte position where the pattern starts (0-indexed)
40    ///
41    /// # Returns
42    /// `true` if the pattern is definitely inside a string literal, `false` otherwise
43    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool;
44}
45
46/// Get a LineFilter for a specific language
47pub fn get_filter(lang: Language) -> Option<Box<dyn LineFilter>> {
48    match lang {
49        Language::Rust => Some(Box::new(RustLineFilter)),
50        Language::C => Some(Box::new(CLineFilter)),
51        Language::Cpp => Some(Box::new(CppLineFilter)),
52        Language::Go => Some(Box::new(GoLineFilter)),
53        Language::Java => Some(Box::new(JavaLineFilter)),
54        Language::JavaScript => Some(Box::new(JavaScriptLineFilter)),
55        Language::TypeScript => Some(Box::new(TypeScriptLineFilter)),
56        Language::Python => Some(Box::new(PythonLineFilter)),
57        Language::Ruby => Some(Box::new(RubyLineFilter)),
58        Language::PHP => Some(Box::new(PHPLineFilter)),
59        Language::CSharp => Some(Box::new(CSharpLineFilter)),
60        Language::Kotlin => Some(Box::new(KotlinLineFilter)),
61        Language::Zig => Some(Box::new(ZigLineFilter)),
62        Language::Vue => Some(Box::new(VueLineFilter)),
63        Language::Svelte => Some(Box::new(SvelteLineFilter)),
64        Language::Swift | Language::Unknown => None,
65    }
66}
67
68// ============================================================================
69// Rust Line Filter
70// ============================================================================
71
72struct RustLineFilter;
73
74impl LineFilter for RustLineFilter {
75    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
76        // Check for single-line comment: // before pattern
77        if let Some(comment_start) = line.find("//") {
78            if comment_start <= pattern_pos {
79                return true;
80            }
81        }
82
83        // Check for multi-line comment start: /* before pattern (unclosed on this line)
84        // Note: We can't reliably detect multi-line comment continuations without state,
85        // so we conservatively return false for those cases
86        if let Some(ml_start) = line.find("/*") {
87            if ml_start <= pattern_pos {
88                // Check if comment is closed before pattern
89                if let Some(ml_end) = line[ml_start..].find("*/") {
90                    let ml_end_pos = ml_start + ml_end + 2;
91                    if pattern_pos >= ml_end_pos {
92                        // Pattern is after comment closure
93                        return false;
94                    }
95                }
96                // Comment not closed, or pattern is inside
97                return true;
98            }
99        }
100
101        false
102    }
103
104    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
105        // Rust has multiple string types: "...", r"...", r#"..."#, r##"..."##, etc.
106
107        // Check for raw strings first (they don't have escape sequences)
108        if let Some(raw_start) = line.find("r#") {
109            if raw_start <= pattern_pos {
110                // Count the number of # symbols
111                let hash_count = line[raw_start + 1..]
112                    .chars()
113                    .take_while(|&c| c == '#')
114                    .count();
115                let closing = format!("\"{}#", "#".repeat(hash_count));
116
117                if let Some(raw_end) = line[raw_start..].find(&closing) {
118                    let raw_end_pos = raw_start + raw_end + closing.len();
119                    if pattern_pos < raw_end_pos {
120                        return true;
121                    }
122                }
123            }
124        }
125
126        // Check for simple raw string r"..."
127        if let Some(raw_start) = line.find("r\"") {
128            if raw_start <= pattern_pos {
129                if let Some(raw_end) = line[raw_start + 2..].find('"') {
130                    let raw_end_pos = raw_start + 2 + raw_end + 1;
131                    if pattern_pos < raw_end_pos {
132                        return true;
133                    }
134                }
135            }
136        }
137
138        // Check for regular strings "..." with escape handling
139        let mut in_string = false;
140        let mut escaped = false;
141
142        for (i, ch) in line.char_indices() {
143            if i >= pattern_pos {
144                return in_string;
145            }
146
147            if escaped {
148                escaped = false;
149                continue;
150            }
151
152            match ch {
153                '\\' if in_string => escaped = true,
154                '"' => in_string = !in_string,
155                _ => {}
156            }
157        }
158
159        false
160    }
161}
162
163// ============================================================================
164// C Line Filter
165// ============================================================================
166
167struct CLineFilter;
168
169impl LineFilter for CLineFilter {
170    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
171        // Check for single-line comment: // before pattern
172        if let Some(comment_start) = line.find("//") {
173            if comment_start <= pattern_pos {
174                return true;
175            }
176        }
177
178        // Check for multi-line comment: /* ... */
179        if let Some(ml_start) = line.find("/*") {
180            if ml_start <= pattern_pos {
181                if let Some(ml_end) = line[ml_start..].find("*/") {
182                    let ml_end_pos = ml_start + ml_end + 2;
183                    if pattern_pos >= ml_end_pos {
184                        return false;
185                    }
186                }
187                return true;
188            }
189        }
190
191        false
192    }
193
194    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
195        // C strings: "..." with escape sequences
196        let mut in_string = false;
197        let mut escaped = false;
198
199        for (i, ch) in line.char_indices() {
200            if i >= pattern_pos {
201                return in_string;
202            }
203
204            if escaped {
205                escaped = false;
206                continue;
207            }
208
209            match ch {
210                '\\' if in_string => escaped = true,
211                '"' => in_string = !in_string,
212                _ => {}
213            }
214        }
215
216        false
217    }
218}
219
220// ============================================================================
221// C++ Line Filter (same as C)
222// ============================================================================
223
224struct CppLineFilter;
225
226impl LineFilter for CppLineFilter {
227    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
228        CLineFilter.is_in_comment(line, pattern_pos)
229    }
230
231    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
232        CLineFilter.is_in_string(line, pattern_pos)
233    }
234}
235
236// ============================================================================
237// Go Line Filter
238// ============================================================================
239
240struct GoLineFilter;
241
242impl LineFilter for GoLineFilter {
243    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
244        // Go comments: // and /* */
245        if let Some(comment_start) = line.find("//") {
246            if comment_start <= pattern_pos {
247                return true;
248            }
249        }
250
251        if let Some(ml_start) = line.find("/*") {
252            if ml_start <= pattern_pos {
253                if let Some(ml_end) = line[ml_start..].find("*/") {
254                    let ml_end_pos = ml_start + ml_end + 2;
255                    if pattern_pos >= ml_end_pos {
256                        return false;
257                    }
258                }
259                return true;
260            }
261        }
262
263        false
264    }
265
266    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
267        // Go strings: "...", `...` (raw strings with backticks)
268
269        // Check for raw string literals first (backticks)
270        let mut in_raw_string = false;
271        for (i, ch) in line.char_indices() {
272            if i >= pattern_pos {
273                return in_raw_string;
274            }
275            if ch == '`' {
276                in_raw_string = !in_raw_string;
277            }
278        }
279
280        // Check for regular strings
281        let mut in_string = false;
282        let mut escaped = false;
283
284        for (i, ch) in line.char_indices() {
285            if i >= pattern_pos {
286                return in_string;
287            }
288
289            if escaped {
290                escaped = false;
291                continue;
292            }
293
294            match ch {
295                '\\' if in_string => escaped = true,
296                '"' => in_string = !in_string,
297                _ => {}
298            }
299        }
300
301        false
302    }
303}
304
305// ============================================================================
306// Java Line Filter
307// ============================================================================
308
309struct JavaLineFilter;
310
311impl LineFilter for JavaLineFilter {
312    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
313        // Java comments: //, /* */, /** */ (Javadoc)
314        if let Some(comment_start) = line.find("//") {
315            if comment_start <= pattern_pos {
316                return true;
317            }
318        }
319
320        if let Some(ml_start) = line.find("/*") {
321            if ml_start <= pattern_pos {
322                if let Some(ml_end) = line[ml_start..].find("*/") {
323                    let ml_end_pos = ml_start + ml_end + 2;
324                    if pattern_pos >= ml_end_pos {
325                        return false;
326                    }
327                }
328                return true;
329            }
330        }
331
332        false
333    }
334
335    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
336        // Java strings: "..." with escape sequences
337        let mut in_string = false;
338        let mut escaped = false;
339
340        for (i, ch) in line.char_indices() {
341            if i >= pattern_pos {
342                return in_string;
343            }
344
345            if escaped {
346                escaped = false;
347                continue;
348            }
349
350            match ch {
351                '\\' if in_string => escaped = true,
352                '"' => in_string = !in_string,
353                _ => {}
354            }
355        }
356
357        false
358    }
359}
360
361// ============================================================================
362// JavaScript Line Filter
363// ============================================================================
364
365struct JavaScriptLineFilter;
366
367impl LineFilter for JavaScriptLineFilter {
368    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
369        // JavaScript comments: //, /* */
370        if let Some(comment_start) = line.find("//") {
371            if comment_start <= pattern_pos {
372                return true;
373            }
374        }
375
376        if let Some(ml_start) = line.find("/*") {
377            if ml_start <= pattern_pos {
378                if let Some(ml_end) = line[ml_start..].find("*/") {
379                    let ml_end_pos = ml_start + ml_end + 2;
380                    if pattern_pos >= ml_end_pos {
381                        return false;
382                    }
383                }
384                return true;
385            }
386        }
387
388        false
389    }
390
391    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
392        // JavaScript strings: "...", '...', `...` (template literals)
393        let mut in_double_quote = false;
394        let mut in_single_quote = false;
395        let mut in_backtick = false;
396        let mut escaped = false;
397
398        for (i, ch) in line.char_indices() {
399            if i >= pattern_pos {
400                return in_double_quote || in_single_quote || in_backtick;
401            }
402
403            if escaped {
404                escaped = false;
405                continue;
406            }
407
408            match ch {
409                '\\' if (in_double_quote || in_single_quote || in_backtick) => escaped = true,
410                '"' if !in_single_quote && !in_backtick => in_double_quote = !in_double_quote,
411                '\'' if !in_double_quote && !in_backtick => in_single_quote = !in_single_quote,
412                '`' if !in_double_quote && !in_single_quote => in_backtick = !in_backtick,
413                _ => {}
414            }
415        }
416
417        false
418    }
419}
420
421// ============================================================================
422// TypeScript Line Filter (same as JavaScript)
423// ============================================================================
424
425struct TypeScriptLineFilter;
426
427impl LineFilter for TypeScriptLineFilter {
428    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
429        JavaScriptLineFilter.is_in_comment(line, pattern_pos)
430    }
431
432    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
433        JavaScriptLineFilter.is_in_string(line, pattern_pos)
434    }
435}
436
437// ============================================================================
438// Python Line Filter
439// ============================================================================
440
441struct PythonLineFilter;
442
443impl LineFilter for PythonLineFilter {
444    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
445        // Python comments: # (single line only)
446        if let Some(comment_start) = line.find('#') {
447            // Make sure # is not inside a string
448            if comment_start <= pattern_pos {
449                // Conservative: assume it's a comment
450                // (We could check if # itself is in a string, but that's complex)
451                return true;
452            }
453        }
454
455        false
456    }
457
458    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
459        // Python strings: "...", '...', """...""", '''...''', f"...", r"...", etc.
460
461        // Check for triple-quoted strings first
462        if let Some(triple_double) = line.find("\"\"\"") {
463            if triple_double <= pattern_pos {
464                // Look for closing triple quote
465                if let Some(close) = line[triple_double + 3..].find("\"\"\"") {
466                    let close_pos = triple_double + 3 + close + 3;
467                    if pattern_pos < close_pos {
468                        return true;
469                    }
470                }
471            }
472        }
473
474        if let Some(triple_single) = line.find("'''") {
475            if triple_single <= pattern_pos {
476                if let Some(close) = line[triple_single + 3..].find("'''") {
477                    let close_pos = triple_single + 3 + close + 3;
478                    if pattern_pos < close_pos {
479                        return true;
480                    }
481                }
482            }
483        }
484
485        // Check for single-line strings (with prefix support: f"...", r"...", b"...", etc.)
486        let mut in_double_quote = false;
487        let mut in_single_quote = false;
488        let mut escaped = false;
489
490        for (i, ch) in line.char_indices() {
491            if i >= pattern_pos {
492                return in_double_quote || in_single_quote;
493            }
494
495            if escaped {
496                escaped = false;
497                continue;
498            }
499
500            match ch {
501                '\\' if (in_double_quote || in_single_quote) => escaped = true,
502                '"' if !in_single_quote => in_double_quote = !in_double_quote,
503                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
504                _ => {}
505            }
506        }
507
508        false
509    }
510}
511
512// ============================================================================
513// Ruby Line Filter
514// ============================================================================
515
516struct RubyLineFilter;
517
518impl LineFilter for RubyLineFilter {
519    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
520        // Ruby comments: # (single line)
521        // Note: Ruby also has =begin...=end multi-line comments, but those are entire-line only
522        if let Some(comment_start) = line.find('#') {
523            if comment_start <= pattern_pos {
524                return true;
525            }
526        }
527
528        false
529    }
530
531    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
532        // Ruby strings: "...", '...', %q{...}, %Q{...}, etc.
533        // For simplicity, we'll handle the common cases: "..." and '...'
534        let mut in_double_quote = false;
535        let mut in_single_quote = false;
536        let mut escaped = false;
537
538        for (i, ch) in line.char_indices() {
539            if i >= pattern_pos {
540                return in_double_quote || in_single_quote;
541            }
542
543            if escaped {
544                escaped = false;
545                continue;
546            }
547
548            match ch {
549                '\\' if (in_double_quote || in_single_quote) => escaped = true,
550                '"' if !in_single_quote => in_double_quote = !in_double_quote,
551                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
552                _ => {}
553            }
554        }
555
556        false
557    }
558}
559
560// ============================================================================
561// PHP Line Filter
562// ============================================================================
563
564struct PHPLineFilter;
565
566impl LineFilter for PHPLineFilter {
567    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
568        // PHP comments: //, #, /* */
569        // Check for // comment
570        if let Some(comment_start) = line.find("//") {
571            if comment_start <= pattern_pos {
572                return true;
573            }
574        }
575
576        // Check for # comment
577        if let Some(comment_start) = line.find('#') {
578            if comment_start <= pattern_pos {
579                return true;
580            }
581        }
582
583        // Check for /* */ comment
584        if let Some(ml_start) = line.find("/*") {
585            if ml_start <= pattern_pos {
586                if let Some(ml_end) = line[ml_start..].find("*/") {
587                    let ml_end_pos = ml_start + ml_end + 2;
588                    if pattern_pos >= ml_end_pos {
589                        return false;
590                    }
591                }
592                return true;
593            }
594        }
595
596        false
597    }
598
599    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
600        // PHP strings: "...", '...', with escape sequences
601        let mut in_double_quote = false;
602        let mut in_single_quote = false;
603        let mut escaped = false;
604
605        for (i, ch) in line.char_indices() {
606            if i >= pattern_pos {
607                return in_double_quote || in_single_quote;
608            }
609
610            if escaped {
611                escaped = false;
612                continue;
613            }
614
615            match ch {
616                '\\' if (in_double_quote || in_single_quote) => escaped = true,
617                '"' if !in_single_quote => in_double_quote = !in_double_quote,
618                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
619                _ => {}
620            }
621        }
622
623        false
624    }
625}
626
627// ============================================================================
628// C# Line Filter
629// ============================================================================
630
631struct CSharpLineFilter;
632
633impl LineFilter for CSharpLineFilter {
634    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
635        // C# comments: //, /* */, /// (XML doc comments)
636        if let Some(comment_start) = line.find("//") {
637            if comment_start <= pattern_pos {
638                return true;
639            }
640        }
641
642        if let Some(ml_start) = line.find("/*") {
643            if ml_start <= pattern_pos {
644                if let Some(ml_end) = line[ml_start..].find("*/") {
645                    let ml_end_pos = ml_start + ml_end + 2;
646                    if pattern_pos >= ml_end_pos {
647                        return false;
648                    }
649                }
650                return true;
651            }
652        }
653
654        false
655    }
656
657    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
658        // C# strings: "...", @"..." (verbatim strings)
659
660        // Check for verbatim strings @"..."
661        if let Some(verbatim_start) = line.find("@\"") {
662            if verbatim_start <= pattern_pos {
663                // In verbatim strings, "" escapes to single "
664                let mut pos = verbatim_start + 2;
665                let chars: Vec<char> = line.chars().collect();
666
667                while pos < chars.len() {
668                    if chars[pos] == '"' {
669                        // Check if it's escaped (double quote)
670                        if pos + 1 < chars.len() && chars[pos + 1] == '"' {
671                            pos += 2;
672                            continue;
673                        }
674                        // End of verbatim string
675                        if pattern_pos <= pos {
676                            return true;
677                        }
678                        break;
679                    }
680                    pos += 1;
681                }
682            }
683        }
684
685        // Check for regular strings "..."
686        let mut in_string = false;
687        let mut escaped = false;
688
689        for (i, ch) in line.char_indices() {
690            if i >= pattern_pos {
691                return in_string;
692            }
693
694            if escaped {
695                escaped = false;
696                continue;
697            }
698
699            match ch {
700                '\\' if in_string => escaped = true,
701                '"' => in_string = !in_string,
702                _ => {}
703            }
704        }
705
706        false
707    }
708}
709
710// ============================================================================
711// Kotlin Line Filter
712// ============================================================================
713
714struct KotlinLineFilter;
715
716impl LineFilter for KotlinLineFilter {
717    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
718        // Kotlin comments: //, /* */
719        if let Some(comment_start) = line.find("//") {
720            if comment_start <= pattern_pos {
721                return true;
722            }
723        }
724
725        if let Some(ml_start) = line.find("/*") {
726            if ml_start <= pattern_pos {
727                if let Some(ml_end) = line[ml_start..].find("*/") {
728                    let ml_end_pos = ml_start + ml_end + 2;
729                    if pattern_pos >= ml_end_pos {
730                        return false;
731                    }
732                }
733                return true;
734            }
735        }
736
737        false
738    }
739
740    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
741        // Kotlin strings: "...", """...""" (raw strings)
742
743        // Check for triple-quoted strings first
744        if let Some(triple_start) = line.find("\"\"\"") {
745            if triple_start <= pattern_pos {
746                if let Some(close) = line[triple_start + 3..].find("\"\"\"") {
747                    let close_pos = triple_start + 3 + close + 3;
748                    if pattern_pos < close_pos {
749                        return true;
750                    }
751                }
752            }
753        }
754
755        // Check for regular strings "..."
756        let mut in_string = false;
757        let mut escaped = false;
758
759        for (i, ch) in line.char_indices() {
760            if i >= pattern_pos {
761                return in_string;
762            }
763
764            if escaped {
765                escaped = false;
766                continue;
767            }
768
769            match ch {
770                '\\' if in_string => escaped = true,
771                '"' => in_string = !in_string,
772                _ => {}
773            }
774        }
775
776        false
777    }
778}
779
780// ============================================================================
781// Zig Line Filter
782// ============================================================================
783
784struct ZigLineFilter;
785
786impl LineFilter for ZigLineFilter {
787    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
788        // Zig comments: // and /// (doc comments)
789        if let Some(comment_start) = line.find("//") {
790            if comment_start <= pattern_pos {
791                return true;
792            }
793        }
794
795        false
796    }
797
798    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
799        // Zig strings: "..." and \\ (multiline string literals)
800        // For simplicity, we'll handle regular strings here
801        let mut in_string = false;
802        let mut escaped = false;
803
804        for (i, ch) in line.char_indices() {
805            if i >= pattern_pos {
806                return in_string;
807            }
808
809            if escaped {
810                escaped = false;
811                continue;
812            }
813
814            match ch {
815                '\\' if in_string => escaped = true,
816                '"' => in_string = !in_string,
817                _ => {}
818            }
819        }
820
821        false
822    }
823}
824
825// ============================================================================
826// Vue Line Filter (use JavaScript/TypeScript for <script> sections)
827// ============================================================================
828
829struct VueLineFilter;
830
831impl LineFilter for VueLineFilter {
832    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
833        // Vue uses JS/TS in <script> sections, HTML comments in <template>
834        // For simplicity, use JavaScript-style comments
835        JavaScriptLineFilter.is_in_comment(line, pattern_pos)
836    }
837
838    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
839        JavaScriptLineFilter.is_in_string(line, pattern_pos)
840    }
841}
842
843// ============================================================================
844// Svelte Line Filter (use JavaScript/TypeScript)
845// ============================================================================
846
847struct SvelteLineFilter;
848
849impl LineFilter for SvelteLineFilter {
850    fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
851        JavaScriptLineFilter.is_in_comment(line, pattern_pos)
852    }
853
854    fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
855        JavaScriptLineFilter.is_in_string(line, pattern_pos)
856    }
857}
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862
863    // ========================================================================
864    // Rust Tests
865    // ========================================================================
866
867    #[test]
868    fn test_rust_single_line_comment() {
869        let filter = RustLineFilter;
870        let line = "let x = 5; // extract_symbols here";
871        assert!(filter.is_in_comment(line, 15)); // "extract" is in comment
872        assert!(!filter.is_in_comment(line, 4)); // "x" is not in comment
873    }
874
875    #[test]
876    fn test_rust_multiline_comment() {
877        let filter = RustLineFilter;
878        let line = "let x = /* extract_symbols */ 5;";
879        assert!(filter.is_in_comment(line, 11)); // "extract" is in comment
880        assert!(!filter.is_in_comment(line, 30)); // "5" is not in comment
881    }
882
883    #[test]
884    fn test_rust_string_literal() {
885        let filter = RustLineFilter;
886        let line = r#"let s = "extract_symbols";"#;
887        assert!(filter.is_in_string(line, 9)); // "extract" is in string
888        assert!(!filter.is_in_string(line, 27)); // after string
889    }
890
891    #[test]
892    fn test_rust_raw_string() {
893        let filter = RustLineFilter;
894        let line = r#"let s = r"extract_symbols";"#;
895        assert!(filter.is_in_string(line, 10)); // "extract" is in raw string
896    }
897
898    #[test]
899    fn test_rust_raw_string_with_hashes() {
900        let filter = RustLineFilter;
901        let line = r###"let s = r#"extract_symbols"#;"###;
902        assert!(filter.is_in_string(line, 11)); // "extract" is in raw string
903    }
904
905    #[test]
906    fn test_rust_escaped_quote() {
907        let filter = RustLineFilter;
908        let line = r#"let s = "before \" extract_symbols after";"#;
909        assert!(filter.is_in_string(line, 15)); // "extract" is in string
910    }
911
912    // ========================================================================
913    // JavaScript Tests
914    // ========================================================================
915
916    #[test]
917    fn test_js_single_line_comment() {
918        let filter = JavaScriptLineFilter;
919        let line = "let x = 5; // extract_symbols here";
920        assert!(filter.is_in_comment(line, 15));
921        assert!(!filter.is_in_comment(line, 4));
922    }
923
924    #[test]
925    fn test_js_string_double_quote() {
926        let filter = JavaScriptLineFilter;
927        let line = r#"let s = "extract_symbols";"#;
928        assert!(filter.is_in_string(line, 9));
929        assert!(!filter.is_in_string(line, 27));
930    }
931
932    #[test]
933    fn test_js_string_single_quote() {
934        let filter = JavaScriptLineFilter;
935        let line = "let s = 'extract_symbols';";
936        assert!(filter.is_in_string(line, 9));
937    }
938
939    #[test]
940    fn test_js_template_literal() {
941        let filter = JavaScriptLineFilter;
942        let line = "let s = `extract_symbols`;";
943        assert!(filter.is_in_string(line, 9));
944    }
945
946    // ========================================================================
947    // Python Tests
948    // ========================================================================
949
950    #[test]
951    fn test_python_comment() {
952        let filter = PythonLineFilter;
953        let line = "x = 5  # extract_symbols here";
954        assert!(filter.is_in_comment(line, 9));
955        assert!(!filter.is_in_comment(line, 0));
956    }
957
958    #[test]
959    fn test_python_string() {
960        let filter = PythonLineFilter;
961        let line = r#"s = "extract_symbols""#;
962        assert!(filter.is_in_string(line, 5));
963    }
964
965    #[test]
966    fn test_python_triple_quote() {
967        let filter = PythonLineFilter;
968        let line = r#"s = """extract_symbols""""#;
969        assert!(filter.is_in_string(line, 7));
970    }
971
972    // ========================================================================
973    // Go Tests
974    // ========================================================================
975
976    #[test]
977    fn test_go_raw_string() {
978        let filter = GoLineFilter;
979        let line = "s := `extract_symbols`";
980        assert!(filter.is_in_string(line, 6));
981    }
982
983    // ========================================================================
984    // C# Tests
985    // ========================================================================
986
987    #[test]
988    fn test_csharp_verbatim_string() {
989        let filter = CSharpLineFilter;
990        let line = r#"string s = @"extract_symbols";"#;
991        assert!(filter.is_in_string(line, 13));
992    }
993
994    #[test]
995    fn test_csharp_verbatim_escaped_quote() {
996        let filter = CSharpLineFilter;
997        let line = r#"string s = @"before "" extract_symbols after";"#;
998        assert!(filter.is_in_string(line, 19));
999    }
1000}