llm_utl/
filter.rs

1//! Code filtering and preprocessing module.
2//!
3//! Provides functionality to strip tests, comments, and documentation
4//! from source code before generating prompts.
5
6use globset::{Glob, GlobSet, GlobSetBuilder};
7use std::path::Path;
8
9/// Configuration for file filtering with glob patterns.
10///
11/// Allows selective file and directory inclusion/exclusion during repository scanning.
12#[derive(Debug, Clone, Default)]
13pub struct FileFilterConfig {
14    exclude_files: Vec<String>,
15    exclude_all_files_except: Vec<String>,
16    exclude_directories: Vec<String>,
17}
18impl FileFilterConfig {
19    /// Создает новую пустую конфигурацию.
20    pub fn new() -> Self {
21        Self::default()
22    }
23
24    /// Добавляет файлы в черный список.
25    pub fn exclude_files(mut self, paths: Vec<String>) -> Self {
26        self.exclude_files = paths;
27        self
28    }
29
30    /// Добавляет директории в черный список.
31    pub fn exclude_directories(mut self, paths: Vec<String>) -> Self {
32        self.exclude_directories = paths;
33        self
34    }
35
36    /// Устанавливает белый список файлов.
37    pub fn allow_only(mut self, paths: Vec<String>) -> Self {
38        self.exclude_all_files_except = paths;
39        self
40    }
41}
42
43#[derive(Debug, Clone)]
44pub(crate) struct FileFilter {
45    exclude_files: GlobSet,
46    include_files: Option<GlobSet>,
47    exclude_directories: GlobSet,
48}
49
50impl FileFilter {
51    /// Создает новый фильтр с заданной конфигурацией.
52    pub(crate) fn new(config: FileFilterConfig) -> Self {
53        let exclude_files = Self::build_globset(&config.exclude_files).unwrap();
54        let exclude_directories = Self::build_globset(&config.exclude_directories).unwrap();
55
56        let include_files = if config.exclude_all_files_except.is_empty() {
57            None
58        } else {
59            Some(Self::build_globset(&config.exclude_all_files_except).unwrap())
60        };
61
62        Self {
63            exclude_files,
64            include_files,
65            exclude_directories,
66        }
67    }
68
69    fn build_globset(patterns: &[String]) -> Result<GlobSet, crate::error::Error> {
70        let mut builder = GlobSetBuilder::new();
71
72        for pattern in patterns {
73            let glob = Glob::new(pattern).map_err(|e| {
74                crate::error::Error::config(format!("Invalid glob pattern '{}': {}", pattern, e))
75            })?;
76            builder.add(glob);
77        }
78
79        builder.build().map_err(|e| {
80            crate::error::Error::config(format!("Failed to build glob set: {}", e))
81        })
82    }
83
84    pub(crate) fn should_process(&self, path: &Path) -> bool {
85        use tracing::trace;
86
87        trace!("Checking if should process: {:?}", path);
88
89        // Check include patterns first (whitelist mode)
90        if let Some(ref include) = self.include_files {
91            let matches = include.is_match(path);
92            trace!("Include pattern match for {:?}: {}", path, matches);
93            if !matches {
94                return false;
95            }
96        }
97
98        // Check if file itself is excluded
99        if self.exclude_files.is_match(path) {
100            trace!("Excluded by file pattern: {:?}", path);
101            return false;
102        }
103
104        // Check if any parent directory is excluded
105        // FIXED: Use a more efficient approach with early termination
106        let path_str = path.to_string_lossy();
107        if self.exclude_directories.is_match(&*path_str) {
108            trace!("Excluded by directory pattern: {:?}", path);
109            return false;
110        }
111
112        // Check ancestors more carefully
113        let mut checked = 0;
114        const MAX_CHECK: usize = 50; // Reasonable limit for directory depth
115
116        for ancestor in path.ancestors().skip(1).take(MAX_CHECK) {
117            checked += 1;
118
119            // Stop at filesystem root
120            if ancestor.parent().is_none() {
121                break;
122            }
123
124            if self.exclude_directories.is_match(ancestor) {
125                trace!("Excluded by ancestor directory pattern: {:?} (ancestor: {:?})", path, ancestor);
126                return false;
127            }
128        }
129
130        trace!("Will process: {:?} (checked {} ancestors)", path, checked);
131        true
132    }
133}
134/// Configuration for code filtering operations.
135#[derive(Debug, Clone)]
136pub struct FilterConfig {
137    /// Remove test code (e.g., #[test], #[cfg(test)])
138    pub remove_tests: bool,
139
140    /// Remove documentation comments (///, /** */)
141    pub remove_doc_comments: bool,
142
143    /// Remove regular comments (//, /* */)
144    pub remove_comments: bool,
145
146    /// Remove blank lines after filtering
147    pub remove_blank_lines: bool,
148
149    /// Preserve copyright/license headers
150    pub preserve_headers: bool,
151
152    /// Remove debug print statements (println!, dbg!, etc.)
153    pub remove_debug_prints: bool,
154}
155
156impl Default for FilterConfig {
157    fn default() -> Self {
158        Self {
159            remove_tests: true,
160            remove_doc_comments: false,
161            remove_comments: false,
162            remove_blank_lines: true,
163            preserve_headers: true,
164            remove_debug_prints: false,
165        }
166    }
167}
168
169impl FilterConfig {
170    /// Creates a configuration that removes everything except code.
171    #[must_use]
172    pub fn minimal() -> Self {
173        Self {
174            remove_tests: true,
175            remove_doc_comments: true,
176            remove_comments: true,
177            remove_blank_lines: true,
178            preserve_headers: false,
179            remove_debug_prints: true,
180        }
181    }
182
183    /// Creates a configuration that keeps documentation.
184    #[must_use]
185    pub fn preserve_docs() -> Self {
186        Self {
187            remove_tests: true,
188            remove_doc_comments: false,
189            remove_comments: true,
190            remove_blank_lines: true,
191            preserve_headers: true,
192            remove_debug_prints: false,
193        }
194    }
195
196    /// Creates a configuration for production-ready code.
197    #[must_use]
198    pub fn production() -> Self {
199        Self {
200            remove_tests: true,
201            remove_doc_comments: false,
202            remove_comments: false,
203            remove_blank_lines: true,
204            preserve_headers: true,
205            remove_debug_prints: true,
206        }
207    }
208}
209
210/// Main code filter that dispatches to language-specific filters.
211#[derive(Debug, Clone)]
212pub struct CodeFilter {
213    config: FilterConfig,
214}
215
216impl CodeFilter {
217    /// Creates a new code filter with the given configuration.
218    #[must_use]
219    pub const fn new(config: FilterConfig) -> Self {
220        Self { config }
221    }
222
223    /// Filters code content based on file extension and configuration.
224    ///
225    /// Returns filtered content or original if no filtering applies.
226    #[must_use]
227    pub fn filter(&self, content: &str, path: &Path) -> String {
228        let extension = path
229            .extension()
230            .and_then(|e| e.to_str())
231            .unwrap_or("");
232
233        match extension {
234            "rs" => RustFilter::new(&self.config).filter(content),
235            "py" => PythonFilter::new(&self.config).filter(content),
236            "js" | "ts" | "jsx" | "tsx" => JavaScriptFilter::new(&self.config).filter(content),
237            "go" => GoFilter::new(&self.config).filter(content),
238            "java" | "kt" => JavaFilter::new(&self.config).filter(content),
239            "c" | "cpp" | "cc" | "h" | "hpp" => CFilter::new(&self.config).filter(content),
240            _ => content.to_string(),
241        }
242    }
243}
244
245/// Trait for language-specific code filters.
246trait LanguageFilter {
247    /// Returns the filter configuration.
248    #[allow(dead_code)]
249    fn config(&self) -> &FilterConfig;
250
251    /// Filters the content according to language rules.
252    fn filter(&self, content: &str) -> String;
253
254    /// Checks if a line is a comment.
255    fn is_comment_line(&self, line: &str) -> bool;
256
257    /// Checks if a line is a doc comment.
258    fn is_doc_comment(&self, line: &str) -> bool;
259
260    /// Removes comments from a line while preserving strings.
261    /// Removes comments from a line while preserving strings.
262    fn strip_line_comment(&self, line: &str, _comment_start: &str) -> String {
263        let mut in_string = false;
264        let mut escape_next = false;
265        let chars: Vec<char> = line.chars().collect();
266
267        for i in 0..chars.len() {
268            if escape_next {
269                escape_next = false;
270                continue;
271            }
272
273            match chars[i] {
274                '\\' if in_string => {
275                    escape_next = true;
276                }
277                '"' => {
278                    in_string = !in_string;
279                }
280                '/' if !in_string && i + 1 < chars.len() && chars[i + 1] == '/' => {
281                    // Found comment outside of string
282                    return line[..i].trim_end().to_string();
283                }
284                _ => {}
285            }
286        }
287
288        line.to_string()
289    }
290}
291
292/// Rust-specific code filter.
293struct RustFilter<'a> {
294    config: &'a FilterConfig,
295}
296
297impl<'a> RustFilter<'a> {
298    const fn new(config: &'a FilterConfig) -> Self {
299        Self { config }
300    }
301
302    /// Checks if we're entering a test module or function.
303    fn is_test_start(&self, line: &str) -> bool {
304        let trimmed = line.trim();
305        trimmed.starts_with("#[test]")
306            || trimmed.starts_with("#[cfg(test)]")
307            || trimmed.starts_with("#[tokio::test]")
308            || trimmed.starts_with("#[async_test]")
309    }
310
311    /// Checks if a line contains test-related attributes.
312    fn has_test_attribute(&self, line: &str) -> bool {
313        let trimmed = line.trim();
314        trimmed.contains("#[test")
315            || trimmed.contains("#[cfg(test")
316            || trimmed.contains("#[should_panic")
317            || trimmed.contains("#[ignore")
318    }
319
320    /// Checks if a line contains a debug print macro.
321    fn is_debug_print(&self, line: &str) -> bool {
322        let trimmed = line.trim();
323        trimmed.starts_with("println!")
324            || trimmed.starts_with("eprintln!")
325            || trimmed.starts_with("dbg!")
326            || trimmed.starts_with("print!")
327            || trimmed.starts_with("eprint!")
328            || trimmed.contains("println!(")
329            || trimmed.contains("eprintln!(")
330            || trimmed.contains("dbg!(")
331    }
332
333    /// Removes debug print statements from a line.
334    /// Returns (processed_line, is_multiline_print)
335    fn strip_debug_prints(&self, line: &str) -> (String, bool) {
336        if !self.config.remove_debug_prints {
337            return (line.to_string(), false);
338        }
339
340        let trimmed = line.trim();
341
342        // Check if line starts with a debug print macro
343        if self.is_debug_print(trimmed) {
344            // Count parentheses to see if it's a complete statement
345            let open_count = line.matches('(').count();
346            let close_count = line.matches(')').count();
347
348            if open_count > close_count {
349                // Multi-line print, need to skip subsequent lines
350                return (String::new(), true);
351            } else {
352                // Single line print, skip it
353                return (String::new(), false);
354            }
355        }
356
357        (line.to_string(), false)
358    }
359}
360
361impl<'a> LanguageFilter for RustFilter<'a> {
362    fn config(&self) -> &FilterConfig {
363        self.config
364    }
365
366    fn is_comment_line(&self, line: &str) -> bool {
367        let trimmed = line.trim();
368        trimmed.starts_with("//") && !trimmed.starts_with("///")
369    }
370
371    fn is_doc_comment(&self, line: &str) -> bool {
372        let trimmed = line.trim();
373        trimmed.starts_with("///") || trimmed.starts_with("//!")
374    }
375
376    fn filter(&self, content: &str) -> String {
377        let lines: Vec<&str> = content.lines().collect();
378        let mut result = Vec::new();
379        let mut in_test_block = false;
380        let mut in_block_comment = false;
381        let mut in_doc_comment = false;
382        let mut in_multiline_print = false;
383        let mut brace_depth = 0;
384        let mut test_block_depth = 0;
385
386        for line in lines {
387            let trimmed = line.trim();
388
389            // Handle multi-line print statements
390            if in_multiline_print {
391                let close_count = line.matches(')').count();
392                let open_count = line.matches('(').count();
393
394                if close_count > open_count {
395                    in_multiline_print = false;
396                }
397                continue;
398            }
399
400            // Handle block comments
401            if trimmed.starts_with("/*") {
402                in_block_comment = true;
403                in_doc_comment = trimmed.starts_with("/**") || trimmed.starts_with("/*!");
404            }
405
406            if in_block_comment {
407                if trimmed.ends_with("*/") {
408                    in_block_comment = false;
409                    in_doc_comment = false;
410                }
411
412                let should_skip = if in_doc_comment {
413                    self.config.remove_doc_comments
414                } else {
415                    self.config.remove_comments
416                };
417
418                if !should_skip {
419                    result.push(line.to_string());
420                }
421                continue;
422            }
423
424            // Skip doc comments
425            if self.config.remove_doc_comments && self.is_doc_comment(line) {
426                continue;
427            }
428
429            // Skip regular comments
430            if self.config.remove_comments && self.is_comment_line(line) {
431                continue;
432            }
433
434            // Handle test blocks
435            if self.config.remove_tests {
436                if self.is_test_start(line) || self.has_test_attribute(line) {
437                    in_test_block = true;
438                    test_block_depth = 0;
439                    continue;
440                }
441
442                if in_test_block {
443                    // Track braces to find end of test block
444                    for ch in trimmed.chars() {
445                        match ch {
446                            '{' => brace_depth += 1,
447                            '}' => {
448                                brace_depth -= 1;
449                                if brace_depth <= test_block_depth {
450                                    in_test_block = false;
451                                }
452                            }
453                            _ => {}
454                        }
455                    }
456                    continue;
457                }
458            }
459
460            // Remove debug prints
461            let (processed_line, is_multiline) = self.strip_debug_prints(line);
462            if is_multiline {
463                in_multiline_print = true;
464                continue;
465            }
466
467            // Remove inline comments if configured
468            let mut final_line = processed_line;
469            if self.config.remove_comments && !final_line.is_empty() {
470                final_line = self.strip_line_comment(&final_line, "//");
471            }
472
473            // Skip blank lines if configured
474            if self.config.remove_blank_lines && final_line.trim().is_empty() {
475                continue;
476            }
477
478            result.push(final_line);
479        }
480
481        result.join("\n")
482    }
483}
484
485/// Python-specific code filter.
486struct PythonFilter<'a> {
487    config: &'a FilterConfig,
488}
489
490impl<'a> PythonFilter<'a> {
491    const fn new(config: &'a FilterConfig) -> Self {
492        Self { config }
493    }
494
495    fn is_test_function(&self, line: &str) -> bool {
496        let trimmed = line.trim();
497        (trimmed.starts_with("def test_") || trimmed.starts_with("async def test_"))
498            && trimmed.contains('(')
499    }
500
501    fn is_test_decorator(&self, line: &str) -> bool {
502        let trimmed = line.trim();
503        trimmed.starts_with("@pytest")
504            || trimmed.starts_with("@unittest")
505            || trimmed == "@test"
506    }
507}
508
509impl<'a> LanguageFilter for PythonFilter<'a> {
510    fn config(&self) -> &FilterConfig {
511        self.config
512    }
513
514    fn is_comment_line(&self, line: &str) -> bool {
515        line.trim().starts_with('#')
516    }
517
518    fn is_doc_comment(&self, line: &str) -> bool {
519        let trimmed = line.trim();
520        trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''")
521    }
522
523    fn filter(&self, content: &str) -> String {
524        let lines: Vec<&str> = content.lines().collect();
525        let mut result = Vec::new();
526        let mut in_docstring = false;
527        let mut in_test_function = false;
528        let _indent_level = 0;
529        let mut test_indent = 0;
530
531        for line in lines {
532            let trimmed = line.trim();
533
534            // Handle docstrings
535            if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
536                in_docstring = !in_docstring;
537                if self.config.remove_doc_comments {
538                    continue;
539                }
540            }
541
542            if in_docstring {
543                if self.config.remove_doc_comments {
544                    continue;
545                }
546                result.push(line.to_string());
547                continue;
548            }
549
550            // Skip comments
551            if self.config.remove_comments && self.is_comment_line(line) {
552                continue;
553            }
554
555            // Handle test functions
556            if self.config.remove_tests {
557                let current_indent = line.len() - line.trim_start().len();
558
559                if self.is_test_decorator(line) {
560                    in_test_function = true;
561                    test_indent = current_indent;
562                    continue;
563                }
564
565                if self.is_test_function(line) {
566                    in_test_function = true;
567                    test_indent = current_indent;
568                    continue;
569                }
570
571                if in_test_function {
572                    if !trimmed.is_empty() && current_indent <= test_indent {
573                        in_test_function = false;
574                    } else {
575                        continue;
576                    }
577                }
578            }
579
580            // Skip blank lines if configured
581            if self.config.remove_blank_lines && trimmed.is_empty() {
582                continue;
583            }
584
585            result.push(line.to_string());
586        }
587
588        result.join("\n")
589    }
590}
591
592/// JavaScript/TypeScript code filter.
593struct JavaScriptFilter<'a> {
594    config: &'a FilterConfig,
595}
596
597impl<'a> JavaScriptFilter<'a> {
598    const fn new(config: &'a FilterConfig) -> Self {
599        Self { config }
600    }
601
602    #[allow(dead_code)]
603    fn is_test_block(&self, line: &str) -> bool {
604        let trimmed = line.trim();
605        trimmed.starts_with("describe(")
606            || trimmed.starts_with("it(")
607            || trimmed.starts_with("test(")
608            || trimmed.starts_with("expect(")
609    }
610}
611
612impl<'a> LanguageFilter for JavaScriptFilter<'a> {
613    fn config(&self) -> &FilterConfig {
614        self.config
615    }
616
617    fn is_comment_line(&self, line: &str) -> bool {
618        line.trim().starts_with("//")
619    }
620
621    fn is_doc_comment(&self, line: &str) -> bool {
622        let trimmed = line.trim();
623        trimmed.starts_with("/**") || trimmed.starts_with("///")
624    }
625
626    fn filter(&self, content: &str) -> String {
627        let lines: Vec<&str> = content.lines().collect();
628        let mut result = Vec::new();
629        let mut in_block_comment = false;
630        let mut in_doc_comment = false;
631
632        for line in lines {
633            let trimmed = line.trim();
634
635            // Handle block comments
636            if trimmed.starts_with("/*") {
637                in_block_comment = true;
638                in_doc_comment = trimmed.starts_with("/**");
639            }
640
641            if in_block_comment {
642                if trimmed.ends_with("*/") {
643                    in_block_comment = false;
644                    in_doc_comment = false;
645                }
646
647                let should_skip = if in_doc_comment {
648                    self.config.remove_doc_comments
649                } else {
650                    self.config.remove_comments
651                };
652
653                if !should_skip {
654                    result.push(line.to_string());
655                }
656                continue;
657            }
658
659            // Skip comments
660            if self.config.remove_comments && self.is_comment_line(line) {
661                continue;
662            }
663
664            if self.config.remove_doc_comments && self.is_doc_comment(line) {
665                continue;
666            }
667
668            // Skip blank lines if configured
669            if self.config.remove_blank_lines && trimmed.is_empty() {
670                continue;
671            }
672
673            // Remove inline comments
674            let mut processed_line = line.to_string();
675            if self.config.remove_comments {
676                processed_line = self.strip_line_comment(&processed_line, "//");
677            }
678
679            result.push(processed_line);
680        }
681
682        result.join("\n")
683    }
684}
685
686/// Go-specific code filter.
687struct GoFilter<'a> {
688    config: &'a FilterConfig,
689}
690
691impl<'a> GoFilter<'a> {
692    const fn new(config: &'a FilterConfig) -> Self {
693        Self { config }
694    }
695}
696
697impl<'a> LanguageFilter for GoFilter<'a> {
698    fn config(&self) -> &FilterConfig {
699        self.config
700    }
701
702    fn is_comment_line(&self, line: &str) -> bool {
703        line.trim().starts_with("//")
704    }
705
706    fn is_doc_comment(&self, _line: &str) -> bool {
707        false // Go doesn't have special doc comments
708    }
709
710    fn filter(&self, content: &str) -> String {
711        JavaScriptFilter::new(self.config).filter(content)
712    }
713}
714
715/// Java/Kotlin code filter.
716struct JavaFilter<'a> {
717    config: &'a FilterConfig,
718}
719
720impl<'a> JavaFilter<'a> {
721    const fn new(config: &'a FilterConfig) -> Self {
722        Self { config }
723    }
724
725    fn is_test_annotation(&self, line: &str) -> bool {
726        let trimmed = line.trim();
727        trimmed.starts_with("@Test")
728            || trimmed.starts_with("@org.junit")
729            || trimmed.starts_with("@BeforeEach")
730            || trimmed.starts_with("@AfterEach")
731    }
732}
733
734impl<'a> LanguageFilter for JavaFilter<'a> {
735    fn config(&self) -> &FilterConfig {
736        self.config
737    }
738
739    fn is_comment_line(&self, line: &str) -> bool {
740        line.trim().starts_with("//")
741    }
742
743    fn is_doc_comment(&self, line: &str) -> bool {
744        let trimmed = line.trim();
745        trimmed.starts_with("/**")
746    }
747
748    fn filter(&self, content: &str) -> String {
749        let lines: Vec<&str> = content.lines().collect();
750        let mut result = Vec::new();
751        let mut in_block_comment = false;
752        let mut in_doc_comment = false;
753        let mut skip_next_method = false;
754
755        for line in lines {
756            let trimmed = line.trim();
757
758            // Check for test annotations
759            if self.config.remove_tests && self.is_test_annotation(line) {
760                skip_next_method = true;
761                continue;
762            }
763
764            // Handle block comments
765            if trimmed.starts_with("/*") {
766                in_block_comment = true;
767                in_doc_comment = trimmed.starts_with("/**");
768            }
769
770            if in_block_comment {
771                if trimmed.ends_with("*/") {
772                    in_block_comment = false;
773                    in_doc_comment = false;
774                }
775
776                let should_skip = if in_doc_comment {
777                    self.config.remove_doc_comments
778                } else {
779                    self.config.remove_comments
780                };
781
782                if !should_skip {
783                    result.push(line.to_string());
784                }
785                continue;
786            }
787
788            // Skip test methods
789            if skip_next_method {
790                if trimmed.contains('{') {
791                    // Found method start, now skip until closing brace
792                    let brace_count = trimmed.matches('{').count() as i32
793                        - trimmed.matches('}').count() as i32;
794
795                    if brace_count == 0 {
796                        skip_next_method = false;
797                    }
798                }
799                continue;
800            }
801
802            // Skip comments
803            if self.config.remove_comments && self.is_comment_line(line) {
804                continue;
805            }
806
807            // Skip blank lines
808            if self.config.remove_blank_lines && trimmed.is_empty() {
809                continue;
810            }
811
812            result.push(line.to_string());
813        }
814
815        result.join("\n")
816    }
817}
818
819/// C/C++ code filter.
820struct CFilter<'a> {
821    config: &'a FilterConfig,
822}
823
824impl<'a> CFilter<'a> {
825    const fn new(config: &'a FilterConfig) -> Self {
826        Self { config }
827    }
828}
829
830impl<'a> LanguageFilter for CFilter<'a> {
831    fn config(&self) -> &FilterConfig {
832        self.config
833    }
834
835    fn is_comment_line(&self, line: &str) -> bool {
836        line.trim().starts_with("//")
837    }
838
839    fn is_doc_comment(&self, line: &str) -> bool {
840        let trimmed = line.trim();
841        trimmed.starts_with("///") || trimmed.starts_with("/**")
842    }
843
844    fn filter(&self, content: &str) -> String {
845        JavaScriptFilter::new(self.config).filter(content)
846    }
847}
848
849#[cfg(test)]
850mod tests {
851    use super::*;
852
853    #[test]
854    fn test_rust_filter_removes_tests() {
855        let config = FilterConfig::default();
856        let filter = CodeFilter::new(config);
857
858        let code = r#"
859fn production_code() {}
860
861#[test]
862fn test_something() {
863    assert_eq!(1, 1);
864}
865
866fn more_production() {}
867"#;
868
869        let filtered = filter.filter(code, Path::new("test.rs"));
870        assert!(!filtered.contains("#[test]"));
871        assert!(!filtered.contains("test_something"));
872        assert!(filtered.contains("production_code"));
873        assert!(filtered.contains("more_production"));
874    }
875
876    #[test]
877    fn test_rust_filter_removes_comments() {
878        let mut config = FilterConfig::default();
879        config.remove_comments = true;
880
881        let filter = CodeFilter::new(config);
882
883        let code = r#"
884// This is a comment
885fn code() {} // inline comment
886"#;
887
888        let filtered = filter.filter(code, Path::new("test.rs"));
889        assert!(!filtered.contains("This is a comment"));
890        assert!(filtered.contains("fn code()"));
891    }
892
893    #[test]
894    fn test_python_filter_removes_tests() {
895        let config = FilterConfig::default();
896        let filter = CodeFilter::new(config);
897
898        let code = r#"
899def production_function():
900    pass
901
902def test_something():
903    assert True
904
905def another_production():
906    pass
907"#;
908
909        let filtered = filter.filter(code, Path::new("test.py"));
910        assert!(!filtered.contains("test_something"));
911        assert!(filtered.contains("production_function"));
912        assert!(filtered.contains("another_production"));
913    }
914
915    #[test]
916    fn test_filter_preserves_strings_with_comment_markers() {
917        let config = FilterConfig {
918            remove_doc_comments: true,
919            remove_comments: true,
920            ..Default::default()
921        };
922        let filter = CodeFilter::new(config);
923
924        let code = r#"let url = "https://example.com"; // real comment"#;
925        let filtered = filter.filter(code, Path::new("test.rs"));
926
927        assert!(filtered.contains("https://"));
928        assert!(!filtered.contains("real comment"));
929    }
930
931    #[test]
932    fn test_remove_println() {
933        let config = FilterConfig {
934            remove_debug_prints: true,
935            ..Default::default()
936        };
937        let filter = CodeFilter::new(config);
938
939        let code = r#"
940fn main() {
941    let x = 5;
942    println!("x = {}", x);
943    let y = 10;
944}
945"#;
946
947        let filtered = filter.filter(code, Path::new("test.rs"));
948        assert!(!filtered.contains("println!"));
949        assert!(filtered.contains("let x = 5"));
950        assert!(filtered.contains("let y = 10"));
951    }
952
953    #[test]
954    fn test_remove_multiline_println() {
955        let config = FilterConfig {
956            remove_debug_prints: true,
957            ..Default::default()
958        };
959        let filter = CodeFilter::new(config);
960
961        let code = r#"
962fn main() {
963    let x = 5;
964    println!(
965        "x = {}",
966        x
967    );
968    let y = 10;
969}
970"#;
971
972        let filtered = filter.filter(code, Path::new("test.rs"));
973        assert!(!filtered.contains("println!"));
974        assert!(filtered.contains("let x = 5"));
975        assert!(filtered.contains("let y = 10"));
976    }
977
978    #[test]
979    fn test_remove_dbg() {
980        let config = FilterConfig {
981            remove_debug_prints: true,
982            ..Default::default()
983        };
984        let filter = CodeFilter::new(config);
985
986        let code = r#"
987fn main() {
988    let x = 5;
989    dbg!(x);
990    let y = 10;
991}
992"#;
993
994        let filtered = filter.filter(code, Path::new("test.rs"));
995        assert!(!filtered.contains("dbg!"));
996        assert!(filtered.contains("let x = 5"));
997    }
998}