1use globset::{Glob, GlobSet, GlobSetBuilder};
7use std::path::Path;
8
9#[derive(Debug, Clone, Default)]
13pub struct FileFilterConfig {
14 exclude_files: Vec<String>,
15 exclude_all_files_except: Vec<String>,
16 exclude_directories: Vec<String>,
17}
18impl FileFilterConfig {
19 pub fn new() -> Self {
21 Self::default()
22 }
23
24 pub fn exclude_files(mut self, paths: Vec<String>) -> Self {
26 self.exclude_files = paths;
27 self
28 }
29
30 pub fn exclude_directories(mut self, paths: Vec<String>) -> Self {
32 self.exclude_directories = paths;
33 self
34 }
35
36 pub fn allow_only(mut self, paths: Vec<String>) -> Self {
38 self.exclude_all_files_except = paths;
39 self
40 }
41}
42
43#[derive(Debug, Clone)]
44pub(crate) struct FileFilter {
45 exclude_files: GlobSet,
46 include_files: Option<GlobSet>,
47 exclude_directories: GlobSet,
48}
49
50impl FileFilter {
51 pub(crate) fn new(config: FileFilterConfig) -> Self {
53 let exclude_files = Self::build_globset(&config.exclude_files).unwrap();
54 let exclude_directories = Self::build_globset(&config.exclude_directories).unwrap();
55
56 let include_files = if config.exclude_all_files_except.is_empty() {
57 None
58 } else {
59 Some(Self::build_globset(&config.exclude_all_files_except).unwrap())
60 };
61
62 Self {
63 exclude_files,
64 include_files,
65 exclude_directories,
66 }
67 }
68
69 fn build_globset(patterns: &[String]) -> Result<GlobSet, crate::error::Error> {
70 let mut builder = GlobSetBuilder::new();
71
72 for pattern in patterns {
73 let glob = Glob::new(pattern).map_err(|e| {
74 crate::error::Error::config(format!("Invalid glob pattern '{}': {}", pattern, e))
75 })?;
76 builder.add(glob);
77 }
78
79 builder.build().map_err(|e| {
80 crate::error::Error::config(format!("Failed to build glob set: {}", e))
81 })
82 }
83
84 pub(crate) fn should_process(&self, path: &Path) -> bool {
85 use tracing::trace;
86
87 trace!("Checking if should process: {:?}", path);
88
89 if let Some(ref include) = self.include_files {
91 let matches = include.is_match(path);
92 trace!("Include pattern match for {:?}: {}", path, matches);
93 if !matches {
94 return false;
95 }
96 }
97
98 if self.exclude_files.is_match(path) {
100 trace!("Excluded by file pattern: {:?}", path);
101 return false;
102 }
103
104 let path_str = path.to_string_lossy();
107 if self.exclude_directories.is_match(&*path_str) {
108 trace!("Excluded by directory pattern: {:?}", path);
109 return false;
110 }
111
112 let mut checked = 0;
114 const MAX_CHECK: usize = 50; for ancestor in path.ancestors().skip(1).take(MAX_CHECK) {
117 checked += 1;
118
119 if ancestor.parent().is_none() {
121 break;
122 }
123
124 if self.exclude_directories.is_match(ancestor) {
125 trace!("Excluded by ancestor directory pattern: {:?} (ancestor: {:?})", path, ancestor);
126 return false;
127 }
128 }
129
130 trace!("Will process: {:?} (checked {} ancestors)", path, checked);
131 true
132 }
133}
134#[derive(Debug, Clone)]
136pub struct FilterConfig {
137 pub remove_tests: bool,
139
140 pub remove_doc_comments: bool,
142
143 pub remove_comments: bool,
145
146 pub remove_blank_lines: bool,
148
149 pub preserve_headers: bool,
151
152 pub remove_debug_prints: bool,
154}
155
156impl Default for FilterConfig {
157 fn default() -> Self {
158 Self {
159 remove_tests: true,
160 remove_doc_comments: false,
161 remove_comments: false,
162 remove_blank_lines: true,
163 preserve_headers: true,
164 remove_debug_prints: false,
165 }
166 }
167}
168
169impl FilterConfig {
170 #[must_use]
172 pub fn minimal() -> Self {
173 Self {
174 remove_tests: true,
175 remove_doc_comments: true,
176 remove_comments: true,
177 remove_blank_lines: true,
178 preserve_headers: false,
179 remove_debug_prints: true,
180 }
181 }
182
183 #[must_use]
185 pub fn preserve_docs() -> Self {
186 Self {
187 remove_tests: true,
188 remove_doc_comments: false,
189 remove_comments: true,
190 remove_blank_lines: true,
191 preserve_headers: true,
192 remove_debug_prints: false,
193 }
194 }
195
196 #[must_use]
198 pub fn production() -> Self {
199 Self {
200 remove_tests: true,
201 remove_doc_comments: false,
202 remove_comments: false,
203 remove_blank_lines: true,
204 preserve_headers: true,
205 remove_debug_prints: true,
206 }
207 }
208}
209
210#[derive(Debug, Clone)]
212pub struct CodeFilter {
213 config: FilterConfig,
214}
215
216impl CodeFilter {
217 #[must_use]
219 pub const fn new(config: FilterConfig) -> Self {
220 Self { config }
221 }
222
223 #[must_use]
227 pub fn filter(&self, content: &str, path: &Path) -> String {
228 let extension = path
229 .extension()
230 .and_then(|e| e.to_str())
231 .unwrap_or("");
232
233 match extension {
234 "rs" => RustFilter::new(&self.config).filter(content),
235 "py" => PythonFilter::new(&self.config).filter(content),
236 "js" | "ts" | "jsx" | "tsx" => JavaScriptFilter::new(&self.config).filter(content),
237 "go" => GoFilter::new(&self.config).filter(content),
238 "java" | "kt" => JavaFilter::new(&self.config).filter(content),
239 "c" | "cpp" | "cc" | "h" | "hpp" => CFilter::new(&self.config).filter(content),
240 _ => content.to_string(),
241 }
242 }
243}
244
245trait LanguageFilter {
247 #[allow(dead_code)]
249 fn config(&self) -> &FilterConfig;
250
251 fn filter(&self, content: &str) -> String;
253
254 fn is_comment_line(&self, line: &str) -> bool;
256
257 fn is_doc_comment(&self, line: &str) -> bool;
259
260 fn strip_line_comment(&self, line: &str, _comment_start: &str) -> String {
263 let mut in_string = false;
264 let mut escape_next = false;
265 let chars: Vec<char> = line.chars().collect();
266
267 for i in 0..chars.len() {
268 if escape_next {
269 escape_next = false;
270 continue;
271 }
272
273 match chars[i] {
274 '\\' if in_string => {
275 escape_next = true;
276 }
277 '"' => {
278 in_string = !in_string;
279 }
280 '/' if !in_string && i + 1 < chars.len() && chars[i + 1] == '/' => {
281 return line[..i].trim_end().to_string();
283 }
284 _ => {}
285 }
286 }
287
288 line.to_string()
289 }
290}
291
292struct RustFilter<'a> {
294 config: &'a FilterConfig,
295}
296
297impl<'a> RustFilter<'a> {
298 const fn new(config: &'a FilterConfig) -> Self {
299 Self { config }
300 }
301
302 fn is_test_start(&self, line: &str) -> bool {
304 let trimmed = line.trim();
305 trimmed.starts_with("#[test]")
306 || trimmed.starts_with("#[cfg(test)]")
307 || trimmed.starts_with("#[tokio::test]")
308 || trimmed.starts_with("#[async_test]")
309 }
310
311 fn has_test_attribute(&self, line: &str) -> bool {
313 let trimmed = line.trim();
314 trimmed.contains("#[test")
315 || trimmed.contains("#[cfg(test")
316 || trimmed.contains("#[should_panic")
317 || trimmed.contains("#[ignore")
318 }
319
320 fn is_debug_print(&self, line: &str) -> bool {
322 let trimmed = line.trim();
323 trimmed.starts_with("println!")
324 || trimmed.starts_with("eprintln!")
325 || trimmed.starts_with("dbg!")
326 || trimmed.starts_with("print!")
327 || trimmed.starts_with("eprint!")
328 || trimmed.contains("println!(")
329 || trimmed.contains("eprintln!(")
330 || trimmed.contains("dbg!(")
331 }
332
333 fn strip_debug_prints(&self, line: &str) -> (String, bool) {
336 if !self.config.remove_debug_prints {
337 return (line.to_string(), false);
338 }
339
340 let trimmed = line.trim();
341
342 if self.is_debug_print(trimmed) {
344 let open_count = line.matches('(').count();
346 let close_count = line.matches(')').count();
347
348 if open_count > close_count {
349 return (String::new(), true);
351 } else {
352 return (String::new(), false);
354 }
355 }
356
357 (line.to_string(), false)
358 }
359}
360
361impl<'a> LanguageFilter for RustFilter<'a> {
362 fn config(&self) -> &FilterConfig {
363 self.config
364 }
365
366 fn is_comment_line(&self, line: &str) -> bool {
367 let trimmed = line.trim();
368 trimmed.starts_with("//") && !trimmed.starts_with("///")
369 }
370
371 fn is_doc_comment(&self, line: &str) -> bool {
372 let trimmed = line.trim();
373 trimmed.starts_with("///") || trimmed.starts_with("//!")
374 }
375
376 fn filter(&self, content: &str) -> String {
377 let lines: Vec<&str> = content.lines().collect();
378 let mut result = Vec::new();
379 let mut in_test_block = false;
380 let mut in_block_comment = false;
381 let mut in_doc_comment = false;
382 let mut in_multiline_print = false;
383 let mut brace_depth = 0;
384 let mut test_block_depth = 0;
385
386 for line in lines {
387 let trimmed = line.trim();
388
389 if in_multiline_print {
391 let close_count = line.matches(')').count();
392 let open_count = line.matches('(').count();
393
394 if close_count > open_count {
395 in_multiline_print = false;
396 }
397 continue;
398 }
399
400 if trimmed.starts_with("/*") {
402 in_block_comment = true;
403 in_doc_comment = trimmed.starts_with("/**") || trimmed.starts_with("/*!");
404 }
405
406 if in_block_comment {
407 if trimmed.ends_with("*/") {
408 in_block_comment = false;
409 in_doc_comment = false;
410 }
411
412 let should_skip = if in_doc_comment {
413 self.config.remove_doc_comments
414 } else {
415 self.config.remove_comments
416 };
417
418 if !should_skip {
419 result.push(line.to_string());
420 }
421 continue;
422 }
423
424 if self.config.remove_doc_comments && self.is_doc_comment(line) {
426 continue;
427 }
428
429 if self.config.remove_comments && self.is_comment_line(line) {
431 continue;
432 }
433
434 if self.config.remove_tests {
436 if self.is_test_start(line) || self.has_test_attribute(line) {
437 in_test_block = true;
438 test_block_depth = 0;
439 continue;
440 }
441
442 if in_test_block {
443 for ch in trimmed.chars() {
445 match ch {
446 '{' => brace_depth += 1,
447 '}' => {
448 brace_depth -= 1;
449 if brace_depth <= test_block_depth {
450 in_test_block = false;
451 }
452 }
453 _ => {}
454 }
455 }
456 continue;
457 }
458 }
459
460 let (processed_line, is_multiline) = self.strip_debug_prints(line);
462 if is_multiline {
463 in_multiline_print = true;
464 continue;
465 }
466
467 let mut final_line = processed_line;
469 if self.config.remove_comments && !final_line.is_empty() {
470 final_line = self.strip_line_comment(&final_line, "//");
471 }
472
473 if self.config.remove_blank_lines && final_line.trim().is_empty() {
475 continue;
476 }
477
478 result.push(final_line);
479 }
480
481 result.join("\n")
482 }
483}
484
485struct PythonFilter<'a> {
487 config: &'a FilterConfig,
488}
489
490impl<'a> PythonFilter<'a> {
491 const fn new(config: &'a FilterConfig) -> Self {
492 Self { config }
493 }
494
495 fn is_test_function(&self, line: &str) -> bool {
496 let trimmed = line.trim();
497 (trimmed.starts_with("def test_") || trimmed.starts_with("async def test_"))
498 && trimmed.contains('(')
499 }
500
501 fn is_test_decorator(&self, line: &str) -> bool {
502 let trimmed = line.trim();
503 trimmed.starts_with("@pytest")
504 || trimmed.starts_with("@unittest")
505 || trimmed == "@test"
506 }
507}
508
509impl<'a> LanguageFilter for PythonFilter<'a> {
510 fn config(&self) -> &FilterConfig {
511 self.config
512 }
513
514 fn is_comment_line(&self, line: &str) -> bool {
515 line.trim().starts_with('#')
516 }
517
518 fn is_doc_comment(&self, line: &str) -> bool {
519 let trimmed = line.trim();
520 trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''")
521 }
522
523 fn filter(&self, content: &str) -> String {
524 let lines: Vec<&str> = content.lines().collect();
525 let mut result = Vec::new();
526 let mut in_docstring = false;
527 let mut in_test_function = false;
528 let _indent_level = 0;
529 let mut test_indent = 0;
530
531 for line in lines {
532 let trimmed = line.trim();
533
534 if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
536 in_docstring = !in_docstring;
537 if self.config.remove_doc_comments {
538 continue;
539 }
540 }
541
542 if in_docstring {
543 if self.config.remove_doc_comments {
544 continue;
545 }
546 result.push(line.to_string());
547 continue;
548 }
549
550 if self.config.remove_comments && self.is_comment_line(line) {
552 continue;
553 }
554
555 if self.config.remove_tests {
557 let current_indent = line.len() - line.trim_start().len();
558
559 if self.is_test_decorator(line) {
560 in_test_function = true;
561 test_indent = current_indent;
562 continue;
563 }
564
565 if self.is_test_function(line) {
566 in_test_function = true;
567 test_indent = current_indent;
568 continue;
569 }
570
571 if in_test_function {
572 if !trimmed.is_empty() && current_indent <= test_indent {
573 in_test_function = false;
574 } else {
575 continue;
576 }
577 }
578 }
579
580 if self.config.remove_blank_lines && trimmed.is_empty() {
582 continue;
583 }
584
585 result.push(line.to_string());
586 }
587
588 result.join("\n")
589 }
590}
591
592struct JavaScriptFilter<'a> {
594 config: &'a FilterConfig,
595}
596
597impl<'a> JavaScriptFilter<'a> {
598 const fn new(config: &'a FilterConfig) -> Self {
599 Self { config }
600 }
601
602 #[allow(dead_code)]
603 fn is_test_block(&self, line: &str) -> bool {
604 let trimmed = line.trim();
605 trimmed.starts_with("describe(")
606 || trimmed.starts_with("it(")
607 || trimmed.starts_with("test(")
608 || trimmed.starts_with("expect(")
609 }
610}
611
612impl<'a> LanguageFilter for JavaScriptFilter<'a> {
613 fn config(&self) -> &FilterConfig {
614 self.config
615 }
616
617 fn is_comment_line(&self, line: &str) -> bool {
618 line.trim().starts_with("//")
619 }
620
621 fn is_doc_comment(&self, line: &str) -> bool {
622 let trimmed = line.trim();
623 trimmed.starts_with("/**") || trimmed.starts_with("///")
624 }
625
626 fn filter(&self, content: &str) -> String {
627 let lines: Vec<&str> = content.lines().collect();
628 let mut result = Vec::new();
629 let mut in_block_comment = false;
630 let mut in_doc_comment = false;
631
632 for line in lines {
633 let trimmed = line.trim();
634
635 if trimmed.starts_with("/*") {
637 in_block_comment = true;
638 in_doc_comment = trimmed.starts_with("/**");
639 }
640
641 if in_block_comment {
642 if trimmed.ends_with("*/") {
643 in_block_comment = false;
644 in_doc_comment = false;
645 }
646
647 let should_skip = if in_doc_comment {
648 self.config.remove_doc_comments
649 } else {
650 self.config.remove_comments
651 };
652
653 if !should_skip {
654 result.push(line.to_string());
655 }
656 continue;
657 }
658
659 if self.config.remove_comments && self.is_comment_line(line) {
661 continue;
662 }
663
664 if self.config.remove_doc_comments && self.is_doc_comment(line) {
665 continue;
666 }
667
668 if self.config.remove_blank_lines && trimmed.is_empty() {
670 continue;
671 }
672
673 let mut processed_line = line.to_string();
675 if self.config.remove_comments {
676 processed_line = self.strip_line_comment(&processed_line, "//");
677 }
678
679 result.push(processed_line);
680 }
681
682 result.join("\n")
683 }
684}
685
686struct GoFilter<'a> {
688 config: &'a FilterConfig,
689}
690
691impl<'a> GoFilter<'a> {
692 const fn new(config: &'a FilterConfig) -> Self {
693 Self { config }
694 }
695}
696
697impl<'a> LanguageFilter for GoFilter<'a> {
698 fn config(&self) -> &FilterConfig {
699 self.config
700 }
701
702 fn is_comment_line(&self, line: &str) -> bool {
703 line.trim().starts_with("//")
704 }
705
706 fn is_doc_comment(&self, _line: &str) -> bool {
707 false }
709
710 fn filter(&self, content: &str) -> String {
711 JavaScriptFilter::new(self.config).filter(content)
712 }
713}
714
715struct JavaFilter<'a> {
717 config: &'a FilterConfig,
718}
719
720impl<'a> JavaFilter<'a> {
721 const fn new(config: &'a FilterConfig) -> Self {
722 Self { config }
723 }
724
725 fn is_test_annotation(&self, line: &str) -> bool {
726 let trimmed = line.trim();
727 trimmed.starts_with("@Test")
728 || trimmed.starts_with("@org.junit")
729 || trimmed.starts_with("@BeforeEach")
730 || trimmed.starts_with("@AfterEach")
731 }
732}
733
734impl<'a> LanguageFilter for JavaFilter<'a> {
735 fn config(&self) -> &FilterConfig {
736 self.config
737 }
738
739 fn is_comment_line(&self, line: &str) -> bool {
740 line.trim().starts_with("//")
741 }
742
743 fn is_doc_comment(&self, line: &str) -> bool {
744 let trimmed = line.trim();
745 trimmed.starts_with("/**")
746 }
747
748 fn filter(&self, content: &str) -> String {
749 let lines: Vec<&str> = content.lines().collect();
750 let mut result = Vec::new();
751 let mut in_block_comment = false;
752 let mut in_doc_comment = false;
753 let mut skip_next_method = false;
754
755 for line in lines {
756 let trimmed = line.trim();
757
758 if self.config.remove_tests && self.is_test_annotation(line) {
760 skip_next_method = true;
761 continue;
762 }
763
764 if trimmed.starts_with("/*") {
766 in_block_comment = true;
767 in_doc_comment = trimmed.starts_with("/**");
768 }
769
770 if in_block_comment {
771 if trimmed.ends_with("*/") {
772 in_block_comment = false;
773 in_doc_comment = false;
774 }
775
776 let should_skip = if in_doc_comment {
777 self.config.remove_doc_comments
778 } else {
779 self.config.remove_comments
780 };
781
782 if !should_skip {
783 result.push(line.to_string());
784 }
785 continue;
786 }
787
788 if skip_next_method {
790 if trimmed.contains('{') {
791 let brace_count = trimmed.matches('{').count() as i32
793 - trimmed.matches('}').count() as i32;
794
795 if brace_count == 0 {
796 skip_next_method = false;
797 }
798 }
799 continue;
800 }
801
802 if self.config.remove_comments && self.is_comment_line(line) {
804 continue;
805 }
806
807 if self.config.remove_blank_lines && trimmed.is_empty() {
809 continue;
810 }
811
812 result.push(line.to_string());
813 }
814
815 result.join("\n")
816 }
817}
818
819struct CFilter<'a> {
821 config: &'a FilterConfig,
822}
823
824impl<'a> CFilter<'a> {
825 const fn new(config: &'a FilterConfig) -> Self {
826 Self { config }
827 }
828}
829
830impl<'a> LanguageFilter for CFilter<'a> {
831 fn config(&self) -> &FilterConfig {
832 self.config
833 }
834
835 fn is_comment_line(&self, line: &str) -> bool {
836 line.trim().starts_with("//")
837 }
838
839 fn is_doc_comment(&self, line: &str) -> bool {
840 let trimmed = line.trim();
841 trimmed.starts_with("///") || trimmed.starts_with("/**")
842 }
843
844 fn filter(&self, content: &str) -> String {
845 JavaScriptFilter::new(self.config).filter(content)
846 }
847}
848
849#[cfg(test)]
850mod tests {
851 use super::*;
852
853 #[test]
854 fn test_rust_filter_removes_tests() {
855 let config = FilterConfig::default();
856 let filter = CodeFilter::new(config);
857
858 let code = r#"
859fn production_code() {}
860
861#[test]
862fn test_something() {
863 assert_eq!(1, 1);
864}
865
866fn more_production() {}
867"#;
868
869 let filtered = filter.filter(code, Path::new("test.rs"));
870 assert!(!filtered.contains("#[test]"));
871 assert!(!filtered.contains("test_something"));
872 assert!(filtered.contains("production_code"));
873 assert!(filtered.contains("more_production"));
874 }
875
876 #[test]
877 fn test_rust_filter_removes_comments() {
878 let mut config = FilterConfig::default();
879 config.remove_comments = true;
880
881 let filter = CodeFilter::new(config);
882
883 let code = r#"
884// This is a comment
885fn code() {} // inline comment
886"#;
887
888 let filtered = filter.filter(code, Path::new("test.rs"));
889 assert!(!filtered.contains("This is a comment"));
890 assert!(filtered.contains("fn code()"));
891 }
892
893 #[test]
894 fn test_python_filter_removes_tests() {
895 let config = FilterConfig::default();
896 let filter = CodeFilter::new(config);
897
898 let code = r#"
899def production_function():
900 pass
901
902def test_something():
903 assert True
904
905def another_production():
906 pass
907"#;
908
909 let filtered = filter.filter(code, Path::new("test.py"));
910 assert!(!filtered.contains("test_something"));
911 assert!(filtered.contains("production_function"));
912 assert!(filtered.contains("another_production"));
913 }
914
915 #[test]
916 fn test_filter_preserves_strings_with_comment_markers() {
917 let config = FilterConfig {
918 remove_doc_comments: true,
919 remove_comments: true,
920 ..Default::default()
921 };
922 let filter = CodeFilter::new(config);
923
924 let code = r#"let url = "https://example.com"; // real comment"#;
925 let filtered = filter.filter(code, Path::new("test.rs"));
926
927 assert!(filtered.contains("https://"));
928 assert!(!filtered.contains("real comment"));
929 }
930
931 #[test]
932 fn test_remove_println() {
933 let config = FilterConfig {
934 remove_debug_prints: true,
935 ..Default::default()
936 };
937 let filter = CodeFilter::new(config);
938
939 let code = r#"
940fn main() {
941 let x = 5;
942 println!("x = {}", x);
943 let y = 10;
944}
945"#;
946
947 let filtered = filter.filter(code, Path::new("test.rs"));
948 assert!(!filtered.contains("println!"));
949 assert!(filtered.contains("let x = 5"));
950 assert!(filtered.contains("let y = 10"));
951 }
952
953 #[test]
954 fn test_remove_multiline_println() {
955 let config = FilterConfig {
956 remove_debug_prints: true,
957 ..Default::default()
958 };
959 let filter = CodeFilter::new(config);
960
961 let code = r#"
962fn main() {
963 let x = 5;
964 println!(
965 "x = {}",
966 x
967 );
968 let y = 10;
969}
970"#;
971
972 let filtered = filter.filter(code, Path::new("test.rs"));
973 assert!(!filtered.contains("println!"));
974 assert!(filtered.contains("let x = 5"));
975 assert!(filtered.contains("let y = 10"));
976 }
977
978 #[test]
979 fn test_remove_dbg() {
980 let config = FilterConfig {
981 remove_debug_prints: true,
982 ..Default::default()
983 };
984 let filter = CodeFilter::new(config);
985
986 let code = r#"
987fn main() {
988 let x = 5;
989 dbg!(x);
990 let y = 10;
991}
992"#;
993
994 let filtered = filter.filter(code, Path::new("test.rs"));
995 assert!(!filtered.contains("dbg!"));
996 assert!(filtered.contains("let x = 5"));
997 }
998}