1use std::path::{Path, PathBuf};
8
9use argus_core::ReviewConfig;
10
11use crate::parser::FileDiff;
12
13pub struct DiffFilter {
25 skip_patterns: Vec<glob::Pattern>,
26 skip_extensions: Vec<String>,
27 max_file_size_lines: usize,
28}
29
30impl DiffFilter {
31 pub fn default_filter() -> Self {
42 Self {
43 skip_patterns: Vec::new(),
44 skip_extensions: Vec::new(),
45 max_file_size_lines: 1000,
46 }
47 }
48
49 pub fn from_config(config: &ReviewConfig) -> Self {
62 let mut skip_patterns = Vec::new();
63 for pat in &config.skip_patterns {
64 if let Ok(p) = glob::Pattern::new(pat) {
65 skip_patterns.push(p);
66 }
67 }
68
69 Self {
70 skip_patterns,
71 skip_extensions: config.skip_extensions.clone(),
72 max_file_size_lines: 1000,
73 }
74 }
75
76 pub fn should_skip(&self, path: &str) -> bool {
88 self.check_skip(Path::new(path), "", 0).is_some()
89 }
90
91 pub fn filter(&self, diffs: Vec<FileDiff>) -> FilterResult {
112 let mut kept = Vec::new();
113 let mut skipped = Vec::new();
114
115 for diff in diffs {
116 let path = &diff.new_path;
117 let path_str = path.to_string_lossy();
118
119 let content = Self::collect_hunk_content(&diff);
120 let changed_lines = Self::count_changed_lines(&diff);
121
122 if let Some(reason) = self.check_skip(path, &content, changed_lines) {
123 skipped.push(SkippedFile {
124 path: path.clone(),
125 reason,
126 });
127 } else {
128 let mut matched = false;
130 for pat in &self.skip_patterns {
131 if pat.matches(&path_str) {
132 skipped.push(SkippedFile {
133 path: path.clone(),
134 reason: SkipReason::PatternMatch(pat.to_string()),
135 });
136 matched = true;
137 break;
138 }
139 }
140 if !matched {
141 kept.push(diff);
142 }
143 }
144 }
145
146 FilterResult { kept, skipped }
147 }
148
149 fn check_skip(&self, path: &Path, content: &str, changed_lines: usize) -> Option<SkipReason> {
150 let path_str = path.to_string_lossy();
151 let file_name = path
152 .file_name()
153 .map(|f| f.to_string_lossy().to_string())
154 .unwrap_or_default();
155
156 if is_lock_file(&file_name) {
158 return Some(SkipReason::LockFile);
159 }
160
161 if is_vendored(&path_str) {
163 return Some(SkipReason::VendoredCode);
164 }
165
166 if is_minified(&file_name, content) {
168 return Some(SkipReason::MinifiedFile);
169 }
170
171 if is_generated_by_name(&file_name) {
173 return Some(SkipReason::GeneratedFile);
174 }
175
176 if is_generated_by_content(content) {
178 return Some(SkipReason::GeneratedFile);
179 }
180
181 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
183 for skip_ext in &self.skip_extensions {
184 if ext == skip_ext {
185 return Some(SkipReason::PatternMatch(format!("*.{skip_ext}")));
186 }
187 }
188 }
189
190 if changed_lines > self.max_file_size_lines {
192 return Some(SkipReason::TooLarge);
193 }
194
195 None
196 }
197
198 fn collect_hunk_content(diff: &FileDiff) -> String {
199 let mut content = String::new();
200 for hunk in &diff.hunks {
201 content.push_str(&hunk.content);
202 }
203 content
204 }
205
206 fn count_changed_lines(diff: &FileDiff) -> usize {
207 let mut count = 0;
208 for hunk in &diff.hunks {
209 for line in hunk.content.lines() {
210 if line.starts_with('+') || line.starts_with('-') {
211 count += 1;
212 }
213 }
214 }
215 count
216 }
217}
218
219pub struct FilterResult {
233 pub kept: Vec<FileDiff>,
235 pub skipped: Vec<SkippedFile>,
237}
238
239#[derive(Debug, Clone)]
254pub struct SkippedFile {
255 pub path: PathBuf,
257 pub reason: SkipReason,
259}
260
261#[derive(Debug, Clone)]
272pub enum SkipReason {
273 LockFile,
275 GeneratedFile,
277 VendoredCode,
279 MinifiedFile,
281 BinaryFile,
283 TooLarge,
285 PatternMatch(String),
287}
288
289impl std::fmt::Display for SkipReason {
290 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
291 match self {
292 SkipReason::LockFile => write!(f, "lock file"),
293 SkipReason::GeneratedFile => write!(f, "generated file"),
294 SkipReason::VendoredCode => write!(f, "vendored code"),
295 SkipReason::MinifiedFile => write!(f, "minified file"),
296 SkipReason::BinaryFile => write!(f, "binary file"),
297 SkipReason::TooLarge => write!(f, "too large"),
298 SkipReason::PatternMatch(pat) => write!(f, "pattern: {pat}"),
299 }
300 }
301}
302
303const LOCK_FILES: &[&str] = &[
304 "package-lock.json",
305 "yarn.lock",
306 "Cargo.lock",
307 "pnpm-lock.yaml",
308 "poetry.lock",
309 "Gemfile.lock",
310 "composer.lock",
311 "go.sum",
312];
313
314fn is_lock_file(file_name: &str) -> bool {
315 LOCK_FILES.contains(&file_name)
316}
317
318fn is_vendored(path: &str) -> bool {
319 let parts: Vec<&str> = path.split('/').collect();
320 for part in &parts {
321 if *part == "vendor" || *part == "third_party" || *part == "node_modules" {
322 return true;
323 }
324 }
325 false
326}
327
328fn is_minified(file_name: &str, content: &str) -> bool {
329 if file_name.ends_with(".min.js") || file_name.ends_with(".min.css") {
330 return true;
331 }
332 for line in content.lines() {
334 if line.len() > 500 {
335 return true;
336 }
337 }
338 false
339}
340
341fn is_generated_by_name(file_name: &str) -> bool {
342 if file_name.contains(".generated.") {
343 return true;
344 }
345 if file_name.ends_with(".g.dart") {
346 return true;
347 }
348 if file_name.ends_with(".pb.go") || file_name.ends_with(".pb.rs") {
349 return true;
350 }
351 false
352}
353
354fn is_generated_by_content(content: &str) -> bool {
355 let mut line_count = 0;
356 for line in content.lines() {
357 let check_line = if let Some(stripped) = line.strip_prefix('+') {
359 stripped
360 } else if line.starts_with('-') || line.starts_with(' ') {
361 &line[1..]
363 } else {
364 line
365 };
366
367 if check_line.contains("// Code generated") || check_line.contains("# AUTO-GENERATED") {
368 return true;
369 }
370 line_count += 1;
371 if line_count >= 5 {
372 break;
373 }
374 }
375 false
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381 use crate::parser::parse_unified_diff;
382
383 fn make_diff(path: &str, content: &str) -> Vec<FileDiff> {
384 let diff = format!(
385 "diff --git a/{path} b/{path}\n\
386 --- a/{path}\n\
387 +++ b/{path}\n\
388 @@ -1,1 +1,2 @@\n\
389 {content}\n"
390 );
391 parse_unified_diff(&diff).unwrap()
392 }
393
394 #[test]
395 fn lock_files_skipped() {
396 let filter = DiffFilter::default_filter();
397 for name in LOCK_FILES {
398 let diffs = make_diff(name, "+new line");
399 let result = filter.filter(diffs);
400 assert!(result.kept.is_empty(), "expected {name} to be skipped");
401 assert_eq!(result.skipped.len(), 1);
402 assert!(matches!(result.skipped[0].reason, SkipReason::LockFile));
403 }
404 }
405
406 #[test]
407 fn generated_files_skipped_by_name() {
408 let filter = DiffFilter::default_filter();
409
410 for name in &[
411 "api.generated.ts",
412 "model.g.dart",
413 "proto.pb.go",
414 "msg.pb.rs",
415 ] {
416 let diffs = make_diff(name, "+new line");
417 let result = filter.filter(diffs);
418 assert!(result.kept.is_empty(), "expected {name} to be skipped");
419 assert!(matches!(
420 result.skipped[0].reason,
421 SkipReason::GeneratedFile
422 ));
423 }
424 }
425
426 #[test]
427 fn generated_files_skipped_by_header() {
428 let filter = DiffFilter::default_filter();
429 let diffs = make_diff("gen.go", "+// Code generated by protoc. DO NOT EDIT.");
430 let result = filter.filter(diffs);
431 assert!(result.kept.is_empty());
432 assert!(matches!(
433 result.skipped[0].reason,
434 SkipReason::GeneratedFile
435 ));
436 }
437
438 #[test]
439 fn minified_files_skipped() {
440 let filter = DiffFilter::default_filter();
441
442 let diffs = make_diff("app.min.js", "+var x=1;");
444 let result = filter.filter(diffs);
445 assert!(result.kept.is_empty());
446 assert!(matches!(result.skipped[0].reason, SkipReason::MinifiedFile));
447
448 let long_line = format!("+{}", "x".repeat(501));
450 let diffs = make_diff("bundle.js", &long_line);
451 let result = filter.filter(diffs);
452 assert!(result.kept.is_empty());
453 assert!(matches!(result.skipped[0].reason, SkipReason::MinifiedFile));
454 }
455
456 #[test]
457 fn vendored_code_skipped() {
458 let filter = DiffFilter::default_filter();
459
460 for path in &[
461 "vendor/lib.go",
462 "third_party/dep.rs",
463 "node_modules/pkg/index.js",
464 ] {
465 let diffs = make_diff(path, "+line");
466 let result = filter.filter(diffs);
467 assert!(result.kept.is_empty(), "expected {path} to be skipped");
468 assert!(matches!(result.skipped[0].reason, SkipReason::VendoredCode));
469 }
470 }
471
472 #[test]
473 fn normal_source_files_kept() {
474 let filter = DiffFilter::default_filter();
475 let diffs = make_diff("src/main.rs", "+let x = 1;");
476 let result = filter.filter(diffs);
477 assert_eq!(result.kept.len(), 1);
478 assert!(result.skipped.is_empty());
479 }
480
481 #[test]
482 fn custom_patterns_from_config() {
483 let config = ReviewConfig {
484 skip_patterns: vec!["*.test.ts".into(), "fixtures/**".into()],
485 ..ReviewConfig::default()
486 };
487 let filter = DiffFilter::from_config(&config);
488
489 let diffs = make_diff("auth.test.ts", "+test line");
490 let result = filter.filter(diffs);
491 assert!(result.kept.is_empty());
492 assert!(matches!(
493 result.skipped[0].reason,
494 SkipReason::PatternMatch(_)
495 ));
496
497 let diffs = make_diff("src/auth.ts", "+real code");
499 let result = filter.filter(diffs);
500 assert_eq!(result.kept.len(), 1);
501 }
502
503 #[test]
504 fn custom_extensions_from_config() {
505 let config = ReviewConfig {
506 skip_extensions: vec!["snap".into()],
507 ..ReviewConfig::default()
508 };
509 let filter = DiffFilter::from_config(&config);
510
511 let diffs = make_diff("component.test.snap", "+snapshot content");
512 let result = filter.filter(diffs);
513 assert!(result.kept.is_empty());
514 }
515
516 #[test]
517 fn empty_diff_returns_empty_result() {
518 let filter = DiffFilter::default_filter();
519 let result = filter.filter(Vec::new());
520 assert!(result.kept.is_empty());
521 assert!(result.skipped.is_empty());
522 }
523
524 #[test]
525 fn too_large_files_skipped() {
526 let filter = DiffFilter::default_filter();
527 let mut lines = String::new();
529 for i in 0..1002 {
530 lines.push_str(&format!("+line {i}\n"));
531 }
532 let diff = format!(
533 "diff --git a/big.rs b/big.rs\n\
534 --- a/big.rs\n\
535 +++ b/big.rs\n\
536 @@ -1,1 +1,1003 @@\n\
537 {lines}"
538 );
539 let diffs = parse_unified_diff(&diff).unwrap();
540 let result = filter.filter(diffs);
541 assert!(result.kept.is_empty());
542 assert!(matches!(result.skipped[0].reason, SkipReason::TooLarge));
543 }
544}