Skip to main content

batuta/bug_hunter/
config.rs

1//! Bug Hunter Configuration
2//!
3//! Handles loading and parsing of `.pmat/bug-hunter.toml` configuration files.
4
5use serde::{Deserialize, Serialize};
6use std::path::Path;
7
8/// Bug Hunter configuration loaded from `.pmat/bug-hunter.toml`.
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10pub struct BugHunterConfig {
11    /// Allowlist entries for intentional patterns
12    #[serde(default)]
13    pub allow: Vec<AllowEntry>,
14
15    /// Custom pattern definitions
16    #[serde(default)]
17    pub patterns: Vec<CustomPattern>,
18
19    /// Trend tracking settings
20    #[serde(default)]
21    pub trend: TrendConfig,
22}
23
24/// An allowlist entry marking a pattern as intentional.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct AllowEntry {
27    /// File glob pattern (e.g., "src/optim/*.rs")
28    pub file: String,
29
30    /// Pattern to allow (e.g., "unimplemented")
31    pub pattern: String,
32
33    /// Reason for allowing (documentation)
34    #[serde(default)]
35    pub reason: String,
36
37    /// Optional: only allow in specific line ranges
38    #[serde(default)]
39    pub lines: Option<LineRange>,
40}
41
42/// Line range for scoped allowlist entries.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct LineRange {
45    pub start: usize,
46    pub end: usize,
47}
48
49/// A custom pattern definition.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct CustomPattern {
52    /// The pattern to match (regex or literal)
53    pub pattern: String,
54
55    /// Category for the finding
56    #[serde(default = "default_category")]
57    pub category: String,
58
59    /// Severity level
60    #[serde(default = "default_severity")]
61    pub severity: String,
62
63    /// Suspiciousness score (0.0-1.0)
64    #[serde(default = "default_suspiciousness")]
65    pub suspiciousness: f64,
66
67    /// Optional description
68    #[serde(default)]
69    pub description: String,
70
71    /// File glob to limit scope (optional)
72    #[serde(default)]
73    pub file_glob: Option<String>,
74
75    /// Language filter (optional: "rust", "python", "typescript", "go")
76    #[serde(default)]
77    pub language: Option<String>,
78}
79
80fn default_category() -> String {
81    "Custom".to_string()
82}
83
84fn default_severity() -> String {
85    "Medium".to_string()
86}
87
88fn default_suspiciousness() -> f64 {
89    0.5
90}
91
92/// Trend tracking configuration.
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct TrendConfig {
95    /// Enable automatic trend snapshots
96    #[serde(default)]
97    pub enabled: bool,
98
99    /// Snapshot interval in days
100    #[serde(default = "default_interval")]
101    pub interval_days: u32,
102
103    /// Maximum snapshots to retain
104    #[serde(default = "default_max_snapshots")]
105    pub max_snapshots: usize,
106}
107
108fn default_interval() -> u32 {
109    7
110}
111
112fn default_max_snapshots() -> usize {
113    52
114}
115
116impl Default for TrendConfig {
117    fn default() -> Self {
118        Self {
119            enabled: false,
120            interval_days: default_interval(),
121            max_snapshots: default_max_snapshots(),
122        }
123    }
124}
125
126impl BugHunterConfig {
127    /// Load configuration from a project path.
128    ///
129    /// Looks for `.pmat/bug-hunter.toml` in the project root.
130    pub fn load(project_path: &Path) -> Self {
131        let config_path = project_path.join(".pmat").join("bug-hunter.toml");
132        if config_path.exists() {
133            match std::fs::read_to_string(&config_path) {
134                Ok(content) => match toml::from_str(&content) {
135                    Ok(config) => return config,
136                    Err(e) => {
137                        eprintln!("Warning: Failed to parse {}: {}", config_path.display(), e);
138                    }
139                },
140                Err(e) => {
141                    eprintln!("Warning: Failed to read {}: {}", config_path.display(), e);
142                }
143            }
144        }
145        Self::default()
146    }
147
148    /// Check if a finding should be allowed (skipped).
149    pub fn is_allowed(&self, file_path: &Path, pattern: &str, line: usize) -> bool {
150        let file_str = file_path.to_string_lossy();
151
152        for entry in &self.allow {
153            // Check pattern match
154            if !entry.pattern.eq_ignore_ascii_case(pattern) && entry.pattern != "*" {
155                continue;
156            }
157
158            // Check file glob
159            if !glob_match(&entry.file, &file_str) {
160                continue;
161            }
162
163            // Check line range if specified
164            if let Some(ref range) = entry.lines {
165                if line < range.start || line > range.end {
166                    continue;
167                }
168            }
169
170            return true;
171        }
172
173        false
174    }
175}
176
177/// Simple glob matching (supports * and **).
178fn glob_match(pattern: &str, path: &str) -> bool {
179    if pattern == "*" || pattern == "**" {
180        return true;
181    }
182
183    // Convert glob to regex-like matching
184    let pattern_parts: Vec<&str> = pattern.split('/').collect();
185    let path_parts: Vec<&str> = path.split('/').collect();
186
187    glob_match_parts(&pattern_parts, &path_parts)
188}
189
190fn glob_match_parts(pattern: &[&str], path: &[&str]) -> bool {
191    let Some((&p, pattern_rest)) = pattern.split_first() else {
192        return path.is_empty();
193    };
194
195    if p == "**" {
196        return glob_match_doublestar(pattern_rest, path);
197    }
198
199    let Some((&path_first, path_rest)) = path.split_first() else {
200        return false;
201    };
202
203    segment_matches(p, path_first) && glob_match_parts(pattern_rest, path_rest)
204}
205
206/// Handle ** glob pattern: matches zero or more path segments
207fn glob_match_doublestar(pattern_rest: &[&str], path: &[&str]) -> bool {
208    for i in 0..=path.len() {
209        if glob_match_parts(pattern_rest, path.get(i..).unwrap_or(&[])) {
210            return true;
211        }
212    }
213    false
214}
215
216fn segment_matches(pattern: &str, segment: &str) -> bool {
217    if pattern == "*" {
218        return true;
219    }
220
221    if !pattern.contains('*') {
222        return pattern == segment;
223    }
224
225    // Simple wildcard matching
226    let parts: Vec<&str> = pattern.split('*').collect();
227    if parts.len() == 2 {
228        let (prefix, suffix) = (parts[0], parts[1]);
229        return segment.starts_with(prefix) && segment.ends_with(suffix);
230    }
231
232    // Fallback to exact match for complex patterns
233    pattern == segment
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    #[test]
241    fn test_glob_match_simple() {
242        assert!(glob_match("src/*.rs", "src/main.rs"));
243        assert!(glob_match("src/*.rs", "src/lib.rs"));
244        assert!(!glob_match("src/*.rs", "src/foo/bar.rs"));
245    }
246
247    #[test]
248    fn test_glob_match_double_star() {
249        assert!(glob_match("src/**/*.rs", "src/main.rs"));
250        assert!(glob_match("src/**/*.rs", "src/foo/bar.rs"));
251        assert!(glob_match("src/**/*.rs", "src/foo/bar/baz.rs"));
252        assert!(!glob_match("src/**/*.rs", "test/main.rs"));
253    }
254
255    #[test]
256    fn test_glob_match_star() {
257        assert!(glob_match("*", "anything"));
258        assert!(glob_match("**", "any/path/here"));
259    }
260
261    #[test]
262    fn test_is_allowed() {
263        let config = BugHunterConfig {
264            allow: vec![AllowEntry {
265                file: "src/optim/*.rs".to_string(),
266                pattern: "unimplemented".to_string(),
267                reason: "Batch optimizers don't support step()".to_string(),
268                lines: None,
269            }],
270            ..Default::default()
271        };
272
273        assert!(config.is_allowed(Path::new("src/optim/admm.rs"), "unimplemented", 100));
274        assert!(!config.is_allowed(Path::new("src/main.rs"), "unimplemented", 100));
275        assert!(!config.is_allowed(Path::new("src/optim/admm.rs"), "placeholder", 100));
276    }
277
278    #[test]
279    fn test_is_allowed_with_line_range() {
280        let config = BugHunterConfig {
281            allow: vec![AllowEntry {
282                file: "src/foo.rs".to_string(),
283                pattern: "TODO".to_string(),
284                reason: "Known issue".to_string(),
285                lines: Some(LineRange { start: 10, end: 20 }),
286            }],
287            ..Default::default()
288        };
289
290        assert!(config.is_allowed(Path::new("src/foo.rs"), "TODO", 15));
291        assert!(!config.is_allowed(Path::new("src/foo.rs"), "TODO", 5));
292        assert!(!config.is_allowed(Path::new("src/foo.rs"), "TODO", 25));
293    }
294
295    #[test]
296    fn test_parse_config() {
297        let toml = r#"
298[[allow]]
299file = "src/optim/*.rs"
300pattern = "unimplemented"
301reason = "Batch optimizers"
302
303[[patterns]]
304pattern = "PERF-TODO"
305category = "PerformanceDebt"
306severity = "High"
307suspiciousness = 0.8
308"#;
309
310        let config: BugHunterConfig = toml::from_str(toml).expect("toml parse failed");
311        assert_eq!(config.allow.len(), 1);
312        assert_eq!(config.patterns.len(), 1);
313        assert_eq!(config.patterns[0].pattern, "PERF-TODO");
314        assert_eq!(config.patterns[0].suspiciousness, 0.8);
315    }
316
317    // ================================================================
318    // Additional coverage tests
319    // ================================================================
320
321    #[test]
322    fn test_load_nonexistent_path() {
323        // load() with a path that has no .pmat/bug-hunter.toml should return default
324        let config = BugHunterConfig::load(Path::new("/absolutely/nonexistent/path"));
325        assert!(config.allow.is_empty());
326        assert!(config.patterns.is_empty());
327        assert!(!config.trend.enabled);
328    }
329
330    #[test]
331    fn test_load_valid_toml() {
332        use std::fs;
333        let tmp = std::env::temp_dir().join("batuta_test_config_load_valid");
334        let pmat_dir = tmp.join(".pmat");
335        let _ = fs::create_dir_all(&pmat_dir);
336
337        let toml_content = r#"
338[[allow]]
339file = "src/**/*.rs"
340pattern = "todo"
341reason = "Known issues"
342
343[trend]
344enabled = true
345interval_days = 14
346max_snapshots = 100
347"#;
348        fs::write(pmat_dir.join("bug-hunter.toml"), toml_content).expect("fs write failed");
349
350        let config = BugHunterConfig::load(&tmp);
351        assert_eq!(config.allow.len(), 1);
352        assert_eq!(config.allow[0].pattern, "todo");
353        assert!(config.trend.enabled);
354        assert_eq!(config.trend.interval_days, 14);
355        assert_eq!(config.trend.max_snapshots, 100);
356
357        let _ = fs::remove_dir_all(&tmp);
358    }
359
360    #[test]
361    fn test_load_invalid_toml() {
362        use std::fs;
363        let tmp = std::env::temp_dir().join("batuta_test_config_load_invalid");
364        let pmat_dir = tmp.join(".pmat");
365        let _ = fs::create_dir_all(&pmat_dir);
366
367        // Write invalid TOML content
368        fs::write(pmat_dir.join("bug-hunter.toml"), "{{invalid toml!!!").expect("fs write failed");
369
370        // Should print warning and return default
371        let config = BugHunterConfig::load(&tmp);
372        assert!(config.allow.is_empty());
373        assert!(config.patterns.is_empty());
374
375        let _ = fs::remove_dir_all(&tmp);
376    }
377
378    #[test]
379    fn test_load_unreadable_file() {
380        // Path exists but .pmat dir exists with a directory instead of a file
381        use std::fs;
382        let tmp = std::env::temp_dir().join("batuta_test_config_load_unreadable");
383        let pmat_dir = tmp.join(".pmat");
384        let toml_as_dir = pmat_dir.join("bug-hunter.toml");
385        let _ = fs::create_dir_all(&toml_as_dir); // Create as directory, not file
386
387        // exists() returns true for directories, but read_to_string will fail
388        let config = BugHunterConfig::load(&tmp);
389        assert!(config.allow.is_empty());
390
391        let _ = fs::remove_dir_all(&tmp);
392    }
393
394    #[test]
395    fn test_default_config() {
396        let config = BugHunterConfig::default();
397        assert!(config.allow.is_empty());
398        assert!(config.patterns.is_empty());
399        assert!(!config.trend.enabled);
400        assert_eq!(config.trend.interval_days, 7);
401        assert_eq!(config.trend.max_snapshots, 52);
402    }
403
404    #[test]
405    fn test_trend_config_default() {
406        let trend = TrendConfig::default();
407        assert!(!trend.enabled);
408        assert_eq!(trend.interval_days, 7);
409        assert_eq!(trend.max_snapshots, 52);
410    }
411
412    #[test]
413    fn test_custom_pattern_defaults() {
414        let toml = r#"
415[[patterns]]
416pattern = "FIXME"
417"#;
418
419        let config: BugHunterConfig = toml::from_str(toml).expect("toml parse failed");
420        let p = &config.patterns[0];
421        assert_eq!(p.pattern, "FIXME");
422        assert_eq!(p.category, "Custom");
423        assert_eq!(p.severity, "Medium");
424        assert!((p.suspiciousness - 0.5).abs() < f64::EPSILON);
425        assert_eq!(p.description, "");
426        assert!(p.file_glob.is_none());
427        assert!(p.language.is_none());
428    }
429
430    #[test]
431    fn test_custom_pattern_full_fields() {
432        let toml = r#"
433[[patterns]]
434pattern = "HACK"
435category = "TechDebt"
436severity = "Critical"
437suspiciousness = 0.9
438description = "Hack workaround"
439file_glob = "src/**/*.rs"
440language = "rust"
441"#;
442
443        let config: BugHunterConfig = toml::from_str(toml).expect("toml parse failed");
444        let p = &config.patterns[0];
445        assert_eq!(p.pattern, "HACK");
446        assert_eq!(p.category, "TechDebt");
447        assert_eq!(p.severity, "Critical");
448        assert!((p.suspiciousness - 0.9).abs() < f64::EPSILON);
449        assert_eq!(p.description, "Hack workaround");
450        assert_eq!(p.file_glob.as_deref(), Some("src/**/*.rs"));
451        assert_eq!(p.language.as_deref(), Some("rust"));
452    }
453
454    #[test]
455    fn test_is_allowed_wildcard_pattern() {
456        let config = BugHunterConfig {
457            allow: vec![AllowEntry {
458                file: "**".to_string(),
459                pattern: "*".to_string(),
460                reason: "Allow everything".to_string(),
461                lines: None,
462            }],
463            ..Default::default()
464        };
465
466        // Wildcard pattern "*" should match any pattern
467        assert!(config.is_allowed(Path::new("src/anything.rs"), "any_pattern", 1));
468        assert!(config.is_allowed(Path::new("tests/foo.rs"), "different", 999));
469    }
470
471    #[test]
472    fn test_is_allowed_case_insensitive_pattern() {
473        let config = BugHunterConfig {
474            allow: vec![AllowEntry {
475                file: "src/*.rs".to_string(),
476                pattern: "TODO".to_string(),
477                reason: "Known".to_string(),
478                lines: None,
479            }],
480            ..Default::default()
481        };
482
483        // eq_ignore_ascii_case should match
484        assert!(config.is_allowed(Path::new("src/main.rs"), "todo", 1));
485        assert!(config.is_allowed(Path::new("src/main.rs"), "Todo", 1));
486        assert!(config.is_allowed(Path::new("src/main.rs"), "TODO", 1));
487    }
488
489    #[test]
490    fn test_is_allowed_no_entries() {
491        let config = BugHunterConfig::default();
492        assert!(!config.is_allowed(Path::new("src/main.rs"), "TODO", 1));
493    }
494
495    #[test]
496    fn test_is_allowed_multiple_entries() {
497        let config = BugHunterConfig {
498            allow: vec![
499                AllowEntry {
500                    file: "src/a.rs".to_string(),
501                    pattern: "TODO".to_string(),
502                    reason: "".to_string(),
503                    lines: None,
504                },
505                AllowEntry {
506                    file: "src/b.rs".to_string(),
507                    pattern: "FIXME".to_string(),
508                    reason: "".to_string(),
509                    lines: None,
510                },
511            ],
512            ..Default::default()
513        };
514
515        assert!(config.is_allowed(Path::new("src/a.rs"), "TODO", 1));
516        assert!(!config.is_allowed(Path::new("src/a.rs"), "FIXME", 1));
517        assert!(config.is_allowed(Path::new("src/b.rs"), "FIXME", 1));
518        assert!(!config.is_allowed(Path::new("src/b.rs"), "TODO", 1));
519    }
520
521    #[test]
522    fn test_glob_match_exact_segment() {
523        // No wildcards - exact match
524        assert!(glob_match("src/main.rs", "src/main.rs"));
525        assert!(!glob_match("src/main.rs", "src/lib.rs"));
526    }
527
528    #[test]
529    fn test_glob_match_empty_pattern() {
530        // Empty pattern should match empty path
531        assert!(glob_match("", ""));
532        // Empty pattern should not match non-empty path
533        assert!(!glob_match("", "src/main.rs"));
534    }
535
536    #[test]
537    fn test_glob_match_double_star_at_end() {
538        // ** at end matches anything remaining
539        assert!(glob_match("src/**", "src/foo.rs"));
540        assert!(glob_match("src/**", "src/foo/bar.rs"));
541        assert!(glob_match("src/**", "src/foo/bar/baz.rs"));
542    }
543
544    #[test]
545    fn test_glob_match_double_star_at_beginning() {
546        assert!(glob_match("**/main.rs", "src/main.rs"));
547        assert!(glob_match("**/main.rs", "deep/nested/main.rs"));
548        assert!(glob_match("**/main.rs", "main.rs")); // zero segments before
549    }
550
551    #[test]
552    fn test_glob_match_star_segment_prefix_suffix() {
553        // Pattern "*.rs" matches any segment ending in .rs
554        assert!(glob_match("*.rs", "main.rs"));
555        assert!(glob_match("*.rs", "lib.rs"));
556        assert!(!glob_match("*.rs", "main.py"));
557    }
558
559    #[test]
560    fn test_glob_match_deeper_paths() {
561        assert!(glob_match("a/b/c", "a/b/c"));
562        assert!(!glob_match("a/b/c", "a/b/d"));
563        assert!(!glob_match("a/b/c", "a/b"));
564        assert!(!glob_match("a/b", "a/b/c"));
565    }
566
567    #[test]
568    fn test_segment_matches_no_wildcard() {
569        assert!(segment_matches("main.rs", "main.rs"));
570        assert!(!segment_matches("main.rs", "lib.rs"));
571    }
572
573    #[test]
574    fn test_segment_matches_star() {
575        assert!(segment_matches("*", "anything"));
576        assert!(segment_matches("*", ""));
577    }
578
579    #[test]
580    fn test_segment_matches_prefix_suffix() {
581        assert!(segment_matches("test_*.rs", "test_main.rs"));
582        assert!(segment_matches("test_*.rs", "test_.rs")); // empty middle
583        assert!(!segment_matches("test_*.rs", "main.rs"));
584    }
585
586    #[test]
587    fn test_segment_matches_complex_pattern() {
588        // Multiple wildcards fall back to exact match
589        assert!(segment_matches("a*b*c", "a*b*c")); // exact match of the pattern string
590        assert!(!segment_matches("a*b*c", "aXbYc")); // won't match - fallback is exact
591    }
592
593    #[test]
594    fn test_glob_match_double_star_zero_segments() {
595        // ** matches zero segments
596        assert!(glob_match("**/src/*.rs", "src/main.rs"));
597        // ** matches one segment
598        assert!(glob_match("**/src/*.rs", "foo/src/main.rs"));
599        // ** matches multiple segments
600        assert!(glob_match("**/src/*.rs", "a/b/src/main.rs"));
601    }
602
603    #[test]
604    fn test_allow_entry_line_range_boundaries() {
605        let config = BugHunterConfig {
606            allow: vec![AllowEntry {
607                file: "src/foo.rs".to_string(),
608                pattern: "TODO".to_string(),
609                reason: "".to_string(),
610                lines: Some(LineRange { start: 10, end: 20 }),
611            }],
612            ..Default::default()
613        };
614
615        // Exactly at boundaries
616        assert!(config.is_allowed(Path::new("src/foo.rs"), "TODO", 10)); // start boundary
617        assert!(config.is_allowed(Path::new("src/foo.rs"), "TODO", 20)); // end boundary
618        assert!(!config.is_allowed(Path::new("src/foo.rs"), "TODO", 9)); // just before
619        assert!(!config.is_allowed(Path::new("src/foo.rs"), "TODO", 21)); // just after
620    }
621
622    #[test]
623    fn test_parse_config_with_line_range() {
624        let toml = r#"
625[[allow]]
626file = "src/main.rs"
627pattern = "HACK"
628reason = "Temporary workaround"
629
630[allow.lines]
631start = 50
632end = 75
633"#;
634
635        let config: BugHunterConfig = toml::from_str(toml).expect("toml parse failed");
636        assert_eq!(config.allow.len(), 1);
637        let entry = &config.allow[0];
638        assert!(entry.lines.is_some());
639        let range = entry.lines.as_ref().expect("unexpected failure");
640        assert_eq!(range.start, 50);
641        assert_eq!(range.end, 75);
642    }
643
644    #[test]
645    fn test_parse_config_with_trend() {
646        let toml = r#"
647[trend]
648enabled = true
649interval_days = 30
650max_snapshots = 24
651"#;
652
653        let config: BugHunterConfig = toml::from_str(toml).expect("toml parse failed");
654        assert!(config.trend.enabled);
655        assert_eq!(config.trend.interval_days, 30);
656        assert_eq!(config.trend.max_snapshots, 24);
657    }
658
659    #[test]
660    fn test_parse_config_empty_toml() {
661        let config: BugHunterConfig = toml::from_str("").expect("toml parse failed");
662        assert!(config.allow.is_empty());
663        assert!(config.patterns.is_empty());
664        assert!(!config.trend.enabled);
665        assert_eq!(config.trend.interval_days, 7);
666        assert_eq!(config.trend.max_snapshots, 52);
667    }
668
669    #[test]
670    fn test_glob_match_parts_empty_pattern_empty_path() {
671        // Both empty -> true
672        assert!(glob_match_parts(&[], &[]));
673    }
674
675    #[test]
676    fn test_glob_match_parts_pattern_longer_than_path() {
677        // Pattern has segments but path is empty
678        assert!(!glob_match_parts(&["src", "main.rs"], &[]));
679    }
680
681    #[test]
682    fn test_glob_match_doublestar_only() {
683        // Just ** matches anything
684        assert!(glob_match_doublestar(&[], &[]));
685        assert!(glob_match_doublestar(&[], &["a", "b", "c"]));
686    }
687
688    #[test]
689    fn test_glob_match_doublestar_with_rest() {
690        // ** followed by pattern
691        assert!(glob_match_doublestar(&["*.rs"], &["main.rs"]));
692        assert!(glob_match_doublestar(&["*.rs"], &["src", "main.rs"]));
693        assert!(!glob_match_doublestar(&["*.rs"], &["main.py"]));
694    }
695}