Skip to main content

batuta/bug_hunter/
diff.rs

1//! Bug Hunter Diff Mode
2//!
3//! Compare findings against a baseline to show only new issues.
4
5use super::types::{Finding, HuntResult};
6use serde::{Deserialize, Serialize};
7use std::collections::HashSet;
8use std::path::Path;
9use std::process::Command;
10
11/// Baseline storage for diff comparisons.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct Baseline {
14    /// Git commit hash when baseline was created
15    pub commit: String,
16    /// Timestamp when baseline was created
17    pub timestamp: u64,
18    /// Finding fingerprints (file:line:pattern hashes)
19    pub fingerprints: HashSet<String>,
20}
21
22impl Baseline {
23    /// Create a new baseline from findings.
24    pub fn from_findings(findings: &[Finding]) -> Self {
25        let fingerprints = findings.iter().map(fingerprint).collect();
26
27        let commit = get_current_commit().unwrap_or_default();
28        let timestamp = std::time::SystemTime::now()
29            .duration_since(std::time::UNIX_EPOCH)
30            .map(|d| d.as_secs())
31            .unwrap_or(0);
32
33        Self { commit, timestamp, fingerprints }
34    }
35
36    /// Load baseline from disk.
37    pub fn load(project_path: &Path) -> Option<Self> {
38        let baseline_path = project_path.join(".pmat").join("bug-hunter-baseline.json");
39        if baseline_path.exists() {
40            let content = std::fs::read_to_string(&baseline_path).ok()?;
41            serde_json::from_str(&content).ok()
42        } else {
43            None
44        }
45    }
46
47    /// Save baseline to disk.
48    pub fn save(&self, project_path: &Path) -> Result<(), String> {
49        let pmat_dir = project_path.join(".pmat");
50        std::fs::create_dir_all(&pmat_dir)
51            .map_err(|e| format!("Failed to create .pmat directory: {}", e))?;
52
53        let baseline_path = pmat_dir.join("bug-hunter-baseline.json");
54        let content = serde_json::to_string_pretty(self)
55            .map_err(|e| format!("Failed to serialize baseline: {}", e))?;
56
57        std::fs::write(&baseline_path, content)
58            .map_err(|e| format!("Failed to write baseline: {}", e))?;
59
60        Ok(())
61    }
62
63    /// Check if a finding is new (not in baseline).
64    pub fn is_new(&self, finding: &Finding) -> bool {
65        !self.fingerprints.contains(&fingerprint(finding))
66    }
67}
68
69/// Generate a fingerprint for a finding.
70fn fingerprint(finding: &Finding) -> String {
71    // Use file path (relative), line number, and title for fingerprinting
72    // This allows findings to persist across minor code movements
73    let file_name = finding.file.file_name().map(|s| s.to_string_lossy()).unwrap_or_default();
74    format!("{}:{}:{}", file_name, finding.line, finding.title)
75}
76
77/// Get current git commit hash.
78fn get_current_commit() -> Option<String> {
79    let output = Command::new("git").args(["rev-parse", "HEAD"]).output().ok()?;
80
81    if output.status.success() {
82        Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
83    } else {
84        None
85    }
86}
87
88/// Get files changed since a commit or time period.
89pub fn get_changed_files(
90    project_path: &Path,
91    base: Option<&str>,
92    since: Option<&str>,
93) -> Vec<String> {
94    let output = if let Some(base) = base {
95        Command::new("git").current_dir(project_path).args(["diff", "--name-only", base]).output()
96    } else if let Some(since) = since {
97        let git_since = format!("--since={}", since);
98        Command::new("git")
99            .current_dir(project_path)
100            .args(["log", "--name-only", "--pretty=format:"])
101            .arg(&git_since)
102            .output()
103    } else {
104        return Vec::new();
105    };
106
107    match output {
108        Ok(out) if out.status.success() => String::from_utf8_lossy(&out.stdout)
109            .lines()
110            .filter(|l| !l.is_empty())
111            .map(|l| l.to_string())
112            .collect::<HashSet<_>>()
113            .into_iter()
114            .collect(),
115        _ => Vec::new(),
116    }
117}
118
119/// Filter findings to only include new ones.
120pub fn filter_new_findings(result: &HuntResult, baseline: &Baseline) -> Vec<Finding> {
121    result.findings.iter().filter(|f| baseline.is_new(f)).cloned().collect()
122}
123
124/// Filter findings to only those in changed files.
125pub fn filter_changed_files(findings: &[Finding], changed_files: &[String]) -> Vec<Finding> {
126    findings
127        .iter()
128        .filter(|f| {
129            let file_path = f.file.to_string_lossy();
130            changed_files.iter().any(|cf| file_path.ends_with(cf))
131        })
132        .cloned()
133        .collect()
134}
135
136/// Diff result showing new and resolved findings.
137#[derive(Debug, Clone)]
138pub struct DiffResult {
139    /// Newly introduced findings
140    pub new_findings: Vec<Finding>,
141    /// Findings that were in baseline but not in current
142    pub resolved_count: usize,
143    /// Total findings in current run
144    pub total_current: usize,
145    /// Total findings in baseline
146    pub total_baseline: usize,
147    /// Base commit/time used for comparison
148    pub base_reference: String,
149}
150
151impl DiffResult {
152    /// Create a diff result from current findings and baseline.
153    pub fn compute(current: &HuntResult, baseline: &Baseline, base_ref: &str) -> Self {
154        let new_findings = filter_new_findings(current, baseline);
155        let current_fps: HashSet<String> = current.findings.iter().map(fingerprint).collect();
156
157        let resolved_count =
158            baseline.fingerprints.iter().filter(|fp| !current_fps.contains(*fp)).count();
159
160        Self {
161            new_findings,
162            resolved_count,
163            total_current: current.findings.len(),
164            total_baseline: baseline.fingerprints.len(),
165            base_reference: base_ref.to_string(),
166        }
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use crate::bug_hunter::types::{DefectCategory, FindingSeverity};
174    use std::path::PathBuf;
175
176    fn make_finding(file: &str, line: usize, title: &str) -> Finding {
177        Finding::new("TEST-001".to_string(), PathBuf::from(file), line, title.to_string())
178            .with_severity(FindingSeverity::Medium)
179            .with_category(DefectCategory::LogicErrors)
180            .with_suspiciousness(0.5)
181    }
182
183    #[test]
184    fn test_fingerprint() {
185        let f = make_finding("src/foo.rs", 42, "Pattern: TODO");
186        let fp = fingerprint(&f);
187        assert!(fp.contains("foo.rs"));
188        assert!(fp.contains("42"));
189        assert!(fp.contains("TODO"));
190    }
191
192    #[test]
193    fn test_baseline_is_new() {
194        let findings = vec![
195            make_finding("src/foo.rs", 10, "Pattern: TODO"),
196            make_finding("src/bar.rs", 20, "Pattern: FIXME"),
197        ];
198        let baseline = Baseline::from_findings(&findings);
199
200        // Same finding is not new
201        let same = make_finding("src/foo.rs", 10, "Pattern: TODO");
202        assert!(!baseline.is_new(&same));
203
204        // Different finding is new
205        let new = make_finding("src/baz.rs", 30, "Pattern: HACK");
206        assert!(baseline.is_new(&new));
207
208        // Same file, different line is new
209        let new_line = make_finding("src/foo.rs", 15, "Pattern: TODO");
210        assert!(baseline.is_new(&new_line));
211    }
212
213    #[test]
214    fn test_diff_result() {
215        let baseline_findings = vec![
216            make_finding("src/foo.rs", 10, "Pattern: TODO"),
217            make_finding("src/bar.rs", 20, "Pattern: FIXME"),
218        ];
219        let baseline = Baseline::from_findings(&baseline_findings);
220
221        let current = HuntResult {
222            findings: vec![
223                make_finding("src/foo.rs", 10, "Pattern: TODO"), // existing
224                make_finding("src/baz.rs", 30, "Pattern: HACK"), // new
225            ],
226            ..Default::default()
227        };
228
229        let diff = DiffResult::compute(&current, &baseline, "main");
230
231        assert_eq!(diff.new_findings.len(), 1);
232        assert_eq!(diff.resolved_count, 1); // bar.rs:20 was resolved
233        assert_eq!(diff.total_current, 2);
234        assert_eq!(diff.total_baseline, 2);
235    }
236
237    // =========================================================================
238    // Coverage gap: get_changed_files
239    // =========================================================================
240
241    #[test]
242    fn test_get_changed_files_with_base() {
243        // Use the actual project's git repo — compare HEAD~1 to HEAD
244        let files = get_changed_files(std::path::Path::new("."), Some("HEAD~1"), None);
245        // Should return some files (unless HEAD is initial commit)
246        // Just verify it doesn't panic and returns a Vec
247        assert!(!files.is_empty() || files.is_empty()); // exercises the code path
248    }
249
250    #[test]
251    fn test_get_changed_files_with_since() {
252        let files = get_changed_files(std::path::Path::new("."), None, Some("1 day ago"));
253        // Should not panic
254        let _ = files.len();
255    }
256
257    #[test]
258    fn test_get_changed_files_neither() {
259        // No base, no since → empty
260        let files = get_changed_files(std::path::Path::new("."), None, None);
261        assert!(files.is_empty());
262    }
263
264    #[test]
265    fn test_get_changed_files_invalid_path() {
266        let files =
267            get_changed_files(std::path::Path::new("/nonexistent/repo"), Some("HEAD~1"), None);
268        assert!(files.is_empty());
269    }
270
271    // =========================================================================
272    // Coverage gap: filter_new_findings
273    // =========================================================================
274
275    #[test]
276    fn test_filter_new_findings_all_new() {
277        let baseline = Baseline::from_findings(&[]);
278        let current = HuntResult {
279            findings: vec![
280                make_finding("src/a.rs", 1, "Pattern: TODO"),
281                make_finding("src/b.rs", 2, "Pattern: FIXME"),
282            ],
283            ..Default::default()
284        };
285        let new = filter_new_findings(&current, &baseline);
286        assert_eq!(new.len(), 2);
287    }
288
289    #[test]
290    fn test_filter_new_findings_none_new() {
291        let findings = vec![make_finding("src/a.rs", 1, "Pattern: TODO")];
292        let baseline = Baseline::from_findings(&findings);
293        let current = HuntResult { findings: findings.clone(), ..Default::default() };
294        let new = filter_new_findings(&current, &baseline);
295        assert!(new.is_empty());
296    }
297
298    // =========================================================================
299    // Coverage gap: filter_changed_files
300    // =========================================================================
301
302    #[test]
303    fn test_filter_changed_files_match() {
304        let findings = vec![
305            make_finding("src/foo.rs", 1, "Pattern: TODO"),
306            make_finding("src/bar.rs", 2, "Pattern: FIXME"),
307            make_finding("src/baz.rs", 3, "Pattern: HACK"),
308        ];
309        let changed = vec!["src/foo.rs".to_string(), "src/baz.rs".to_string()];
310        let filtered = filter_changed_files(&findings, &changed);
311        assert_eq!(filtered.len(), 2);
312        assert!(filtered.iter().all(|f| {
313            let p = f.file.to_string_lossy();
314            p.ends_with("foo.rs") || p.ends_with("baz.rs")
315        }));
316    }
317
318    #[test]
319    fn test_filter_changed_files_no_match() {
320        let findings = vec![make_finding("src/foo.rs", 1, "Pattern: TODO")];
321        let changed = vec!["src/bar.rs".to_string()];
322        let filtered = filter_changed_files(&findings, &changed);
323        assert!(filtered.is_empty());
324    }
325
326    #[test]
327    fn test_filter_changed_files_empty_changed() {
328        let findings = vec![make_finding("src/foo.rs", 1, "Pattern: TODO")];
329        let filtered = filter_changed_files(&findings, &[]);
330        assert!(filtered.is_empty());
331    }
332
333    // =========================================================================
334    // Coverage gap: Baseline::save()
335    // =========================================================================
336
337    #[test]
338    fn test_baseline_save_and_load() {
339        let dir = std::env::temp_dir().join(format!(
340            "batuta_diff_test_{}",
341            std::time::SystemTime::now()
342                .duration_since(std::time::UNIX_EPOCH)
343                .expect("unexpected failure")
344                .as_nanos()
345        ));
346        std::fs::create_dir_all(&dir).expect("mkdir failed");
347
348        let findings = vec![
349            make_finding("src/foo.rs", 10, "Pattern: TODO"),
350            make_finding("src/bar.rs", 20, "Pattern: FIXME"),
351        ];
352        let baseline = Baseline::from_findings(&findings);
353
354        // Save
355        let result = baseline.save(&dir);
356        assert!(result.is_ok(), "save failed: {:?}", result.err());
357
358        // Load
359        let loaded = Baseline::load(&dir);
360        assert!(loaded.is_some(), "load returned None");
361        let loaded = loaded.expect("unexpected failure");
362        assert_eq!(loaded.fingerprints.len(), 2);
363        assert_eq!(loaded.fingerprints, baseline.fingerprints);
364
365        // Cleanup
366        let _ = std::fs::remove_dir_all(&dir);
367    }
368
369    #[test]
370    fn test_baseline_save_creates_pmat_dir() {
371        let dir = std::env::temp_dir().join(format!(
372            "batuta_diff_pmat_{}",
373            std::time::SystemTime::now()
374                .duration_since(std::time::UNIX_EPOCH)
375                .expect("unexpected failure")
376                .as_nanos()
377        ));
378        // Don't create dir; save should create .pmat subdirectory
379        let baseline = Baseline::from_findings(&[]);
380        let result = baseline.save(&dir);
381        assert!(result.is_ok());
382        assert!(dir.join(".pmat").join("bug-hunter-baseline.json").exists());
383
384        let _ = std::fs::remove_dir_all(&dir);
385    }
386
387    #[test]
388    fn test_baseline_load_nonexistent() {
389        let path = PathBuf::from("/nonexistent/path/that/does/not/exist");
390        let loaded = Baseline::load(&path);
391        assert!(loaded.is_none());
392    }
393
394    // =========================================================================
395    // Coverage gap: get_changed_files error paths
396    // =========================================================================
397
398    #[test]
399    fn test_get_changed_files_invalid_base_ref() {
400        // Invalid base ref in a valid git repo should return empty
401        let files = get_changed_files(
402            std::path::Path::new("."),
403            Some("INVALID_REF_THAT_DOES_NOT_EXIST_12345"),
404            None,
405        );
406        assert!(files.is_empty());
407    }
408
409    #[test]
410    fn test_get_changed_files_since_in_invalid_repo() {
411        let files =
412            get_changed_files(std::path::Path::new("/nonexistent/repo"), None, Some("1 week ago"));
413        assert!(files.is_empty());
414    }
415
416    // =========================================================================
417    // Coverage gap: fingerprint edge cases
418    // =========================================================================
419
420    #[test]
421    fn test_fingerprint_with_directory_path() {
422        // Finding with a directory path (no file_name)
423        let f = make_finding("/", 1, "Pattern: TODO");
424        let fp = fingerprint(&f);
425        // Should not panic, just use empty or "/"
426        assert!(fp.contains('1'));
427        assert!(fp.contains("TODO"));
428    }
429
430    #[test]
431    fn test_fingerprint_stability() {
432        // Same input should always produce same fingerprint
433        let f1 = make_finding("src/main.rs", 42, "Pattern: unwrap");
434        let f2 = make_finding("src/main.rs", 42, "Pattern: unwrap");
435        assert_eq!(fingerprint(&f1), fingerprint(&f2));
436    }
437
438    #[test]
439    fn test_fingerprint_different_files_same_line_title() {
440        let f1 = make_finding("src/a.rs", 10, "Pattern: TODO");
441        let f2 = make_finding("src/b.rs", 10, "Pattern: TODO");
442        assert_ne!(fingerprint(&f1), fingerprint(&f2));
443    }
444
445    // =========================================================================
446    // Coverage gap: DiffResult edge cases
447    // =========================================================================
448
449    #[test]
450    fn test_diff_result_all_resolved() {
451        let baseline_findings = vec![
452            make_finding("src/a.rs", 1, "Pattern: TODO"),
453            make_finding("src/b.rs", 2, "Pattern: FIXME"),
454        ];
455        let baseline = Baseline::from_findings(&baseline_findings);
456
457        let current = HuntResult {
458            findings: vec![], // All findings resolved
459            ..Default::default()
460        };
461
462        let diff = DiffResult::compute(&current, &baseline, "HEAD~5");
463
464        assert_eq!(diff.new_findings.len(), 0);
465        assert_eq!(diff.resolved_count, 2);
466        assert_eq!(diff.total_current, 0);
467        assert_eq!(diff.total_baseline, 2);
468        assert_eq!(diff.base_reference, "HEAD~5");
469    }
470
471    #[test]
472    fn test_diff_result_empty_baseline() {
473        let baseline = Baseline::from_findings(&[]);
474
475        let current = HuntResult {
476            findings: vec![make_finding("src/a.rs", 1, "Pattern: TODO")],
477            ..Default::default()
478        };
479
480        let diff = DiffResult::compute(&current, &baseline, "initial");
481
482        assert_eq!(diff.new_findings.len(), 1);
483        assert_eq!(diff.resolved_count, 0);
484        assert_eq!(diff.total_current, 1);
485        assert_eq!(diff.total_baseline, 0);
486    }
487
488    // =========================================================================
489    // Coverage gap: Baseline::from_findings timestamp/commit
490    // =========================================================================
491
492    #[test]
493    fn test_baseline_from_findings_has_timestamp() {
494        let baseline = Baseline::from_findings(&[]);
495        // Timestamp should be non-zero (we're past epoch)
496        assert!(baseline.timestamp > 0);
497    }
498
499    #[test]
500    fn test_baseline_from_findings_has_commit() {
501        let baseline = Baseline::from_findings(&[]);
502        // In a git repo commit is a SHA; outside a git repo (e.g. clean-room
503        // container) get_current_commit() returns None → empty string via
504        // unwrap_or_default().  Both cases are valid.
505        let in_git_repo = std::process::Command::new("git")
506            .args(["rev-parse", "--git-dir"])
507            .output()
508            .map(|o| o.status.success())
509            .unwrap_or(false);
510        if in_git_repo {
511            assert!(!baseline.commit.is_empty(), "commit should be set in a git repo");
512        } else {
513            assert!(baseline.commit.is_empty(), "commit should be empty outside a git repo");
514        }
515    }
516}