Skip to main content

batuta/bug_hunter/
blame.rs

1//! Git Blame Integration
2//!
3//! Provides git blame information for findings to help identify who introduced
4//! bugs and when, enabling better triage and assignment.
5
6use std::collections::HashMap;
7use std::path::Path;
8use std::process::Command;
9
10/// Git blame information for a single line.
11#[derive(Debug, Clone, Default)]
12pub struct BlameInfo {
13    /// Author name
14    pub author: String,
15    /// Abbreviated commit hash
16    pub commit: String,
17    /// Date of the commit (YYYY-MM-DD format)
18    pub date: String,
19}
20
21/// Cache for blame lookups to avoid repeated git calls.
22#[derive(Debug, Default)]
23pub struct BlameCache {
24    /// Cache: (file, line) -> BlameInfo
25    cache: HashMap<(String, usize), BlameInfo>,
26}
27
28impl BlameCache {
29    /// Create a new empty blame cache.
30    pub fn new() -> Self {
31        Self { cache: HashMap::new() }
32    }
33
34    /// Get blame info for a specific file and line, using cache.
35    pub fn get_blame(
36        &mut self,
37        project_path: &Path,
38        file: &Path,
39        line: usize,
40    ) -> Option<BlameInfo> {
41        let file_str = file.to_string_lossy().to_string();
42        let key = (file_str.clone(), line);
43
44        if let Some(cached) = self.cache.get(&key) {
45            return Some(cached.clone());
46        }
47
48        // Not in cache, fetch from git
49        let blame = get_blame_for_line(project_path, file, line)?;
50        self.cache.insert(key, blame.clone());
51        Some(blame)
52    }
53
54    /// Prefetch blame info for multiple lines in a file (batch optimization).
55    pub fn prefetch_file(&mut self, project_path: &Path, file: &Path, lines: &[usize]) {
56        if lines.is_empty() {
57            return;
58        }
59
60        let file_str = file.to_string_lossy().to_string();
61
62        // Check which lines we don't have cached
63        let uncached: Vec<usize> = lines
64            .iter()
65            .filter(|&&l| !self.cache.contains_key(&(file_str.clone(), l)))
66            .copied()
67            .collect();
68
69        if uncached.is_empty() {
70            return;
71        }
72
73        // Batch fetch all blame info for the file
74        if let Some(all_blames) = get_blame_for_file(project_path, file) {
75            for (line, blame) in all_blames {
76                self.cache.insert((file_str.clone(), line), blame);
77            }
78        }
79    }
80}
81
82/// Get blame info for a specific line using git blame.
83fn get_blame_for_line(project_path: &Path, file: &Path, line: usize) -> Option<BlameInfo> {
84    let output = Command::new("git")
85        .current_dir(project_path)
86        .args([
87            "blame",
88            "-L",
89            &format!("{},{}", line, line),
90            "--porcelain",
91            &file.to_string_lossy(),
92        ])
93        .output()
94        .ok()?;
95
96    if !output.status.success() {
97        return None;
98    }
99
100    parse_porcelain_blame(&String::from_utf8_lossy(&output.stdout))
101}
102
103/// Get blame info for an entire file (more efficient for multiple lines).
104fn get_blame_for_file(project_path: &Path, file: &Path) -> Option<HashMap<usize, BlameInfo>> {
105    let output = Command::new("git")
106        .current_dir(project_path)
107        .args(["blame", "--porcelain", &file.to_string_lossy()])
108        .output()
109        .ok()?;
110
111    if !output.status.success() {
112        return None;
113    }
114
115    Some(parse_porcelain_blame_full(&String::from_utf8_lossy(&output.stdout)))
116}
117
118/// Parse porcelain blame output for a single line.
119fn parse_porcelain_blame(output: &str) -> Option<BlameInfo> {
120    let mut author = String::new();
121    let mut commit = String::new();
122    let mut date = String::new();
123
124    for line in output.lines() {
125        if line.starts_with("author ") {
126            author = line.strip_prefix("author ").unwrap_or("").to_string();
127        } else if line.starts_with("author-time ") {
128            // Convert Unix timestamp to YYYY-MM-DD
129            if let Ok(timestamp) = line.strip_prefix("author-time ").unwrap_or("0").parse::<i64>() {
130                date = format_timestamp(timestamp);
131            }
132        } else if commit.is_empty() && line.len() >= 40 {
133            // Commit lines start with 40-char hex hash: <hash> <orig_line> <final_line> [count]
134            let first_40: String = line.chars().take(40).collect();
135            if first_40.chars().all(|c| c.is_ascii_hexdigit()) {
136                commit = line[..7.min(line.len())].to_string();
137            }
138        }
139    }
140
141    if author.is_empty() && commit.is_empty() {
142        return None;
143    }
144
145    Some(BlameInfo { author, commit, date })
146}
147
148/// Parse porcelain blame output for full file.
149/// Check if a line is a porcelain blame commit header (40-char hex prefix).
150fn is_commit_header(line: &str) -> bool {
151    line.len() >= 40 && line.chars().take(40).all(|c| c.is_ascii_hexdigit())
152}
153
154/// Insert a blame entry if we have accumulated valid state.
155fn flush_blame_entry(
156    results: &mut HashMap<usize, BlameInfo>,
157    line_num: usize,
158    author: &str,
159    commit: &str,
160    date: &str,
161) {
162    if line_num > 0 && !commit.is_empty() {
163        results.insert(
164            line_num,
165            BlameInfo {
166                author: author.to_string(),
167                commit: commit.to_string(),
168                date: date.to_string(),
169            },
170        );
171    }
172}
173
174fn parse_porcelain_blame_full(output: &str) -> HashMap<usize, BlameInfo> {
175    let mut results = HashMap::new();
176    let mut current_line = 0usize;
177    let mut current_author = String::new();
178    let mut current_commit = String::new();
179    let mut current_date = String::new();
180
181    for line in output.lines() {
182        if is_commit_header(line) {
183            flush_blame_entry(
184                &mut results,
185                current_line,
186                &current_author,
187                &current_commit,
188                &current_date,
189            );
190
191            let parts: Vec<&str> = line.split_whitespace().collect();
192            if parts.len() >= 3 {
193                current_commit = parts[0][..7.min(parts[0].len())].to_string();
194                if let Ok(line_num) = parts[2].parse::<usize>() {
195                    current_line = line_num;
196                }
197            }
198            current_author.clear();
199            current_date.clear();
200        } else if let Some(author) = line.strip_prefix("author ") {
201            current_author = author.to_string();
202        } else if let Some(ts_str) = line.strip_prefix("author-time ") {
203            if let Ok(timestamp) = ts_str.parse::<i64>() {
204                current_date = format_timestamp(timestamp);
205            }
206        }
207    }
208
209    flush_blame_entry(&mut results, current_line, &current_author, &current_commit, &current_date);
210    results
211}
212
213/// Format Unix timestamp as YYYY-MM-DD.
214fn format_timestamp(timestamp: i64) -> String {
215    if timestamp < 0 {
216        return String::new();
217    }
218
219    // Convert to date using chrono if available, otherwise use simple calculation
220    let secs_per_day = 86400u64;
221    let days_since_epoch = (timestamp as u64) / secs_per_day;
222
223    // Simple date calculation (approximate, good enough for display)
224    let mut year = 1970i32;
225    let mut remaining_days = days_since_epoch as i32;
226
227    loop {
228        let days_in_year = if is_leap_year(year) { 366 } else { 365 };
229        if remaining_days < days_in_year {
230            break;
231        }
232        remaining_days -= days_in_year;
233        year += 1;
234    }
235
236    let mut month = 1;
237    let days_in_months = if is_leap_year(year) {
238        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
239    } else {
240        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
241    };
242
243    for days in &days_in_months {
244        if remaining_days < *days {
245            break;
246        }
247        remaining_days -= days;
248        month += 1;
249    }
250
251    let day = remaining_days + 1;
252    format!("{:04}-{:02}-{:02}", year, month, day)
253}
254
255fn is_leap_year(year: i32) -> bool {
256    (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[test]
264    fn test_parse_porcelain_blame_basic() {
265        let output = r#"abc1234567890123456789012345678901234567 1 1 1
266author John Doe
267author-mail <john@example.com>
268author-time 1704067200
269author-tz +0000
270committer John Doe
271committer-mail <john@example.com>
272committer-time 1704067200
273committer-tz +0000
274summary Initial commit
275filename src/main.rs
276	fn main() {}
277"#;
278
279        let blame = parse_porcelain_blame(output).expect("unexpected failure");
280        assert_eq!(blame.author, "John Doe");
281        assert_eq!(blame.commit, "abc1234");
282        assert_eq!(blame.date, "2024-01-01");
283    }
284
285    #[test]
286    fn test_parse_porcelain_blame_empty() {
287        let output = "";
288        assert!(parse_porcelain_blame(output).is_none());
289    }
290
291    #[test]
292    fn test_format_timestamp() {
293        // 2024-01-01 00:00:00 UTC
294        assert_eq!(format_timestamp(1704067200), "2024-01-01");
295        // 2000-06-15
296        assert_eq!(format_timestamp(961027200), "2000-06-15");
297        // 1970-01-01
298        assert_eq!(format_timestamp(0), "1970-01-01");
299    }
300
301    #[test]
302    fn test_is_leap_year() {
303        assert!(is_leap_year(2000)); // Divisible by 400
304        assert!(is_leap_year(2024)); // Divisible by 4, not by 100
305        assert!(!is_leap_year(1900)); // Divisible by 100, not by 400
306        assert!(!is_leap_year(2023)); // Not divisible by 4
307    }
308
309    #[test]
310    fn test_blame_cache_new() {
311        let cache = BlameCache::new();
312        assert!(cache.cache.is_empty());
313    }
314
315    #[test]
316    fn test_parse_porcelain_blame_full() {
317        let output = r#"abc1234567890123456789012345678901234567 1 1 1
318author Alice
319author-time 1704067200
320summary Line 1
321filename test.rs
322	line 1 content
323def5678901234567890123456789012345678901 2 2 1
324author Bob
325author-time 1704153600
326summary Line 2
327filename test.rs
328	line 2 content
329"#;
330
331        let blames = parse_porcelain_blame_full(output);
332        assert_eq!(blames.len(), 2);
333        assert_eq!(blames.get(&1).expect("key not found").author, "Alice");
334        assert_eq!(blames.get(&2).expect("key not found").author, "Bob");
335    }
336
337    // ================================================================
338    // Additional coverage tests
339    // ================================================================
340
341    #[test]
342    fn test_format_timestamp_negative() {
343        // Negative timestamp should return empty string
344        assert_eq!(format_timestamp(-1), "");
345        assert_eq!(format_timestamp(-100000), "");
346    }
347
348    #[test]
349    fn test_format_timestamp_leap_year_feb_29() {
350        // Feb 29, 2000 00:00:00 UTC = 951782400
351        let result = format_timestamp(951782400);
352        assert_eq!(result, "2000-02-29");
353    }
354
355    #[test]
356    fn test_format_timestamp_dec_31() {
357        // Dec 31, 1999 00:00:00 UTC = 946598400
358        let result = format_timestamp(946598400);
359        assert_eq!(result, "1999-12-31");
360    }
361
362    #[test]
363    fn test_format_timestamp_end_of_non_leap_feb() {
364        // Feb 28, 2023 00:00:00 UTC = 1677542400
365        let result = format_timestamp(1677542400);
366        assert_eq!(result, "2023-02-28");
367    }
368
369    #[test]
370    fn test_format_timestamp_various_months() {
371        // Test each month boundary to exercise the days_in_months loop:
372        // Mar 15, 2024 = 1710460800
373        let result = format_timestamp(1710460800);
374        assert_eq!(result, "2024-03-15");
375
376        // Jul 1, 2024 = 1719792000
377        let result = format_timestamp(1719792000);
378        assert_eq!(result, "2024-07-01");
379
380        // Nov 30, 2024 = 1732924800
381        let result = format_timestamp(1732924800);
382        assert_eq!(result, "2024-11-30");
383    }
384
385    #[test]
386    fn test_is_leap_year_edge_cases() {
387        // Non-century leap years
388        assert!(is_leap_year(2004));
389        assert!(is_leap_year(2008));
390        assert!(is_leap_year(2012));
391        assert!(is_leap_year(2016));
392        assert!(is_leap_year(2020));
393
394        // Century non-leap years
395        assert!(!is_leap_year(1700));
396        assert!(!is_leap_year(1800));
397        assert!(!is_leap_year(2100));
398        assert!(!is_leap_year(2200));
399
400        // Century leap years (divisible by 400)
401        assert!(is_leap_year(1600));
402        assert!(is_leap_year(2400));
403    }
404
405    #[test]
406    fn test_parse_porcelain_blame_no_hash_line() {
407        // Output with author and author-time but no valid 40-char hex hash line.
408        // The function should return Some since author is non-empty, but commit stays empty.
409        let output = "author TestAuthor\nauthor-time 1704067200\nshort line\nfilename test.rs\n";
410        let result = parse_porcelain_blame(output);
411        assert!(result.is_some(), "Should return Some when author is present");
412        let blame = result.expect("operation failed");
413        assert_eq!(blame.author, "TestAuthor");
414        assert_eq!(blame.commit, "", "No hash line means empty commit");
415        assert_eq!(blame.date, "2024-01-01");
416    }
417
418    #[test]
419    fn test_parse_porcelain_blame_no_hash_no_author() {
420        // Output with no valid hash line and no author
421        let output = "short line\nsome other data\n";
422        let result = parse_porcelain_blame(output);
423        assert!(result.is_none());
424    }
425
426    #[test]
427    fn test_parse_porcelain_blame_no_hash_with_author() {
428        // Output with author but no valid commit hash - returns Some (author not empty)
429        let output = "short line\nauthor Nobody\n";
430        let result = parse_porcelain_blame(output);
431        assert!(result.is_some());
432        let blame = result.expect("operation failed");
433        assert_eq!(blame.author, "Nobody");
434        assert_eq!(blame.commit, ""); // No hash found
435    }
436
437    #[test]
438    fn test_parse_porcelain_blame_only_author() {
439        // Output with only author, no commit hash
440        let output = "author SomeAuthor\n";
441        let result = parse_porcelain_blame(output);
442        // author is set but commit is empty => still returns Some since author is not empty
443        assert!(result.is_some());
444        let blame = result.expect("operation failed");
445        assert_eq!(blame.author, "SomeAuthor");
446        assert_eq!(blame.commit, "");
447    }
448
449    #[test]
450    fn test_parse_porcelain_blame_non_hex_40char_line() {
451        // 40-char line that is NOT all hex digits
452        let output = "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ 1 1 1\nauthor Test\n";
453        let result = parse_porcelain_blame(output);
454        // The 40-char line has non-hex chars, so commit remains empty
455        // but author is set, so result is Some
456        assert!(result.is_some());
457        let blame = result.expect("operation failed");
458        assert_eq!(blame.commit, "");
459    }
460
461    #[test]
462    fn test_parse_porcelain_blame_invalid_timestamp() {
463        let output =
464            "abc1234567890123456789012345678901234567 1 1 1\nauthor Test\nauthor-time notanumber\n";
465        let result = parse_porcelain_blame(output);
466        assert!(result.is_some());
467        let blame = result.expect("operation failed");
468        assert_eq!(blame.author, "Test");
469        assert_eq!(blame.date, ""); // Failed to parse, date stays empty
470    }
471
472    #[test]
473    fn test_parse_porcelain_blame_full_empty_input() {
474        let blames = parse_porcelain_blame_full("");
475        assert!(blames.is_empty());
476    }
477
478    #[test]
479    fn test_parse_porcelain_blame_full_single_entry() {
480        let output = "abc1234567890123456789012345678901234567 1 5 1\nauthor Alice\nauthor-time 1704067200\n";
481        let blames = parse_porcelain_blame_full(output);
482        assert_eq!(blames.len(), 1);
483        assert_eq!(blames.get(&5).expect("key not found").author, "Alice");
484        assert_eq!(blames.get(&5).expect("key not found").commit, "abc1234");
485    }
486
487    #[test]
488    fn test_parse_porcelain_blame_full_three_entries() {
489        let output = concat!(
490            "aaa1234567890123456789012345678901234567 1 1 1\n",
491            "author A\n",
492            "author-time 1000000000\n",
493            "filename f.rs\n",
494            "\tline 1\n",
495            "bbb1234567890123456789012345678901234567 2 2 1\n",
496            "author B\n",
497            "author-time 1100000000\n",
498            "filename f.rs\n",
499            "\tline 2\n",
500            "ccc1234567890123456789012345678901234567 3 3 1\n",
501            "author C\n",
502            "author-time 1200000000\n",
503            "filename f.rs\n",
504            "\tline 3\n",
505        );
506        let blames = parse_porcelain_blame_full(output);
507        assert_eq!(blames.len(), 3);
508        assert_eq!(blames.get(&1).expect("key not found").author, "A");
509        assert_eq!(blames.get(&2).expect("key not found").author, "B");
510        assert_eq!(blames.get(&3).expect("key not found").author, "C");
511    }
512
513    #[test]
514    fn test_parse_porcelain_blame_full_invalid_timestamp() {
515        let output =
516            "abc1234567890123456789012345678901234567 1 3 1\nauthor Test\nauthor-time invalid\n";
517        let blames = parse_porcelain_blame_full(output);
518        assert_eq!(blames.len(), 1);
519        let blame = blames.get(&3).expect("key not found");
520        assert_eq!(blame.author, "Test");
521        assert_eq!(blame.date, ""); // Failed to parse
522    }
523
524    #[test]
525    fn test_parse_porcelain_blame_full_commit_line_less_than_3_parts() {
526        // Hash line with < 3 whitespace-separated parts
527        let output = "abc1234567890123456789012345678901234567 1\n";
528        let blames = parse_porcelain_blame_full(output);
529        // parts.len() < 3, so current_commit won't be set, current_line stays 0
530        assert!(blames.is_empty());
531    }
532
533    #[test]
534    fn test_blame_cache_direct_insert_and_get() {
535        let mut cache = BlameCache::new();
536        let key = ("src/main.rs".to_string(), 10);
537        let blame = BlameInfo {
538            author: "Test".to_string(),
539            commit: "abc1234".to_string(),
540            date: "2024-01-01".to_string(),
541        };
542        cache.cache.insert(key, blame);
543
544        // Get blame from cache (cache hit path)
545        // Note: get_blame calls git, so we test the cache hit via direct insert
546        let cached = cache.cache.get(&("src/main.rs".to_string(), 10));
547        assert!(cached.is_some());
548        assert_eq!(cached.expect("unexpected failure").author, "Test");
549    }
550
551    #[test]
552    fn test_blame_info_default() {
553        let info = BlameInfo::default();
554        assert_eq!(info.author, "");
555        assert_eq!(info.commit, "");
556        assert_eq!(info.date, "");
557    }
558
559    #[test]
560    fn test_blame_info_clone() {
561        let info = BlameInfo {
562            author: "Author".to_string(),
563            commit: "abc1234".to_string(),
564            date: "2024-01-01".to_string(),
565        };
566        let cloned = info.clone();
567        assert_eq!(info.author, cloned.author);
568        assert_eq!(info.commit, cloned.commit);
569        assert_eq!(info.date, cloned.date);
570    }
571
572    #[test]
573    fn test_prefetch_file_empty_lines() {
574        let mut cache = BlameCache::new();
575        let path = Path::new("/nonexistent/project");
576        let file = Path::new("src/main.rs");
577        // Empty lines slice - should return early
578        cache.prefetch_file(path, file, &[]);
579        assert!(cache.cache.is_empty());
580    }
581
582    #[test]
583    fn test_prefetch_file_all_cached() {
584        let mut cache = BlameCache::new();
585        let file_str = "src/main.rs".to_string();
586
587        // Pre-populate cache for lines 1,2,3
588        for line in 1..=3 {
589            cache.cache.insert(
590                (file_str.clone(), line),
591                BlameInfo {
592                    author: format!("Author{}", line),
593                    commit: "abc1234".to_string(),
594                    date: "2024-01-01".to_string(),
595                },
596            );
597        }
598
599        let path = Path::new("/nonexistent/project");
600        let file = Path::new("src/main.rs");
601        // All lines already cached - uncached will be empty, should return early
602        cache.prefetch_file(path, file, &[1, 2, 3]);
603        // Cache should still have 3 entries (no new fetches)
604        assert_eq!(cache.cache.len(), 3);
605    }
606
607    #[test]
608    fn test_get_blame_for_nonexistent_path() {
609        let mut cache = BlameCache::new();
610        let path = Path::new("/absolutely/nonexistent/project/path");
611        let file = Path::new("nonexistent_file.rs");
612        let result = cache.get_blame(path, file, 1);
613        // Should return None since git blame will fail
614        assert!(result.is_none());
615    }
616
617    #[test]
618    fn test_get_blame_cache_hit() {
619        let mut cache = BlameCache::new();
620        let file_str = "src/test.rs".to_string();
621        let key = (file_str, 42);
622        cache.cache.insert(
623            key,
624            BlameInfo {
625                author: "CachedAuthor".to_string(),
626                commit: "ccc1234".to_string(),
627                date: "2024-06-15".to_string(),
628            },
629        );
630
631        // This should hit the cache branch
632        let path = Path::new("/some/path");
633        let file = Path::new("src/test.rs");
634        let result = cache.get_blame(path, file, 42);
635        assert!(result.is_some());
636        let blame = result.expect("operation failed");
637        assert_eq!(blame.author, "CachedAuthor");
638    }
639
640    #[test]
641    fn test_prefetch_file_with_uncached_lines_nonexistent_project() {
642        let mut cache = BlameCache::new();
643        let path = Path::new("/nonexistent/project/path");
644        let file = Path::new("src/main.rs");
645        // Lines not in cache, but git blame will fail for nonexistent path
646        cache.prefetch_file(path, file, &[1, 2, 3]);
647        // No entries should be added since get_blame_for_file returns None
648        assert!(cache.cache.is_empty());
649    }
650}