Skip to main content

tokmd_context_git/
lib.rs

1//! Lightweight git scoring for context ranking.
2
3use std::collections::BTreeMap;
4#[cfg(feature = "git")]
5use std::path::Path;
6
7#[cfg(feature = "git")]
8use tokmd_path::normalize_rel_path as normalize_path;
9#[cfg(feature = "git")]
10use tokmd_types::{FileKind, FileRow};
11
12/// Git-derived scores for file ranking.
13pub struct GitScores {
14    /// Per-file hotspot scores: path → (lines × commits)
15    pub hotspots: BTreeMap<String, usize>,
16    /// Per-file commit counts: path → commits
17    pub commit_counts: BTreeMap<String, usize>,
18}
19
20#[cfg(feature = "git")]
21pub fn compute_git_scores(
22    root: &Path,
23    rows: &[FileRow],
24    max_commits: usize,
25    max_commit_files: usize,
26) -> Option<GitScores> {
27    let repo_root = tokmd_git::repo_root(root)?;
28    let commits =
29        tokmd_git::collect_history(&repo_root, Some(max_commits), Some(max_commit_files)).ok()?;
30
31    // Build file → lines map (only parent files)
32    let file_lines: BTreeMap<String, usize> = rows
33        .iter()
34        .filter(|r| r.kind == FileKind::Parent)
35        .map(|r| (normalize_path(&r.path), r.lines))
36        .collect();
37
38    // Count commits per file
39    let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
40    for commit in &commits {
41        for file in &commit.files {
42            let key = normalize_path(file);
43            if file_lines.contains_key(&key) {
44                *commit_counts.entry(key).or_insert(0) += 1;
45            }
46        }
47    }
48
49    // Compute hotspot scores: lines × commits
50    let hotspots: BTreeMap<String, usize> = commit_counts
51        .iter()
52        .filter_map(|(path, commits)| {
53            let lines = file_lines.get(path)?;
54            Some((path.clone(), lines * commits))
55        })
56        .collect();
57
58    Some(GitScores {
59        hotspots,
60        commit_counts,
61    })
62}
63
64#[cfg(not(feature = "git"))]
65pub fn compute_git_scores(
66    _root: &std::path::Path,
67    _rows: &[tokmd_types::FileRow],
68    _max_commits: usize,
69    _max_commit_files: usize,
70) -> Option<GitScores> {
71    None
72}
73
74#[cfg(test)]
75mod tests_no_feature {
76    use super::*;
77
78    #[test]
79    fn git_scores_can_be_constructed() {
80        let scores = GitScores {
81            hotspots: BTreeMap::new(),
82            commit_counts: BTreeMap::new(),
83        };
84        assert!(scores.hotspots.is_empty());
85        assert!(scores.commit_counts.is_empty());
86    }
87
88    #[test]
89    fn git_scores_btreemap_is_sorted() {
90        let mut hotspots = BTreeMap::new();
91        hotspots.insert("z/file.rs".to_string(), 100);
92        hotspots.insert("a/file.rs".to_string(), 50);
93        let scores = GitScores {
94            hotspots,
95            commit_counts: BTreeMap::new(),
96        };
97        let keys: Vec<&String> = scores.hotspots.keys().collect();
98        assert_eq!(keys[0], "a/file.rs");
99        assert_eq!(keys[1], "z/file.rs");
100    }
101}
102
103#[cfg(all(test, feature = "git"))]
104mod tests {
105    use super::*;
106    use std::process::Command;
107    use tokmd_types::{FileKind, FileRow};
108
109    fn make_row(path: &str, lines: usize) -> FileRow {
110        FileRow {
111            path: path.to_string(),
112            module: "(root)".to_string(),
113            lang: "Rust".to_string(),
114            kind: FileKind::Parent,
115            code: lines,
116            comments: 0,
117            blanks: 0,
118            lines,
119            bytes: lines * 10,
120            tokens: lines * 5,
121        }
122    }
123
124    fn create_test_repo() -> Option<tempfile::TempDir> {
125        let dir = tempfile::tempdir().ok()?;
126        let root = dir.path();
127
128        // git init + config
129        Command::new("git")
130            .args(["init"])
131            .current_dir(root)
132            .output()
133            .ok()?;
134        Command::new("git")
135            .args(["config", "user.email", "test@test.com"])
136            .current_dir(root)
137            .output()
138            .ok()?;
139        Command::new("git")
140            .args(["config", "user.name", "Test"])
141            .current_dir(root)
142            .output()
143            .ok()?;
144
145        // main.rs: 2 commits (3 lines initially, then 4)
146        std::fs::write(root.join("main.rs"), "1\n2\n3").ok()?;
147        Command::new("git")
148            .args(["add", "."])
149            .current_dir(root)
150            .output()
151            .ok()?;
152        Command::new("git")
153            .args(["commit", "-m", "c1"])
154            .current_dir(root)
155            .output()
156            .ok()?;
157
158        std::fs::write(root.join("main.rs"), "1\n2\n3\n4").ok()?;
159        Command::new("git")
160            .args(["add", "."])
161            .current_dir(root)
162            .output()
163            .ok()?;
164        Command::new("git")
165            .args(["commit", "-m", "c2"])
166            .current_dir(root)
167            .output()
168            .ok()?;
169
170        // lib.rs: 1 commit (5 lines)
171        std::fs::write(root.join("lib.rs"), "1\n2\n3\n4\n5").ok()?;
172        Command::new("git")
173            .args(["add", "."])
174            .current_dir(root)
175            .output()
176            .ok()?;
177        Command::new("git")
178            .args(["commit", "-m", "c3"])
179            .current_dir(root)
180            .output()
181            .ok()?;
182
183        Some(dir)
184    }
185
186    #[test]
187    fn test_compute_git_scores_commit_counts() {
188        let repo = match create_test_repo() {
189            Some(r) => r,
190            None => return, // Skip if git unavailable
191        };
192        let rows = vec![make_row("main.rs", 4), make_row("lib.rs", 5)];
193        let Some(scores) = compute_git_scores(repo.path(), &rows, 100, 100) else {
194            return; // git unavailable in this environment
195        };
196
197        // main.rs has 2 commits, lib.rs has 1 commit
198        assert_eq!(scores.commit_counts.get("main.rs"), Some(&2));
199        assert_eq!(scores.commit_counts.get("lib.rs"), Some(&1));
200    }
201
202    #[test]
203    fn test_compute_git_scores_hotspots() {
204        let repo = match create_test_repo() {
205            Some(r) => r,
206            None => return,
207        };
208        let rows = vec![make_row("main.rs", 4), make_row("lib.rs", 5)];
209        let Some(scores) = compute_git_scores(repo.path(), &rows, 100, 100) else {
210            return;
211        };
212
213        // hotspot = lines * commits
214        // main.rs: 4 lines * 2 commits = 8
215        // lib.rs: 5 lines * 1 commit = 5
216        assert_eq!(scores.hotspots.get("main.rs"), Some(&8));
217        assert_eq!(scores.hotspots.get("lib.rs"), Some(&5));
218    }
219
220    #[test]
221    fn test_compute_git_scores_filters_children() {
222        let repo = match create_test_repo() {
223            Some(r) => r,
224            None => return,
225        };
226        // Only include child rows - should be filtered out
227        let rows = vec![FileRow {
228            path: "main.rs".to_string(),
229            module: "(root)".to_string(),
230            lang: "Rust".to_string(),
231            kind: FileKind::Child, // Child, not Parent
232            code: 4,
233            comments: 0,
234            blanks: 0,
235            lines: 4,
236            bytes: 40,
237            tokens: 20,
238        }];
239        let scores = compute_git_scores(repo.path(), &rows, 100, 100);
240
241        // Child rows should be filtered, so commit_counts should be empty
242        // compute_git_scores may return None in some environments
243        let Some(scores) = scores else { return };
244        assert!(scores.commit_counts.is_empty());
245    }
246
247    #[test]
248    fn test_compute_git_scores_non_repo_returns_none() {
249        let dir = tempfile::tempdir().unwrap();
250        let rows = vec![];
251        // Not a git repo, should return None
252        assert!(compute_git_scores(dir.path(), &rows, 100, 100).is_none());
253    }
254
255    #[test]
256    fn test_normalize_path_backslash() {
257        assert_eq!(normalize_path("foo\\bar\\baz.rs"), "foo/bar/baz.rs");
258    }
259
260    #[test]
261    fn test_normalize_path_dot_slash() {
262        assert_eq!(normalize_path("./src/main.rs"), "src/main.rs");
263    }
264
265    // ==================== Mutant killer tests ====================
266
267    #[test]
268    fn test_compute_git_scores_returns_some() {
269        // Kills "compute_git_scores -> None" mutant
270        let repo = match create_test_repo() {
271            Some(r) => r,
272            None => return,
273        };
274        let rows = vec![make_row("main.rs", 4)];
275        let result = compute_git_scores(repo.path(), &rows, 100, 100);
276        assert!(
277            result.is_some(),
278            "compute_git_scores should return Some for valid git repo"
279        );
280    }
281
282    #[test]
283    fn test_compute_git_scores_not_default() {
284        // Kills "compute_git_scores -> Some(Default::default())" mutant
285        let repo = match create_test_repo() {
286            Some(r) => r,
287            None => return,
288        };
289        let rows = vec![make_row("main.rs", 4), make_row("lib.rs", 5)];
290        let Some(scores) = compute_git_scores(repo.path(), &rows, 100, 100) else {
291            return;
292        };
293
294        // Scores should not be empty (kills Default::default() mutant)
295        assert!(
296            !scores.commit_counts.is_empty(),
297            "commit_counts should not be empty"
298        );
299        assert!(!scores.hotspots.is_empty(), "hotspots should not be empty");
300    }
301
302    #[test]
303    fn test_commit_count_increment() {
304        // Kills "+= -> -=" mutant on commit counting
305        // main.rs has 2 commits, so count must be > 0 (not negative from subtraction)
306        let repo = match create_test_repo() {
307            Some(r) => r,
308            None => return,
309        };
310        let rows = vec![make_row("main.rs", 4)];
311        let Some(scores) = compute_git_scores(repo.path(), &rows, 100, 100) else {
312            return;
313        };
314
315        let count = scores.commit_counts.get("main.rs").copied().unwrap_or(0);
316        assert!(count > 0, "commit count must be positive, got {count}");
317        assert_eq!(count, 2, "main.rs should have exactly 2 commits");
318    }
319
320    #[test]
321    fn test_hotspot_multiplication() {
322        // Kills "lines * commits -> lines + commits" or "lines / commits" mutants
323        let repo = match create_test_repo() {
324            Some(r) => r,
325            None => return,
326        };
327        // main.rs: 4 lines, 2 commits
328        // If multiplication: 4 * 2 = 8
329        // If addition: 4 + 2 = 6
330        // If division: 4 / 2 = 2
331        let rows = vec![make_row("main.rs", 4)];
332        let Some(scores) = compute_git_scores(repo.path(), &rows, 100, 100) else {
333            return;
334        };
335
336        let hotspot = scores.hotspots.get("main.rs").copied().unwrap_or(0);
337        assert_eq!(
338            hotspot, 8,
339            "hotspot should be lines * commits = 4 * 2 = 8, got {hotspot}"
340        );
341    }
342
343    #[test]
344    fn test_normalize_path_not_empty() {
345        // Kills "normalize_path -> empty string" mutant
346        assert!(!normalize_path("foo/bar").is_empty());
347        assert!(!normalize_path("test.rs").is_empty());
348        assert!(!normalize_path("./src/lib.rs").is_empty());
349    }
350
351    #[test]
352    fn test_normalize_path_not_xyzzy() {
353        // Kills "normalize_path -> xyzzy" mutant
354        assert_ne!(normalize_path("foo/bar"), "xyzzy");
355        assert_ne!(normalize_path("test.rs"), "xyzzy");
356        assert_ne!(normalize_path("./src/lib.rs"), "xyzzy");
357    }
358
359    #[test]
360    fn test_filter_only_parent_files() {
361        // Kills "== FileKind::Parent -> != FileKind::Parent" mutant
362        let repo = match create_test_repo() {
363            Some(r) => r,
364            None => return,
365        };
366        // Mix of parent and child rows
367        let rows = vec![
368            make_row("main.rs", 4), // Parent
369            FileRow {
370                path: "lib.rs".to_string(),
371                module: "(root)".to_string(),
372                lang: "Rust".to_string(),
373                kind: FileKind::Child, // Child - should be filtered
374                code: 5,
375                comments: 0,
376                blanks: 0,
377                lines: 5,
378                bytes: 50,
379                tokens: 25,
380            },
381        ];
382        let scores = compute_git_scores(repo.path(), &rows, 100, 100);
383        let Some(scores) = scores else { return };
384
385        // Only main.rs (Parent) should appear
386        assert!(scores.commit_counts.contains_key("main.rs"));
387        assert!(
388            !scores.commit_counts.contains_key("lib.rs"),
389            "Child file lib.rs should be filtered out"
390        );
391    }
392
393    #[test]
394    fn test_path_matching_with_normalization() {
395        // Tests that path normalization works for matching git paths to FileRow paths
396        let repo = match create_test_repo() {
397            Some(r) => r,
398            None => return,
399        };
400        // Use backslash path (Windows-style) - should still match
401        let rows = vec![FileRow {
402            path: "main.rs".to_string(), // Forward slash
403            module: "(root)".to_string(),
404            lang: "Rust".to_string(),
405            kind: FileKind::Parent,
406            code: 4,
407            comments: 0,
408            blanks: 0,
409            lines: 4,
410            bytes: 40,
411            tokens: 20,
412        }];
413        let Some(scores) = compute_git_scores(repo.path(), &rows, 100, 100) else {
414            return;
415        };
416
417        // Should find the file despite potential path differences
418        assert!(
419            scores.commit_counts.contains_key("main.rs"),
420            "Should match file after normalization"
421        );
422    }
423}
424
425#[cfg(test)]
426mod tests_no_git {
427    use super::*;
428
429    #[test]
430    fn test_git_scores_struct_default() {
431        let scores = GitScores {
432            hotspots: BTreeMap::new(),
433            commit_counts: BTreeMap::new(),
434        };
435        assert!(scores.hotspots.is_empty());
436        assert!(scores.commit_counts.is_empty());
437    }
438
439    #[test]
440    fn test_git_scores_struct_with_data() {
441        let mut hotspots = BTreeMap::new();
442        hotspots.insert("src/main.rs".to_string(), 100);
443        hotspots.insert("src/lib.rs".to_string(), 50);
444        let mut commit_counts = BTreeMap::new();
445        commit_counts.insert("src/main.rs".to_string(), 10);
446        commit_counts.insert("src/lib.rs".to_string(), 5);
447
448        let scores = GitScores {
449            hotspots,
450            commit_counts,
451        };
452
453        assert_eq!(scores.hotspots.len(), 2);
454        assert_eq!(scores.commit_counts.get("src/main.rs"), Some(&10));
455        assert_eq!(scores.hotspots.get("src/lib.rs"), Some(&50));
456    }
457
458    #[test]
459    fn test_git_scores_btreemap_ordering() {
460        let mut hotspots = BTreeMap::new();
461        hotspots.insert("z.rs".to_string(), 1);
462        hotspots.insert("a.rs".to_string(), 2);
463        hotspots.insert("m.rs".to_string(), 3);
464
465        let scores = GitScores {
466            hotspots,
467            commit_counts: BTreeMap::new(),
468        };
469
470        let keys: Vec<&String> = scores.hotspots.keys().collect();
471        assert_eq!(keys, vec!["a.rs", "m.rs", "z.rs"]);
472    }
473}