Skip to main content

tokmd_context_git/
lib.rs

1//! Lightweight git scoring for context ranking.
2
3use std::collections::BTreeMap;
4#[cfg(feature = "git")]
5use std::path::Path;
6
7#[cfg(feature = "git")]
8use tokmd_path::normalize_rel_path as normalize_path;
9#[cfg(feature = "git")]
10use tokmd_types::{FileKind, FileRow};
11
12/// Git-derived scores for file ranking.
13pub struct GitScores {
14    /// Per-file hotspot scores: path → (lines × commits)
15    pub hotspots: BTreeMap<String, usize>,
16    /// Per-file commit counts: path → commits
17    pub commit_counts: BTreeMap<String, usize>,
18}
19
20#[cfg(feature = "git")]
21pub fn compute_git_scores(
22    root: &Path,
23    rows: &[FileRow],
24    max_commits: usize,
25    max_commit_files: usize,
26) -> Option<GitScores> {
27    let repo_root = tokmd_git::repo_root(root)?;
28    let commits =
29        tokmd_git::collect_history(&repo_root, Some(max_commits), Some(max_commit_files)).ok()?;
30
31    // Build file → lines map (only parent files)
32    let file_lines: BTreeMap<String, usize> = rows
33        .iter()
34        .filter(|r| r.kind == FileKind::Parent)
35        .map(|r| (normalize_path(&r.path), r.lines))
36        .collect();
37
38    // Count commits per file
39    let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
40    for commit in &commits {
41        for file in &commit.files {
42            let key = normalize_path(file);
43            if file_lines.contains_key(&key) {
44                *commit_counts.entry(key).or_insert(0) += 1;
45            }
46        }
47    }
48
49    // Compute hotspot scores: lines × commits
50    let hotspots: BTreeMap<String, usize> = commit_counts
51        .iter()
52        .filter_map(|(path, commits)| {
53            let lines = file_lines.get(path)?;
54            Some((path.clone(), lines * commits))
55        })
56        .collect();
57
58    Some(GitScores {
59        hotspots,
60        commit_counts,
61    })
62}
63
64#[cfg(not(feature = "git"))]
65pub fn compute_git_scores(
66    _root: &std::path::Path,
67    _rows: &[tokmd_types::FileRow],
68    _max_commits: usize,
69    _max_commit_files: usize,
70) -> Option<GitScores> {
71    None
72}
73
74#[cfg(all(test, feature = "git"))]
75mod tests {
76    use super::*;
77    use std::process::Command;
78    use tokmd_types::{FileKind, FileRow};
79
80    fn make_row(path: &str, lines: usize) -> FileRow {
81        FileRow {
82            path: path.to_string(),
83            module: "(root)".to_string(),
84            lang: "Rust".to_string(),
85            kind: FileKind::Parent,
86            code: lines,
87            comments: 0,
88            blanks: 0,
89            lines,
90            bytes: lines * 10,
91            tokens: lines * 5,
92        }
93    }
94
95    fn create_test_repo() -> Option<tempfile::TempDir> {
96        let dir = tempfile::tempdir().ok()?;
97        let root = dir.path();
98
99        // git init + config
100        Command::new("git")
101            .args(["init"])
102            .current_dir(root)
103            .output()
104            .ok()?;
105        Command::new("git")
106            .args(["config", "user.email", "test@test.com"])
107            .current_dir(root)
108            .output()
109            .ok()?;
110        Command::new("git")
111            .args(["config", "user.name", "Test"])
112            .current_dir(root)
113            .output()
114            .ok()?;
115
116        // main.rs: 2 commits (3 lines initially, then 4)
117        std::fs::write(root.join("main.rs"), "1\n2\n3").ok()?;
118        Command::new("git")
119            .args(["add", "."])
120            .current_dir(root)
121            .output()
122            .ok()?;
123        Command::new("git")
124            .args(["commit", "-m", "c1"])
125            .current_dir(root)
126            .output()
127            .ok()?;
128
129        std::fs::write(root.join("main.rs"), "1\n2\n3\n4").ok()?;
130        Command::new("git")
131            .args(["add", "."])
132            .current_dir(root)
133            .output()
134            .ok()?;
135        Command::new("git")
136            .args(["commit", "-m", "c2"])
137            .current_dir(root)
138            .output()
139            .ok()?;
140
141        // lib.rs: 1 commit (5 lines)
142        std::fs::write(root.join("lib.rs"), "1\n2\n3\n4\n5").ok()?;
143        Command::new("git")
144            .args(["add", "."])
145            .current_dir(root)
146            .output()
147            .ok()?;
148        Command::new("git")
149            .args(["commit", "-m", "c3"])
150            .current_dir(root)
151            .output()
152            .ok()?;
153
154        Some(dir)
155    }
156
157    #[test]
158    fn test_compute_git_scores_commit_counts() {
159        let repo = match create_test_repo() {
160            Some(r) => r,
161            None => return, // Skip if git unavailable
162        };
163        let rows = vec![make_row("main.rs", 4), make_row("lib.rs", 5)];
164        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
165
166        // main.rs has 2 commits, lib.rs has 1 commit
167        assert_eq!(scores.commit_counts.get("main.rs"), Some(&2));
168        assert_eq!(scores.commit_counts.get("lib.rs"), Some(&1));
169    }
170
171    #[test]
172    fn test_compute_git_scores_hotspots() {
173        let repo = match create_test_repo() {
174            Some(r) => r,
175            None => return,
176        };
177        let rows = vec![make_row("main.rs", 4), make_row("lib.rs", 5)];
178        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
179
180        // hotspot = lines * commits
181        // main.rs: 4 lines * 2 commits = 8
182        // lib.rs: 5 lines * 1 commit = 5
183        assert_eq!(scores.hotspots.get("main.rs"), Some(&8));
184        assert_eq!(scores.hotspots.get("lib.rs"), Some(&5));
185    }
186
187    #[test]
188    fn test_compute_git_scores_filters_children() {
189        let repo = match create_test_repo() {
190            Some(r) => r,
191            None => return,
192        };
193        // Only include child rows - should be filtered out
194        let rows = vec![FileRow {
195            path: "main.rs".to_string(),
196            module: "(root)".to_string(),
197            lang: "Rust".to_string(),
198            kind: FileKind::Child, // Child, not Parent
199            code: 4,
200            comments: 0,
201            blanks: 0,
202            lines: 4,
203            bytes: 40,
204            tokens: 20,
205        }];
206        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
207
208        // Child rows should be filtered, so commit_counts should be empty
209        assert!(scores.commit_counts.is_empty());
210    }
211
212    #[test]
213    fn test_compute_git_scores_non_repo_returns_none() {
214        let dir = tempfile::tempdir().unwrap();
215        let rows = vec![];
216        // Not a git repo, should return None
217        assert!(compute_git_scores(dir.path(), &rows, 100, 100).is_none());
218    }
219
220    #[test]
221    fn test_normalize_path_backslash() {
222        assert_eq!(normalize_path("foo\\bar\\baz.rs"), "foo/bar/baz.rs");
223    }
224
225    #[test]
226    fn test_normalize_path_dot_slash() {
227        assert_eq!(normalize_path("./src/main.rs"), "src/main.rs");
228    }
229
230    // ==================== Mutant killer tests ====================
231
232    #[test]
233    fn test_compute_git_scores_returns_some() {
234        // Kills "compute_git_scores -> None" mutant
235        let repo = match create_test_repo() {
236            Some(r) => r,
237            None => return,
238        };
239        let rows = vec![make_row("main.rs", 4)];
240        let result = compute_git_scores(repo.path(), &rows, 100, 100);
241        assert!(
242            result.is_some(),
243            "compute_git_scores should return Some for valid git repo"
244        );
245    }
246
247    #[test]
248    fn test_compute_git_scores_not_default() {
249        // Kills "compute_git_scores -> Some(Default::default())" mutant
250        let repo = match create_test_repo() {
251            Some(r) => r,
252            None => return,
253        };
254        let rows = vec![make_row("main.rs", 4), make_row("lib.rs", 5)];
255        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
256
257        // Scores should not be empty (kills Default::default() mutant)
258        assert!(
259            !scores.commit_counts.is_empty(),
260            "commit_counts should not be empty"
261        );
262        assert!(!scores.hotspots.is_empty(), "hotspots should not be empty");
263    }
264
265    #[test]
266    fn test_commit_count_increment() {
267        // Kills "+= -> -=" mutant on commit counting
268        // main.rs has 2 commits, so count must be > 0 (not negative from subtraction)
269        let repo = match create_test_repo() {
270            Some(r) => r,
271            None => return,
272        };
273        let rows = vec![make_row("main.rs", 4)];
274        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
275
276        let count = scores.commit_counts.get("main.rs").copied().unwrap_or(0);
277        assert!(count > 0, "commit count must be positive, got {count}");
278        assert_eq!(count, 2, "main.rs should have exactly 2 commits");
279    }
280
281    #[test]
282    fn test_hotspot_multiplication() {
283        // Kills "lines * commits -> lines + commits" or "lines / commits" mutants
284        let repo = match create_test_repo() {
285            Some(r) => r,
286            None => return,
287        };
288        // main.rs: 4 lines, 2 commits
289        // If multiplication: 4 * 2 = 8
290        // If addition: 4 + 2 = 6
291        // If division: 4 / 2 = 2
292        let rows = vec![make_row("main.rs", 4)];
293        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
294
295        let hotspot = scores.hotspots.get("main.rs").copied().unwrap_or(0);
296        assert_eq!(
297            hotspot, 8,
298            "hotspot should be lines * commits = 4 * 2 = 8, got {hotspot}"
299        );
300    }
301
302    #[test]
303    fn test_normalize_path_not_empty() {
304        // Kills "normalize_path -> empty string" mutant
305        assert!(!normalize_path("foo/bar").is_empty());
306        assert!(!normalize_path("test.rs").is_empty());
307        assert!(!normalize_path("./src/lib.rs").is_empty());
308    }
309
310    #[test]
311    fn test_normalize_path_not_xyzzy() {
312        // Kills "normalize_path -> xyzzy" mutant
313        assert_ne!(normalize_path("foo/bar"), "xyzzy");
314        assert_ne!(normalize_path("test.rs"), "xyzzy");
315        assert_ne!(normalize_path("./src/lib.rs"), "xyzzy");
316    }
317
318    #[test]
319    fn test_filter_only_parent_files() {
320        // Kills "== FileKind::Parent -> != FileKind::Parent" mutant
321        let repo = match create_test_repo() {
322            Some(r) => r,
323            None => return,
324        };
325        // Mix of parent and child rows
326        let rows = vec![
327            make_row("main.rs", 4), // Parent
328            FileRow {
329                path: "lib.rs".to_string(),
330                module: "(root)".to_string(),
331                lang: "Rust".to_string(),
332                kind: FileKind::Child, // Child - should be filtered
333                code: 5,
334                comments: 0,
335                blanks: 0,
336                lines: 5,
337                bytes: 50,
338                tokens: 25,
339            },
340        ];
341        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
342
343        // Only main.rs (Parent) should appear
344        assert!(scores.commit_counts.contains_key("main.rs"));
345        assert!(
346            !scores.commit_counts.contains_key("lib.rs"),
347            "Child file lib.rs should be filtered out"
348        );
349    }
350
351    #[test]
352    fn test_path_matching_with_normalization() {
353        // Tests that path normalization works for matching git paths to FileRow paths
354        let repo = match create_test_repo() {
355            Some(r) => r,
356            None => return,
357        };
358        // Use backslash path (Windows-style) - should still match
359        let rows = vec![FileRow {
360            path: "main.rs".to_string(), // Forward slash
361            module: "(root)".to_string(),
362            lang: "Rust".to_string(),
363            kind: FileKind::Parent,
364            code: 4,
365            comments: 0,
366            blanks: 0,
367            lines: 4,
368            bytes: 40,
369            tokens: 20,
370        }];
371        let scores = compute_git_scores(repo.path(), &rows, 100, 100).unwrap();
372
373        // Should find the file despite potential path differences
374        assert!(
375            scores.commit_counts.contains_key("main.rs"),
376            "Should match file after normalization"
377        );
378    }
379}