Skip to main content

tokmd_git/
lib.rs

1//! # tokmd-git
2//!
3//! **Tier 2 (Utilities)**
4//!
5//! Streaming git log adapter for tokmd analysis. Collects commit history
6//! without loading the entire history into memory.
7//!
8//! ## What belongs here
9//! * Git history collection
10//! * Commit parsing (timestamp, author, affected files)
11//! * Streaming interface
12//!
13//! ## What does NOT belong here
14//! * Analysis computation (use tokmd-analysis)
15//! * Git history modification
16//! * Complex git operations (use git2 crate directly if needed)
17
18use std::io::{BufRead, BufReader};
19use std::path::{Path, PathBuf};
20use std::process::{Command, Stdio};
21
22use anyhow::{Context, Result};
23
24/// Create a `Command` for git with process-environment isolation.
25///
26/// Strips `GIT_DIR` and `GIT_WORK_TREE` so that inherited environment
27/// variables cannot override the explicit `-C` path used by all
28/// functions in this crate.
29fn git_cmd() -> Command {
30    let mut cmd = Command::new("git");
31    cmd.env_remove("GIT_DIR").env_remove("GIT_WORK_TREE");
32    cmd
33}
34
35#[derive(Debug, Clone)]
36pub struct GitCommit {
37    pub timestamp: i64,
38    pub author: String,
39    pub files: Vec<String>,
40}
41
42/// Git range syntax for comparing commits.
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
44pub enum GitRangeMode {
45    /// Two-dot syntax: `A..B` - commits in B but not A.
46    #[default]
47    TwoDot,
48    /// Three-dot syntax: `A...B` - symmetric difference from merge-base.
49    ThreeDot,
50}
51
52impl GitRangeMode {
53    /// Format the range string for git commands.
54    pub fn format(&self, base: &str, head: &str) -> String {
55        match self {
56            GitRangeMode::TwoDot => format!("{}..{}", base, head),
57            GitRangeMode::ThreeDot => format!("{}...{}", base, head),
58        }
59    }
60}
61
62pub fn git_available() -> bool {
63    git_cmd()
64        .arg("--version")
65        .stdout(Stdio::null())
66        .stderr(Stdio::null())
67        .status()
68        .map(|s| s.success())
69        .unwrap_or(false)
70}
71
72pub fn repo_root(path: &Path) -> Option<PathBuf> {
73    let output = git_cmd()
74        .arg("-C")
75        .arg(path)
76        .arg("rev-parse")
77        .arg("--show-toplevel")
78        .output()
79        .ok()?;
80    if !output.status.success() {
81        return None;
82    }
83    let root = String::from_utf8_lossy(&output.stdout).trim().to_string();
84    if root.is_empty() {
85        None
86    } else {
87        Some(PathBuf::from(root))
88    }
89}
90
91pub fn collect_history(
92    repo_root: &Path,
93    max_commits: Option<usize>,
94    max_commit_files: Option<usize>,
95) -> Result<Vec<GitCommit>> {
96    let mut child = git_cmd()
97        .arg("-C")
98        .arg(repo_root)
99        .arg("log")
100        .arg("--name-only")
101        .arg("--pretty=format:%ct|%ae")
102        .stdout(Stdio::piped())
103        .stderr(Stdio::null())
104        .spawn()
105        .context("Failed to spawn git log")?;
106
107    let stdout = child.stdout.take().context("Missing git log stdout")?;
108    let reader = BufReader::new(stdout);
109
110    let mut commits: Vec<GitCommit> = Vec::new();
111    let mut current: Option<GitCommit> = None;
112
113    for line in reader.lines() {
114        let line = line?;
115        if line.trim().is_empty() {
116            if let Some(commit) = current.take() {
117                commits.push(commit);
118                if max_commits.is_some_and(|limit| commits.len() >= limit) {
119                    break;
120                }
121            }
122            continue;
123        }
124
125        if current.is_none() {
126            let mut parts = line.splitn(2, '|');
127            let ts = parts.next().unwrap_or("0").parse::<i64>().unwrap_or(0);
128            let author = parts.next().unwrap_or("").to_string();
129            current = Some(GitCommit {
130                timestamp: ts,
131                author,
132                files: Vec::new(),
133            });
134            continue;
135        }
136
137        if let Some(commit) = current.as_mut()
138            && max_commit_files
139                .map(|limit| commit.files.len() < limit)
140                .unwrap_or(true)
141        {
142            commit.files.push(line.trim().to_string());
143        }
144    }
145
146    if let Some(commit) = current.take() {
147        commits.push(commit);
148    }
149
150    let status = child.wait()?;
151    if !status.success() {
152        return Err(anyhow::anyhow!("git log failed"));
153    }
154
155    Ok(commits)
156}
157
158/// Get the set of added line numbers per file between two refs.
159pub fn get_added_lines(
160    repo_root: &Path,
161    base: &str,
162    head: &str,
163    range_mode: GitRangeMode,
164) -> Result<std::collections::BTreeMap<PathBuf, std::collections::BTreeSet<usize>>> {
165    let range = range_mode.format(base, head);
166    let output = git_cmd()
167        .arg("-C")
168        .arg(repo_root)
169        .args(["diff", "--unified=0", &range])
170        .output()
171        .context("Failed to run git diff")?;
172
173    if !output.status.success() {
174        let stderr = String::from_utf8_lossy(&output.stderr);
175        return Err(anyhow::anyhow!("git diff failed: {}", stderr.trim()));
176    }
177
178    let stdout = String::from_utf8_lossy(&output.stdout);
179    let mut result: std::collections::BTreeMap<PathBuf, std::collections::BTreeSet<usize>> =
180        std::collections::BTreeMap::new();
181    let mut current_file: Option<PathBuf> = None;
182
183    for line in stdout.lines() {
184        if let Some(file_path) = line.strip_prefix("+++ b/") {
185            current_file = Some(PathBuf::from(file_path));
186            continue;
187        }
188
189        if line.starts_with("@@") {
190            let Some(file) = current_file.as_ref() else {
191                continue;
192            };
193
194            // Hunk header: @@ -a,b +c,d @@
195            // We care about +c,d
196            let parts: Vec<&str> = line.split_whitespace().collect();
197            if parts.len() < 3 {
198                continue;
199            }
200
201            let new_range = parts[2]; // +c,d
202            let range_str = new_range.strip_prefix('+').unwrap_or(new_range);
203            let range_parts: Vec<&str> = range_str.split(',').collect();
204
205            let start: usize = range_parts[0].parse().unwrap_or(0);
206            let count: usize = if range_parts.len() > 1 {
207                range_parts[1].parse().unwrap_or(1)
208            } else {
209                1
210            };
211
212            if count > 0 && start > 0 {
213                let set = result.entry(file.clone()).or_default();
214                for i in 0..count {
215                    set.insert(start + i);
216                }
217            }
218        }
219    }
220
221    Ok(result)
222}
223
224/// Check whether a git revision resolves to a valid commit.
225pub fn rev_exists(repo_root: &Path, rev: &str) -> bool {
226    git_cmd()
227        .arg("-C")
228        .arg(repo_root)
229        .args(["rev-parse", "--verify", "--quiet"])
230        .arg(format!("{rev}^{{commit}}"))
231        .stdout(Stdio::null())
232        .stderr(Stdio::null())
233        .status()
234        .map(|s| s.success())
235        .unwrap_or(false)
236}
237
238/// Resolve a base ref with a fallback chain for CI environments.
239///
240/// Fallback order:
241/// 1. `requested` itself (fast path)
242/// 2. `TOKMD_GIT_BASE_REF` env var
243/// 3. `origin/{GITHUB_BASE_REF}` (GitHub Actions)
244/// 4. `origin/HEAD` (remote default branch)
245/// 5. `origin/main`, `main`, `origin/master`, `master`
246///
247/// Returns `None` if nothing resolves.
248pub fn resolve_base_ref(repo_root: &Path, requested: &str) -> Option<String> {
249    // Fast path: the requested ref exists
250    if rev_exists(repo_root, requested) {
251        return Some(requested.to_string());
252    }
253
254    // Only use fallback resolution for the CLI default (`main`).
255    // Explicitly requested bases should fail fast if missing.
256    if requested != "main" {
257        return None;
258    }
259
260    // TOKMD_GIT_BASE_REF env override
261    if let Ok(env_ref) = std::env::var("TOKMD_GIT_BASE_REF")
262        && !env_ref.is_empty()
263        && rev_exists(repo_root, &env_ref)
264    {
265        return Some(env_ref);
266    }
267
268    // GitHub Actions: origin/$GITHUB_BASE_REF
269    if let Ok(gh_base) = std::env::var("GITHUB_BASE_REF")
270        && !gh_base.is_empty()
271    {
272        let candidate = format!("origin/{gh_base}");
273        if rev_exists(repo_root, &candidate) {
274            return Some(candidate);
275        }
276    }
277
278    // Remote default branch
279    static FALLBACKS: &[&str] = &[
280        "origin/HEAD",
281        "origin/main",
282        "main",
283        "origin/master",
284        "master",
285    ];
286
287    for candidate in FALLBACKS {
288        if rev_exists(repo_root, candidate) {
289            return Some((*candidate).to_string());
290        }
291    }
292
293    None
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    fn test_git(dir: &Path) -> Command {
301        let mut cmd = git_cmd();
302        cmd.arg("-C").arg(dir);
303        cmd
304    }
305
306    #[test]
307    fn git_range_two_dot_format() {
308        assert_eq!(GitRangeMode::TwoDot.format("main", "HEAD"), "main..HEAD");
309    }
310
311    #[test]
312    fn git_range_three_dot_format() {
313        assert_eq!(GitRangeMode::ThreeDot.format("main", "HEAD"), "main...HEAD");
314    }
315
316    #[test]
317    fn git_range_default_is_two_dot() {
318        assert_eq!(GitRangeMode::default(), GitRangeMode::TwoDot);
319    }
320
321    #[test]
322    fn rev_exists_finds_head_in_repo() {
323        if !git_available() {
324            return;
325        }
326        let dir = tempfile::tempdir().unwrap();
327
328        // Init repo and create a commit so HEAD resolves
329        test_git(dir.path()).arg("init").output().unwrap();
330        test_git(dir.path())
331            .args(["config", "user.email", "test@test.com"])
332            .output()
333            .unwrap();
334        test_git(dir.path())
335            .args(["config", "user.name", "Test"])
336            .output()
337            .unwrap();
338        std::fs::write(dir.path().join("f.txt"), "hello").unwrap();
339        test_git(dir.path()).args(["add", "."]).output().unwrap();
340        test_git(dir.path())
341            .args(["commit", "-m", "init"])
342            .output()
343            .unwrap();
344
345        assert!(rev_exists(dir.path(), "HEAD"));
346        assert!(!rev_exists(dir.path(), "nonexistent-branch-abc123"));
347    }
348
349    #[test]
350    fn resolve_base_ref_returns_requested_when_valid() {
351        if !git_available() {
352            return;
353        }
354        let dir = tempfile::tempdir().unwrap();
355
356        test_git(dir.path())
357            .args(["init", "-b", "main"])
358            .output()
359            .unwrap();
360        test_git(dir.path())
361            .args(["config", "user.email", "test@test.com"])
362            .output()
363            .unwrap();
364        test_git(dir.path())
365            .args(["config", "user.name", "Test"])
366            .output()
367            .unwrap();
368        std::fs::write(dir.path().join("f.txt"), "hello").unwrap();
369        test_git(dir.path()).args(["add", "."]).output().unwrap();
370        test_git(dir.path())
371            .args(["commit", "-m", "init"])
372            .output()
373            .unwrap();
374
375        assert_eq!(
376            resolve_base_ref(dir.path(), "main"),
377            Some("main".to_string())
378        );
379    }
380
381    #[test]
382    fn resolve_base_ref_returns_none_when_nothing_resolves() {
383        if !git_available() {
384            return;
385        }
386        let dir = tempfile::tempdir().unwrap();
387
388        // Init on "trunk" with no commits, no remotes
389        test_git(dir.path())
390            .args(["init", "-b", "trunk"])
391            .output()
392            .unwrap();
393
394        // No commits exist, so even "trunk" won't resolve to a commit
395        assert_eq!(resolve_base_ref(dir.path(), "nonexistent"), None);
396    }
397}