Skip to main content

the_code_graph_cli/adapters/
git.rs

1use std::path::{Path, PathBuf};
2use std::process::Command;
3
4use domain::error::{CodeGraphError, Result};
5use domain::model::DiffHunk;
6use domain::ports::GitProvider;
7
8pub struct ShellGitProvider {
9    work_dir: PathBuf,
10}
11
12impl ShellGitProvider {
13    pub fn new(work_dir: PathBuf) -> Self {
14        Self { work_dir }
15    }
16
17    fn run_git(&self, args: &[&str]) -> Result<String> {
18        let output = Command::new("git")
19            .args(args)
20            .current_dir(&self.work_dir)
21            .output()
22            .map_err(|e| CodeGraphError::Git(format!("failed to run git: {e}")))?;
23
24        if !output.status.success() {
25            return Err(CodeGraphError::Git(
26                String::from_utf8_lossy(&output.stderr).trim().to_string(),
27            ));
28        }
29
30        Ok(String::from_utf8_lossy(&output.stdout)
31            .trim_end()
32            .to_string())
33    }
34}
35
36fn parse_hunk_range(s: &str) -> Result<(usize, usize)> {
37    if let Some((start_s, count_s)) = s.split_once(',') {
38        let start = start_s
39            .parse::<usize>()
40            .map_err(|e| CodeGraphError::Git(format!("bad hunk start '{start_s}': {e}")))?;
41        let count = count_s
42            .parse::<usize>()
43            .map_err(|e| CodeGraphError::Git(format!("bad hunk count '{count_s}': {e}")))?;
44        Ok((start, count))
45    } else {
46        let start = s
47            .parse::<usize>()
48            .map_err(|e| CodeGraphError::Git(format!("bad hunk start '{s}': {e}")))?;
49        Ok((start, 1))
50    }
51}
52
53fn parse_diff_output(output: &str) -> Result<Vec<DiffHunk>> {
54    let mut hunks = Vec::new();
55    let mut current_file: Option<PathBuf> = None;
56
57    for line in output.lines() {
58        if let Some(rest) = line.strip_prefix("diff --git ") {
59            // Extract the b/Y path from "a/X b/Y"
60            if let Some(b_part) = rest.split(" b/").last() {
61                current_file = Some(PathBuf::from(b_part));
62            }
63        } else if let Some(to_path) = line.strip_prefix("rename to ") {
64            current_file = Some(PathBuf::from(to_path));
65        } else if line.starts_with("@@ ") {
66            // Parse @@ -old_start[,old_count] +new_start[,new_count] @@
67            let inner = line
68                .strip_prefix("@@ ")
69                .and_then(|s| s.split_once(" @@"))
70                .map(|(ranges, _)| ranges);
71
72            let ranges = match inner {
73                Some(r) => r,
74                None => {
75                    return Err(CodeGraphError::Git(format!(
76                        "malformed hunk header: {line}"
77                    )));
78                }
79            };
80
81            let parts: Vec<&str> = ranges.split_whitespace().collect();
82            if parts.len() != 2 {
83                return Err(CodeGraphError::Git(format!(
84                    "expected 2 range specs, got {}: {line}",
85                    parts.len()
86                )));
87            }
88
89            let old_range = parts[0]
90                .strip_prefix('-')
91                .ok_or_else(|| CodeGraphError::Git(format!("missing '-' prefix: {line}")))?;
92            let new_range = parts[1]
93                .strip_prefix('+')
94                .ok_or_else(|| CodeGraphError::Git(format!("missing '+' prefix: {line}")))?;
95
96            let (old_start, old_count) = parse_hunk_range(old_range)?;
97            let (new_start, new_count) = parse_hunk_range(new_range)?;
98
99            let file = current_file.clone().ok_or_else(|| {
100                CodeGraphError::Git("hunk header before any diff --git line".into())
101            })?;
102
103            hunks.push(DiffHunk {
104                file,
105                old_start,
106                old_count,
107                new_start,
108                new_count,
109            });
110        }
111    }
112
113    Ok(hunks)
114}
115
116/// Reject git ref strings that start with `-` to prevent argument injection.
117fn validate_git_ref(refspec: &str) -> Result<()> {
118    if refspec.starts_with('-') {
119        return Err(CodeGraphError::Git(format!(
120            "invalid git ref: '{refspec}' (must not start with '-')"
121        )));
122    }
123    Ok(())
124}
125
126const SUPPORTED_EXTENSIONS: &[&str] = &["ts", "tsx", "js", "jsx", "rs", "py", "go"];
127
128fn has_supported_extension(path: &Path) -> bool {
129    path.extension()
130        .and_then(|e| e.to_str())
131        .is_some_and(|e| SUPPORTED_EXTENSIONS.contains(&e))
132}
133
134fn parse_git_status(output: &str) -> Vec<PathBuf> {
135    output
136        .lines()
137        .filter(|line| line.len() >= 4) // "XY <path>" minimum
138        .filter_map(|line| {
139            let status = &line[..2];
140            let rest = &line[3..]; // skip "XY "
141
142            // Handle renames: "R  old -> new" or "RM old -> new"
143            let path_str = if status.starts_with('R') {
144                rest.split(" -> ").last().unwrap_or(rest)
145            } else {
146                rest
147            };
148
149            let path = PathBuf::from(path_str.trim());
150            if has_supported_extension(&path) {
151                Some(path)
152            } else {
153                None
154            }
155        })
156        .collect()
157}
158
159impl GitProvider for ShellGitProvider {
160    fn current_head(&self) -> Result<String> {
161        self.run_git(&["rev-parse", "HEAD"])
162    }
163
164    fn changed_files(&self, from: &str, to: &str) -> Result<Vec<PathBuf>> {
165        validate_git_ref(from)?;
166        validate_git_ref(to)?;
167        let output = self.run_git(&["diff", "--name-only", from, to])?;
168        Ok(output
169            .lines()
170            .filter(|l| !l.is_empty())
171            .map(PathBuf::from)
172            .collect())
173    }
174
175    fn diff_hunks(&self, from: &str, to: Option<&str>) -> Result<Vec<DiffHunk>> {
176        validate_git_ref(from)?;
177        if let Some(r) = to {
178            validate_git_ref(r)?;
179        }
180        let output = match to {
181            None => self.run_git(&["diff", "--unified=0", from])?,
182            Some(r) => self.run_git(&["diff", "--unified=0", from, r])?,
183        };
184        parse_diff_output(&output)
185    }
186
187    fn modified_files(&self) -> Result<Vec<PathBuf>> {
188        let output = self.run_git(&["status", "--porcelain"])?;
189        Ok(parse_git_status(&output))
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    #[test]
198    fn current_head_returns_40_char_hex() {
199        // This test runs in the actual code-graph repo
200        let provider = ShellGitProvider::new(PathBuf::from("."));
201        let head = provider.current_head().unwrap();
202        assert_eq!(head.len(), 40, "HEAD should be 40 hex chars, got: {head}");
203        assert!(
204            head.chars().all(|c| c.is_ascii_hexdigit()),
205            "HEAD should be hex: {head}"
206        );
207    }
208
209    #[test]
210    fn changed_files_returns_paths() {
211        // Compare HEAD with itself — should return empty
212        let provider = ShellGitProvider::new(PathBuf::from("."));
213        let files = provider.changed_files("HEAD", "HEAD").unwrap();
214        assert!(files.is_empty(), "no changes between HEAD and HEAD");
215    }
216
217    #[test]
218    fn parse_single_hunk_add() {
219        let input = "\
220diff --git a/src/lib.rs b/src/lib.rs
221new file mode 100644
222--- /dev/null
223+++ b/src/lib.rs
224@@ -0,0 +1,5 @@ some context";
225        let hunks = parse_diff_output(input).unwrap();
226        assert_eq!(hunks.len(), 1);
227        assert_eq!(hunks[0].file, PathBuf::from("src/lib.rs"));
228        assert_eq!(hunks[0].old_start, 0);
229        assert_eq!(hunks[0].old_count, 0);
230        assert_eq!(hunks[0].new_start, 1);
231        assert_eq!(hunks[0].new_count, 5);
232    }
233
234    #[test]
235    fn parse_modify_hunk() {
236        let input = "\
237diff --git a/src/main.rs b/src/main.rs
238--- a/src/main.rs
239+++ b/src/main.rs
240@@ -10,3 +10,5 @@ fn main()";
241        let hunks = parse_diff_output(input).unwrap();
242        assert_eq!(hunks.len(), 1);
243        assert_eq!(hunks[0].file, PathBuf::from("src/main.rs"));
244        assert_eq!(hunks[0].old_start, 10);
245        assert_eq!(hunks[0].old_count, 3);
246        assert_eq!(hunks[0].new_start, 10);
247        assert_eq!(hunks[0].new_count, 5);
248    }
249
250    #[test]
251    fn parse_delete_hunk() {
252        let input = "\
253diff --git a/src/old.rs b/src/old.rs
254--- a/src/old.rs
255+++ b/src/old.rs
256@@ -5,3 +4,0 @@ fn removed()";
257        let hunks = parse_diff_output(input).unwrap();
258        assert_eq!(hunks.len(), 1);
259        assert_eq!(hunks[0].old_start, 5);
260        assert_eq!(hunks[0].old_count, 3);
261        assert_eq!(hunks[0].new_start, 4);
262        assert_eq!(hunks[0].new_count, 0);
263    }
264
265    #[test]
266    fn parse_multi_file_diff() {
267        let input = "\
268diff --git a/src/a.rs b/src/a.rs
269--- a/src/a.rs
270+++ b/src/a.rs
271@@ -1,2 +1,4 @@ fn a()
272@@ -20,1 +22,3 @@ fn b()
273diff --git a/src/b.rs b/src/b.rs
274--- a/src/b.rs
275+++ b/src/b.rs
276@@ -5,3 +5,3 @@ fn c()
277@@ -30,0 +30,10 @@ fn d()";
278        let hunks = parse_diff_output(input).unwrap();
279        assert_eq!(hunks.len(), 4);
280        assert_eq!(hunks[0].file, PathBuf::from("src/a.rs"));
281        assert_eq!(hunks[1].file, PathBuf::from("src/a.rs"));
282        assert_eq!(hunks[2].file, PathBuf::from("src/b.rs"));
283        assert_eq!(hunks[3].file, PathBuf::from("src/b.rs"));
284        // First file, second hunk
285        assert_eq!(hunks[1].old_start, 20);
286        assert_eq!(hunks[1].old_count, 1);
287        assert_eq!(hunks[1].new_start, 22);
288        assert_eq!(hunks[1].new_count, 3);
289        // Second file, second hunk
290        assert_eq!(hunks[3].new_start, 30);
291        assert_eq!(hunks[3].new_count, 10);
292    }
293
294    #[test]
295    fn parse_rename() {
296        let input = "\
297diff --git a/old.rs b/new.rs
298similarity index 90%
299rename from old.rs
300rename to new.rs
301--- a/old.rs
302+++ b/new.rs
303@@ -1,2 +1,3 @@ fn renamed()";
304        let hunks = parse_diff_output(input).unwrap();
305        assert_eq!(hunks.len(), 1);
306        assert_eq!(hunks[0].file, PathBuf::from("new.rs"));
307        assert_eq!(hunks[0].old_start, 1);
308        assert_eq!(hunks[0].old_count, 2);
309        assert_eq!(hunks[0].new_start, 1);
310        assert_eq!(hunks[0].new_count, 3);
311    }
312
313    #[test]
314    fn parse_empty_output() {
315        let hunks = parse_diff_output("").unwrap();
316        assert!(hunks.is_empty());
317    }
318
319    // --- parse_git_status tests ---
320
321    #[test]
322    fn parse_git_status_modified_file() {
323        let output = " M src/main.rs\n";
324        let files = parse_git_status(output);
325        assert_eq!(files, vec![PathBuf::from("src/main.rs")]);
326    }
327
328    #[test]
329    fn parse_git_status_untracked_file() {
330        let output = "?? new_file.ts\n";
331        let files = parse_git_status(output);
332        assert_eq!(files, vec![PathBuf::from("new_file.ts")]);
333    }
334
335    #[test]
336    fn parse_git_status_deleted_file() {
337        let output = " D deleted.rs\n";
338        let files = parse_git_status(output);
339        assert_eq!(files, vec![PathBuf::from("deleted.rs")]);
340    }
341
342    #[test]
343    fn parse_git_status_both_modified() {
344        let output = "MM both.ts\n";
345        let files = parse_git_status(output);
346        assert_eq!(files, vec![PathBuf::from("both.ts")]);
347    }
348
349    #[test]
350    fn parse_git_status_rename_uses_new_name() {
351        let output = "R  old.rs -> new.rs\n";
352        let files = parse_git_status(output);
353        assert_eq!(files, vec![PathBuf::from("new.rs")]);
354    }
355
356    #[test]
357    fn parse_git_status_multi_line_mixed() {
358        let output = " M src/main.rs\n?? new_file.ts\nA  added.py\n D deleted.go\n";
359        let files = parse_git_status(output);
360        assert_eq!(files.len(), 4);
361        assert!(files.contains(&PathBuf::from("src/main.rs")));
362        assert!(files.contains(&PathBuf::from("new_file.ts")));
363        assert!(files.contains(&PathBuf::from("added.py")));
364        assert!(files.contains(&PathBuf::from("deleted.go")));
365    }
366
367    #[test]
368    fn parse_git_status_empty_output() {
369        let files = parse_git_status("");
370        assert!(files.is_empty());
371    }
372
373    #[test]
374    fn parse_git_status_filters_unsupported_extensions() {
375        let output = " M readme.md\n M config.json\n M src/main.rs\n";
376        let files = parse_git_status(output);
377        assert_eq!(files, vec![PathBuf::from("src/main.rs")]);
378    }
379
380    #[test]
381    fn parse_single_line_hunk() {
382        let input = "\
383diff --git a/src/lib.rs b/src/lib.rs
384--- a/src/lib.rs
385+++ b/src/lib.rs
386@@ -5 +5 @@ fn single()";
387        let hunks = parse_diff_output(input).unwrap();
388        assert_eq!(hunks.len(), 1);
389        assert_eq!(hunks[0].old_start, 5);
390        assert_eq!(hunks[0].old_count, 1);
391        assert_eq!(hunks[0].new_start, 5);
392        assert_eq!(hunks[0].new_count, 1);
393    }
394}