Skip to main content

difflore_core/infra/
git.rs

1use std::collections::HashMap;
2use std::fs;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use crate::errors::CoreError;
6use crate::models::{
7    DiffContentRecord, DiffHunkRecord, GitBranchRecord, GitBranchesInput, GitCheckoutPRInput,
8    GitCommitInput, GitCreatePRInput, GitDiffInput, GitFileStatusRecord, GitPRResult, GitPushInput,
9    GitStatusInput, GitStatusRecord,
10};
11
12fn run_git(project_path: &str, args: &[&str]) -> crate::Result<String> {
13    let output = std::process::Command::new("git")
14        .args(args)
15        .current_dir(project_path)
16        .output()?;
17    if !output.status.success() {
18        let stderr = String::from_utf8_lossy(&output.stderr);
19        return Err(CoreError::Internal(format!("git error: {stderr}")));
20    }
21    Ok(String::from_utf8_lossy(&output.stdout).to_string())
22}
23
24fn run_git_args(project_path: &str, args: &[String]) -> crate::Result<String> {
25    let output = std::process::Command::new("git")
26        .args(args.iter().map(String::as_str))
27        .current_dir(project_path)
28        .output()?;
29    if !output.status.success() {
30        let stderr = String::from_utf8_lossy(&output.stderr);
31        return Err(CoreError::Internal(format!("git error: {stderr}")));
32    }
33    Ok(String::from_utf8_lossy(&output.stdout).to_string())
34}
35
36fn parse_ahead_behind(line: &str) -> (i32, i32) {
37    let mut ahead = 0i32;
38    let mut behind = 0i32;
39    if let Some(idx) = line.find("[ahead ") {
40        let rest = &line[idx + 7..];
41        let num: String = rest.chars().take_while(char::is_ascii_digit).collect();
42        if let Ok(n) = num.parse() {
43            ahead = n;
44        }
45    }
46    if let Some(idx) = line.find("behind ") {
47        let rest = &line[idx + 7..];
48        let num: String = rest.chars().take_while(char::is_ascii_digit).collect();
49        if let Ok(n) = num.parse() {
50            behind = n;
51        }
52    }
53    (ahead, behind)
54}
55
56fn parse_branch_from_status_line(line: &str) -> Option<String> {
57    let rest = line.strip_prefix("## ")?;
58    if rest.is_empty() {
59        return None;
60    }
61    let branch_part = rest.split_once("...").map_or(rest, |(a, _)| a);
62    let branch_part = branch_part
63        .split_once('[')
64        .map_or(branch_part, |(a, _)| a.trim())
65        .trim();
66    if branch_part.is_empty() {
67        None
68    } else {
69        Some(branch_part.to_owned())
70    }
71}
72
73fn merge_numstat(project_path: &str) -> crate::Result<HashMap<String, (i32, i32)>> {
74    let mut m: HashMap<String, (i32, i32)> = HashMap::new();
75    for out in [
76        run_git(project_path, &["diff", "--numstat"])?,
77        run_git(project_path, &["diff", "--cached", "--numstat"])?,
78    ] {
79        for line in out.lines() {
80            if line.is_empty() {
81                continue;
82            }
83            let mut parts = line.splitn(3, '\t');
84            let add = parts.next();
85            let del = parts.next();
86            let path = parts.next();
87            if let (Some(a), Some(d), Some(p)) = (add, del, path) {
88                let adds = if a == "-" { 0 } else { a.parse().unwrap_or(0) };
89                let dels = if d == "-" { 0 } else { d.parse().unwrap_or(0) };
90                let e = m.entry(p.to_owned()).or_insert((0, 0));
91                e.0 += adds;
92                e.1 += dels;
93            }
94        }
95    }
96    Ok(m)
97}
98
99fn parse_git_diff(output: &str) -> Vec<DiffContentRecord> {
100    if output.trim().is_empty() {
101        return vec![];
102    }
103    let sections: Vec<String> = output
104        .split("\ndiff --git ")
105        .enumerate()
106        .map(|(i, s)| {
107            if i == 0 {
108                s.to_owned()
109            } else {
110                format!("diff --git {s}")
111            }
112        })
113        .filter(|s| !s.trim().is_empty())
114        .collect();
115
116    if sections.len() == 1 && !sections[0].starts_with("diff --git ") {
117        return vec![];
118    }
119
120    let mut files = Vec::new();
121    for section in sections {
122        let first_line = section.lines().next().unwrap_or("");
123        let file_path = parse_b_path_from_diff_git(first_line).unwrap_or_default();
124        if file_path.is_empty() {
125            continue;
126        }
127        let hunks = parse_hunks(&section);
128        if hunks.is_empty() && is_binary_diff_section(&section) {
129            continue;
130        }
131        files.push(DiffContentRecord { file_path, hunks });
132    }
133    files
134}
135
136fn parse_b_path_from_diff_git(first_line: &str) -> Option<String> {
137    let rest = first_line.strip_prefix("diff --git ")?;
138    let (_, rest) = parse_diff_path_token(rest)?;
139    let (b_path, _) = parse_diff_path_token(rest.trim_start())?;
140    b_path.strip_prefix("b/").map(ToOwned::to_owned)
141}
142
143fn parse_diff_path_token(input: &str) -> Option<(String, &str)> {
144    let input = input.trim_start();
145    if let Some(rest) = input.strip_prefix('"') {
146        return parse_quoted_diff_path(rest);
147    }
148    let split = input.find(char::is_whitespace).unwrap_or(input.len());
149    if split == 0 {
150        return None;
151    }
152    Some((input[..split].to_owned(), &input[split..]))
153}
154
155fn parse_quoted_diff_path(input: &str) -> Option<(String, &str)> {
156    let mut out = String::new();
157    let mut chars = input.char_indices().peekable();
158    while let Some((idx, ch)) = chars.next() {
159        match ch {
160            '"' => {
161                let rest = &input[idx + ch.len_utf8()..];
162                return Some((out, rest));
163            }
164            '\\' => {
165                let (_, escaped) = chars.next()?;
166                match escaped {
167                    'n' => out.push('\n'),
168                    'r' => out.push('\r'),
169                    't' => out.push('\t'),
170                    '\\' | '"' => out.push(escaped),
171                    '0'..='7' => {
172                        let mut value = escaped.to_digit(8)?;
173                        for _ in 0..2 {
174                            let Some((_, next)) = chars.peek().copied() else {
175                                break;
176                            };
177                            let Some(digit) = next.to_digit(8) else {
178                                break;
179                            };
180                            value = value * 8 + digit;
181                            let _ = chars.next();
182                        }
183                        out.push(char::from_u32(value).unwrap_or('\u{FFFD}'));
184                    }
185                    other => out.push(other),
186                }
187            }
188            other => out.push(other),
189        }
190    }
191    None
192}
193
194fn is_binary_diff_section(section: &str) -> bool {
195    section
196        .lines()
197        .any(|line| line.starts_with("Binary files ") || line == "GIT binary patch")
198}
199
200fn parse_hunks(section: &str) -> Vec<DiffHunkRecord> {
201    let mut hunks = Vec::new();
202    let mut in_hunk = false;
203    let mut header = String::new();
204    let mut body = String::new();
205    for line in section.lines() {
206        if line.starts_with("@@") {
207            if in_hunk {
208                hunks.push(DiffHunkRecord {
209                    header: std::mem::take(&mut header),
210                    body: std::mem::take(&mut body),
211                });
212            }
213            line.clone_into(&mut header);
214            in_hunk = true;
215        } else if in_hunk {
216            body.push_str(line);
217            body.push('\n');
218        }
219    }
220    if in_hunk {
221        hunks.push(DiffHunkRecord { header, body });
222    }
223    hunks
224}
225
226pub async fn status(input: GitStatusInput) -> crate::Result<GitStatusRecord> {
227    let out = run_git(&input.project_path, &["status", "--porcelain", "-b"])?;
228    let mut branch: Option<String> = None;
229    let mut ahead = 0i32;
230    let mut behind = 0i32;
231    let mut files: Vec<GitFileStatusRecord> = Vec::new();
232    let stats = merge_numstat(&input.project_path)?;
233
234    for line in out.lines() {
235        if line.starts_with("## ") {
236            branch = parse_branch_from_status_line(line);
237            (ahead, behind) = parse_ahead_behind(line);
238            continue;
239        }
240        if line.len() < 3 {
241            continue;
242        }
243        let status = line[..2].to_string();
244        let path = line[3..].trim().to_owned();
245        if path.is_empty() {
246            continue;
247        }
248        let (adds, dels) = stats.get(&path).copied().unwrap_or((0, 0));
249        files.push(GitFileStatusRecord {
250            path,
251            status,
252            additions: adds,
253            deletions: dels,
254        });
255    }
256
257    Ok(GitStatusRecord {
258        branch,
259        ahead,
260        behind,
261        files,
262    })
263}
264
265pub async fn branches(input: GitBranchesInput) -> crate::Result<Vec<GitBranchRecord>> {
266    let out = run_git(&input.project_path, &["branch", "-a"])?;
267    let mut rows = Vec::new();
268    for line in out.lines() {
269        if line.trim().is_empty() {
270            continue;
271        }
272        let current = line.starts_with('*');
273        let name = line.trim_start_matches('*').trim().to_owned();
274        if name.is_empty() || name.contains(" -> ") {
275            continue;
276        }
277        let remote = if name.starts_with("remotes/") {
278            name.strip_prefix("remotes/").map(|s| {
279                s.split_once('/')
280                    .map_or_else(|| s.to_owned(), |(r, _)| r.to_owned())
281            })
282        } else {
283            None
284        };
285        rows.push(GitBranchRecord {
286            name,
287            current,
288            remote,
289        });
290    }
291    Ok(rows)
292}
293
294/// CLI2-2: reject a revision/ref that git could misparse as an OPTION (argument
295/// injection). Values are already passed as argv (so there is no SHELL
296/// injection), but git still parses a leading-`-` arg as a flag, so a cloud- or
297/// PR-supplied ref like `--upload-pack=…` could smuggle a dangerous git flag.
298/// Legitimate git revisions never begin with `-` and never contain control
299/// characters, so refusing those is safe and closes the vector.
300pub fn reject_option_like_revision(value: &str, what: &str) -> crate::Result<()> {
301    if value.starts_with('-') {
302        return Err(CoreError::Validation(format!(
303            "refusing to pass {what} '{value}' to git: a leading '-' would be parsed as an option (possible argument injection)"
304        )));
305    }
306    if value.chars().any(char::is_control) {
307        return Err(CoreError::Validation(format!(
308            "refusing to pass {what} to git: contains control characters"
309        )));
310    }
311    Ok(())
312}
313
314pub async fn diff(input: GitDiffInput) -> crate::Result<Vec<DiffContentRecord>> {
315    let mut args: Vec<String> = vec!["diff".into(), "--no-color".into()];
316    if input.staged.unwrap_or(false) {
317        args.push("--cached".into());
318    }
319    // CLI2-2: validate the user/cloud-supplied revisions before handing them to
320    // git so an option-looking ref can't be parsed as a flag.
321    if let Some(ref a) = input.ref1 {
322        reject_option_like_revision(a, "diff revision")?;
323        args.push(a.clone());
324    }
325    if let Some(ref b) = input.ref2 {
326        reject_option_like_revision(b, "diff revision")?;
327        args.push(b.clone());
328    }
329    let output = run_git_args(&input.project_path, &args)?;
330    Ok(parse_git_diff(&output))
331}
332
333/// NOTE: git:changed events are driven by frontend mutation invalidation
334/// (useGitCommit / useGitPush onSettled), not emitted from the backend.
335pub async fn commit(input: GitCommitInput) -> crate::Result<()> {
336    match &input.files {
337        Some(files) if !files.is_empty() => {
338            let mut args = vec!["add", "--"];
339            let file_refs: Vec<&str> = files.iter().map(String::as_str).collect();
340            args.extend(file_refs);
341            run_git(&input.project_path, &args)?;
342        }
343        _ => {
344            return Err(CoreError::Validation(
345                "No files specified for commit. Please select files to stage explicitly.".into(),
346            ));
347        }
348    }
349
350    let now_nanos = SystemTime::now()
351        .duration_since(UNIX_EPOCH)
352        .map_or(0, |d| d.as_nanos());
353    let message_file = std::env::temp_dir().join(format!(
354        "difflore-git-commit-message-{}-{now_nanos}.txt",
355        std::process::id()
356    ));
357    fs::write(&message_file, &input.message).map_err(|e| {
358        CoreError::Internal(format!("failed to write temporary commit message: {e}"))
359    })?;
360
361    let commit_args = vec![
362        "commit".to_owned(),
363        "-F".to_owned(),
364        message_file.to_string_lossy().to_string(),
365    ];
366    let commit_result = run_git_args(&input.project_path, &commit_args);
367    let _ = fs::remove_file(&message_file);
368    commit_result?;
369    Ok(())
370}
371
372/// NOTE: git:changed events are driven by frontend mutation invalidation
373/// (useGitPush onSettled), not emitted from the backend.
374pub async fn push(input: GitPushInput) -> crate::Result<()> {
375    run_git(&input.project_path, &["push"])?;
376    Ok(())
377}
378
379pub async fn create_pr(input: GitCreatePRInput) -> crate::Result<GitPRResult> {
380    which::which("gh")
381        .map_err(|_| CoreError::Internal("GitHub CLI (gh) is not installed".into()))?;
382
383    let mut args = vec![
384        "pr".to_owned(),
385        "create".to_owned(),
386        "--title".to_owned(),
387        input.title,
388    ];
389    if let Some(body) = input.body {
390        args.push("--body".to_owned());
391        args.push(body);
392    }
393    if let Some(base) = input.base {
394        args.push("--base".to_owned());
395        args.push(base);
396    }
397
398    let output = std::process::Command::new("gh")
399        .args(&args)
400        .current_dir(&input.project_path)
401        .output()?;
402
403    if !output.status.success() {
404        let stderr = String::from_utf8_lossy(&output.stderr);
405        return Err(CoreError::Internal(format!("gh error: {stderr}")));
406    }
407
408    let stdout = String::from_utf8_lossy(&output.stdout).trim().to_owned();
409    let url = if stdout.starts_with("http") {
410        Some(stdout)
411    } else {
412        stdout
413            .lines()
414            .find(|l| l.starts_with("http"))
415            .map(ToOwned::to_owned)
416    };
417
418    Ok(GitPRResult { url })
419}
420
421pub async fn checkout_pr(input: GitCheckoutPRInput) -> crate::Result<()> {
422    which::which("gh")
423        .map_err(|_| CoreError::Internal("GitHub CLI (gh) is not installed".into()))?;
424
425    let pr_number = input
426        .pr_number
427        .ok_or_else(|| CoreError::Internal("pr_number is required".into()))?;
428
429    let output = std::process::Command::new("gh")
430        .args(["pr", "checkout", &pr_number.to_string()])
431        .current_dir(&input.project_path)
432        .output()?;
433
434    if !output.status.success() {
435        let stderr = String::from_utf8_lossy(&output.stderr);
436        return Err(CoreError::Internal(format!("gh error: {stderr}")));
437    }
438
439    Ok(())
440}
441
442/// Parse a GitHub remote URL into `owner/repo`.
443///
444/// Accepts HTTPS and SSH forms:
445///   `https://github.com/owner/repo(.git)?`
446///   `git@github.com:owner/repo(.git)?`
447///   `ssh://git@github.com/owner/repo(.git)?`
448pub fn parse_github_remote_url(url: &str) -> Option<String> {
449    let url = url.trim().trim_end_matches('/');
450    let stripped = if let Some(rest) = url.strip_prefix("https://github.com/") {
451        rest
452    } else if let Some(rest) = url.strip_prefix("git@github.com:") {
453        rest
454    } else {
455        url.strip_prefix("ssh://git@github.com/")?
456    };
457    let stripped = stripped.strip_suffix(".git").unwrap_or(stripped);
458    let parts: Vec<&str> = stripped.split('/').collect();
459    if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() {
460        // GitHub treats owner/repo names case-insensitively, while our local
461        // rule index compares repo scopes as strings. Normalize once at the
462        // boundary so forks with mixed-case upstream remotes still recall
463        // rules imported from lower-case cloud `source_repo` values.
464        Some(format!("{}/{}", parts[0], parts[1]).to_ascii_lowercase())
465    } else {
466        None
467    }
468}
469
470/// Normalize a GitHub `owner/repo` string or supported GitHub remote URL.
471///
472/// Runtime memory scoping accepts explicit MCP `repo_full_name` values as
473/// well as local git remote URLs. Keeping both paths on one normalizer avoids
474/// accidental global recall when an agent passes `repo_full_name` but the MCP
475/// server's cwd is not the edited repository.
476pub fn normalize_github_repo_full_name(value: &str) -> Option<String> {
477    if let Some(repo) = parse_github_remote_url(value) {
478        return Some(repo);
479    }
480    let value = value.trim().trim_end_matches('/').trim_end_matches(".git");
481    let mut parts = value.split('/');
482    let owner = parts.next()?.trim();
483    let repo = parts.next()?.trim();
484    if owner.is_empty() || repo.is_empty() || parts.next().is_some() {
485        return None;
486    }
487    Some(format!("{owner}/{repo}").to_ascii_lowercase())
488}
489
490/// Best-effort `owner/repo` detection from local Git remotes.
491///
492/// Returns remotes in priority order, currently `origin` first (the repo
493/// users can safely push/write outcomes to), then `upstream` as legacy
494/// provenance metadata. Runtime rule recall uses the primary repo only;
495/// upstream is not a cross-project widening signal.
496pub fn detect_github_repo_full_names(project_path: &str) -> Vec<String> {
497    let mut repos = Vec::new();
498    for remote in ["origin", "upstream"] {
499        let Ok(url) = run_git(project_path, &["remote", "get-url", remote]) else {
500            continue;
501        };
502        let Some(repo) = parse_github_remote_url(&url) else {
503            continue;
504        };
505        if !repos.iter().any(|existing| existing == &repo) {
506            repos.push(repo);
507        }
508    }
509    repos
510}
511
512/// Best-effort primary `owner/repo` detection from `git remote get-url origin`.
513///
514/// Returns `None` when not inside a git repo, when the `origin` remote is
515/// missing, or when the remote URL doesn't parse as a GitHub URL. Accepts
516/// both HTTPS and SSH forms:
517///   `https://github.com/owner/repo(.git)?`
518///   `git@github.com:owner/repo(.git)?`
519///
520/// Used by `run_review` to scope past-verdict recall to THIS repo's rules
521/// (slogan: "understands your repo better"). Non-fatal — callers return no repo-scoped
522/// recall when detection fails.
523pub fn detect_github_repo_full_name(project_path: &str) -> Option<String> {
524    detect_github_repo_full_names(project_path)
525        .into_iter()
526        .next()
527}
528
529#[cfg(test)]
530mod detect_tests {
531    use super::*;
532
533    #[test]
534    fn parses_supported_github_remote_urls() {
535        assert_eq!(
536            parse_github_remote_url("https://github.com/vitejs/vite.git").as_deref(),
537            Some("vitejs/vite")
538        );
539        assert_eq!(
540            parse_github_remote_url("git@github.com:tokio-rs/tokio.git").as_deref(),
541            Some("tokio-rs/tokio")
542        );
543        assert_eq!(
544            parse_github_remote_url("ssh://git@github.com/gin-gonic/gin").as_deref(),
545            Some("gin-gonic/gin")
546        );
547        assert_eq!(
548            parse_github_remote_url("https://github.com/TanStack/router.git").as_deref(),
549            Some("tanstack/router")
550        );
551    }
552
553    #[test]
554    fn reject_option_like_revision_blocks_argument_injection() {
555        // CLI2-2: option-looking revisions are refused (a real ref never starts
556        // with '-' and never carries control characters), so a cloud-/PR-supplied
557        // ref can't smuggle a git flag like `--upload-pack=…`.
558        assert!(reject_option_like_revision("--upload-pack=evil", "ref").is_err());
559        assert!(reject_option_like_revision("-foo", "ref").is_err());
560        assert!(reject_option_like_revision("--output=/tmp/x", "ref").is_err());
561        assert!(reject_option_like_revision("ref\nwith-newline", "ref").is_err());
562        // Legitimate refs / SHAs / rev-expressions pass.
563        assert!(reject_option_like_revision("HEAD", "ref").is_ok());
564        assert!(reject_option_like_revision("main", "ref").is_ok());
565        assert!(reject_option_like_revision("origin/feature-x", "ref").is_ok());
566        assert!(reject_option_like_revision("HEAD~3", "ref").is_ok());
567        // Also used to guard non-revision positional args (e.g. a clone URL).
568        assert!(reject_option_like_revision("https://github.com/owner/repo.git", "url").is_ok());
569        assert!(
570            reject_option_like_revision("9ef0a85b2e2e4e2fbbbc02dd3bd0a57d12345678", "sha").is_ok()
571        );
572    }
573
574    #[test]
575    fn rejects_non_github_or_incomplete_remote_urls() {
576        assert_eq!(parse_github_remote_url("https://gitlab.com/a/b.git"), None);
577        assert_eq!(parse_github_remote_url("https://github.com/owner"), None);
578        assert_eq!(parse_github_remote_url("git@github.com:owner/.git"), None);
579    }
580
581    #[test]
582    fn normalizes_explicit_github_repo_full_names() {
583        assert_eq!(
584            normalize_github_repo_full_name("TanStack/router").as_deref(),
585            Some("tanstack/router")
586        );
587        assert_eq!(
588            normalize_github_repo_full_name("https://github.com/FastAPI/FastAPI.git").as_deref(),
589            Some("fastapi/fastapi")
590        );
591        assert_eq!(normalize_github_repo_full_name("owner"), None);
592        assert_eq!(
593            normalize_github_repo_full_name("https://gitlab.com/a/b"),
594            None
595        );
596    }
597
598    #[test]
599    fn unknown_host_returns_none() {
600        assert_eq!(
601            detect_github_repo_full_name("/nonexistent-path-for-sure"),
602            None
603        );
604    }
605
606    #[test]
607    fn parses_quoted_diff_git_paths() {
608        assert_eq!(
609            parse_b_path_from_diff_git(
610                "diff --git \"a/src/file with spaces.rs\" \"b/src/file with spaces.rs\""
611            )
612            .as_deref(),
613            Some("src/file with spaces.rs")
614        );
615        assert_eq!(
616            parse_b_path_from_diff_git(
617                "diff --git \"a/src/quoted\\\"name.rs\" \"b/src/quoted\\\"name.rs\""
618            )
619            .as_deref(),
620            Some("src/quoted\"name.rs")
621        );
622    }
623
624    #[test]
625    fn parse_git_diff_skips_binary_diff_without_hunks() {
626        let diff = "diff --git a/logo.png b/logo.png\nindex 111..222 100644\nBinary files a/logo.png and b/logo.png differ\n";
627
628        assert!(parse_git_diff(diff).is_empty());
629    }
630}