Skip to main content

sem_core/git/
bridge.rs

1use std::env;
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4use std::process::Command;
5use std::sync::{Mutex, OnceLock};
6
7use git2::{Blame, Delta, Diff, DiffFindOptions, DiffOptions, ErrorCode, Oid, Repository};
8use thiserror::Error;
9
10use super::types::{CommitInfo, DiffScope, FileChange, FileCommitInfo, FileStatus};
11use super::types::BlameLineInfo;
12
13#[derive(Error, Debug)]
14pub enum GitError {
15    #[error("not a git repository")]
16    NotARepo,
17    #[error("git error: {0}")]
18    Git2(#[from] git2::Error),
19    #[error("io error: {0}")]
20    Io(#[from] std::io::Error),
21}
22
23pub struct GitBridge {
24    repo: Repository,
25    repo_root: PathBuf,
26    cwd: PathBuf,
27}
28
29impl GitBridge {
30    pub fn open(path: &Path) -> Result<Self, GitError> {
31        let cwd = normalize_open_path(path)?;
32        let repo = match Repository::discover(path) {
33            Ok(repo) => repo,
34            Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
35                let _guard = owner_validation_lock()
36                    .lock()
37                    .unwrap_or_else(|poisoned| poisoned.into_inner());
38                let _owner_validation = OwnerValidationDisabled::new()?;
39                let repo = Repository::discover(path);
40                repo.map_err(map_git_error)?
41            }
42            Err(error) => return Err(map_git_error(error)),
43        };
44        let repo_root = repo.workdir().ok_or(GitError::NotARepo)?;
45        let repo_root = fs::canonicalize(repo_root)?;
46        Ok(Self {
47            repo,
48            repo_root,
49            cwd,
50        })
51    }
52
53    pub fn repo_root(&self) -> &Path {
54        &self.repo_root
55    }
56
57    /// Return the URL of the "origin" remote, if one exists.
58    pub fn get_remote_url(&self) -> Option<String> {
59        self.repo
60            .find_remote("origin")
61            .ok()
62            .and_then(|r| r.url().map(String::from))
63    }
64
65    /// Resolve a refspec to its full commit SHA, if valid.
66    pub fn resolve_ref_sha(&self, refspec: &str) -> Option<String> {
67        self.repo
68            .revparse_single(refspec)
69            .ok()
70            .and_then(|obj| obj.peel_to_commit().ok())
71            .map(|c| c.id().to_string())
72    }
73
74    pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
75        Ok(self.repo.blame_file(file_path, None)?)
76    }
77
78    pub fn blame_file_porcelain(&self, file_path: &Path) -> Result<Vec<BlameLineInfo>, GitError> {
79        let output = Command::new("git")
80            .arg("-C")
81            .arg(&self.repo_root)
82            .arg("blame")
83            .arg("--line-porcelain")
84            .arg("--")
85            .arg(file_path)
86            .output()?;
87
88        if !output.status.success() {
89            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
90            return Err(git_command_error(if stderr.is_empty() {
91                format!("git blame exited with {}", output.status)
92            } else {
93                stderr
94            }));
95        }
96
97        let parsed = parse_blame_porcelain(&String::from_utf8_lossy(&output.stdout));
98        if parsed.is_empty() && !output.stdout.is_empty() {
99            return Err(git_command_error(
100                "failed to parse git blame porcelain output".to_string(),
101            ));
102        }
103
104        Ok(parsed)
105    }
106
107    pub fn commit_summary(&self, oid: Oid) -> Option<String> {
108        self.repo
109            .find_commit(oid)
110            .ok()
111            .and_then(|commit| commit.summary().map(String::from))
112    }
113
114    pub fn get_head_sha(&self) -> Result<String, GitError> {
115        let head = self.repo.head()?;
116        let oid = head.target().ok_or_else(|| {
117            git2::Error::from_str("HEAD has no target")
118        })?;
119        Ok(oid.to_string())
120    }
121
122    /// Combined detect scope + get files in one call (fast path).
123    /// Shows all changes from HEAD to the current working state by default.
124    /// Use `--staged` for staged changes only.
125    pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
126        // Show the full current working state, including staged changes.
127        let mut working_files = self.get_working_diff_files(pathspecs)?;
128        if !working_files.is_empty() {
129            self.populate_contents(&mut working_files, &DiffScope::Working)?;
130            return Ok((DiffScope::Working, working_files));
131        }
132
133        // Clean worktree = no changes
134        Ok((DiffScope::Working, Vec::new()))
135    }
136
137    /// Get changed files for a specific scope
138    pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
139        let mut files = match scope {
140            DiffScope::Working => {
141                self.get_working_diff_files(pathspecs)?
142            }
143            DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
144            DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
145            DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
146            DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
147        };
148
149        // Filter .sem/ files
150        files.retain(|f| !f.file_path.starts_with(".sem/"));
151
152        self.populate_contents(&mut files, scope)?;
153        Ok(files)
154    }
155
156    pub fn get_staged_files_with_base_ref(
157        &self,
158        base: &str,
159        pathspecs: &[String],
160    ) -> Result<Vec<FileChange>, GitError> {
161        let mut files = self.get_staged_diff_files_with_base(base, pathspecs)?;
162        files.retain(|f| !f.file_path.starts_with(".sem/"));
163
164        let base_tree = self.resolve_tree(base)?;
165        for file in files.iter_mut() {
166            if file.status != FileStatus::Deleted {
167                file.after_content = self.read_index_file(&file.file_path);
168            }
169            if file.status != FileStatus::Added {
170                let path = file
171                    .old_file_path
172                    .as_deref()
173                    .unwrap_or(&file.file_path);
174                file.before_content = self.read_blob_from_tree(&base_tree, path);
175            }
176        }
177
178        Ok(files)
179    }
180
181    /// Resolve the merge base between two refs
182    pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
183        let obj1 = self.repo.revparse_single(ref1)?;
184        let obj2 = self.repo.revparse_single(ref2)?;
185        let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
186        Ok(oid.to_string())
187    }
188
189    /// Check if a string resolves to a valid git revision
190    pub fn is_valid_rev(&self, refspec: &str) -> bool {
191        self.repo.revparse_single(refspec).is_ok()
192    }
193
194    fn make_diff_opts(&self, pathspecs: &[String]) -> Result<DiffOptions, GitError> {
195        let mut opts = DiffOptions::new();
196        for spec in self.normalize_pathspecs(pathspecs)? {
197            opts.pathspec(spec.as_str());
198        }
199        Ok(opts)
200    }
201
202    fn normalize_pathspecs(&self, pathspecs: &[String]) -> Result<Vec<String>, GitError> {
203        pathspecs
204            .iter()
205            .map(|spec| self.normalize_pathspec(spec))
206            .collect()
207    }
208
209    fn normalize_pathspec(&self, spec: &str) -> Result<String, GitError> {
210        if spec.is_empty() || spec.starts_with(':') {
211            return Ok(spec.to_string());
212        }
213
214        let spec_path = Path::new(spec);
215        let absolute = if spec_path.is_absolute() {
216            normalize_absolute_pathspec(spec_path)
217        } else {
218            normalize_lexical(&self.cwd.join(spec_path))
219        };
220
221        let repo_root = normalize_lexical(&self.repo_root);
222        let relative =
223            absolute
224                .strip_prefix(&repo_root)
225                .map_err(|_| pathspec_outside_repo_error(spec, &self.repo_root))?;
226
227        if relative.as_os_str().is_empty() {
228            Ok(".".to_string())
229        } else {
230            relative
231                .to_str()
232                .map(|path| path.replace('\\', "/"))
233                .ok_or_else(|| non_utf8_pathspec_error(spec))
234        }
235    }
236
237    fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
238        let head_tree = match self.repo.head() {
239            Ok(head) => {
240                let commit = head.peel_to_commit()?;
241                Some(commit.tree()?)
242            }
243            Err(_) => None, // No commits yet
244        };
245
246        self.get_index_diff_files(head_tree.as_ref(), pathspecs)
247    }
248
249    fn get_staged_diff_files_with_base(
250        &self,
251        base: &str,
252        pathspecs: &[String],
253    ) -> Result<Vec<FileChange>, GitError> {
254        let base_tree = self.resolve_tree(base)?;
255        self.get_index_diff_files(Some(&base_tree), pathspecs)
256    }
257
258    fn get_index_diff_files(
259        &self,
260        base_tree: Option<&git2::Tree<'_>>,
261        pathspecs: &[String],
262    ) -> Result<Vec<FileChange>, GitError> {
263        let mut opts = self.make_diff_opts(pathspecs)?;
264        let mut diff = self.repo.diff_tree_to_index(
265            base_tree,
266            Some(&self.repo.index()?),
267            Some(&mut opts),
268        )?;
269        Self::detect_renames(&mut diff)?;
270
271        Ok(self.diff_to_file_changes(&diff))
272    }
273
274    fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
275        let mut opts = self.make_diff_opts(pathspecs)?;
276        opts.include_untracked(false);
277
278        let head_tree = self.resolve_tree("HEAD").ok();
279        let mut diff = match head_tree.as_ref() {
280            Some(head_tree) => self
281                .repo
282                .diff_tree_to_workdir_with_index(Some(head_tree), Some(&mut opts))?,
283            None => self.repo.diff_index_to_workdir(None, Some(&mut opts))?,
284        };
285        Self::detect_renames(&mut diff)?;
286        self.apply_index_rename_map(
287            self.diff_to_file_changes(&diff),
288            head_tree.as_ref(),
289            pathspecs,
290        )
291    }
292
293    fn apply_index_rename_map(
294        &self,
295        mut files: Vec<FileChange>,
296        base_tree: Option<&git2::Tree<'_>>,
297        pathspecs: &[String],
298    ) -> Result<Vec<FileChange>, GitError> {
299        let Some(base_tree) = base_tree else {
300            return Ok(files);
301        };
302
303        let index_renames: Vec<FileChange> = self
304            .get_index_diff_files(Some(base_tree), pathspecs)?
305            .into_iter()
306            .filter(|file| file.status == FileStatus::Renamed)
307            .collect();
308
309        for rename in index_renames {
310            let Some(old_path) = rename.old_file_path.clone() else {
311                continue;
312            };
313            let target_pos = files
314                .iter()
315                .position(|file| {
316                    matches!(file.status, FileStatus::Added | FileStatus::Renamed)
317                        && file.file_path == rename.file_path
318                });
319            let deleted_pos = files
320                .iter()
321                .position(|file| {
322                    file.status == FileStatus::Deleted && file.file_path == old_path
323                });
324
325            if let (Some(target_pos), Some(deleted_pos)) = (target_pos, deleted_pos) {
326                if files[target_pos].status == FileStatus::Renamed
327                    && files[target_pos].old_file_path.as_deref() == Some(old_path.as_str())
328                {
329                    continue;
330                }
331
332                let target_file = files[target_pos].clone();
333                let deleted_file = files[deleted_pos].clone();
334                let displaced_deleted_path =
335                    if target_file.status == FileStatus::Renamed {
336                        target_file
337                            .old_file_path
338                            .as_ref()
339                            .filter(|path| *path != &old_path)
340                            .cloned()
341                    } else {
342                        None
343                    };
344
345                files = files
346                    .into_iter()
347                    .enumerate()
348                    .filter_map(|(idx, file)| {
349                        if idx == target_pos || idx == deleted_pos {
350                            None
351                        } else {
352                            Some(file)
353                        }
354                    })
355                    .collect();
356                let before_content = deleted_file
357                    .before_content
358                    .or_else(|| self.read_blob_from_tree(base_tree, &old_path));
359                let after_content = target_file
360                    .after_content
361                    .or_else(|| self.read_working_file(&target_file.file_path));
362                files.push(FileChange {
363                    file_path: target_file.file_path,
364                    status: FileStatus::Renamed,
365                    old_file_path: Some(old_path),
366                    before_content,
367                    after_content,
368                });
369                if let Some(file_path) = displaced_deleted_path {
370                    let before_content = self.read_blob_from_tree(base_tree, &file_path);
371                    files.push(FileChange {
372                        file_path,
373                        status: FileStatus::Deleted,
374                        old_file_path: None,
375                        before_content,
376                        after_content: None,
377                    });
378                }
379            }
380        }
381
382        Ok(files)
383    }
384
385    fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
386        let obj = self.repo.revparse_single(sha)?;
387        let commit = obj.peel_to_commit()?;
388        let tree = commit.tree()?;
389
390        let parent_tree = if commit.parent_count() > 0 {
391            Some(commit.parent(0)?.tree()?)
392        } else {
393            None
394        };
395
396        let mut opts = self.make_diff_opts(pathspecs)?;
397        let mut diff = self.repo.diff_tree_to_tree(
398            parent_tree.as_ref(),
399            Some(&tree),
400            Some(&mut opts),
401        )?;
402        Self::detect_renames(&mut diff)?;
403
404        Ok(self.diff_to_file_changes(&diff))
405    }
406
407    fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
408        let from_obj = self.repo.revparse_single(from)?;
409        let to_obj = self.repo.revparse_single(to)?;
410
411        let from_tree = from_obj.peel_to_commit()?.tree()?;
412        let to_tree = to_obj.peel_to_commit()?.tree()?;
413
414        let mut opts = self.make_diff_opts(pathspecs)?;
415        let mut diff = self.repo.diff_tree_to_tree(
416            Some(&from_tree),
417            Some(&to_tree),
418            Some(&mut opts),
419        )?;
420        Self::detect_renames(&mut diff)?;
421
422        Ok(self.diff_to_file_changes(&diff))
423    }
424
425    fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
426        let tree = self.resolve_tree(refspec)?;
427        let mut opts = self.make_diff_opts(pathspecs)?;
428        let mut diff = self.repo.diff_tree_to_workdir_with_index(
429            Some(&tree),
430            Some(&mut opts),
431        )?;
432        Self::detect_renames(&mut diff)?;
433        self.apply_index_rename_map(self.diff_to_file_changes(&diff), Some(&tree), pathspecs)
434    }
435
436    fn detect_renames(diff: &mut Diff) -> Result<(), GitError> {
437        let mut opts = DiffFindOptions::new();
438        opts.renames(true);
439        diff.find_similar(Some(&mut opts))?;
440        Ok(())
441    }
442
443    fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
444        let mut files = Vec::new();
445
446        for delta in diff.deltas() {
447            let (status, file_path, old_file_path) = match delta.status() {
448                Delta::Added => {
449                    let path = delta
450                        .new_file()
451                        .path()
452                        .and_then(|p| p.to_str())
453                        .unwrap_or("")
454                        .to_string();
455                    (FileStatus::Added, path, None)
456                }
457                Delta::Deleted => {
458                    let path = delta
459                        .old_file()
460                        .path()
461                        .and_then(|p| p.to_str())
462                        .unwrap_or("")
463                        .to_string();
464                    (FileStatus::Deleted, path, None)
465                }
466                Delta::Modified => {
467                    let path = delta
468                        .new_file()
469                        .path()
470                        .and_then(|p| p.to_str())
471                        .unwrap_or("")
472                        .to_string();
473                    (FileStatus::Modified, path, None)
474                }
475                Delta::Renamed => {
476                    let new_path = delta
477                        .new_file()
478                        .path()
479                        .and_then(|p| p.to_str())
480                        .unwrap_or("")
481                        .to_string();
482                    let old_path = delta
483                        .old_file()
484                        .path()
485                        .and_then(|p| p.to_str())
486                        .unwrap_or("")
487                        .to_string();
488                    (FileStatus::Renamed, new_path, Some(old_path))
489                }
490                _ => continue,
491            };
492
493            if !file_path.starts_with(".sem/") {
494                files.push(FileChange {
495                    file_path,
496                    status,
497                    old_file_path,
498                    before_content: None,
499                    after_content: None,
500                });
501            }
502        }
503
504        files
505    }
506
507    fn bytes_look_binary(bytes: &[u8], complete: bool) -> bool {
508        if bytes.iter().any(|byte| *byte == 0) {
509            return true;
510        }
511
512        match std::str::from_utf8(bytes) {
513            Ok(_) => false,
514            Err(error) => complete || error.error_len().is_some(),
515        }
516    }
517
518    fn populate_contents(
519        &self,
520        files: &mut [FileChange],
521        scope: &DiffScope,
522    ) -> Result<(), GitError> {
523        match scope {
524            DiffScope::Working => {
525                // Resolve HEAD tree once for all before_content reads
526                let head_tree = self.resolve_tree("HEAD").ok();
527                for file in files.iter_mut() {
528                    if file.status != FileStatus::Deleted {
529                        file.after_content = self.read_working_file(&file.file_path);
530                    }
531                    if file.status != FileStatus::Added {
532                        let path = file
533                            .old_file_path
534                            .as_deref()
535                            .unwrap_or(&file.file_path);
536                        file.before_content = head_tree
537                            .as_ref()
538                            .and_then(|t| self.read_blob_from_tree(t, path));
539                    }
540                }
541            }
542            DiffScope::Staged => {
543                let head_tree = self.resolve_tree("HEAD").ok();
544                for file in files.iter_mut() {
545                    if file.status != FileStatus::Deleted {
546                        file.after_content = self
547                            .read_index_file(&file.file_path)
548                            .or_else(|| self.read_working_file(&file.file_path));
549                    }
550                    if file.status != FileStatus::Added {
551                        let path = file
552                            .old_file_path
553                            .as_deref()
554                            .unwrap_or(&file.file_path);
555                        file.before_content = head_tree
556                            .as_ref()
557                            .and_then(|t| self.read_blob_from_tree(t, path));
558                    }
559                }
560            }
561            DiffScope::Commit { sha } => {
562                // Resolve both trees once instead of per-file
563                let after_tree = self.resolve_tree(sha)?;
564                let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
565                for file in files.iter_mut() {
566                    if file.status != FileStatus::Deleted {
567                        file.after_content =
568                            self.read_blob_from_tree(&after_tree, &file.file_path);
569                    }
570                    if file.status != FileStatus::Added {
571                        let path = file
572                            .old_file_path
573                            .as_deref()
574                            .unwrap_or(&file.file_path);
575                        file.before_content = before_tree
576                            .as_ref()
577                            .and_then(|t| self.read_blob_from_tree(t, path));
578                    }
579                }
580            }
581            DiffScope::Range { from, to } => {
582                let after_tree = self.resolve_tree(to)?;
583                let before_tree = self.resolve_tree(from)?;
584                for file in files.iter_mut() {
585                    if file.status != FileStatus::Deleted {
586                        file.after_content =
587                            self.read_blob_from_tree(&after_tree, &file.file_path);
588                    }
589                    if file.status != FileStatus::Added {
590                        let path = file
591                            .old_file_path
592                            .as_deref()
593                            .unwrap_or(&file.file_path);
594                        file.before_content =
595                            self.read_blob_from_tree(&before_tree, path);
596                    }
597                }
598            }
599            DiffScope::RefToWorking { refspec } => {
600                let before_tree = self.resolve_tree(refspec)?;
601                for file in files.iter_mut() {
602                    if file.status != FileStatus::Deleted {
603                        file.after_content = self.read_working_file(&file.file_path);
604                    }
605                    if file.status != FileStatus::Added {
606                        let path = file
607                            .old_file_path
608                            .as_deref()
609                            .unwrap_or(&file.file_path);
610                        file.before_content =
611                            self.read_blob_from_tree(&before_tree, path);
612                    }
613                }
614            }
615        }
616        Ok(())
617    }
618
619    fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
620        let obj = self.repo.revparse_single(refspec)?;
621        let commit = obj.peel_to_commit()?;
622        Ok(commit.tree()?)
623    }
624
625    fn normalize_line_endings(s: String) -> String {
626        if s.contains('\r') {
627            s.replace("\r\n", "\n").replace('\r', "\n")
628        } else {
629            s
630        }
631    }
632
633    fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
634        let entry = tree.get_path(Path::new(file_path)).ok()?;
635        let blob = self.repo.find_blob(entry.id()).ok()?;
636        let bytes = blob.content();
637        if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
638            return None;
639        }
640        std::str::from_utf8(bytes)
641            .ok()
642            .map(|s| Self::normalize_line_endings(s.to_string()))
643    }
644
645    fn read_working_file(&self, file_path: &str) -> Option<String> {
646        let full_path = self.repo_root.join(file_path);
647        let bytes = fs::read(full_path).ok()?;
648        if Self::bytes_look_binary(&bytes, true) {
649            return None;
650        }
651        String::from_utf8(bytes)
652            .ok()
653            .map(Self::normalize_line_endings)
654    }
655
656    fn read_index_file(&self, file_path: &str) -> Option<String> {
657        let index = self.repo.index().ok()?;
658        let entry = index.get_path(Path::new(file_path), 0)?;
659        let blob = self.repo.find_blob(entry.id).ok()?;
660        let bytes = blob.content();
661        if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
662            return None;
663        }
664        std::str::from_utf8(bytes)
665            .ok()
666            .map(|s| Self::normalize_line_endings(s.to_string()))
667    }
668
669
670    /// Read file content at a specific git ref (commit SHA, branch, tag, etc.)
671    pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
672        let tree = self.resolve_tree(refspec)?;
673        Ok(self.read_blob_from_tree(&tree, file_path))
674    }
675
676    /// Get commits that modified a specific file, walking history from HEAD.
677    /// Returns commits in reverse chronological order (newest first).
678    pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
679        let mut revwalk = self.repo.revwalk()?;
680        revwalk.push_head()?;
681        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
682
683        let mut commits = Vec::new();
684        let path = Path::new(file_path);
685
686        for oid_result in revwalk {
687            let oid = oid_result?;
688            let commit = self.repo.find_commit(oid)?;
689            let tree = commit.tree()?;
690
691            // Check if this file exists in this commit's tree
692            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
693
694            // Compare with parent to see if the file changed
695            let file_in_parent = if commit.parent_count() > 0 {
696                commit.parent(0)
697                    .ok()
698                    .and_then(|p| p.tree().ok())
699                    .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
700            } else {
701                None // No parent = initial commit, file was added
702            };
703
704            // Include if file changed between parent and this commit
705            let changed = match (file_in_commit, file_in_parent) {
706                (Some(cur), Some(prev)) => cur != prev,  // content changed
707                (Some(_), None) => true,                   // file added
708                (None, Some(_)) => true,                   // file deleted
709                (None, None) => false,                     // file not present in either
710            };
711
712            if changed {
713                let sha = oid.to_string();
714                commits.push(CommitInfo {
715                    short_sha: sha[..7.min(sha.len())].to_string(),
716                    sha,
717                    author: commit.author().name().unwrap_or("unknown").to_string(),
718                    date: commit.time().seconds().to_string(),
719                    message: commit.message().unwrap_or("").to_string(),
720                });
721
722                if limit != 0 && commits.len() >= limit {
723                    break;
724                }
725            }
726        }
727
728        Ok(commits)
729    }
730
731    /// Get commits that modified a specific file, following renames across history.
732    /// Like `git log --follow`: when the tracked path disappears between commits,
733    /// compute a diff with rename detection to find the old filename and continue.
734    /// Returns commits in reverse chronological order (newest first).
735    pub fn get_file_commits_follow_renames(
736        &self,
737        file_path: &str,
738        limit: usize,
739    ) -> Result<Vec<FileCommitInfo>, GitError> {
740        match self.get_file_commits_follow_renames_cli(file_path, limit) {
741            Ok(commits) if !commits.is_empty() => return Ok(commits),
742            Ok(_) => {}
743            Err(GitError::Io(error)) if error.kind() == std::io::ErrorKind::NotFound => {}
744            Err(error) => return Err(error),
745        }
746
747        let mut revwalk = self.repo.revwalk()?;
748        revwalk.push_head()?;
749        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
750
751        let mut results = Vec::new();
752        let mut tracked_path = file_path.to_string();
753
754        for oid_result in revwalk {
755            let oid = oid_result?;
756            let commit = self.repo.find_commit(oid)?;
757            let tree = commit.tree()?;
758
759            let path = Path::new(&tracked_path);
760            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
761
762            let (parent_tree_opt, file_in_parent) = if commit.parent_count() > 0 {
763                let parent = commit.parent(0)?;
764                let ptree = parent.tree()?;
765                let fip = ptree.get_path(path).ok().map(|e| e.id());
766                (Some(ptree), fip)
767            } else {
768                (None, None)
769            };
770
771            let changed = match (file_in_commit, file_in_parent) {
772                (Some(cur), Some(prev)) => cur != prev,
773                (Some(_), None) => true,
774                (None, Some(_)) => true,
775                (None, None) => false,
776            };
777
778            if changed {
779                let sha_str = oid.to_string();
780                results.push(FileCommitInfo {
781                    commit: CommitInfo {
782                        short_sha: sha_str[..7.min(sha_str.len())].to_string(),
783                        sha: sha_str,
784                        author: commit.author().name().unwrap_or("unknown").to_string(),
785                        date: commit.time().seconds().to_string(),
786                        message: commit.message().unwrap_or("").to_string(),
787                    },
788                    file_path: tracked_path.clone(),
789                });
790
791                if limit != 0 && results.len() >= limit {
792                    break;
793                }
794            }
795
796            // When walking backward, the rename commit still contains the new
797            // path. Detect that parent-side old path before the next iteration.
798            let should_check_rename =
799                parent_tree_opt.is_some() && (file_in_parent.is_none() || file_in_commit.is_none());
800            if should_check_rename {
801                let mut diff = self.repo.diff_tree_to_tree(
802                    parent_tree_opt.as_ref(),
803                    Some(&tree),
804                    None,
805                )?;
806                let mut find_opts = DiffFindOptions::new();
807                find_opts.renames(true);
808                diff.find_similar(Some(&mut find_opts))?;
809
810                let mut found_rename = false;
811                for delta in diff.deltas() {
812                    if delta.status() == Delta::Renamed {
813                        let new_path = delta
814                            .new_file()
815                            .path()
816                            .and_then(|p| p.to_str())
817                            .unwrap_or("");
818                        if new_path == tracked_path {
819                            // The tracked file was renamed FROM old_path
820                            let old_path = delta
821                                .old_file()
822                                .path()
823                                .and_then(|p| p.to_str())
824                                .unwrap_or("")
825                                .to_string();
826                            if !old_path.is_empty() {
827                                tracked_path = old_path;
828                                found_rename = true;
829                                break;
830                            }
831                        }
832                    }
833                }
834
835                if !found_rename && file_in_commit.is_none() {
836                    // File truly deleted, stop tracking
837                    break;
838                }
839            }
840        }
841
842        Ok(results)
843    }
844
845    fn get_file_commits_follow_renames_cli(
846        &self,
847        file_path: &str,
848        limit: usize,
849    ) -> Result<Vec<FileCommitInfo>, GitError> {
850        let mut command = Command::new("git");
851        command
852            .arg("-C")
853            .arg(&self.repo_root)
854            .arg("log")
855            .arg("--follow")
856            .arg("--format=\x1e%H\x1f%an\x1f%at\x1f%s")
857            .arg("--name-status");
858        if limit != 0 {
859            command.arg("-n").arg(limit.to_string());
860        }
861        command.arg("--").arg(file_path);
862
863        let output = command.output()?;
864        if !output.status.success() {
865            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
866            return Err(git_command_error(if stderr.is_empty() {
867                format!("git log exited with {}", output.status)
868            } else {
869                stderr
870            }));
871        }
872
873        let stdout = String::from_utf8_lossy(&output.stdout);
874        let mut tracked_path = file_path.to_string();
875        let mut commits = Vec::new();
876
877        for record in stdout.split('\x1e') {
878            let record = record.trim_start_matches('\n');
879            if record.trim().is_empty() {
880                continue;
881            }
882
883            let mut lines = record.lines();
884            let Some(meta) = lines.next() else {
885                continue;
886            };
887            let mut parts = meta.splitn(4, '\x1f');
888            let Some(sha) = parts.next() else {
889                continue;
890            };
891            let Some(author) = parts.next() else {
892                continue;
893            };
894            let Some(date) = parts.next() else {
895                continue;
896            };
897            let message = parts.next().unwrap_or_default();
898
899            let commit_path = tracked_path.clone();
900            let mut previous_path = None;
901            for line in lines {
902                let fields: Vec<&str> = line.split('\t').collect();
903                if fields.len() >= 3 && fields[0].starts_with('R') && fields[2] == tracked_path {
904                    previous_path = Some(fields[1].to_string());
905                }
906            }
907
908            commits.push(FileCommitInfo {
909                commit: CommitInfo {
910                    short_sha: sha[..7.min(sha.len())].to_string(),
911                    sha: sha.to_string(),
912                    author: author.to_string(),
913                    date: date.to_string(),
914                    message: message.to_string(),
915                },
916                file_path: commit_path,
917            });
918
919            if let Some(previous_path) = previous_path {
920                tracked_path = previous_path;
921            }
922        }
923
924        Ok(commits)
925    }
926
927    /// Get all file paths changed in a single commit (vs its parent).
928    /// Returns file paths from the new side of each delta.
929    pub fn get_commit_changed_files(&self, sha: &str) -> Result<Vec<String>, GitError> {
930        let obj = self.repo.revparse_single(sha)?;
931        let commit = obj.peel_to_commit()?;
932        let tree = commit.tree()?;
933        let parent_tree = if commit.parent_count() > 0 {
934            Some(commit.parent(0)?.tree()?)
935        } else {
936            None
937        };
938        let diff = self.repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
939        let mut paths = Vec::new();
940        for delta in diff.deltas() {
941            if let Some(p) = delta.new_file().path().and_then(|p| p.to_str()) {
942                paths.push(p.to_string());
943            }
944            // Also include old path for deletions/renames
945            if let Some(p) = delta.old_file().path().and_then(|p| p.to_str()) {
946                if !paths.contains(&p.to_string()) {
947                    paths.push(p.to_string());
948                }
949            }
950        }
951        Ok(paths)
952    }
953
954    pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
955        let mut revwalk = self.repo.revwalk()?;
956        revwalk.push_head()?;
957
958        let mut commits = Vec::new();
959        for (i, oid_result) in revwalk.enumerate() {
960            if limit != 0 && i >= limit {
961                break;
962            }
963            let oid = oid_result?;
964            let commit = self.repo.find_commit(oid)?;
965            let sha = oid.to_string();
966            commits.push(CommitInfo {
967                short_sha: sha[..7.min(sha.len())].to_string(),
968                sha,
969                author: commit.author().name().unwrap_or("unknown").to_string(),
970                date: commit.time().seconds().to_string(),
971                message: commit.message().unwrap_or("").to_string(),
972            });
973        }
974
975        Ok(commits)
976    }
977}
978
979fn parse_blame_porcelain(output: &str) -> Vec<BlameLineInfo> {
980    let lines: Vec<&str> = output.lines().collect();
981    let mut parsed = Vec::new();
982    let mut index = 0;
983
984    while index < lines.len() {
985        let Some((raw_sha, line_number)) = parse_blame_header(lines[index]) else {
986            index += 1;
987            continue;
988        };
989        index += 1;
990
991        let mut author = String::new();
992        let mut author_time = None;
993        let mut summary = String::new();
994
995        while index < lines.len() {
996            let line = lines[index];
997            index += 1;
998
999            if line.starts_with('\t') {
1000                break;
1001            } else if let Some(value) = line.strip_prefix("author ") {
1002                author = value.to_string();
1003            } else if let Some(value) = line.strip_prefix("author-time ") {
1004                author_time = value.parse::<i64>().ok();
1005            } else if let Some(value) = line.strip_prefix("summary ") {
1006                summary = value.to_string();
1007            }
1008        }
1009
1010        let sha = raw_sha.trim_start_matches('^');
1011        let commit_sha = if sha.chars().all(|c| c == '0') {
1012            None
1013        } else {
1014            Some(sha.to_string())
1015        };
1016
1017        if author.is_empty() {
1018            author = if commit_sha.is_none() {
1019                "Not Committed Yet".to_string()
1020            } else {
1021                "unknown".to_string()
1022            };
1023        }
1024
1025        parsed.push(BlameLineInfo {
1026            line_number,
1027            commit_sha,
1028            author,
1029            author_time,
1030            summary,
1031        });
1032    }
1033
1034    parsed.sort_by_key(|line| line.line_number);
1035    parsed
1036}
1037
1038fn parse_blame_header(line: &str) -> Option<(&str, usize)> {
1039    let mut parts = line.split_whitespace();
1040    let sha = parts.next()?;
1041    if !is_blame_oid(sha) {
1042        return None;
1043    }
1044    parts.next()?;
1045    let final_line = parts.next()?.parse().ok()?;
1046    Some((sha, final_line))
1047}
1048
1049fn is_blame_oid(value: &str) -> bool {
1050    let value = value.strip_prefix('^').unwrap_or(value);
1051    value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
1052}
1053
1054fn git_command_error(message: String) -> GitError {
1055    GitError::Git2(git2::Error::from_str(&message))
1056}
1057
1058fn map_git_error(error: git2::Error) -> GitError {
1059    if error.code() == ErrorCode::NotFound {
1060        GitError::NotARepo
1061    } else {
1062        GitError::Git2(error)
1063    }
1064}
1065
1066fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
1067    let safe_directories = command_line_safe_directories();
1068    should_retry_with_safe_directory(error, path, &safe_directories)
1069}
1070
1071fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
1072    error.code() == ErrorCode::Owner
1073        && nearest_git_root(path).is_some_and(|repo_root| {
1074            safe_directories.iter().any(|safe_directory| {
1075                safe_directory == "*"
1076                    || paths_match(&repo_root, Path::new(safe_directory))
1077            })
1078        })
1079}
1080
1081fn command_line_safe_directories() -> Vec<String> {
1082    let count = env::var("GIT_CONFIG_COUNT")
1083        .ok()
1084        .and_then(|value| value.parse::<usize>().ok())
1085        .unwrap_or_default();
1086
1087    (0..count)
1088        .filter_map(|index| {
1089            let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
1090            if key.eq_ignore_ascii_case("safe.directory") {
1091                env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
1092            } else {
1093                None
1094            }
1095        })
1096        .collect()
1097}
1098
1099fn nearest_git_root(path: &Path) -> Option<PathBuf> {
1100    let mut current = if path.is_file() {
1101        path.parent()?
1102    } else {
1103        path
1104    };
1105
1106    loop {
1107        if current.join(".git").exists() {
1108            return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
1109        }
1110
1111        current = current.parent()?;
1112    }
1113}
1114
1115fn paths_match(left: &Path, right: &Path) -> bool {
1116    let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
1117    let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
1118
1119    if cfg!(windows) {
1120        left.to_string_lossy()
1121            .eq_ignore_ascii_case(&right.to_string_lossy())
1122    } else {
1123        left == right
1124    }
1125}
1126
1127fn owner_validation_lock() -> &'static Mutex<()> {
1128    static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
1129    LOCK.get_or_init(|| Mutex::new(()))
1130}
1131
1132struct OwnerValidationDisabled;
1133
1134impl OwnerValidationDisabled {
1135    fn new() -> Result<Self, GitError> {
1136        // libgit2 stores this as a process-global option; callers hold owner_validation_lock.
1137        unsafe { git2::opts::set_verify_owner_validation(false)? };
1138        Ok(Self)
1139    }
1140}
1141
1142impl Drop for OwnerValidationDisabled {
1143    fn drop(&mut self) {
1144        // Restore the default before the owner-validation lock is released.
1145        unsafe {
1146            let _ = git2::opts::set_verify_owner_validation(true);
1147        }
1148    }
1149}
1150
1151fn normalize_open_path(path: &Path) -> Result<PathBuf, GitError> {
1152    let canonical = match fs::canonicalize(path) {
1153        Ok(canonical) => canonical,
1154        Err(_) if path.is_absolute() => normalize_lexical(path),
1155        Err(_) => normalize_lexical(&env::current_dir()?.join(path)),
1156    };
1157
1158    Ok(if canonical.is_file() {
1159        canonical
1160            .parent()
1161            .map(Path::to_path_buf)
1162            .unwrap_or(canonical)
1163    } else {
1164        canonical
1165    })
1166}
1167
1168fn normalize_absolute_pathspec(path: &Path) -> PathBuf {
1169    let path = normalize_lexical(path);
1170    let Some(leaf) = path.file_name() else {
1171        return fs::canonicalize(&path).unwrap_or(path);
1172    };
1173    let mut trailing_components = vec![leaf.to_os_string()];
1174
1175    let Some(parent) = path.parent() else {
1176        return path;
1177    };
1178
1179    for ancestor in parent.ancestors() {
1180        if ancestor.exists() {
1181            let mut normalized =
1182                fs::canonicalize(ancestor).unwrap_or_else(|_| normalize_lexical(ancestor));
1183            for component in trailing_components.iter().rev() {
1184                normalized.push(component);
1185            }
1186            return normalized;
1187        }
1188
1189        let Some(name) = ancestor.file_name() else {
1190            return path;
1191        };
1192        trailing_components.push(name.to_os_string());
1193    }
1194
1195    path
1196}
1197
1198fn pathspec_outside_repo_error(pathspec: &str, repo_root: &Path) -> GitError {
1199    GitError::Git2(git2::Error::from_str(&format!(
1200        "pathspec '{pathspec}' is outside repository '{}'",
1201        repo_root.display()
1202    )))
1203}
1204
1205fn non_utf8_pathspec_error(pathspec: &str) -> GitError {
1206    GitError::Git2(git2::Error::from_str(&format!(
1207        "pathspec '{pathspec}' is not valid UTF-8 after normalization"
1208    )))
1209}
1210
1211fn normalize_lexical(path: &Path) -> PathBuf {
1212    let mut normalized = PathBuf::new();
1213
1214    for component in path.components() {
1215        match component {
1216            Component::CurDir => {}
1217            Component::ParentDir => {
1218                if !normalized.pop() && !normalized.has_root() {
1219                    normalized.push("..");
1220                }
1221            }
1222            Component::Normal(part) => normalized.push(part),
1223            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
1224            Component::RootDir => normalized.push(component.as_os_str()),
1225        }
1226    }
1227
1228    normalized
1229}
1230
1231#[cfg(test)]
1232mod tests {
1233    use super::*;
1234    use crate::model::change::ChangeType;
1235    use crate::parser::differ::{collect_binary_file_changes, compute_semantic_diff};
1236    use crate::parser::plugins::create_default_registry;
1237    use git2::{ErrorClass, Oid, Repository, Signature};
1238    use tempfile::TempDir;
1239
1240    fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
1241        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1242
1243        let mut index = repo.index().unwrap();
1244        index.add_path(Path::new(file_path)).unwrap();
1245        index.write().unwrap();
1246
1247        let tree_id = index.write_tree().unwrap();
1248        let tree = repo.find_tree(tree_id).unwrap();
1249        let sig = Signature::now("Test User", "test@example.com").unwrap();
1250
1251        match repo.head() {
1252            Ok(head) => {
1253                let parent = repo.find_commit(head.target().unwrap()).unwrap();
1254                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1255                    .unwrap()
1256            }
1257            Err(_) => repo
1258                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1259                .unwrap(),
1260        }
1261    }
1262
1263    fn commit_binary_file(
1264        repo: &Repository,
1265        file_path: &str,
1266        contents: &[u8],
1267        message: &str,
1268    ) -> Oid {
1269        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1270
1271        let mut index = repo.index().unwrap();
1272        index.add_path(Path::new(file_path)).unwrap();
1273        index.write().unwrap();
1274
1275        let tree_id = index.write_tree().unwrap();
1276        let tree = repo.find_tree(tree_id).unwrap();
1277        let sig = Signature::now("Test User", "test@example.com").unwrap();
1278
1279        match repo.head() {
1280            Ok(head) => {
1281                let parent = repo.find_commit(head.target().unwrap()).unwrap();
1282                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1283                    .unwrap()
1284            }
1285            Err(_) => repo
1286                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1287                .unwrap(),
1288        }
1289    }
1290
1291    #[test]
1292    fn porcelain_blame_reports_uncommitted_lines() {
1293        let temp = TempDir::new().unwrap();
1294        let repo = Repository::init(temp.path()).unwrap();
1295
1296        commit_file(&repo, "a.py", "def foo():\n    return 1\n", "init");
1297        fs::write(temp.path().join("a.py"), "def foo():\n    return 2\n").unwrap();
1298
1299        let bridge = GitBridge::open(temp.path()).unwrap();
1300        let blame = bridge.blame_file_porcelain(Path::new("a.py")).unwrap();
1301
1302        assert!(blame[0].commit_sha.is_some());
1303        assert_eq!(blame[1].commit_sha, None);
1304        assert_eq!(blame[1].author, "Not Committed Yet");
1305    }
1306
1307    #[test]
1308    fn clean_worktree_does_not_fall_back_to_head_commit() {
1309        let temp = TempDir::new().unwrap();
1310        let repo = Repository::init(temp.path()).unwrap();
1311
1312        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1313        commit_file(
1314            &repo,
1315            "sample.ts",
1316            "export function a() {\n  return 2;\n}\n",
1317            "change a",
1318        );
1319
1320        let bridge = GitBridge::open(temp.path()).unwrap();
1321        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1322
1323        assert!(matches!(scope, DiffScope::Working));
1324        assert!(files.is_empty());
1325    }
1326
1327    #[test]
1328    fn owner_error_retries_for_command_line_safe_directory() {
1329        let temp = TempDir::new().unwrap();
1330        Repository::init(temp.path()).unwrap();
1331
1332        let owner_error = git2::Error::new(
1333            ErrorCode::Owner,
1334            ErrorClass::Config,
1335            "owner mismatch",
1336        );
1337        let safe_directories = [temp.path().to_string_lossy().to_string()];
1338
1339        assert!(should_retry_with_safe_directory(
1340            &owner_error,
1341            temp.path(),
1342            &safe_directories,
1343        ));
1344
1345        let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
1346        assert!(!should_retry_with_safe_directory(
1347            &owner_error,
1348            temp.path(),
1349            &other_directories,
1350        ));
1351
1352        let not_found_error = git2::Error::new(
1353            ErrorCode::NotFound,
1354            ErrorClass::Repository,
1355            "not found",
1356        );
1357        assert!(!should_retry_with_safe_directory(
1358            &not_found_error,
1359            temp.path(),
1360            &["*".to_string()],
1361        ));
1362    }
1363
1364    #[test]
1365    fn explicit_commit_scope_still_reads_head_commit_diff() {
1366        let temp = TempDir::new().unwrap();
1367        let repo = Repository::init(temp.path()).unwrap();
1368
1369        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1370        let head_oid = commit_file(
1371            &repo,
1372            "sample.ts",
1373            "export function a() {\n  return 2;\n}\n",
1374            "change a",
1375        );
1376
1377        let bridge = GitBridge::open(temp.path()).unwrap();
1378        let files = bridge
1379            .get_changed_files(&DiffScope::Commit {
1380                sha: head_oid.to_string(),
1381            }, &[])
1382            .unwrap();
1383
1384        assert_eq!(files.len(), 1);
1385        assert_eq!(files[0].file_path, "sample.ts");
1386        assert_eq!(files[0].status, FileStatus::Modified);
1387    }
1388
1389    #[test]
1390    fn pathspecs_are_normalized_from_open_directory() {
1391        let temp = TempDir::new().unwrap();
1392        let repo = Repository::init(temp.path()).unwrap();
1393        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1394
1395        commit_file(&repo, "pkg/a.py", "def foo():\n    return 1\n", "init");
1396        fs::write(temp.path().join("pkg/a.py"), "def foo():\n    return 2\n").unwrap();
1397
1398        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1399        let relative_files = bridge
1400            .get_changed_files(&DiffScope::Working, &["a.py".to_string()])
1401            .unwrap();
1402
1403        assert_eq!(relative_files.len(), 1);
1404        assert_eq!(relative_files[0].file_path, "pkg/a.py");
1405
1406        let absolute_path = temp.path().join("pkg/a.py").to_string_lossy().to_string();
1407        let absolute_files = bridge
1408            .get_changed_files(&DiffScope::Working, &[absolute_path])
1409            .unwrap();
1410
1411        assert_eq!(absolute_files.len(), 1);
1412        assert_eq!(absolute_files[0].file_path, "pkg/a.py");
1413    }
1414
1415    #[test]
1416    fn absolute_deleted_pathspecs_are_normalized_from_existing_parent() {
1417        let temp = TempDir::new().unwrap();
1418        let repo = Repository::init(temp.path()).unwrap();
1419        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1420
1421        commit_file(
1422            &repo,
1423            "pkg/deleted.py",
1424            "def foo():\n    return 1\n",
1425            "init",
1426        );
1427        let absolute_path = temp
1428            .path()
1429            .join("pkg/deleted.py")
1430            .to_string_lossy()
1431            .to_string();
1432        fs::remove_file(temp.path().join("pkg/deleted.py")).unwrap();
1433
1434        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1435        let files = bridge
1436            .get_changed_files(&DiffScope::Working, &[absolute_path])
1437            .unwrap();
1438
1439        assert_eq!(files.len(), 1);
1440        assert_eq!(files[0].file_path, "pkg/deleted.py");
1441        assert_eq!(files[0].status, FileStatus::Deleted);
1442    }
1443
1444    #[test]
1445    fn absolute_missing_pathspecs_preserve_trailing_component_order() {
1446        let temp = TempDir::new().unwrap();
1447        let existing_parent = temp.path().join("existing");
1448        fs::create_dir(&existing_parent).unwrap();
1449
1450        let pathspec = existing_parent.join("missing").join("leaf.py");
1451        let normalized = normalize_absolute_pathspec(&pathspec);
1452
1453        let mut expected = fs::canonicalize(&existing_parent).unwrap();
1454        expected.push("missing");
1455        expected.push("leaf.py");
1456        assert_eq!(normalized, expected);
1457    }
1458
1459    #[test]
1460    fn absolute_pathspecs_outside_repo_are_rejected() {
1461        let repo_dir = TempDir::new().unwrap();
1462        let outside_dir = TempDir::new().unwrap();
1463        let repo = Repository::init(repo_dir.path()).unwrap();
1464
1465        commit_file(&repo, "sample.py", "def foo():\n    return 1\n", "init");
1466        fs::write(
1467            repo_dir.path().join("sample.py"),
1468            "def foo():\n    return 2\n",
1469        )
1470        .unwrap();
1471        let outside_path = outside_dir.path().join("outside.py");
1472        fs::write(&outside_path, "def outside():\n    return 1\n").unwrap();
1473
1474        let bridge = GitBridge::open(repo_dir.path()).unwrap();
1475        let err = bridge
1476            .get_changed_files(
1477                &DiffScope::Working,
1478                &[outside_path.to_string_lossy().to_string()],
1479            )
1480            .unwrap_err();
1481
1482        let message = err.to_string();
1483        assert!(message.contains("pathspec"));
1484        assert!(message.contains("is outside repository"));
1485    }
1486
1487    #[test]
1488    fn working_binary_modification_is_reported_as_binary_change() {
1489        let temp = TempDir::new().unwrap();
1490        let repo = Repository::init(temp.path()).unwrap();
1491
1492        commit_binary_file(&repo, "pic.png", b"\0png-v1\0", "init");
1493        fs::write(temp.path().join("pic.png"), b"\0png-v2\0extra").unwrap();
1494
1495        let bridge = GitBridge::open(temp.path()).unwrap();
1496        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1497
1498        assert_eq!(files.len(), 1);
1499        assert_eq!(files[0].file_path, "pic.png");
1500        assert_eq!(files[0].status, FileStatus::Modified);
1501        assert!(files[0].before_content.is_none());
1502        assert!(files[0].after_content.is_none());
1503
1504        let binary_changes = collect_binary_file_changes(&files);
1505        let registry = create_default_registry();
1506        let result = compute_semantic_diff(&files, &registry, None, None);
1507
1508        assert!(result.changes.is_empty());
1509        assert_eq!(result.file_count, 0);
1510        assert_eq!(binary_changes.len(), 1);
1511        assert_eq!(binary_changes[0].file_path, "pic.png");
1512        assert_eq!(binary_changes[0].status, FileStatus::Modified);
1513    }
1514
1515    #[test]
1516    fn staged_binary_add_and_delete_are_reported_as_binary_changes() {
1517        let temp = TempDir::new().unwrap();
1518        let repo = Repository::init(temp.path()).unwrap();
1519
1520        fs::write(temp.path().join("added.png"), b"\0added-binary\0").unwrap();
1521        let mut index = repo.index().unwrap();
1522        index.add_path(Path::new("added.png")).unwrap();
1523        index.write().unwrap();
1524
1525        let bridge = GitBridge::open(temp.path()).unwrap();
1526        let added_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1527        assert_eq!(added_files.len(), 1);
1528        assert_eq!(added_files[0].file_path, "added.png");
1529        assert_eq!(added_files[0].status, FileStatus::Added);
1530        assert!(added_files[0].before_content.is_none());
1531        assert!(added_files[0].after_content.is_none());
1532        let added_binary_changes = collect_binary_file_changes(&added_files);
1533        assert_eq!(added_binary_changes.len(), 1);
1534        assert_eq!(added_binary_changes[0].file_path, "added.png");
1535
1536        let temp = TempDir::new().unwrap();
1537        let repo = Repository::init(temp.path()).unwrap();
1538        commit_binary_file(&repo, "deleted.png", b"\0deleted-binary\0", "init");
1539        fs::remove_file(temp.path().join("deleted.png")).unwrap();
1540        let mut index = repo.index().unwrap();
1541        index.remove_path(Path::new("deleted.png")).unwrap();
1542        index.write().unwrap();
1543
1544        let bridge = GitBridge::open(temp.path()).unwrap();
1545        let deleted_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1546        assert_eq!(deleted_files.len(), 1);
1547        assert_eq!(deleted_files[0].file_path, "deleted.png");
1548        assert_eq!(deleted_files[0].status, FileStatus::Deleted);
1549        assert!(deleted_files[0].before_content.is_none());
1550        assert!(deleted_files[0].after_content.is_none());
1551        let deleted_binary_changes = collect_binary_file_changes(&deleted_files);
1552        assert_eq!(deleted_binary_changes.len(), 1);
1553        assert_eq!(deleted_binary_changes[0].file_path, "deleted.png");
1554    }
1555
1556    #[test]
1557    fn partial_utf8_boundary_is_not_treated_as_binary() {
1558        assert!(!GitBridge::bytes_look_binary(&[0xe2, 0x82], false));
1559        assert!(GitBridge::bytes_look_binary(&[0xe2, 0x82], true));
1560    }
1561
1562    #[test]
1563    fn staged_file_rename_is_reported_as_single_rename_with_old_contents() {
1564        let temp = TempDir::new().unwrap();
1565        let repo = Repository::init(temp.path()).unwrap();
1566
1567        let contents = "export function foo() {\n  return 1;\n}\n";
1568        commit_file(&repo, "old.ts", contents, "init");
1569
1570        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1571        let mut index = repo.index().unwrap();
1572        index.remove_path(Path::new("old.ts")).unwrap();
1573        index.add_path(Path::new("new.ts")).unwrap();
1574        index.write().unwrap();
1575
1576        let bridge = GitBridge::open(temp.path()).unwrap();
1577        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1578
1579        assert_eq!(files.len(), 1);
1580        assert_eq!(files[0].status, FileStatus::Renamed);
1581        assert_eq!(files[0].file_path, "new.ts");
1582        assert_eq!(files[0].old_file_path.as_deref(), Some("old.ts"));
1583        assert_eq!(files[0].before_content.as_deref(), Some(contents));
1584        assert_eq!(files[0].after_content.as_deref(), Some(contents));
1585    }
1586
1587    #[test]
1588    fn staged_file_rename_with_edit_reports_single_moved_entity() {
1589        let temp = TempDir::new().unwrap();
1590        let repo = Repository::init(temp.path()).unwrap();
1591
1592        let before = "\
1593// shared header 01
1594// shared header 02
1595// shared header 03
1596// shared header 04
1597// shared header 05
1598// shared header 06
1599// shared header 07
1600// shared header 08
1601// shared header 09
1602// shared header 10
1603export function foo() {
1604  return alpha + beta + gamma;
1605}
1606";
1607        let after = before.replace(
1608            "return alpha + beta + gamma;",
1609            "return one + two + three;",
1610        );
1611
1612        commit_file(&repo, "old.ts", before, "init");
1613        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1614        fs::write(temp.path().join("new.ts"), &after).unwrap();
1615
1616        let mut index = repo.index().unwrap();
1617        index.remove_path(Path::new("old.ts")).unwrap();
1618        index.add_path(Path::new("new.ts")).unwrap();
1619        index.write().unwrap();
1620
1621        let bridge = GitBridge::open(temp.path()).unwrap();
1622        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1623        assert_eq!(files.len(), 1);
1624        assert_eq!(files[0].status, FileStatus::Renamed);
1625
1626        let registry = create_default_registry();
1627        let result = compute_semantic_diff(&files, &registry, None, None);
1628
1629        assert_eq!(result.added_count, 0);
1630        assert_eq!(result.deleted_count, 0);
1631        // `foo` is a compound Moved change whose body also changed, so it counts toward
1632        // both moved_count and modified_count.
1633        assert_eq!(result.modified_count, 1);
1634        assert_eq!(result.moved_count, 1);
1635        assert_eq!(result.changes.len(), 1);
1636        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1637        assert_eq!(result.changes[0].entity_name, "foo");
1638        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
1639        assert_eq!(result.changes[0].structural_change, Some(true));
1640    }
1641
1642    #[test]
1643    fn working_diff_preserves_staged_rename_with_unstaged_edit() {
1644        let temp = TempDir::new().unwrap();
1645        let repo = Repository::init(temp.path()).unwrap();
1646
1647        let before = "\
1648export function foo(x: number) {
1649  return x + 1;
1650}
1651
1652export function bar(y: number) {
1653  return y * 2;
1654}
1655";
1656        let after = "\
1657export function foo(x: number) {
1658  return x + 42;
1659}
1660
1661export function bar(y: number) {
1662  return y * 99;
1663}
1664";
1665
1666        commit_file(&repo, "a.ts", before, "init");
1667
1668        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1669        let mut index = repo.index().unwrap();
1670        index.remove_path(Path::new("a.ts")).unwrap();
1671        index.add_path(Path::new("b.ts")).unwrap();
1672        index.write().unwrap();
1673
1674        fs::write(temp.path().join("b.ts"), after).unwrap();
1675
1676        let bridge = GitBridge::open(temp.path()).unwrap();
1677        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1678
1679        assert!(matches!(scope, DiffScope::Working));
1680        assert_eq!(files.len(), 1);
1681        assert_eq!(files[0].status, FileStatus::Renamed);
1682        assert_eq!(files[0].file_path, "b.ts");
1683        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1684        assert_eq!(files[0].before_content.as_deref(), Some(before));
1685        assert_eq!(files[0].after_content.as_deref(), Some(after));
1686
1687        let registry = create_default_registry();
1688        let result = compute_semantic_diff(&files, &registry, None, None);
1689
1690        assert_eq!(result.added_count, 0);
1691        assert_eq!(result.deleted_count, 0);
1692        assert_eq!(result.modified_count, 2);
1693        assert_eq!(result.moved_count, 2);
1694        assert_eq!(result.changes.len(), 2);
1695        assert!(result
1696            .changes
1697            .iter()
1698            .all(|change| change.change_type == ChangeType::Moved));
1699        assert!(result
1700            .changes
1701            .iter()
1702            .all(|change| change.old_file_path.as_deref() == Some("a.ts")));
1703        assert!(result
1704            .changes
1705            .iter()
1706            .all(|change| change.structural_change == Some(true)));
1707    }
1708
1709    #[test]
1710    fn working_diff_uses_staged_rename_map_after_large_unstaged_rewrite() {
1711        let temp = TempDir::new().unwrap();
1712        let repo = Repository::init(temp.path()).unwrap();
1713
1714        let before_noise = (0..200)
1715            .map(|i| format!("// old filler {i} alpha beta gamma"))
1716            .collect::<Vec<_>>()
1717            .join("\n");
1718        let after_noise = (0..200)
1719            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1720            .collect::<Vec<_>>()
1721            .join("\n");
1722        let before = format!(
1723            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1724        );
1725        let after = format!(
1726            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1727        );
1728
1729        commit_file(&repo, "a.ts", &before, "init");
1730
1731        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1732        let mut index = repo.index().unwrap();
1733        index.remove_path(Path::new("a.ts")).unwrap();
1734        index.add_path(Path::new("b.ts")).unwrap();
1735        index.write().unwrap();
1736
1737        fs::write(temp.path().join("b.ts"), &after).unwrap();
1738
1739        let bridge = GitBridge::open(temp.path()).unwrap();
1740        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1741
1742        assert!(matches!(scope, DiffScope::Working));
1743        assert_eq!(files.len(), 1);
1744        assert_eq!(files[0].status, FileStatus::Renamed);
1745        assert_eq!(files[0].file_path, "b.ts");
1746        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1747        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1748        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1749
1750        let registry = create_default_registry();
1751        let result = compute_semantic_diff(&files, &registry, None, None);
1752
1753        assert_eq!(result.added_count, 0);
1754        assert_eq!(result.deleted_count, 0);
1755        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
1756        // compound Moved change whose body also changed, so it counts toward both
1757        // moved_count and modified_count.
1758        assert_eq!(result.modified_count, 2);
1759        assert_eq!(result.moved_count, 1);
1760        assert!(result
1761            .changes
1762            .iter()
1763            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1764    }
1765
1766    #[test]
1767    fn explicit_ref_to_working_uses_index_rename_map_after_large_unstaged_rewrite() {
1768        let temp = TempDir::new().unwrap();
1769        let repo = Repository::init(temp.path()).unwrap();
1770
1771        let before_noise = (0..200)
1772            .map(|i| format!("// old filler {i} alpha beta gamma"))
1773            .collect::<Vec<_>>()
1774            .join("\n");
1775        let after_noise = (0..200)
1776            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1777            .collect::<Vec<_>>()
1778            .join("\n");
1779        let before = format!(
1780            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1781        );
1782        let after = format!(
1783            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1784        );
1785
1786        commit_file(&repo, "a.ts", &before, "init");
1787
1788        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1789        let mut index = repo.index().unwrap();
1790        index.remove_path(Path::new("a.ts")).unwrap();
1791        index.add_path(Path::new("b.ts")).unwrap();
1792        index.write().unwrap();
1793
1794        fs::write(temp.path().join("b.ts"), &after).unwrap();
1795
1796        let bridge = GitBridge::open(temp.path()).unwrap();
1797        let files = bridge
1798            .get_changed_files(
1799                &DiffScope::RefToWorking {
1800                    refspec: "HEAD".to_string(),
1801                },
1802                &[],
1803            )
1804            .unwrap();
1805
1806        assert_eq!(files.len(), 1);
1807        assert_eq!(files[0].status, FileStatus::Renamed);
1808        assert_eq!(files[0].file_path, "b.ts");
1809        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1810        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1811        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1812
1813        let registry = create_default_registry();
1814        let result = compute_semantic_diff(&files, &registry, None, None);
1815
1816        assert_eq!(result.added_count, 0);
1817        assert_eq!(result.deleted_count, 0);
1818        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
1819        // compound Moved change whose body also changed, so it counts toward both
1820        // moved_count and modified_count.
1821        assert_eq!(result.modified_count, 2);
1822        assert_eq!(result.moved_count, 1);
1823        assert!(result
1824            .changes
1825            .iter()
1826            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1827    }
1828
1829    #[test]
1830    fn staged_rename_map_overrides_wrong_worktree_rename_pairing() {
1831        let temp = TempDir::new().unwrap();
1832        let repo = Repository::init(temp.path()).unwrap();
1833
1834        let a_before = "export function foo(x: number) {\n  return x + 1;\n}\n";
1835        let c_before = "export function foo(x: number) {\n  return x + 42;\n}\n";
1836
1837        commit_file(&repo, "a.ts", a_before, "init a");
1838        commit_file(&repo, "c.ts", c_before, "init c");
1839
1840        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1841        let mut index = repo.index().unwrap();
1842        index.remove_path(Path::new("a.ts")).unwrap();
1843        index.add_path(Path::new("b.ts")).unwrap();
1844        index.write().unwrap();
1845
1846        fs::remove_file(temp.path().join("c.ts")).unwrap();
1847        fs::write(temp.path().join("b.ts"), c_before).unwrap();
1848
1849        let bridge = GitBridge::open(temp.path()).unwrap();
1850        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1851
1852        assert!(matches!(scope, DiffScope::Working));
1853        let renamed = files
1854            .iter()
1855            .find(|file| {
1856                file.status == FileStatus::Renamed
1857                    && file.file_path == "b.ts"
1858                    && file.old_file_path.as_deref() == Some("a.ts")
1859            })
1860            .unwrap();
1861        assert_eq!(renamed.before_content.as_deref(), Some(a_before));
1862        assert_eq!(renamed.after_content.as_deref(), Some(c_before));
1863
1864        let deleted = files
1865            .iter()
1866            .find(|file| file.status == FileStatus::Deleted && file.file_path == "c.ts")
1867            .unwrap();
1868        assert_eq!(deleted.before_content.as_deref(), Some(c_before));
1869        assert_eq!(deleted.after_content.as_deref(), None);
1870        assert!(!files.iter().any(|file| {
1871            file.status == FileStatus::Renamed
1872                && file.file_path == "b.ts"
1873                && file.old_file_path.as_deref() == Some("c.ts")
1874        }));
1875    }
1876
1877    #[test]
1878    fn staged_diff_with_base_ref_compares_index_to_that_ref() {
1879        let temp = TempDir::new().unwrap();
1880        let repo = Repository::init(temp.path()).unwrap();
1881
1882        let v1 = "def foo():\n    return 1\n";
1883        let v2 = "def foo():\n    return 2\n";
1884        let v3 = "def foo():\n    return 3\n";
1885        let v4 = "def foo():\n    return 4\n";
1886
1887        commit_file(&repo, "a.py", v1, "init");
1888        commit_file(&repo, "a.py", v2, "second");
1889        fs::write(temp.path().join("a.py"), v3).unwrap();
1890
1891        let mut index = repo.index().unwrap();
1892        index.add_path(Path::new("a.py")).unwrap();
1893        index.write().unwrap();
1894
1895        fs::write(temp.path().join("a.py"), v4).unwrap();
1896
1897        let bridge = GitBridge::open(temp.path()).unwrap();
1898        let files = bridge
1899            .get_staged_files_with_base_ref("HEAD~1", &[])
1900            .unwrap();
1901
1902        assert_eq!(files.len(), 1);
1903        assert_eq!(files[0].status, FileStatus::Modified);
1904        assert_eq!(files[0].file_path, "a.py");
1905        assert_eq!(files[0].before_content.as_deref(), Some(v1));
1906        assert_eq!(files[0].after_content.as_deref(), Some(v3));
1907
1908        let registry = create_default_registry();
1909        let result = compute_semantic_diff(&files, &registry, None, None);
1910
1911        assert_eq!(result.modified_count, 1);
1912        assert_eq!(result.changes.len(), 1);
1913        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
1914        assert_eq!(result.changes[0].entity_name, "foo");
1915    }
1916
1917    #[test]
1918    fn crlf_only_difference_in_working_file_is_invisible() {
1919        let temp = TempDir::new().unwrap();
1920        let repo = Repository::init(temp.path()).unwrap();
1921
1922        commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
1923        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
1924
1925        let bridge = GitBridge::open(temp.path()).unwrap();
1926        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1927
1928        assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
1929
1930        let before = files[0].before_content.as_deref().unwrap();
1931        let after = files[0].after_content.as_deref().unwrap();
1932
1933        assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
1934    }
1935
1936    #[test]
1937    fn crlf_stored_in_blob_is_normalized_on_read() {
1938        let temp = TempDir::new().unwrap();
1939        let repo = Repository::init(temp.path()).unwrap();
1940
1941        repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
1942        commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
1943        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
1944
1945        let bridge = GitBridge::open(temp.path()).unwrap();
1946        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1947
1948        assert_eq!(files.len(), 1, "expected git to detect the modification");
1949
1950        let before = files[0].before_content.as_deref().unwrap();
1951        assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
1952    }
1953}