Skip to main content

sem_core/git/
bridge.rs

1use std::env;
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4use std::process::Command;
5use std::sync::{Mutex, OnceLock};
6
7use git2::{Blame, Delta, Diff, DiffFindOptions, DiffOptions, ErrorCode, Oid, Repository};
8use thiserror::Error;
9
10use super::types::{CommitInfo, DiffScope, FileChange, FileCommitInfo, FileStatus};
11use super::types::BlameLineInfo;
12
13#[derive(Error, Debug)]
14pub enum GitError {
15    #[error("not a git repository")]
16    NotARepo,
17    #[error("git error: {0}")]
18    Git2(#[from] git2::Error),
19    #[error("io error: {0}")]
20    Io(#[from] std::io::Error),
21}
22
23pub struct GitBridge {
24    repo: Repository,
25    repo_root: PathBuf,
26    cwd: PathBuf,
27}
28
29impl GitBridge {
30    pub fn open(path: &Path) -> Result<Self, GitError> {
31        let cwd = normalize_open_path(path)?;
32        let repo = match Repository::discover(path) {
33            Ok(repo) => repo,
34            Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
35                let _guard = owner_validation_lock()
36                    .lock()
37                    .unwrap_or_else(|poisoned| poisoned.into_inner());
38                let _owner_validation = OwnerValidationDisabled::new()?;
39                let repo = Repository::discover(path);
40                repo.map_err(map_git_error)?
41            }
42            Err(error) => return Err(map_git_error(error)),
43        };
44        let repo_root = repo.workdir().ok_or(GitError::NotARepo)?;
45        let repo_root = fs::canonicalize(repo_root)?;
46        Ok(Self {
47            repo,
48            repo_root,
49            cwd,
50        })
51    }
52
53    pub fn repo_root(&self) -> &Path {
54        &self.repo_root
55    }
56
57    pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
58        Ok(self.repo.blame_file(file_path, None)?)
59    }
60
61    pub fn blame_file_porcelain(&self, file_path: &Path) -> Result<Vec<BlameLineInfo>, GitError> {
62        let output = Command::new("git")
63            .arg("-C")
64            .arg(&self.repo_root)
65            .arg("blame")
66            .arg("--line-porcelain")
67            .arg("--")
68            .arg(file_path)
69            .output()?;
70
71        if !output.status.success() {
72            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
73            return Err(git_command_error(if stderr.is_empty() {
74                format!("git blame exited with {}", output.status)
75            } else {
76                stderr
77            }));
78        }
79
80        let parsed = parse_blame_porcelain(&String::from_utf8_lossy(&output.stdout));
81        if parsed.is_empty() && !output.stdout.is_empty() {
82            return Err(git_command_error(
83                "failed to parse git blame porcelain output".to_string(),
84            ));
85        }
86
87        Ok(parsed)
88    }
89
90    pub fn commit_summary(&self, oid: Oid) -> Option<String> {
91        self.repo
92            .find_commit(oid)
93            .ok()
94            .and_then(|commit| commit.summary().map(String::from))
95    }
96
97    pub fn get_head_sha(&self) -> Result<String, GitError> {
98        let head = self.repo.head()?;
99        let oid = head.target().ok_or_else(|| {
100            git2::Error::from_str("HEAD has no target")
101        })?;
102        Ok(oid.to_string())
103    }
104
105    /// Combined detect scope + get files in one call (fast path).
106    /// Shows all changes from HEAD to the current working state by default.
107    /// Use `--staged` for staged changes only.
108    pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
109        // Show the full current working state, including staged changes.
110        let mut working_files = self.get_working_diff_files(pathspecs)?;
111        if !working_files.is_empty() {
112            self.populate_contents(&mut working_files, &DiffScope::Working)?;
113            return Ok((DiffScope::Working, working_files));
114        }
115
116        // Clean worktree = no changes
117        Ok((DiffScope::Working, Vec::new()))
118    }
119
120    /// Get changed files for a specific scope
121    pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
122        let mut files = match scope {
123            DiffScope::Working => {
124                self.get_working_diff_files(pathspecs)?
125            }
126            DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
127            DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
128            DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
129            DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
130        };
131
132        // Filter .sem/ files
133        files.retain(|f| !f.file_path.starts_with(".sem/"));
134
135        self.populate_contents(&mut files, scope)?;
136        Ok(files)
137    }
138
139    pub fn get_staged_files_with_base_ref(
140        &self,
141        base: &str,
142        pathspecs: &[String],
143    ) -> Result<Vec<FileChange>, GitError> {
144        let mut files = self.get_staged_diff_files_with_base(base, pathspecs)?;
145        files.retain(|f| !f.file_path.starts_with(".sem/"));
146
147        let base_tree = self.resolve_tree(base)?;
148        for file in files.iter_mut() {
149            if file.status != FileStatus::Deleted {
150                file.after_content = self.read_index_file(&file.file_path);
151            }
152            if file.status != FileStatus::Added {
153                let path = file
154                    .old_file_path
155                    .as_deref()
156                    .unwrap_or(&file.file_path);
157                file.before_content = self.read_blob_from_tree(&base_tree, path);
158            }
159        }
160
161        Ok(files)
162    }
163
164    /// Resolve the merge base between two refs
165    pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
166        let obj1 = self.repo.revparse_single(ref1)?;
167        let obj2 = self.repo.revparse_single(ref2)?;
168        let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
169        Ok(oid.to_string())
170    }
171
172    /// Check if a string resolves to a valid git revision
173    pub fn is_valid_rev(&self, refspec: &str) -> bool {
174        self.repo.revparse_single(refspec).is_ok()
175    }
176
177    fn make_diff_opts(&self, pathspecs: &[String]) -> Result<DiffOptions, GitError> {
178        let mut opts = DiffOptions::new();
179        for spec in self.normalize_pathspecs(pathspecs)? {
180            opts.pathspec(spec.as_str());
181        }
182        Ok(opts)
183    }
184
185    fn normalize_pathspecs(&self, pathspecs: &[String]) -> Result<Vec<String>, GitError> {
186        pathspecs
187            .iter()
188            .map(|spec| self.normalize_pathspec(spec))
189            .collect()
190    }
191
192    fn normalize_pathspec(&self, spec: &str) -> Result<String, GitError> {
193        if spec.is_empty() || spec.starts_with(':') {
194            return Ok(spec.to_string());
195        }
196
197        let spec_path = Path::new(spec);
198        let absolute = if spec_path.is_absolute() {
199            normalize_absolute_pathspec(spec_path)
200        } else {
201            normalize_lexical(&self.cwd.join(spec_path))
202        };
203
204        let repo_root = normalize_lexical(&self.repo_root);
205        let relative =
206            absolute
207                .strip_prefix(&repo_root)
208                .map_err(|_| pathspec_outside_repo_error(spec, &self.repo_root))?;
209
210        if relative.as_os_str().is_empty() {
211            Ok(".".to_string())
212        } else {
213            relative
214                .to_str()
215                .map(|path| path.replace('\\', "/"))
216                .ok_or_else(|| non_utf8_pathspec_error(spec))
217        }
218    }
219
220    fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
221        let head_tree = match self.repo.head() {
222            Ok(head) => {
223                let commit = head.peel_to_commit()?;
224                Some(commit.tree()?)
225            }
226            Err(_) => None, // No commits yet
227        };
228
229        self.get_index_diff_files(head_tree.as_ref(), pathspecs)
230    }
231
232    fn get_staged_diff_files_with_base(
233        &self,
234        base: &str,
235        pathspecs: &[String],
236    ) -> Result<Vec<FileChange>, GitError> {
237        let base_tree = self.resolve_tree(base)?;
238        self.get_index_diff_files(Some(&base_tree), pathspecs)
239    }
240
241    fn get_index_diff_files(
242        &self,
243        base_tree: Option<&git2::Tree<'_>>,
244        pathspecs: &[String],
245    ) -> Result<Vec<FileChange>, GitError> {
246        let mut opts = self.make_diff_opts(pathspecs)?;
247        let mut diff = self.repo.diff_tree_to_index(
248            base_tree,
249            Some(&self.repo.index()?),
250            Some(&mut opts),
251        )?;
252        Self::detect_renames(&mut diff)?;
253
254        Ok(self.diff_to_file_changes(&diff))
255    }
256
257    fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
258        let mut opts = self.make_diff_opts(pathspecs)?;
259        opts.include_untracked(false);
260
261        let head_tree = self.resolve_tree("HEAD").ok();
262        let mut diff = match head_tree.as_ref() {
263            Some(head_tree) => self
264                .repo
265                .diff_tree_to_workdir_with_index(Some(head_tree), Some(&mut opts))?,
266            None => self.repo.diff_index_to_workdir(None, Some(&mut opts))?,
267        };
268        Self::detect_renames(&mut diff)?;
269        self.apply_index_rename_map(
270            self.diff_to_file_changes(&diff),
271            head_tree.as_ref(),
272            pathspecs,
273        )
274    }
275
276    fn apply_index_rename_map(
277        &self,
278        mut files: Vec<FileChange>,
279        base_tree: Option<&git2::Tree<'_>>,
280        pathspecs: &[String],
281    ) -> Result<Vec<FileChange>, GitError> {
282        let Some(base_tree) = base_tree else {
283            return Ok(files);
284        };
285
286        let index_renames: Vec<FileChange> = self
287            .get_index_diff_files(Some(base_tree), pathspecs)?
288            .into_iter()
289            .filter(|file| file.status == FileStatus::Renamed)
290            .collect();
291
292        for rename in index_renames {
293            let Some(old_path) = rename.old_file_path.clone() else {
294                continue;
295            };
296            let target_pos = files
297                .iter()
298                .position(|file| {
299                    matches!(file.status, FileStatus::Added | FileStatus::Renamed)
300                        && file.file_path == rename.file_path
301                });
302            let deleted_pos = files
303                .iter()
304                .position(|file| {
305                    file.status == FileStatus::Deleted && file.file_path == old_path
306                });
307
308            if let (Some(target_pos), Some(deleted_pos)) = (target_pos, deleted_pos) {
309                if files[target_pos].status == FileStatus::Renamed
310                    && files[target_pos].old_file_path.as_deref() == Some(old_path.as_str())
311                {
312                    continue;
313                }
314
315                let target_file = files[target_pos].clone();
316                let deleted_file = files[deleted_pos].clone();
317                let displaced_deleted_path =
318                    if target_file.status == FileStatus::Renamed {
319                        target_file
320                            .old_file_path
321                            .as_ref()
322                            .filter(|path| *path != &old_path)
323                            .cloned()
324                    } else {
325                        None
326                    };
327
328                files = files
329                    .into_iter()
330                    .enumerate()
331                    .filter_map(|(idx, file)| {
332                        if idx == target_pos || idx == deleted_pos {
333                            None
334                        } else {
335                            Some(file)
336                        }
337                    })
338                    .collect();
339                let before_content = deleted_file
340                    .before_content
341                    .or_else(|| self.read_blob_from_tree(base_tree, &old_path));
342                let after_content = target_file
343                    .after_content
344                    .or_else(|| self.read_working_file(&target_file.file_path));
345                files.push(FileChange {
346                    file_path: target_file.file_path,
347                    status: FileStatus::Renamed,
348                    old_file_path: Some(old_path),
349                    before_content,
350                    after_content,
351                });
352                if let Some(file_path) = displaced_deleted_path {
353                    let before_content = self.read_blob_from_tree(base_tree, &file_path);
354                    files.push(FileChange {
355                        file_path,
356                        status: FileStatus::Deleted,
357                        old_file_path: None,
358                        before_content,
359                        after_content: None,
360                    });
361                }
362            }
363        }
364
365        Ok(files)
366    }
367
368    fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
369        let obj = self.repo.revparse_single(sha)?;
370        let commit = obj.peel_to_commit()?;
371        let tree = commit.tree()?;
372
373        let parent_tree = if commit.parent_count() > 0 {
374            Some(commit.parent(0)?.tree()?)
375        } else {
376            None
377        };
378
379        let mut opts = self.make_diff_opts(pathspecs)?;
380        let mut diff = self.repo.diff_tree_to_tree(
381            parent_tree.as_ref(),
382            Some(&tree),
383            Some(&mut opts),
384        )?;
385        Self::detect_renames(&mut diff)?;
386
387        Ok(self.diff_to_file_changes(&diff))
388    }
389
390    fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
391        let from_obj = self.repo.revparse_single(from)?;
392        let to_obj = self.repo.revparse_single(to)?;
393
394        let from_tree = from_obj.peel_to_commit()?.tree()?;
395        let to_tree = to_obj.peel_to_commit()?.tree()?;
396
397        let mut opts = self.make_diff_opts(pathspecs)?;
398        let mut diff = self.repo.diff_tree_to_tree(
399            Some(&from_tree),
400            Some(&to_tree),
401            Some(&mut opts),
402        )?;
403        Self::detect_renames(&mut diff)?;
404
405        Ok(self.diff_to_file_changes(&diff))
406    }
407
408    fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
409        let tree = self.resolve_tree(refspec)?;
410        let mut opts = self.make_diff_opts(pathspecs)?;
411        let mut diff = self.repo.diff_tree_to_workdir_with_index(
412            Some(&tree),
413            Some(&mut opts),
414        )?;
415        Self::detect_renames(&mut diff)?;
416        self.apply_index_rename_map(self.diff_to_file_changes(&diff), Some(&tree), pathspecs)
417    }
418
419    fn detect_renames(diff: &mut Diff) -> Result<(), GitError> {
420        let mut opts = DiffFindOptions::new();
421        opts.renames(true);
422        diff.find_similar(Some(&mut opts))?;
423        Ok(())
424    }
425
426    fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
427        let mut files = Vec::new();
428
429        for delta in diff.deltas() {
430            let (status, file_path, old_file_path) = match delta.status() {
431                Delta::Added => {
432                    let path = delta
433                        .new_file()
434                        .path()
435                        .and_then(|p| p.to_str())
436                        .unwrap_or("")
437                        .to_string();
438                    (FileStatus::Added, path, None)
439                }
440                Delta::Deleted => {
441                    let path = delta
442                        .old_file()
443                        .path()
444                        .and_then(|p| p.to_str())
445                        .unwrap_or("")
446                        .to_string();
447                    (FileStatus::Deleted, path, None)
448                }
449                Delta::Modified => {
450                    let path = delta
451                        .new_file()
452                        .path()
453                        .and_then(|p| p.to_str())
454                        .unwrap_or("")
455                        .to_string();
456                    (FileStatus::Modified, path, None)
457                }
458                Delta::Renamed => {
459                    let new_path = delta
460                        .new_file()
461                        .path()
462                        .and_then(|p| p.to_str())
463                        .unwrap_or("")
464                        .to_string();
465                    let old_path = delta
466                        .old_file()
467                        .path()
468                        .and_then(|p| p.to_str())
469                        .unwrap_or("")
470                        .to_string();
471                    (FileStatus::Renamed, new_path, Some(old_path))
472                }
473                _ => continue,
474            };
475
476            if !file_path.starts_with(".sem/") {
477                files.push(FileChange {
478                    file_path,
479                    status,
480                    old_file_path,
481                    before_content: None,
482                    after_content: None,
483                });
484            }
485        }
486
487        files
488    }
489
490    fn bytes_look_binary(bytes: &[u8], complete: bool) -> bool {
491        if bytes.iter().any(|byte| *byte == 0) {
492            return true;
493        }
494
495        match std::str::from_utf8(bytes) {
496            Ok(_) => false,
497            Err(error) => complete || error.error_len().is_some(),
498        }
499    }
500
501    fn populate_contents(
502        &self,
503        files: &mut [FileChange],
504        scope: &DiffScope,
505    ) -> Result<(), GitError> {
506        match scope {
507            DiffScope::Working => {
508                // Resolve HEAD tree once for all before_content reads
509                let head_tree = self.resolve_tree("HEAD").ok();
510                for file in files.iter_mut() {
511                    if file.status != FileStatus::Deleted {
512                        file.after_content = self.read_working_file(&file.file_path);
513                    }
514                    if file.status != FileStatus::Added {
515                        let path = file
516                            .old_file_path
517                            .as_deref()
518                            .unwrap_or(&file.file_path);
519                        file.before_content = head_tree
520                            .as_ref()
521                            .and_then(|t| self.read_blob_from_tree(t, path));
522                    }
523                }
524            }
525            DiffScope::Staged => {
526                let head_tree = self.resolve_tree("HEAD").ok();
527                for file in files.iter_mut() {
528                    if file.status != FileStatus::Deleted {
529                        file.after_content = self
530                            .read_index_file(&file.file_path)
531                            .or_else(|| self.read_working_file(&file.file_path));
532                    }
533                    if file.status != FileStatus::Added {
534                        let path = file
535                            .old_file_path
536                            .as_deref()
537                            .unwrap_or(&file.file_path);
538                        file.before_content = head_tree
539                            .as_ref()
540                            .and_then(|t| self.read_blob_from_tree(t, path));
541                    }
542                }
543            }
544            DiffScope::Commit { sha } => {
545                // Resolve both trees once instead of per-file
546                let after_tree = self.resolve_tree(sha)?;
547                let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
548                for file in files.iter_mut() {
549                    if file.status != FileStatus::Deleted {
550                        file.after_content =
551                            self.read_blob_from_tree(&after_tree, &file.file_path);
552                    }
553                    if file.status != FileStatus::Added {
554                        let path = file
555                            .old_file_path
556                            .as_deref()
557                            .unwrap_or(&file.file_path);
558                        file.before_content = before_tree
559                            .as_ref()
560                            .and_then(|t| self.read_blob_from_tree(t, path));
561                    }
562                }
563            }
564            DiffScope::Range { from, to } => {
565                let after_tree = self.resolve_tree(to)?;
566                let before_tree = self.resolve_tree(from)?;
567                for file in files.iter_mut() {
568                    if file.status != FileStatus::Deleted {
569                        file.after_content =
570                            self.read_blob_from_tree(&after_tree, &file.file_path);
571                    }
572                    if file.status != FileStatus::Added {
573                        let path = file
574                            .old_file_path
575                            .as_deref()
576                            .unwrap_or(&file.file_path);
577                        file.before_content =
578                            self.read_blob_from_tree(&before_tree, path);
579                    }
580                }
581            }
582            DiffScope::RefToWorking { refspec } => {
583                let before_tree = self.resolve_tree(refspec)?;
584                for file in files.iter_mut() {
585                    if file.status != FileStatus::Deleted {
586                        file.after_content = self.read_working_file(&file.file_path);
587                    }
588                    if file.status != FileStatus::Added {
589                        let path = file
590                            .old_file_path
591                            .as_deref()
592                            .unwrap_or(&file.file_path);
593                        file.before_content =
594                            self.read_blob_from_tree(&before_tree, path);
595                    }
596                }
597            }
598        }
599        Ok(())
600    }
601
602    fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
603        let obj = self.repo.revparse_single(refspec)?;
604        let commit = obj.peel_to_commit()?;
605        Ok(commit.tree()?)
606    }
607
608    fn normalize_line_endings(s: String) -> String {
609        if s.contains('\r') {
610            s.replace("\r\n", "\n").replace('\r', "\n")
611        } else {
612            s
613        }
614    }
615
616    fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
617        let entry = tree.get_path(Path::new(file_path)).ok()?;
618        let blob = self.repo.find_blob(entry.id()).ok()?;
619        let bytes = blob.content();
620        if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
621            return None;
622        }
623        std::str::from_utf8(bytes)
624            .ok()
625            .map(|s| Self::normalize_line_endings(s.to_string()))
626    }
627
628    fn read_working_file(&self, file_path: &str) -> Option<String> {
629        let full_path = self.repo_root.join(file_path);
630        let bytes = fs::read(full_path).ok()?;
631        if Self::bytes_look_binary(&bytes, true) {
632            return None;
633        }
634        String::from_utf8(bytes)
635            .ok()
636            .map(Self::normalize_line_endings)
637    }
638
639    fn read_index_file(&self, file_path: &str) -> Option<String> {
640        let index = self.repo.index().ok()?;
641        let entry = index.get_path(Path::new(file_path), 0)?;
642        let blob = self.repo.find_blob(entry.id).ok()?;
643        let bytes = blob.content();
644        if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
645            return None;
646        }
647        std::str::from_utf8(bytes)
648            .ok()
649            .map(|s| Self::normalize_line_endings(s.to_string()))
650    }
651
652
653    /// Read file content at a specific git ref (commit SHA, branch, tag, etc.)
654    pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
655        let tree = self.resolve_tree(refspec)?;
656        Ok(self.read_blob_from_tree(&tree, file_path))
657    }
658
659    /// Get commits that modified a specific file, walking history from HEAD.
660    /// Returns commits in reverse chronological order (newest first).
661    pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
662        let mut revwalk = self.repo.revwalk()?;
663        revwalk.push_head()?;
664        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
665
666        let mut commits = Vec::new();
667        let path = Path::new(file_path);
668
669        for oid_result in revwalk {
670            let oid = oid_result?;
671            let commit = self.repo.find_commit(oid)?;
672            let tree = commit.tree()?;
673
674            // Check if this file exists in this commit's tree
675            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
676
677            // Compare with parent to see if the file changed
678            let file_in_parent = if commit.parent_count() > 0 {
679                commit.parent(0)
680                    .ok()
681                    .and_then(|p| p.tree().ok())
682                    .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
683            } else {
684                None // No parent = initial commit, file was added
685            };
686
687            // Include if file changed between parent and this commit
688            let changed = match (file_in_commit, file_in_parent) {
689                (Some(cur), Some(prev)) => cur != prev,  // content changed
690                (Some(_), None) => true,                   // file added
691                (None, Some(_)) => true,                   // file deleted
692                (None, None) => false,                     // file not present in either
693            };
694
695            if changed {
696                let sha = oid.to_string();
697                commits.push(CommitInfo {
698                    short_sha: sha[..7.min(sha.len())].to_string(),
699                    sha,
700                    author: commit.author().name().unwrap_or("unknown").to_string(),
701                    date: commit.time().seconds().to_string(),
702                    message: commit.message().unwrap_or("").to_string(),
703                });
704
705                if limit != 0 && commits.len() >= limit {
706                    break;
707                }
708            }
709        }
710
711        Ok(commits)
712    }
713
714    /// Get commits that modified a specific file, following renames across history.
715    /// Like `git log --follow`: when the tracked path disappears between commits,
716    /// compute a diff with rename detection to find the old filename and continue.
717    /// Returns commits in reverse chronological order (newest first).
718    pub fn get_file_commits_follow_renames(
719        &self,
720        file_path: &str,
721        limit: usize,
722    ) -> Result<Vec<FileCommitInfo>, GitError> {
723        match self.get_file_commits_follow_renames_cli(file_path, limit) {
724            Ok(commits) if !commits.is_empty() => return Ok(commits),
725            Ok(_) => {}
726            Err(GitError::Io(error)) if error.kind() == std::io::ErrorKind::NotFound => {}
727            Err(error) => return Err(error),
728        }
729
730        let mut revwalk = self.repo.revwalk()?;
731        revwalk.push_head()?;
732        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
733
734        let mut results = Vec::new();
735        let mut tracked_path = file_path.to_string();
736
737        for oid_result in revwalk {
738            let oid = oid_result?;
739            let commit = self.repo.find_commit(oid)?;
740            let tree = commit.tree()?;
741
742            let path = Path::new(&tracked_path);
743            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
744
745            let (parent_tree_opt, file_in_parent) = if commit.parent_count() > 0 {
746                let parent = commit.parent(0)?;
747                let ptree = parent.tree()?;
748                let fip = ptree.get_path(path).ok().map(|e| e.id());
749                (Some(ptree), fip)
750            } else {
751                (None, None)
752            };
753
754            let changed = match (file_in_commit, file_in_parent) {
755                (Some(cur), Some(prev)) => cur != prev,
756                (Some(_), None) => true,
757                (None, Some(_)) => true,
758                (None, None) => false,
759            };
760
761            if changed {
762                let sha_str = oid.to_string();
763                results.push(FileCommitInfo {
764                    commit: CommitInfo {
765                        short_sha: sha_str[..7.min(sha_str.len())].to_string(),
766                        sha: sha_str,
767                        author: commit.author().name().unwrap_or("unknown").to_string(),
768                        date: commit.time().seconds().to_string(),
769                        message: commit.message().unwrap_or("").to_string(),
770                    },
771                    file_path: tracked_path.clone(),
772                });
773
774                if limit != 0 && results.len() >= limit {
775                    break;
776                }
777            }
778
779            // When walking backward, the rename commit still contains the new
780            // path. Detect that parent-side old path before the next iteration.
781            let should_check_rename =
782                parent_tree_opt.is_some() && (file_in_parent.is_none() || file_in_commit.is_none());
783            if should_check_rename {
784                let mut diff = self.repo.diff_tree_to_tree(
785                    parent_tree_opt.as_ref(),
786                    Some(&tree),
787                    None,
788                )?;
789                let mut find_opts = DiffFindOptions::new();
790                find_opts.renames(true);
791                diff.find_similar(Some(&mut find_opts))?;
792
793                let mut found_rename = false;
794                for delta in diff.deltas() {
795                    if delta.status() == Delta::Renamed {
796                        let new_path = delta
797                            .new_file()
798                            .path()
799                            .and_then(|p| p.to_str())
800                            .unwrap_or("");
801                        if new_path == tracked_path {
802                            // The tracked file was renamed FROM old_path
803                            let old_path = delta
804                                .old_file()
805                                .path()
806                                .and_then(|p| p.to_str())
807                                .unwrap_or("")
808                                .to_string();
809                            if !old_path.is_empty() {
810                                tracked_path = old_path;
811                                found_rename = true;
812                                break;
813                            }
814                        }
815                    }
816                }
817
818                if !found_rename && file_in_commit.is_none() {
819                    // File truly deleted, stop tracking
820                    break;
821                }
822            }
823        }
824
825        Ok(results)
826    }
827
828    fn get_file_commits_follow_renames_cli(
829        &self,
830        file_path: &str,
831        limit: usize,
832    ) -> Result<Vec<FileCommitInfo>, GitError> {
833        let mut command = Command::new("git");
834        command
835            .arg("-C")
836            .arg(&self.repo_root)
837            .arg("log")
838            .arg("--follow")
839            .arg("--format=\x1e%H\x1f%an\x1f%at\x1f%s")
840            .arg("--name-status");
841        if limit != 0 {
842            command.arg("-n").arg(limit.to_string());
843        }
844        command.arg("--").arg(file_path);
845
846        let output = command.output()?;
847        if !output.status.success() {
848            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
849            return Err(git_command_error(if stderr.is_empty() {
850                format!("git log exited with {}", output.status)
851            } else {
852                stderr
853            }));
854        }
855
856        let stdout = String::from_utf8_lossy(&output.stdout);
857        let mut tracked_path = file_path.to_string();
858        let mut commits = Vec::new();
859
860        for record in stdout.split('\x1e') {
861            let record = record.trim_start_matches('\n');
862            if record.trim().is_empty() {
863                continue;
864            }
865
866            let mut lines = record.lines();
867            let Some(meta) = lines.next() else {
868                continue;
869            };
870            let mut parts = meta.splitn(4, '\x1f');
871            let Some(sha) = parts.next() else {
872                continue;
873            };
874            let Some(author) = parts.next() else {
875                continue;
876            };
877            let Some(date) = parts.next() else {
878                continue;
879            };
880            let message = parts.next().unwrap_or_default();
881
882            let commit_path = tracked_path.clone();
883            let mut previous_path = None;
884            for line in lines {
885                let fields: Vec<&str> = line.split('\t').collect();
886                if fields.len() >= 3 && fields[0].starts_with('R') && fields[2] == tracked_path {
887                    previous_path = Some(fields[1].to_string());
888                }
889            }
890
891            commits.push(FileCommitInfo {
892                commit: CommitInfo {
893                    short_sha: sha[..7.min(sha.len())].to_string(),
894                    sha: sha.to_string(),
895                    author: author.to_string(),
896                    date: date.to_string(),
897                    message: message.to_string(),
898                },
899                file_path: commit_path,
900            });
901
902            if let Some(previous_path) = previous_path {
903                tracked_path = previous_path;
904            }
905        }
906
907        Ok(commits)
908    }
909
910    /// Get all file paths changed in a single commit (vs its parent).
911    /// Returns file paths from the new side of each delta.
912    pub fn get_commit_changed_files(&self, sha: &str) -> Result<Vec<String>, GitError> {
913        let obj = self.repo.revparse_single(sha)?;
914        let commit = obj.peel_to_commit()?;
915        let tree = commit.tree()?;
916        let parent_tree = if commit.parent_count() > 0 {
917            Some(commit.parent(0)?.tree()?)
918        } else {
919            None
920        };
921        let diff = self.repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
922        let mut paths = Vec::new();
923        for delta in diff.deltas() {
924            if let Some(p) = delta.new_file().path().and_then(|p| p.to_str()) {
925                paths.push(p.to_string());
926            }
927            // Also include old path for deletions/renames
928            if let Some(p) = delta.old_file().path().and_then(|p| p.to_str()) {
929                if !paths.contains(&p.to_string()) {
930                    paths.push(p.to_string());
931                }
932            }
933        }
934        Ok(paths)
935    }
936
937    pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
938        let mut revwalk = self.repo.revwalk()?;
939        revwalk.push_head()?;
940
941        let mut commits = Vec::new();
942        for (i, oid_result) in revwalk.enumerate() {
943            if limit != 0 && i >= limit {
944                break;
945            }
946            let oid = oid_result?;
947            let commit = self.repo.find_commit(oid)?;
948            let sha = oid.to_string();
949            commits.push(CommitInfo {
950                short_sha: sha[..7.min(sha.len())].to_string(),
951                sha,
952                author: commit.author().name().unwrap_or("unknown").to_string(),
953                date: commit.time().seconds().to_string(),
954                message: commit.message().unwrap_or("").to_string(),
955            });
956        }
957
958        Ok(commits)
959    }
960}
961
962fn parse_blame_porcelain(output: &str) -> Vec<BlameLineInfo> {
963    let lines: Vec<&str> = output.lines().collect();
964    let mut parsed = Vec::new();
965    let mut index = 0;
966
967    while index < lines.len() {
968        let Some((raw_sha, line_number)) = parse_blame_header(lines[index]) else {
969            index += 1;
970            continue;
971        };
972        index += 1;
973
974        let mut author = String::new();
975        let mut author_time = None;
976        let mut summary = String::new();
977
978        while index < lines.len() {
979            let line = lines[index];
980            index += 1;
981
982            if line.starts_with('\t') {
983                break;
984            } else if let Some(value) = line.strip_prefix("author ") {
985                author = value.to_string();
986            } else if let Some(value) = line.strip_prefix("author-time ") {
987                author_time = value.parse::<i64>().ok();
988            } else if let Some(value) = line.strip_prefix("summary ") {
989                summary = value.to_string();
990            }
991        }
992
993        let sha = raw_sha.trim_start_matches('^');
994        let commit_sha = if sha.chars().all(|c| c == '0') {
995            None
996        } else {
997            Some(sha.to_string())
998        };
999
1000        if author.is_empty() {
1001            author = if commit_sha.is_none() {
1002                "Not Committed Yet".to_string()
1003            } else {
1004                "unknown".to_string()
1005            };
1006        }
1007
1008        parsed.push(BlameLineInfo {
1009            line_number,
1010            commit_sha,
1011            author,
1012            author_time,
1013            summary,
1014        });
1015    }
1016
1017    parsed.sort_by_key(|line| line.line_number);
1018    parsed
1019}
1020
1021fn parse_blame_header(line: &str) -> Option<(&str, usize)> {
1022    let mut parts = line.split_whitespace();
1023    let sha = parts.next()?;
1024    if !is_blame_oid(sha) {
1025        return None;
1026    }
1027    parts.next()?;
1028    let final_line = parts.next()?.parse().ok()?;
1029    Some((sha, final_line))
1030}
1031
1032fn is_blame_oid(value: &str) -> bool {
1033    let value = value.strip_prefix('^').unwrap_or(value);
1034    value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
1035}
1036
1037fn git_command_error(message: String) -> GitError {
1038    GitError::Git2(git2::Error::from_str(&message))
1039}
1040
1041fn map_git_error(error: git2::Error) -> GitError {
1042    if error.code() == ErrorCode::NotFound {
1043        GitError::NotARepo
1044    } else {
1045        GitError::Git2(error)
1046    }
1047}
1048
1049fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
1050    let safe_directories = command_line_safe_directories();
1051    should_retry_with_safe_directory(error, path, &safe_directories)
1052}
1053
1054fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
1055    error.code() == ErrorCode::Owner
1056        && nearest_git_root(path).is_some_and(|repo_root| {
1057            safe_directories.iter().any(|safe_directory| {
1058                safe_directory == "*"
1059                    || paths_match(&repo_root, Path::new(safe_directory))
1060            })
1061        })
1062}
1063
1064fn command_line_safe_directories() -> Vec<String> {
1065    let count = env::var("GIT_CONFIG_COUNT")
1066        .ok()
1067        .and_then(|value| value.parse::<usize>().ok())
1068        .unwrap_or_default();
1069
1070    (0..count)
1071        .filter_map(|index| {
1072            let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
1073            if key.eq_ignore_ascii_case("safe.directory") {
1074                env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
1075            } else {
1076                None
1077            }
1078        })
1079        .collect()
1080}
1081
1082fn nearest_git_root(path: &Path) -> Option<PathBuf> {
1083    let mut current = if path.is_file() {
1084        path.parent()?
1085    } else {
1086        path
1087    };
1088
1089    loop {
1090        if current.join(".git").exists() {
1091            return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
1092        }
1093
1094        current = current.parent()?;
1095    }
1096}
1097
1098fn paths_match(left: &Path, right: &Path) -> bool {
1099    let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
1100    let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
1101
1102    if cfg!(windows) {
1103        left.to_string_lossy()
1104            .eq_ignore_ascii_case(&right.to_string_lossy())
1105    } else {
1106        left == right
1107    }
1108}
1109
1110fn owner_validation_lock() -> &'static Mutex<()> {
1111    static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
1112    LOCK.get_or_init(|| Mutex::new(()))
1113}
1114
1115struct OwnerValidationDisabled;
1116
1117impl OwnerValidationDisabled {
1118    fn new() -> Result<Self, GitError> {
1119        // libgit2 stores this as a process-global option; callers hold owner_validation_lock.
1120        unsafe { git2::opts::set_verify_owner_validation(false)? };
1121        Ok(Self)
1122    }
1123}
1124
1125impl Drop for OwnerValidationDisabled {
1126    fn drop(&mut self) {
1127        // Restore the default before the owner-validation lock is released.
1128        unsafe {
1129            let _ = git2::opts::set_verify_owner_validation(true);
1130        }
1131    }
1132}
1133
1134fn normalize_open_path(path: &Path) -> Result<PathBuf, GitError> {
1135    let canonical = match fs::canonicalize(path) {
1136        Ok(canonical) => canonical,
1137        Err(_) if path.is_absolute() => normalize_lexical(path),
1138        Err(_) => normalize_lexical(&env::current_dir()?.join(path)),
1139    };
1140
1141    Ok(if canonical.is_file() {
1142        canonical
1143            .parent()
1144            .map(Path::to_path_buf)
1145            .unwrap_or(canonical)
1146    } else {
1147        canonical
1148    })
1149}
1150
1151fn normalize_absolute_pathspec(path: &Path) -> PathBuf {
1152    let path = normalize_lexical(path);
1153    let Some(leaf) = path.file_name() else {
1154        return fs::canonicalize(&path).unwrap_or(path);
1155    };
1156    let mut trailing_components = vec![leaf.to_os_string()];
1157
1158    let Some(parent) = path.parent() else {
1159        return path;
1160    };
1161
1162    for ancestor in parent.ancestors() {
1163        if ancestor.exists() {
1164            let mut normalized =
1165                fs::canonicalize(ancestor).unwrap_or_else(|_| normalize_lexical(ancestor));
1166            for component in trailing_components.iter().rev() {
1167                normalized.push(component);
1168            }
1169            return normalized;
1170        }
1171
1172        let Some(name) = ancestor.file_name() else {
1173            return path;
1174        };
1175        trailing_components.push(name.to_os_string());
1176    }
1177
1178    path
1179}
1180
1181fn pathspec_outside_repo_error(pathspec: &str, repo_root: &Path) -> GitError {
1182    GitError::Git2(git2::Error::from_str(&format!(
1183        "pathspec '{pathspec}' is outside repository '{}'",
1184        repo_root.display()
1185    )))
1186}
1187
1188fn non_utf8_pathspec_error(pathspec: &str) -> GitError {
1189    GitError::Git2(git2::Error::from_str(&format!(
1190        "pathspec '{pathspec}' is not valid UTF-8 after normalization"
1191    )))
1192}
1193
1194fn normalize_lexical(path: &Path) -> PathBuf {
1195    let mut normalized = PathBuf::new();
1196
1197    for component in path.components() {
1198        match component {
1199            Component::CurDir => {}
1200            Component::ParentDir => {
1201                if !normalized.pop() && !normalized.has_root() {
1202                    normalized.push("..");
1203                }
1204            }
1205            Component::Normal(part) => normalized.push(part),
1206            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
1207            Component::RootDir => normalized.push(component.as_os_str()),
1208        }
1209    }
1210
1211    normalized
1212}
1213
1214#[cfg(test)]
1215mod tests {
1216    use super::*;
1217    use crate::model::change::ChangeType;
1218    use crate::parser::differ::{collect_binary_file_changes, compute_semantic_diff};
1219    use crate::parser::plugins::create_default_registry;
1220    use git2::{ErrorClass, Oid, Repository, Signature};
1221    use tempfile::TempDir;
1222
1223    fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
1224        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1225
1226        let mut index = repo.index().unwrap();
1227        index.add_path(Path::new(file_path)).unwrap();
1228        index.write().unwrap();
1229
1230        let tree_id = index.write_tree().unwrap();
1231        let tree = repo.find_tree(tree_id).unwrap();
1232        let sig = Signature::now("Test User", "test@example.com").unwrap();
1233
1234        match repo.head() {
1235            Ok(head) => {
1236                let parent = repo.find_commit(head.target().unwrap()).unwrap();
1237                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1238                    .unwrap()
1239            }
1240            Err(_) => repo
1241                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1242                .unwrap(),
1243        }
1244    }
1245
1246    fn commit_binary_file(
1247        repo: &Repository,
1248        file_path: &str,
1249        contents: &[u8],
1250        message: &str,
1251    ) -> Oid {
1252        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1253
1254        let mut index = repo.index().unwrap();
1255        index.add_path(Path::new(file_path)).unwrap();
1256        index.write().unwrap();
1257
1258        let tree_id = index.write_tree().unwrap();
1259        let tree = repo.find_tree(tree_id).unwrap();
1260        let sig = Signature::now("Test User", "test@example.com").unwrap();
1261
1262        match repo.head() {
1263            Ok(head) => {
1264                let parent = repo.find_commit(head.target().unwrap()).unwrap();
1265                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1266                    .unwrap()
1267            }
1268            Err(_) => repo
1269                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1270                .unwrap(),
1271        }
1272    }
1273
1274    #[test]
1275    fn porcelain_blame_reports_uncommitted_lines() {
1276        let temp = TempDir::new().unwrap();
1277        let repo = Repository::init(temp.path()).unwrap();
1278
1279        commit_file(&repo, "a.py", "def foo():\n    return 1\n", "init");
1280        fs::write(temp.path().join("a.py"), "def foo():\n    return 2\n").unwrap();
1281
1282        let bridge = GitBridge::open(temp.path()).unwrap();
1283        let blame = bridge.blame_file_porcelain(Path::new("a.py")).unwrap();
1284
1285        assert!(blame[0].commit_sha.is_some());
1286        assert_eq!(blame[1].commit_sha, None);
1287        assert_eq!(blame[1].author, "Not Committed Yet");
1288    }
1289
1290    #[test]
1291    fn clean_worktree_does_not_fall_back_to_head_commit() {
1292        let temp = TempDir::new().unwrap();
1293        let repo = Repository::init(temp.path()).unwrap();
1294
1295        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1296        commit_file(
1297            &repo,
1298            "sample.ts",
1299            "export function a() {\n  return 2;\n}\n",
1300            "change a",
1301        );
1302
1303        let bridge = GitBridge::open(temp.path()).unwrap();
1304        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1305
1306        assert!(matches!(scope, DiffScope::Working));
1307        assert!(files.is_empty());
1308    }
1309
1310    #[test]
1311    fn owner_error_retries_for_command_line_safe_directory() {
1312        let temp = TempDir::new().unwrap();
1313        Repository::init(temp.path()).unwrap();
1314
1315        let owner_error = git2::Error::new(
1316            ErrorCode::Owner,
1317            ErrorClass::Config,
1318            "owner mismatch",
1319        );
1320        let safe_directories = [temp.path().to_string_lossy().to_string()];
1321
1322        assert!(should_retry_with_safe_directory(
1323            &owner_error,
1324            temp.path(),
1325            &safe_directories,
1326        ));
1327
1328        let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
1329        assert!(!should_retry_with_safe_directory(
1330            &owner_error,
1331            temp.path(),
1332            &other_directories,
1333        ));
1334
1335        let not_found_error = git2::Error::new(
1336            ErrorCode::NotFound,
1337            ErrorClass::Repository,
1338            "not found",
1339        );
1340        assert!(!should_retry_with_safe_directory(
1341            &not_found_error,
1342            temp.path(),
1343            &["*".to_string()],
1344        ));
1345    }
1346
1347    #[test]
1348    fn explicit_commit_scope_still_reads_head_commit_diff() {
1349        let temp = TempDir::new().unwrap();
1350        let repo = Repository::init(temp.path()).unwrap();
1351
1352        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1353        let head_oid = commit_file(
1354            &repo,
1355            "sample.ts",
1356            "export function a() {\n  return 2;\n}\n",
1357            "change a",
1358        );
1359
1360        let bridge = GitBridge::open(temp.path()).unwrap();
1361        let files = bridge
1362            .get_changed_files(&DiffScope::Commit {
1363                sha: head_oid.to_string(),
1364            }, &[])
1365            .unwrap();
1366
1367        assert_eq!(files.len(), 1);
1368        assert_eq!(files[0].file_path, "sample.ts");
1369        assert_eq!(files[0].status, FileStatus::Modified);
1370    }
1371
1372    #[test]
1373    fn pathspecs_are_normalized_from_open_directory() {
1374        let temp = TempDir::new().unwrap();
1375        let repo = Repository::init(temp.path()).unwrap();
1376        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1377
1378        commit_file(&repo, "pkg/a.py", "def foo():\n    return 1\n", "init");
1379        fs::write(temp.path().join("pkg/a.py"), "def foo():\n    return 2\n").unwrap();
1380
1381        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1382        let relative_files = bridge
1383            .get_changed_files(&DiffScope::Working, &["a.py".to_string()])
1384            .unwrap();
1385
1386        assert_eq!(relative_files.len(), 1);
1387        assert_eq!(relative_files[0].file_path, "pkg/a.py");
1388
1389        let absolute_path = temp.path().join("pkg/a.py").to_string_lossy().to_string();
1390        let absolute_files = bridge
1391            .get_changed_files(&DiffScope::Working, &[absolute_path])
1392            .unwrap();
1393
1394        assert_eq!(absolute_files.len(), 1);
1395        assert_eq!(absolute_files[0].file_path, "pkg/a.py");
1396    }
1397
1398    #[test]
1399    fn absolute_deleted_pathspecs_are_normalized_from_existing_parent() {
1400        let temp = TempDir::new().unwrap();
1401        let repo = Repository::init(temp.path()).unwrap();
1402        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1403
1404        commit_file(
1405            &repo,
1406            "pkg/deleted.py",
1407            "def foo():\n    return 1\n",
1408            "init",
1409        );
1410        let absolute_path = temp
1411            .path()
1412            .join("pkg/deleted.py")
1413            .to_string_lossy()
1414            .to_string();
1415        fs::remove_file(temp.path().join("pkg/deleted.py")).unwrap();
1416
1417        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1418        let files = bridge
1419            .get_changed_files(&DiffScope::Working, &[absolute_path])
1420            .unwrap();
1421
1422        assert_eq!(files.len(), 1);
1423        assert_eq!(files[0].file_path, "pkg/deleted.py");
1424        assert_eq!(files[0].status, FileStatus::Deleted);
1425    }
1426
1427    #[test]
1428    fn absolute_missing_pathspecs_preserve_trailing_component_order() {
1429        let temp = TempDir::new().unwrap();
1430        let existing_parent = temp.path().join("existing");
1431        fs::create_dir(&existing_parent).unwrap();
1432
1433        let pathspec = existing_parent.join("missing").join("leaf.py");
1434        let normalized = normalize_absolute_pathspec(&pathspec);
1435
1436        let mut expected = fs::canonicalize(&existing_parent).unwrap();
1437        expected.push("missing");
1438        expected.push("leaf.py");
1439        assert_eq!(normalized, expected);
1440    }
1441
1442    #[test]
1443    fn absolute_pathspecs_outside_repo_are_rejected() {
1444        let repo_dir = TempDir::new().unwrap();
1445        let outside_dir = TempDir::new().unwrap();
1446        let repo = Repository::init(repo_dir.path()).unwrap();
1447
1448        commit_file(&repo, "sample.py", "def foo():\n    return 1\n", "init");
1449        fs::write(
1450            repo_dir.path().join("sample.py"),
1451            "def foo():\n    return 2\n",
1452        )
1453        .unwrap();
1454        let outside_path = outside_dir.path().join("outside.py");
1455        fs::write(&outside_path, "def outside():\n    return 1\n").unwrap();
1456
1457        let bridge = GitBridge::open(repo_dir.path()).unwrap();
1458        let err = bridge
1459            .get_changed_files(
1460                &DiffScope::Working,
1461                &[outside_path.to_string_lossy().to_string()],
1462            )
1463            .unwrap_err();
1464
1465        let message = err.to_string();
1466        assert!(message.contains("pathspec"));
1467        assert!(message.contains("is outside repository"));
1468    }
1469
1470    #[test]
1471    fn working_binary_modification_is_reported_as_binary_change() {
1472        let temp = TempDir::new().unwrap();
1473        let repo = Repository::init(temp.path()).unwrap();
1474
1475        commit_binary_file(&repo, "pic.png", b"\0png-v1\0", "init");
1476        fs::write(temp.path().join("pic.png"), b"\0png-v2\0extra").unwrap();
1477
1478        let bridge = GitBridge::open(temp.path()).unwrap();
1479        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1480
1481        assert_eq!(files.len(), 1);
1482        assert_eq!(files[0].file_path, "pic.png");
1483        assert_eq!(files[0].status, FileStatus::Modified);
1484        assert!(files[0].before_content.is_none());
1485        assert!(files[0].after_content.is_none());
1486
1487        let binary_changes = collect_binary_file_changes(&files);
1488        let registry = create_default_registry();
1489        let result = compute_semantic_diff(&files, &registry, None, None);
1490
1491        assert!(result.changes.is_empty());
1492        assert_eq!(result.file_count, 0);
1493        assert_eq!(binary_changes.len(), 1);
1494        assert_eq!(binary_changes[0].file_path, "pic.png");
1495        assert_eq!(binary_changes[0].status, FileStatus::Modified);
1496    }
1497
1498    #[test]
1499    fn staged_binary_add_and_delete_are_reported_as_binary_changes() {
1500        let temp = TempDir::new().unwrap();
1501        let repo = Repository::init(temp.path()).unwrap();
1502
1503        fs::write(temp.path().join("added.png"), b"\0added-binary\0").unwrap();
1504        let mut index = repo.index().unwrap();
1505        index.add_path(Path::new("added.png")).unwrap();
1506        index.write().unwrap();
1507
1508        let bridge = GitBridge::open(temp.path()).unwrap();
1509        let added_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1510        assert_eq!(added_files.len(), 1);
1511        assert_eq!(added_files[0].file_path, "added.png");
1512        assert_eq!(added_files[0].status, FileStatus::Added);
1513        assert!(added_files[0].before_content.is_none());
1514        assert!(added_files[0].after_content.is_none());
1515        let added_binary_changes = collect_binary_file_changes(&added_files);
1516        assert_eq!(added_binary_changes.len(), 1);
1517        assert_eq!(added_binary_changes[0].file_path, "added.png");
1518
1519        let temp = TempDir::new().unwrap();
1520        let repo = Repository::init(temp.path()).unwrap();
1521        commit_binary_file(&repo, "deleted.png", b"\0deleted-binary\0", "init");
1522        fs::remove_file(temp.path().join("deleted.png")).unwrap();
1523        let mut index = repo.index().unwrap();
1524        index.remove_path(Path::new("deleted.png")).unwrap();
1525        index.write().unwrap();
1526
1527        let bridge = GitBridge::open(temp.path()).unwrap();
1528        let deleted_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1529        assert_eq!(deleted_files.len(), 1);
1530        assert_eq!(deleted_files[0].file_path, "deleted.png");
1531        assert_eq!(deleted_files[0].status, FileStatus::Deleted);
1532        assert!(deleted_files[0].before_content.is_none());
1533        assert!(deleted_files[0].after_content.is_none());
1534        let deleted_binary_changes = collect_binary_file_changes(&deleted_files);
1535        assert_eq!(deleted_binary_changes.len(), 1);
1536        assert_eq!(deleted_binary_changes[0].file_path, "deleted.png");
1537    }
1538
1539    #[test]
1540    fn partial_utf8_boundary_is_not_treated_as_binary() {
1541        assert!(!GitBridge::bytes_look_binary(&[0xe2, 0x82], false));
1542        assert!(GitBridge::bytes_look_binary(&[0xe2, 0x82], true));
1543    }
1544
1545    #[test]
1546    fn staged_file_rename_is_reported_as_single_rename_with_old_contents() {
1547        let temp = TempDir::new().unwrap();
1548        let repo = Repository::init(temp.path()).unwrap();
1549
1550        let contents = "export function foo() {\n  return 1;\n}\n";
1551        commit_file(&repo, "old.ts", contents, "init");
1552
1553        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1554        let mut index = repo.index().unwrap();
1555        index.remove_path(Path::new("old.ts")).unwrap();
1556        index.add_path(Path::new("new.ts")).unwrap();
1557        index.write().unwrap();
1558
1559        let bridge = GitBridge::open(temp.path()).unwrap();
1560        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1561
1562        assert_eq!(files.len(), 1);
1563        assert_eq!(files[0].status, FileStatus::Renamed);
1564        assert_eq!(files[0].file_path, "new.ts");
1565        assert_eq!(files[0].old_file_path.as_deref(), Some("old.ts"));
1566        assert_eq!(files[0].before_content.as_deref(), Some(contents));
1567        assert_eq!(files[0].after_content.as_deref(), Some(contents));
1568    }
1569
1570    #[test]
1571    fn staged_file_rename_with_edit_reports_single_moved_entity() {
1572        let temp = TempDir::new().unwrap();
1573        let repo = Repository::init(temp.path()).unwrap();
1574
1575        let before = "\
1576// shared header 01
1577// shared header 02
1578// shared header 03
1579// shared header 04
1580// shared header 05
1581// shared header 06
1582// shared header 07
1583// shared header 08
1584// shared header 09
1585// shared header 10
1586export function foo() {
1587  return alpha + beta + gamma;
1588}
1589";
1590        let after = before.replace(
1591            "return alpha + beta + gamma;",
1592            "return one + two + three;",
1593        );
1594
1595        commit_file(&repo, "old.ts", before, "init");
1596        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1597        fs::write(temp.path().join("new.ts"), &after).unwrap();
1598
1599        let mut index = repo.index().unwrap();
1600        index.remove_path(Path::new("old.ts")).unwrap();
1601        index.add_path(Path::new("new.ts")).unwrap();
1602        index.write().unwrap();
1603
1604        let bridge = GitBridge::open(temp.path()).unwrap();
1605        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1606        assert_eq!(files.len(), 1);
1607        assert_eq!(files[0].status, FileStatus::Renamed);
1608
1609        let registry = create_default_registry();
1610        let result = compute_semantic_diff(&files, &registry, None, None);
1611
1612        assert_eq!(result.added_count, 0);
1613        assert_eq!(result.deleted_count, 0);
1614        // `foo` is a compound Moved change whose body also changed, so it counts toward
1615        // both moved_count and modified_count.
1616        assert_eq!(result.modified_count, 1);
1617        assert_eq!(result.moved_count, 1);
1618        assert_eq!(result.changes.len(), 1);
1619        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1620        assert_eq!(result.changes[0].entity_name, "foo");
1621        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
1622        assert_eq!(result.changes[0].structural_change, Some(true));
1623    }
1624
1625    #[test]
1626    fn working_diff_preserves_staged_rename_with_unstaged_edit() {
1627        let temp = TempDir::new().unwrap();
1628        let repo = Repository::init(temp.path()).unwrap();
1629
1630        let before = "\
1631export function foo(x: number) {
1632  return x + 1;
1633}
1634
1635export function bar(y: number) {
1636  return y * 2;
1637}
1638";
1639        let after = "\
1640export function foo(x: number) {
1641  return x + 42;
1642}
1643
1644export function bar(y: number) {
1645  return y * 99;
1646}
1647";
1648
1649        commit_file(&repo, "a.ts", before, "init");
1650
1651        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1652        let mut index = repo.index().unwrap();
1653        index.remove_path(Path::new("a.ts")).unwrap();
1654        index.add_path(Path::new("b.ts")).unwrap();
1655        index.write().unwrap();
1656
1657        fs::write(temp.path().join("b.ts"), after).unwrap();
1658
1659        let bridge = GitBridge::open(temp.path()).unwrap();
1660        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1661
1662        assert!(matches!(scope, DiffScope::Working));
1663        assert_eq!(files.len(), 1);
1664        assert_eq!(files[0].status, FileStatus::Renamed);
1665        assert_eq!(files[0].file_path, "b.ts");
1666        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1667        assert_eq!(files[0].before_content.as_deref(), Some(before));
1668        assert_eq!(files[0].after_content.as_deref(), Some(after));
1669
1670        let registry = create_default_registry();
1671        let result = compute_semantic_diff(&files, &registry, None, None);
1672
1673        assert_eq!(result.added_count, 0);
1674        assert_eq!(result.deleted_count, 0);
1675        assert_eq!(result.modified_count, 2);
1676        assert_eq!(result.moved_count, 2);
1677        assert_eq!(result.changes.len(), 2);
1678        assert!(result
1679            .changes
1680            .iter()
1681            .all(|change| change.change_type == ChangeType::Moved));
1682        assert!(result
1683            .changes
1684            .iter()
1685            .all(|change| change.old_file_path.as_deref() == Some("a.ts")));
1686        assert!(result
1687            .changes
1688            .iter()
1689            .all(|change| change.structural_change == Some(true)));
1690    }
1691
1692    #[test]
1693    fn working_diff_uses_staged_rename_map_after_large_unstaged_rewrite() {
1694        let temp = TempDir::new().unwrap();
1695        let repo = Repository::init(temp.path()).unwrap();
1696
1697        let before_noise = (0..200)
1698            .map(|i| format!("// old filler {i} alpha beta gamma"))
1699            .collect::<Vec<_>>()
1700            .join("\n");
1701        let after_noise = (0..200)
1702            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1703            .collect::<Vec<_>>()
1704            .join("\n");
1705        let before = format!(
1706            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1707        );
1708        let after = format!(
1709            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1710        );
1711
1712        commit_file(&repo, "a.ts", &before, "init");
1713
1714        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1715        let mut index = repo.index().unwrap();
1716        index.remove_path(Path::new("a.ts")).unwrap();
1717        index.add_path(Path::new("b.ts")).unwrap();
1718        index.write().unwrap();
1719
1720        fs::write(temp.path().join("b.ts"), &after).unwrap();
1721
1722        let bridge = GitBridge::open(temp.path()).unwrap();
1723        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1724
1725        assert!(matches!(scope, DiffScope::Working));
1726        assert_eq!(files.len(), 1);
1727        assert_eq!(files[0].status, FileStatus::Renamed);
1728        assert_eq!(files[0].file_path, "b.ts");
1729        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1730        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1731        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1732
1733        let registry = create_default_registry();
1734        let result = compute_semantic_diff(&files, &registry, None, None);
1735
1736        assert_eq!(result.added_count, 0);
1737        assert_eq!(result.deleted_count, 0);
1738        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
1739        // compound Moved change whose body also changed, so it counts toward both
1740        // moved_count and modified_count.
1741        assert_eq!(result.modified_count, 2);
1742        assert_eq!(result.moved_count, 1);
1743        assert!(result
1744            .changes
1745            .iter()
1746            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1747    }
1748
1749    #[test]
1750    fn explicit_ref_to_working_uses_index_rename_map_after_large_unstaged_rewrite() {
1751        let temp = TempDir::new().unwrap();
1752        let repo = Repository::init(temp.path()).unwrap();
1753
1754        let before_noise = (0..200)
1755            .map(|i| format!("// old filler {i} alpha beta gamma"))
1756            .collect::<Vec<_>>()
1757            .join("\n");
1758        let after_noise = (0..200)
1759            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1760            .collect::<Vec<_>>()
1761            .join("\n");
1762        let before = format!(
1763            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1764        );
1765        let after = format!(
1766            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1767        );
1768
1769        commit_file(&repo, "a.ts", &before, "init");
1770
1771        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1772        let mut index = repo.index().unwrap();
1773        index.remove_path(Path::new("a.ts")).unwrap();
1774        index.add_path(Path::new("b.ts")).unwrap();
1775        index.write().unwrap();
1776
1777        fs::write(temp.path().join("b.ts"), &after).unwrap();
1778
1779        let bridge = GitBridge::open(temp.path()).unwrap();
1780        let files = bridge
1781            .get_changed_files(
1782                &DiffScope::RefToWorking {
1783                    refspec: "HEAD".to_string(),
1784                },
1785                &[],
1786            )
1787            .unwrap();
1788
1789        assert_eq!(files.len(), 1);
1790        assert_eq!(files[0].status, FileStatus::Renamed);
1791        assert_eq!(files[0].file_path, "b.ts");
1792        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1793        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1794        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1795
1796        let registry = create_default_registry();
1797        let result = compute_semantic_diff(&files, &registry, None, None);
1798
1799        assert_eq!(result.added_count, 0);
1800        assert_eq!(result.deleted_count, 0);
1801        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
1802        // compound Moved change whose body also changed, so it counts toward both
1803        // moved_count and modified_count.
1804        assert_eq!(result.modified_count, 2);
1805        assert_eq!(result.moved_count, 1);
1806        assert!(result
1807            .changes
1808            .iter()
1809            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1810    }
1811
1812    #[test]
1813    fn staged_rename_map_overrides_wrong_worktree_rename_pairing() {
1814        let temp = TempDir::new().unwrap();
1815        let repo = Repository::init(temp.path()).unwrap();
1816
1817        let a_before = "export function foo(x: number) {\n  return x + 1;\n}\n";
1818        let c_before = "export function foo(x: number) {\n  return x + 42;\n}\n";
1819
1820        commit_file(&repo, "a.ts", a_before, "init a");
1821        commit_file(&repo, "c.ts", c_before, "init c");
1822
1823        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1824        let mut index = repo.index().unwrap();
1825        index.remove_path(Path::new("a.ts")).unwrap();
1826        index.add_path(Path::new("b.ts")).unwrap();
1827        index.write().unwrap();
1828
1829        fs::remove_file(temp.path().join("c.ts")).unwrap();
1830        fs::write(temp.path().join("b.ts"), c_before).unwrap();
1831
1832        let bridge = GitBridge::open(temp.path()).unwrap();
1833        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1834
1835        assert!(matches!(scope, DiffScope::Working));
1836        let renamed = files
1837            .iter()
1838            .find(|file| {
1839                file.status == FileStatus::Renamed
1840                    && file.file_path == "b.ts"
1841                    && file.old_file_path.as_deref() == Some("a.ts")
1842            })
1843            .unwrap();
1844        assert_eq!(renamed.before_content.as_deref(), Some(a_before));
1845        assert_eq!(renamed.after_content.as_deref(), Some(c_before));
1846
1847        let deleted = files
1848            .iter()
1849            .find(|file| file.status == FileStatus::Deleted && file.file_path == "c.ts")
1850            .unwrap();
1851        assert_eq!(deleted.before_content.as_deref(), Some(c_before));
1852        assert_eq!(deleted.after_content.as_deref(), None);
1853        assert!(!files.iter().any(|file| {
1854            file.status == FileStatus::Renamed
1855                && file.file_path == "b.ts"
1856                && file.old_file_path.as_deref() == Some("c.ts")
1857        }));
1858    }
1859
1860    #[test]
1861    fn staged_diff_with_base_ref_compares_index_to_that_ref() {
1862        let temp = TempDir::new().unwrap();
1863        let repo = Repository::init(temp.path()).unwrap();
1864
1865        let v1 = "def foo():\n    return 1\n";
1866        let v2 = "def foo():\n    return 2\n";
1867        let v3 = "def foo():\n    return 3\n";
1868        let v4 = "def foo():\n    return 4\n";
1869
1870        commit_file(&repo, "a.py", v1, "init");
1871        commit_file(&repo, "a.py", v2, "second");
1872        fs::write(temp.path().join("a.py"), v3).unwrap();
1873
1874        let mut index = repo.index().unwrap();
1875        index.add_path(Path::new("a.py")).unwrap();
1876        index.write().unwrap();
1877
1878        fs::write(temp.path().join("a.py"), v4).unwrap();
1879
1880        let bridge = GitBridge::open(temp.path()).unwrap();
1881        let files = bridge
1882            .get_staged_files_with_base_ref("HEAD~1", &[])
1883            .unwrap();
1884
1885        assert_eq!(files.len(), 1);
1886        assert_eq!(files[0].status, FileStatus::Modified);
1887        assert_eq!(files[0].file_path, "a.py");
1888        assert_eq!(files[0].before_content.as_deref(), Some(v1));
1889        assert_eq!(files[0].after_content.as_deref(), Some(v3));
1890
1891        let registry = create_default_registry();
1892        let result = compute_semantic_diff(&files, &registry, None, None);
1893
1894        assert_eq!(result.modified_count, 1);
1895        assert_eq!(result.changes.len(), 1);
1896        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
1897        assert_eq!(result.changes[0].entity_name, "foo");
1898    }
1899
1900    #[test]
1901    fn crlf_only_difference_in_working_file_is_invisible() {
1902        let temp = TempDir::new().unwrap();
1903        let repo = Repository::init(temp.path()).unwrap();
1904
1905        commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
1906        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
1907
1908        let bridge = GitBridge::open(temp.path()).unwrap();
1909        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1910
1911        assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
1912
1913        let before = files[0].before_content.as_deref().unwrap();
1914        let after = files[0].after_content.as_deref().unwrap();
1915
1916        assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
1917    }
1918
1919    #[test]
1920    fn crlf_stored_in_blob_is_normalized_on_read() {
1921        let temp = TempDir::new().unwrap();
1922        let repo = Repository::init(temp.path()).unwrap();
1923
1924        repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
1925        commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
1926        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
1927
1928        let bridge = GitBridge::open(temp.path()).unwrap();
1929        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1930
1931        assert_eq!(files.len(), 1, "expected git to detect the modification");
1932
1933        let before = files[0].before_content.as_deref().unwrap();
1934        assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
1935    }
1936}