Skip to main content

sem_core/git/
bridge.rs

1use std::env;
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4use std::process::Command;
5use std::sync::{Mutex, OnceLock};
6
7use git2::{Blame, Delta, Diff, DiffFindOptions, DiffOptions, ErrorCode, Oid, Repository};
8use thiserror::Error;
9
10use super::types::{CommitInfo, DiffScope, FileChange, FileCommitInfo, FileStatus};
11use super::types::BlameLineInfo;
12
13#[derive(Error, Debug)]
14pub enum GitError {
15    #[error("not a git repository")]
16    NotARepo,
17    #[error("git error: {0}")]
18    Git2(#[from] git2::Error),
19    #[error("io error: {0}")]
20    Io(#[from] std::io::Error),
21}
22
23pub struct GitBridge {
24    repo: Repository,
25    repo_root: PathBuf,
26    cwd: PathBuf,
27}
28
29impl GitBridge {
30    pub fn open(path: &Path) -> Result<Self, GitError> {
31        let cwd = normalize_open_path(path)?;
32        let repo = match Repository::discover(path) {
33            Ok(repo) => repo,
34            Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
35                let _guard = owner_validation_lock()
36                    .lock()
37                    .unwrap_or_else(|poisoned| poisoned.into_inner());
38                let _owner_validation = OwnerValidationDisabled::new()?;
39                let repo = Repository::discover(path);
40                repo.map_err(map_git_error)?
41            }
42            Err(error) => return Err(map_git_error(error)),
43        };
44        let repo_root = repo.workdir().ok_or(GitError::NotARepo)?;
45        let repo_root = fs::canonicalize(repo_root)?;
46        Ok(Self {
47            repo,
48            repo_root,
49            cwd,
50        })
51    }
52
53    pub fn repo_root(&self) -> &Path {
54        &self.repo_root
55    }
56
57    pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
58        Ok(self.repo.blame_file(file_path, None)?)
59    }
60
61    pub fn blame_file_porcelain(&self, file_path: &Path) -> Result<Vec<BlameLineInfo>, GitError> {
62        let output = Command::new("git")
63            .arg("-C")
64            .arg(&self.repo_root)
65            .arg("blame")
66            .arg("--line-porcelain")
67            .arg("--")
68            .arg(file_path)
69            .output()?;
70
71        if !output.status.success() {
72            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
73            return Err(git_command_error(if stderr.is_empty() {
74                format!("git blame exited with {}", output.status)
75            } else {
76                stderr
77            }));
78        }
79
80        let parsed = parse_blame_porcelain(&String::from_utf8_lossy(&output.stdout));
81        if parsed.is_empty() && !output.stdout.is_empty() {
82            return Err(git_command_error(
83                "failed to parse git blame porcelain output".to_string(),
84            ));
85        }
86
87        Ok(parsed)
88    }
89
90    pub fn commit_summary(&self, oid: Oid) -> Option<String> {
91        self.repo
92            .find_commit(oid)
93            .ok()
94            .and_then(|commit| commit.summary().map(String::from))
95    }
96
97    pub fn get_head_sha(&self) -> Result<String, GitError> {
98        let head = self.repo.head()?;
99        let oid = head.target().ok_or_else(|| {
100            git2::Error::from_str("HEAD has no target")
101        })?;
102        Ok(oid.to_string())
103    }
104
105    /// Combined detect scope + get files in one call (fast path).
106    /// Shows all changes from HEAD to the current working state by default.
107    /// Use `--staged` for staged changes only.
108    pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
109        // Show the full current working state, including staged changes.
110        let mut working_files = self.get_working_diff_files(pathspecs)?;
111        if !working_files.is_empty() {
112            self.populate_contents(&mut working_files, &DiffScope::Working)?;
113            return Ok((DiffScope::Working, working_files));
114        }
115
116        // Clean worktree = no changes
117        Ok((DiffScope::Working, Vec::new()))
118    }
119
120    /// Get changed files for a specific scope
121    pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
122        let mut files = match scope {
123            DiffScope::Working => {
124                self.get_working_diff_files(pathspecs)?
125            }
126            DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
127            DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
128            DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
129            DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
130        };
131
132        // Filter .sem/ files
133        files.retain(|f| !f.file_path.starts_with(".sem/"));
134
135        self.populate_contents(&mut files, scope)?;
136        Ok(files)
137    }
138
139    pub fn get_staged_files_with_base_ref(
140        &self,
141        base: &str,
142        pathspecs: &[String],
143    ) -> Result<Vec<FileChange>, GitError> {
144        let mut files = self.get_staged_diff_files_with_base(base, pathspecs)?;
145        files.retain(|f| !f.file_path.starts_with(".sem/"));
146
147        let base_tree = self.resolve_tree(base)?;
148        for file in files.iter_mut() {
149            if file.status != FileStatus::Deleted {
150                file.after_content = self.read_index_file(&file.file_path);
151            }
152            if file.status != FileStatus::Added {
153                let path = file
154                    .old_file_path
155                    .as_deref()
156                    .unwrap_or(&file.file_path);
157                file.before_content = self.read_blob_from_tree(&base_tree, path);
158            }
159        }
160
161        Ok(files)
162    }
163
164    /// Resolve the merge base between two refs
165    pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
166        let obj1 = self.repo.revparse_single(ref1)?;
167        let obj2 = self.repo.revparse_single(ref2)?;
168        let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
169        Ok(oid.to_string())
170    }
171
172    /// Check if a string resolves to a valid git revision
173    pub fn is_valid_rev(&self, refspec: &str) -> bool {
174        self.repo.revparse_single(refspec).is_ok()
175    }
176
177    fn make_diff_opts(&self, pathspecs: &[String]) -> Result<DiffOptions, GitError> {
178        let mut opts = DiffOptions::new();
179        for spec in self.normalize_pathspecs(pathspecs)? {
180            opts.pathspec(spec.as_str());
181        }
182        Ok(opts)
183    }
184
185    fn normalize_pathspecs(&self, pathspecs: &[String]) -> Result<Vec<String>, GitError> {
186        pathspecs
187            .iter()
188            .map(|spec| self.normalize_pathspec(spec))
189            .collect()
190    }
191
192    fn normalize_pathspec(&self, spec: &str) -> Result<String, GitError> {
193        if spec.is_empty() || spec.starts_with(':') {
194            return Ok(spec.to_string());
195        }
196
197        let spec_path = Path::new(spec);
198        let absolute = if spec_path.is_absolute() {
199            normalize_absolute_pathspec(spec_path)
200        } else {
201            normalize_lexical(&self.cwd.join(spec_path))
202        };
203
204        let repo_root = normalize_lexical(&self.repo_root);
205        let relative =
206            absolute
207                .strip_prefix(&repo_root)
208                .map_err(|_| pathspec_outside_repo_error(spec, &self.repo_root))?;
209
210        if relative.as_os_str().is_empty() {
211            Ok(".".to_string())
212        } else {
213            relative
214                .to_str()
215                .map(|path| path.replace('\\', "/"))
216                .ok_or_else(|| non_utf8_pathspec_error(spec))
217        }
218    }
219
220    fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
221        let head_tree = match self.repo.head() {
222            Ok(head) => {
223                let commit = head.peel_to_commit()?;
224                Some(commit.tree()?)
225            }
226            Err(_) => None, // No commits yet
227        };
228
229        self.get_index_diff_files(head_tree.as_ref(), pathspecs)
230    }
231
232    fn get_staged_diff_files_with_base(
233        &self,
234        base: &str,
235        pathspecs: &[String],
236    ) -> Result<Vec<FileChange>, GitError> {
237        let base_tree = self.resolve_tree(base)?;
238        self.get_index_diff_files(Some(&base_tree), pathspecs)
239    }
240
241    fn get_index_diff_files(
242        &self,
243        base_tree: Option<&git2::Tree<'_>>,
244        pathspecs: &[String],
245    ) -> Result<Vec<FileChange>, GitError> {
246        let mut opts = self.make_diff_opts(pathspecs)?;
247        let mut diff = self.repo.diff_tree_to_index(
248            base_tree,
249            Some(&self.repo.index()?),
250            Some(&mut opts),
251        )?;
252        Self::detect_renames(&mut diff)?;
253
254        Ok(self.diff_to_file_changes(&diff))
255    }
256
257    fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
258        let mut opts = self.make_diff_opts(pathspecs)?;
259        opts.include_untracked(false);
260
261        let head_tree = self.resolve_tree("HEAD").ok();
262        let mut diff = match head_tree.as_ref() {
263            Some(head_tree) => self
264                .repo
265                .diff_tree_to_workdir_with_index(Some(head_tree), Some(&mut opts))?,
266            None => self.repo.diff_index_to_workdir(None, Some(&mut opts))?,
267        };
268        Self::detect_renames(&mut diff)?;
269        self.apply_index_rename_map(
270            self.diff_to_file_changes(&diff),
271            head_tree.as_ref(),
272            pathspecs,
273        )
274    }
275
276    fn apply_index_rename_map(
277        &self,
278        mut files: Vec<FileChange>,
279        base_tree: Option<&git2::Tree<'_>>,
280        pathspecs: &[String],
281    ) -> Result<Vec<FileChange>, GitError> {
282        let Some(base_tree) = base_tree else {
283            return Ok(files);
284        };
285
286        let index_renames: Vec<FileChange> = self
287            .get_index_diff_files(Some(base_tree), pathspecs)?
288            .into_iter()
289            .filter(|file| file.status == FileStatus::Renamed)
290            .collect();
291
292        for rename in index_renames {
293            let Some(old_path) = rename.old_file_path.clone() else {
294                continue;
295            };
296            let target_pos = files
297                .iter()
298                .position(|file| {
299                    matches!(file.status, FileStatus::Added | FileStatus::Renamed)
300                        && file.file_path == rename.file_path
301                });
302            let deleted_pos = files
303                .iter()
304                .position(|file| {
305                    file.status == FileStatus::Deleted && file.file_path == old_path
306                });
307
308            if let (Some(target_pos), Some(deleted_pos)) = (target_pos, deleted_pos) {
309                if files[target_pos].status == FileStatus::Renamed
310                    && files[target_pos].old_file_path.as_deref() == Some(old_path.as_str())
311                {
312                    continue;
313                }
314
315                let target_file = files[target_pos].clone();
316                let deleted_file = files[deleted_pos].clone();
317                let displaced_deleted_path =
318                    if target_file.status == FileStatus::Renamed {
319                        target_file
320                            .old_file_path
321                            .as_ref()
322                            .filter(|path| *path != &old_path)
323                            .cloned()
324                    } else {
325                        None
326                    };
327
328                files = files
329                    .into_iter()
330                    .enumerate()
331                    .filter_map(|(idx, file)| {
332                        if idx == target_pos || idx == deleted_pos {
333                            None
334                        } else {
335                            Some(file)
336                        }
337                    })
338                    .collect();
339                let before_content = deleted_file
340                    .before_content
341                    .or_else(|| self.read_blob_from_tree(base_tree, &old_path));
342                let after_content = target_file
343                    .after_content
344                    .or_else(|| self.read_working_file(&target_file.file_path));
345                files.push(FileChange {
346                    file_path: target_file.file_path,
347                    status: FileStatus::Renamed,
348                    old_file_path: Some(old_path),
349                    before_content,
350                    after_content,
351                });
352                if let Some(file_path) = displaced_deleted_path {
353                    let before_content = self.read_blob_from_tree(base_tree, &file_path);
354                    files.push(FileChange {
355                        file_path,
356                        status: FileStatus::Deleted,
357                        old_file_path: None,
358                        before_content,
359                        after_content: None,
360                    });
361                }
362            }
363        }
364
365        Ok(files)
366    }
367
368    fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
369        let obj = self.repo.revparse_single(sha)?;
370        let commit = obj.peel_to_commit()?;
371        let tree = commit.tree()?;
372
373        let parent_tree = if commit.parent_count() > 0 {
374            Some(commit.parent(0)?.tree()?)
375        } else {
376            None
377        };
378
379        let mut opts = self.make_diff_opts(pathspecs)?;
380        let mut diff = self.repo.diff_tree_to_tree(
381            parent_tree.as_ref(),
382            Some(&tree),
383            Some(&mut opts),
384        )?;
385        Self::detect_renames(&mut diff)?;
386
387        Ok(self.diff_to_file_changes(&diff))
388    }
389
390    fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
391        let from_obj = self.repo.revparse_single(from)?;
392        let to_obj = self.repo.revparse_single(to)?;
393
394        let from_tree = from_obj.peel_to_commit()?.tree()?;
395        let to_tree = to_obj.peel_to_commit()?.tree()?;
396
397        let mut opts = self.make_diff_opts(pathspecs)?;
398        let mut diff = self.repo.diff_tree_to_tree(
399            Some(&from_tree),
400            Some(&to_tree),
401            Some(&mut opts),
402        )?;
403        Self::detect_renames(&mut diff)?;
404
405        Ok(self.diff_to_file_changes(&diff))
406    }
407
408    fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
409        let tree = self.resolve_tree(refspec)?;
410        let mut opts = self.make_diff_opts(pathspecs)?;
411        let mut diff = self.repo.diff_tree_to_workdir_with_index(
412            Some(&tree),
413            Some(&mut opts),
414        )?;
415        Self::detect_renames(&mut diff)?;
416        self.apply_index_rename_map(self.diff_to_file_changes(&diff), Some(&tree), pathspecs)
417    }
418
419    fn detect_renames(diff: &mut Diff) -> Result<(), GitError> {
420        let mut opts = DiffFindOptions::new();
421        opts.renames(true);
422        diff.find_similar(Some(&mut opts))?;
423        Ok(())
424    }
425
426    fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
427        let mut files = Vec::new();
428
429        for delta in diff.deltas() {
430            let (status, file_path, old_file_path) = match delta.status() {
431                Delta::Added => {
432                    let path = delta
433                        .new_file()
434                        .path()
435                        .and_then(|p| p.to_str())
436                        .unwrap_or("")
437                        .to_string();
438                    (FileStatus::Added, path, None)
439                }
440                Delta::Deleted => {
441                    let path = delta
442                        .old_file()
443                        .path()
444                        .and_then(|p| p.to_str())
445                        .unwrap_or("")
446                        .to_string();
447                    (FileStatus::Deleted, path, None)
448                }
449                Delta::Modified => {
450                    let path = delta
451                        .new_file()
452                        .path()
453                        .and_then(|p| p.to_str())
454                        .unwrap_or("")
455                        .to_string();
456                    (FileStatus::Modified, path, None)
457                }
458                Delta::Renamed => {
459                    let new_path = delta
460                        .new_file()
461                        .path()
462                        .and_then(|p| p.to_str())
463                        .unwrap_or("")
464                        .to_string();
465                    let old_path = delta
466                        .old_file()
467                        .path()
468                        .and_then(|p| p.to_str())
469                        .unwrap_or("")
470                        .to_string();
471                    (FileStatus::Renamed, new_path, Some(old_path))
472                }
473                _ => continue,
474            };
475
476            if !file_path.starts_with(".sem/") {
477                files.push(FileChange {
478                    file_path,
479                    status,
480                    old_file_path,
481                    before_content: None,
482                    after_content: None,
483                });
484            }
485        }
486
487        files
488    }
489
490    fn populate_contents(
491        &self,
492        files: &mut [FileChange],
493        scope: &DiffScope,
494    ) -> Result<(), GitError> {
495        match scope {
496            DiffScope::Working => {
497                // Resolve HEAD tree once for all before_content reads
498                let head_tree = self.resolve_tree("HEAD").ok();
499                for file in files.iter_mut() {
500                    if file.status != FileStatus::Deleted {
501                        file.after_content = self.read_working_file(&file.file_path);
502                    }
503                    if file.status != FileStatus::Added {
504                        let path = file
505                            .old_file_path
506                            .as_deref()
507                            .unwrap_or(&file.file_path);
508                        file.before_content = head_tree
509                            .as_ref()
510                            .and_then(|t| self.read_blob_from_tree(t, path));
511                    }
512                }
513            }
514            DiffScope::Staged => {
515                let head_tree = self.resolve_tree("HEAD").ok();
516                for file in files.iter_mut() {
517                    if file.status != FileStatus::Deleted {
518                        file.after_content = self
519                            .read_index_file(&file.file_path)
520                            .or_else(|| self.read_working_file(&file.file_path));
521                    }
522                    if file.status != FileStatus::Added {
523                        let path = file
524                            .old_file_path
525                            .as_deref()
526                            .unwrap_or(&file.file_path);
527                        file.before_content = head_tree
528                            .as_ref()
529                            .and_then(|t| self.read_blob_from_tree(t, path));
530                    }
531                }
532            }
533            DiffScope::Commit { sha } => {
534                // Resolve both trees once instead of per-file
535                let after_tree = self.resolve_tree(sha)?;
536                let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
537                for file in files.iter_mut() {
538                    if file.status != FileStatus::Deleted {
539                        file.after_content =
540                            self.read_blob_from_tree(&after_tree, &file.file_path);
541                    }
542                    if file.status != FileStatus::Added {
543                        let path = file
544                            .old_file_path
545                            .as_deref()
546                            .unwrap_or(&file.file_path);
547                        file.before_content = before_tree
548                            .as_ref()
549                            .and_then(|t| self.read_blob_from_tree(t, path));
550                    }
551                }
552            }
553            DiffScope::Range { from, to } => {
554                let after_tree = self.resolve_tree(to)?;
555                let before_tree = self.resolve_tree(from)?;
556                for file in files.iter_mut() {
557                    if file.status != FileStatus::Deleted {
558                        file.after_content =
559                            self.read_blob_from_tree(&after_tree, &file.file_path);
560                    }
561                    if file.status != FileStatus::Added {
562                        let path = file
563                            .old_file_path
564                            .as_deref()
565                            .unwrap_or(&file.file_path);
566                        file.before_content =
567                            self.read_blob_from_tree(&before_tree, path);
568                    }
569                }
570            }
571            DiffScope::RefToWorking { refspec } => {
572                let before_tree = self.resolve_tree(refspec)?;
573                for file in files.iter_mut() {
574                    if file.status != FileStatus::Deleted {
575                        file.after_content = self.read_working_file(&file.file_path);
576                    }
577                    if file.status != FileStatus::Added {
578                        let path = file
579                            .old_file_path
580                            .as_deref()
581                            .unwrap_or(&file.file_path);
582                        file.before_content =
583                            self.read_blob_from_tree(&before_tree, path);
584                    }
585                }
586            }
587        }
588        Ok(())
589    }
590
591    fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
592        let obj = self.repo.revparse_single(refspec)?;
593        let commit = obj.peel_to_commit()?;
594        Ok(commit.tree()?)
595    }
596
597    fn normalize_line_endings(s: String) -> String {
598        if s.contains('\r') {
599            s.replace("\r\n", "\n").replace('\r', "\n")
600        } else {
601            s
602        }
603    }
604
605    fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
606        let entry = tree.get_path(Path::new(file_path)).ok()?;
607        let blob = self.repo.find_blob(entry.id()).ok()?;
608        std::str::from_utf8(blob.content())
609            .ok()
610            .map(|s| Self::normalize_line_endings(s.to_string()))
611    }
612
613    fn read_working_file(&self, file_path: &str) -> Option<String> {
614        let full_path = self.repo_root.join(file_path);
615        fs::read_to_string(full_path)
616            .ok()
617            .map(Self::normalize_line_endings)
618    }
619
620    fn read_index_file(&self, file_path: &str) -> Option<String> {
621        let index = self.repo.index().ok()?;
622        let entry = index.get_path(Path::new(file_path), 0)?;
623        let blob = self.repo.find_blob(entry.id).ok()?;
624        std::str::from_utf8(blob.content())
625            .ok()
626            .map(|s| Self::normalize_line_endings(s.to_string()))
627    }
628
629
630    /// Read file content at a specific git ref (commit SHA, branch, tag, etc.)
631    pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
632        let tree = self.resolve_tree(refspec)?;
633        Ok(self.read_blob_from_tree(&tree, file_path))
634    }
635
636    /// Get commits that modified a specific file, walking history from HEAD.
637    /// Returns commits in reverse chronological order (newest first).
638    pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
639        let mut revwalk = self.repo.revwalk()?;
640        revwalk.push_head()?;
641        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
642
643        let mut commits = Vec::new();
644        let path = Path::new(file_path);
645
646        for oid_result in revwalk {
647            let oid = oid_result?;
648            let commit = self.repo.find_commit(oid)?;
649            let tree = commit.tree()?;
650
651            // Check if this file exists in this commit's tree
652            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
653
654            // Compare with parent to see if the file changed
655            let file_in_parent = if commit.parent_count() > 0 {
656                commit.parent(0)
657                    .ok()
658                    .and_then(|p| p.tree().ok())
659                    .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
660            } else {
661                None // No parent = initial commit, file was added
662            };
663
664            // Include if file changed between parent and this commit
665            let changed = match (file_in_commit, file_in_parent) {
666                (Some(cur), Some(prev)) => cur != prev,  // content changed
667                (Some(_), None) => true,                   // file added
668                (None, Some(_)) => true,                   // file deleted
669                (None, None) => false,                     // file not present in either
670            };
671
672            if changed {
673                let sha = oid.to_string();
674                commits.push(CommitInfo {
675                    short_sha: sha[..7.min(sha.len())].to_string(),
676                    sha,
677                    author: commit.author().name().unwrap_or("unknown").to_string(),
678                    date: commit.time().seconds().to_string(),
679                    message: commit.message().unwrap_or("").to_string(),
680                });
681
682                if limit != 0 && commits.len() >= limit {
683                    break;
684                }
685            }
686        }
687
688        Ok(commits)
689    }
690
691    /// Get commits that modified a specific file, following renames across history.
692    /// Like `git log --follow`: when the tracked path disappears between commits,
693    /// compute a diff with rename detection to find the old filename and continue.
694    /// Returns commits in reverse chronological order (newest first).
695    pub fn get_file_commits_follow_renames(
696        &self,
697        file_path: &str,
698        limit: usize,
699    ) -> Result<Vec<FileCommitInfo>, GitError> {
700        match self.get_file_commits_follow_renames_cli(file_path, limit) {
701            Ok(commits) if !commits.is_empty() => return Ok(commits),
702            Ok(_) => {}
703            Err(GitError::Io(error)) if error.kind() == std::io::ErrorKind::NotFound => {}
704            Err(error) => return Err(error),
705        }
706
707        let mut revwalk = self.repo.revwalk()?;
708        revwalk.push_head()?;
709        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
710
711        let mut results = Vec::new();
712        let mut tracked_path = file_path.to_string();
713
714        for oid_result in revwalk {
715            let oid = oid_result?;
716            let commit = self.repo.find_commit(oid)?;
717            let tree = commit.tree()?;
718
719            let path = Path::new(&tracked_path);
720            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
721
722            let (parent_tree_opt, file_in_parent) = if commit.parent_count() > 0 {
723                let parent = commit.parent(0)?;
724                let ptree = parent.tree()?;
725                let fip = ptree.get_path(path).ok().map(|e| e.id());
726                (Some(ptree), fip)
727            } else {
728                (None, None)
729            };
730
731            let changed = match (file_in_commit, file_in_parent) {
732                (Some(cur), Some(prev)) => cur != prev,
733                (Some(_), None) => true,
734                (None, Some(_)) => true,
735                (None, None) => false,
736            };
737
738            if changed {
739                let sha_str = oid.to_string();
740                results.push(FileCommitInfo {
741                    commit: CommitInfo {
742                        short_sha: sha_str[..7.min(sha_str.len())].to_string(),
743                        sha: sha_str,
744                        author: commit.author().name().unwrap_or("unknown").to_string(),
745                        date: commit.time().seconds().to_string(),
746                        message: commit.message().unwrap_or("").to_string(),
747                    },
748                    file_path: tracked_path.clone(),
749                });
750
751                if limit != 0 && results.len() >= limit {
752                    break;
753                }
754            }
755
756            // When walking backward, the rename commit still contains the new
757            // path. Detect that parent-side old path before the next iteration.
758            let should_check_rename =
759                parent_tree_opt.is_some() && (file_in_parent.is_none() || file_in_commit.is_none());
760            if should_check_rename {
761                let mut diff = self.repo.diff_tree_to_tree(
762                    parent_tree_opt.as_ref(),
763                    Some(&tree),
764                    None,
765                )?;
766                let mut find_opts = DiffFindOptions::new();
767                find_opts.renames(true);
768                diff.find_similar(Some(&mut find_opts))?;
769
770                let mut found_rename = false;
771                for delta in diff.deltas() {
772                    if delta.status() == Delta::Renamed {
773                        let new_path = delta
774                            .new_file()
775                            .path()
776                            .and_then(|p| p.to_str())
777                            .unwrap_or("");
778                        if new_path == tracked_path {
779                            // The tracked file was renamed FROM old_path
780                            let old_path = delta
781                                .old_file()
782                                .path()
783                                .and_then(|p| p.to_str())
784                                .unwrap_or("")
785                                .to_string();
786                            if !old_path.is_empty() {
787                                tracked_path = old_path;
788                                found_rename = true;
789                                break;
790                            }
791                        }
792                    }
793                }
794
795                if !found_rename && file_in_commit.is_none() {
796                    // File truly deleted, stop tracking
797                    break;
798                }
799            }
800        }
801
802        Ok(results)
803    }
804
805    fn get_file_commits_follow_renames_cli(
806        &self,
807        file_path: &str,
808        limit: usize,
809    ) -> Result<Vec<FileCommitInfo>, GitError> {
810        let mut command = Command::new("git");
811        command
812            .arg("-C")
813            .arg(&self.repo_root)
814            .arg("log")
815            .arg("--follow")
816            .arg("--format=\x1e%H\x1f%an\x1f%at\x1f%s")
817            .arg("--name-status");
818        if limit != 0 {
819            command.arg("-n").arg(limit.to_string());
820        }
821        command.arg("--").arg(file_path);
822
823        let output = command.output()?;
824        if !output.status.success() {
825            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
826            return Err(git_command_error(if stderr.is_empty() {
827                format!("git log exited with {}", output.status)
828            } else {
829                stderr
830            }));
831        }
832
833        let stdout = String::from_utf8_lossy(&output.stdout);
834        let mut tracked_path = file_path.to_string();
835        let mut commits = Vec::new();
836
837        for record in stdout.split('\x1e') {
838            let record = record.trim_start_matches('\n');
839            if record.trim().is_empty() {
840                continue;
841            }
842
843            let mut lines = record.lines();
844            let Some(meta) = lines.next() else {
845                continue;
846            };
847            let mut parts = meta.splitn(4, '\x1f');
848            let Some(sha) = parts.next() else {
849                continue;
850            };
851            let Some(author) = parts.next() else {
852                continue;
853            };
854            let Some(date) = parts.next() else {
855                continue;
856            };
857            let message = parts.next().unwrap_or_default();
858
859            let commit_path = tracked_path.clone();
860            let mut previous_path = None;
861            for line in lines {
862                let fields: Vec<&str> = line.split('\t').collect();
863                if fields.len() >= 3 && fields[0].starts_with('R') && fields[2] == tracked_path {
864                    previous_path = Some(fields[1].to_string());
865                }
866            }
867
868            commits.push(FileCommitInfo {
869                commit: CommitInfo {
870                    short_sha: sha[..7.min(sha.len())].to_string(),
871                    sha: sha.to_string(),
872                    author: author.to_string(),
873                    date: date.to_string(),
874                    message: message.to_string(),
875                },
876                file_path: commit_path,
877            });
878
879            if let Some(previous_path) = previous_path {
880                tracked_path = previous_path;
881            }
882        }
883
884        Ok(commits)
885    }
886
887    /// Get all file paths changed in a single commit (vs its parent).
888    /// Returns file paths from the new side of each delta.
889    pub fn get_commit_changed_files(&self, sha: &str) -> Result<Vec<String>, GitError> {
890        let obj = self.repo.revparse_single(sha)?;
891        let commit = obj.peel_to_commit()?;
892        let tree = commit.tree()?;
893        let parent_tree = if commit.parent_count() > 0 {
894            Some(commit.parent(0)?.tree()?)
895        } else {
896            None
897        };
898        let diff = self.repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
899        let mut paths = Vec::new();
900        for delta in diff.deltas() {
901            if let Some(p) = delta.new_file().path().and_then(|p| p.to_str()) {
902                paths.push(p.to_string());
903            }
904            // Also include old path for deletions/renames
905            if let Some(p) = delta.old_file().path().and_then(|p| p.to_str()) {
906                if !paths.contains(&p.to_string()) {
907                    paths.push(p.to_string());
908                }
909            }
910        }
911        Ok(paths)
912    }
913
914    pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
915        let mut revwalk = self.repo.revwalk()?;
916        revwalk.push_head()?;
917
918        let mut commits = Vec::new();
919        for (i, oid_result) in revwalk.enumerate() {
920            if limit != 0 && i >= limit {
921                break;
922            }
923            let oid = oid_result?;
924            let commit = self.repo.find_commit(oid)?;
925            let sha = oid.to_string();
926            commits.push(CommitInfo {
927                short_sha: sha[..7.min(sha.len())].to_string(),
928                sha,
929                author: commit.author().name().unwrap_or("unknown").to_string(),
930                date: commit.time().seconds().to_string(),
931                message: commit.message().unwrap_or("").to_string(),
932            });
933        }
934
935        Ok(commits)
936    }
937}
938
939fn parse_blame_porcelain(output: &str) -> Vec<BlameLineInfo> {
940    let lines: Vec<&str> = output.lines().collect();
941    let mut parsed = Vec::new();
942    let mut index = 0;
943
944    while index < lines.len() {
945        let Some((raw_sha, line_number)) = parse_blame_header(lines[index]) else {
946            index += 1;
947            continue;
948        };
949        index += 1;
950
951        let mut author = String::new();
952        let mut author_time = None;
953        let mut summary = String::new();
954
955        while index < lines.len() {
956            let line = lines[index];
957            index += 1;
958
959            if line.starts_with('\t') {
960                break;
961            } else if let Some(value) = line.strip_prefix("author ") {
962                author = value.to_string();
963            } else if let Some(value) = line.strip_prefix("author-time ") {
964                author_time = value.parse::<i64>().ok();
965            } else if let Some(value) = line.strip_prefix("summary ") {
966                summary = value.to_string();
967            }
968        }
969
970        let sha = raw_sha.trim_start_matches('^');
971        let commit_sha = if sha.chars().all(|c| c == '0') {
972            None
973        } else {
974            Some(sha.to_string())
975        };
976
977        if author.is_empty() {
978            author = if commit_sha.is_none() {
979                "Not Committed Yet".to_string()
980            } else {
981                "unknown".to_string()
982            };
983        }
984
985        parsed.push(BlameLineInfo {
986            line_number,
987            commit_sha,
988            author,
989            author_time,
990            summary,
991        });
992    }
993
994    parsed.sort_by_key(|line| line.line_number);
995    parsed
996}
997
998fn parse_blame_header(line: &str) -> Option<(&str, usize)> {
999    let mut parts = line.split_whitespace();
1000    let sha = parts.next()?;
1001    if !is_blame_oid(sha) {
1002        return None;
1003    }
1004    parts.next()?;
1005    let final_line = parts.next()?.parse().ok()?;
1006    Some((sha, final_line))
1007}
1008
1009fn is_blame_oid(value: &str) -> bool {
1010    let value = value.strip_prefix('^').unwrap_or(value);
1011    value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
1012}
1013
1014fn git_command_error(message: String) -> GitError {
1015    GitError::Git2(git2::Error::from_str(&message))
1016}
1017
1018fn map_git_error(error: git2::Error) -> GitError {
1019    if error.code() == ErrorCode::NotFound {
1020        GitError::NotARepo
1021    } else {
1022        GitError::Git2(error)
1023    }
1024}
1025
1026fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
1027    let safe_directories = command_line_safe_directories();
1028    should_retry_with_safe_directory(error, path, &safe_directories)
1029}
1030
1031fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
1032    error.code() == ErrorCode::Owner
1033        && nearest_git_root(path).is_some_and(|repo_root| {
1034            safe_directories.iter().any(|safe_directory| {
1035                safe_directory == "*"
1036                    || paths_match(&repo_root, Path::new(safe_directory))
1037            })
1038        })
1039}
1040
1041fn command_line_safe_directories() -> Vec<String> {
1042    let count = env::var("GIT_CONFIG_COUNT")
1043        .ok()
1044        .and_then(|value| value.parse::<usize>().ok())
1045        .unwrap_or_default();
1046
1047    (0..count)
1048        .filter_map(|index| {
1049            let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
1050            if key.eq_ignore_ascii_case("safe.directory") {
1051                env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
1052            } else {
1053                None
1054            }
1055        })
1056        .collect()
1057}
1058
1059fn nearest_git_root(path: &Path) -> Option<PathBuf> {
1060    let mut current = if path.is_file() {
1061        path.parent()?
1062    } else {
1063        path
1064    };
1065
1066    loop {
1067        if current.join(".git").exists() {
1068            return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
1069        }
1070
1071        current = current.parent()?;
1072    }
1073}
1074
1075fn paths_match(left: &Path, right: &Path) -> bool {
1076    let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
1077    let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
1078
1079    if cfg!(windows) {
1080        left.to_string_lossy()
1081            .eq_ignore_ascii_case(&right.to_string_lossy())
1082    } else {
1083        left == right
1084    }
1085}
1086
1087fn owner_validation_lock() -> &'static Mutex<()> {
1088    static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
1089    LOCK.get_or_init(|| Mutex::new(()))
1090}
1091
1092struct OwnerValidationDisabled;
1093
1094impl OwnerValidationDisabled {
1095    fn new() -> Result<Self, GitError> {
1096        // libgit2 stores this as a process-global option; callers hold owner_validation_lock.
1097        unsafe { git2::opts::set_verify_owner_validation(false)? };
1098        Ok(Self)
1099    }
1100}
1101
1102impl Drop for OwnerValidationDisabled {
1103    fn drop(&mut self) {
1104        // Restore the default before the owner-validation lock is released.
1105        unsafe {
1106            let _ = git2::opts::set_verify_owner_validation(true);
1107        }
1108    }
1109}
1110
1111fn normalize_open_path(path: &Path) -> Result<PathBuf, GitError> {
1112    let canonical = match fs::canonicalize(path) {
1113        Ok(canonical) => canonical,
1114        Err(_) if path.is_absolute() => normalize_lexical(path),
1115        Err(_) => normalize_lexical(&env::current_dir()?.join(path)),
1116    };
1117
1118    Ok(if canonical.is_file() {
1119        canonical
1120            .parent()
1121            .map(Path::to_path_buf)
1122            .unwrap_or(canonical)
1123    } else {
1124        canonical
1125    })
1126}
1127
1128fn normalize_absolute_pathspec(path: &Path) -> PathBuf {
1129    let path = normalize_lexical(path);
1130    let Some(leaf) = path.file_name() else {
1131        return fs::canonicalize(&path).unwrap_or(path);
1132    };
1133    let mut trailing_components = vec![leaf.to_os_string()];
1134
1135    let Some(parent) = path.parent() else {
1136        return path;
1137    };
1138
1139    for ancestor in parent.ancestors() {
1140        if ancestor.exists() {
1141            let mut normalized =
1142                fs::canonicalize(ancestor).unwrap_or_else(|_| normalize_lexical(ancestor));
1143            for component in trailing_components.iter().rev() {
1144                normalized.push(component);
1145            }
1146            return normalized;
1147        }
1148
1149        let Some(name) = ancestor.file_name() else {
1150            return path;
1151        };
1152        trailing_components.push(name.to_os_string());
1153    }
1154
1155    path
1156}
1157
1158fn pathspec_outside_repo_error(pathspec: &str, repo_root: &Path) -> GitError {
1159    GitError::Git2(git2::Error::from_str(&format!(
1160        "pathspec '{pathspec}' is outside repository '{}'",
1161        repo_root.display()
1162    )))
1163}
1164
1165fn non_utf8_pathspec_error(pathspec: &str) -> GitError {
1166    GitError::Git2(git2::Error::from_str(&format!(
1167        "pathspec '{pathspec}' is not valid UTF-8 after normalization"
1168    )))
1169}
1170
1171fn normalize_lexical(path: &Path) -> PathBuf {
1172    let mut normalized = PathBuf::new();
1173
1174    for component in path.components() {
1175        match component {
1176            Component::CurDir => {}
1177            Component::ParentDir => {
1178                if !normalized.pop() && !normalized.has_root() {
1179                    normalized.push("..");
1180                }
1181            }
1182            Component::Normal(part) => normalized.push(part),
1183            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
1184            Component::RootDir => normalized.push(component.as_os_str()),
1185        }
1186    }
1187
1188    normalized
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193    use super::*;
1194    use crate::model::change::ChangeType;
1195    use crate::parser::differ::compute_semantic_diff;
1196    use crate::parser::plugins::create_default_registry;
1197    use git2::{ErrorClass, Oid, Repository, Signature};
1198    use tempfile::TempDir;
1199
1200    fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
1201        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1202
1203        let mut index = repo.index().unwrap();
1204        index.add_path(Path::new(file_path)).unwrap();
1205        index.write().unwrap();
1206
1207        let tree_id = index.write_tree().unwrap();
1208        let tree = repo.find_tree(tree_id).unwrap();
1209        let sig = Signature::now("Test User", "test@example.com").unwrap();
1210
1211        match repo.head() {
1212            Ok(head) => {
1213                let parent = repo.find_commit(head.target().unwrap()).unwrap();
1214                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1215                    .unwrap()
1216            }
1217            Err(_) => repo
1218                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1219                .unwrap(),
1220        }
1221    }
1222
1223    #[test]
1224    fn porcelain_blame_reports_uncommitted_lines() {
1225        let temp = TempDir::new().unwrap();
1226        let repo = Repository::init(temp.path()).unwrap();
1227
1228        commit_file(&repo, "a.py", "def foo():\n    return 1\n", "init");
1229        fs::write(temp.path().join("a.py"), "def foo():\n    return 2\n").unwrap();
1230
1231        let bridge = GitBridge::open(temp.path()).unwrap();
1232        let blame = bridge.blame_file_porcelain(Path::new("a.py")).unwrap();
1233
1234        assert!(blame[0].commit_sha.is_some());
1235        assert_eq!(blame[1].commit_sha, None);
1236        assert_eq!(blame[1].author, "Not Committed Yet");
1237    }
1238
1239    #[test]
1240    fn clean_worktree_does_not_fall_back_to_head_commit() {
1241        let temp = TempDir::new().unwrap();
1242        let repo = Repository::init(temp.path()).unwrap();
1243
1244        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1245        commit_file(
1246            &repo,
1247            "sample.ts",
1248            "export function a() {\n  return 2;\n}\n",
1249            "change a",
1250        );
1251
1252        let bridge = GitBridge::open(temp.path()).unwrap();
1253        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1254
1255        assert!(matches!(scope, DiffScope::Working));
1256        assert!(files.is_empty());
1257    }
1258
1259    #[test]
1260    fn owner_error_retries_for_command_line_safe_directory() {
1261        let temp = TempDir::new().unwrap();
1262        Repository::init(temp.path()).unwrap();
1263
1264        let owner_error = git2::Error::new(
1265            ErrorCode::Owner,
1266            ErrorClass::Config,
1267            "owner mismatch",
1268        );
1269        let safe_directories = [temp.path().to_string_lossy().to_string()];
1270
1271        assert!(should_retry_with_safe_directory(
1272            &owner_error,
1273            temp.path(),
1274            &safe_directories,
1275        ));
1276
1277        let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
1278        assert!(!should_retry_with_safe_directory(
1279            &owner_error,
1280            temp.path(),
1281            &other_directories,
1282        ));
1283
1284        let not_found_error = git2::Error::new(
1285            ErrorCode::NotFound,
1286            ErrorClass::Repository,
1287            "not found",
1288        );
1289        assert!(!should_retry_with_safe_directory(
1290            &not_found_error,
1291            temp.path(),
1292            &["*".to_string()],
1293        ));
1294    }
1295
1296    #[test]
1297    fn explicit_commit_scope_still_reads_head_commit_diff() {
1298        let temp = TempDir::new().unwrap();
1299        let repo = Repository::init(temp.path()).unwrap();
1300
1301        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1302        let head_oid = commit_file(
1303            &repo,
1304            "sample.ts",
1305            "export function a() {\n  return 2;\n}\n",
1306            "change a",
1307        );
1308
1309        let bridge = GitBridge::open(temp.path()).unwrap();
1310        let files = bridge
1311            .get_changed_files(&DiffScope::Commit {
1312                sha: head_oid.to_string(),
1313            }, &[])
1314            .unwrap();
1315
1316        assert_eq!(files.len(), 1);
1317        assert_eq!(files[0].file_path, "sample.ts");
1318        assert_eq!(files[0].status, FileStatus::Modified);
1319    }
1320
1321    #[test]
1322    fn pathspecs_are_normalized_from_open_directory() {
1323        let temp = TempDir::new().unwrap();
1324        let repo = Repository::init(temp.path()).unwrap();
1325        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1326
1327        commit_file(&repo, "pkg/a.py", "def foo():\n    return 1\n", "init");
1328        fs::write(temp.path().join("pkg/a.py"), "def foo():\n    return 2\n").unwrap();
1329
1330        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1331        let relative_files = bridge
1332            .get_changed_files(&DiffScope::Working, &["a.py".to_string()])
1333            .unwrap();
1334
1335        assert_eq!(relative_files.len(), 1);
1336        assert_eq!(relative_files[0].file_path, "pkg/a.py");
1337
1338        let absolute_path = temp.path().join("pkg/a.py").to_string_lossy().to_string();
1339        let absolute_files = bridge
1340            .get_changed_files(&DiffScope::Working, &[absolute_path])
1341            .unwrap();
1342
1343        assert_eq!(absolute_files.len(), 1);
1344        assert_eq!(absolute_files[0].file_path, "pkg/a.py");
1345    }
1346
1347    #[test]
1348    fn absolute_deleted_pathspecs_are_normalized_from_existing_parent() {
1349        let temp = TempDir::new().unwrap();
1350        let repo = Repository::init(temp.path()).unwrap();
1351        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1352
1353        commit_file(
1354            &repo,
1355            "pkg/deleted.py",
1356            "def foo():\n    return 1\n",
1357            "init",
1358        );
1359        let absolute_path = temp
1360            .path()
1361            .join("pkg/deleted.py")
1362            .to_string_lossy()
1363            .to_string();
1364        fs::remove_file(temp.path().join("pkg/deleted.py")).unwrap();
1365
1366        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1367        let files = bridge
1368            .get_changed_files(&DiffScope::Working, &[absolute_path])
1369            .unwrap();
1370
1371        assert_eq!(files.len(), 1);
1372        assert_eq!(files[0].file_path, "pkg/deleted.py");
1373        assert_eq!(files[0].status, FileStatus::Deleted);
1374    }
1375
1376    #[test]
1377    fn absolute_missing_pathspecs_preserve_trailing_component_order() {
1378        let temp = TempDir::new().unwrap();
1379        let existing_parent = temp.path().join("existing");
1380        fs::create_dir(&existing_parent).unwrap();
1381
1382        let pathspec = existing_parent.join("missing").join("leaf.py");
1383        let normalized = normalize_absolute_pathspec(&pathspec);
1384
1385        let mut expected = fs::canonicalize(&existing_parent).unwrap();
1386        expected.push("missing");
1387        expected.push("leaf.py");
1388        assert_eq!(normalized, expected);
1389    }
1390
1391    #[test]
1392    fn absolute_pathspecs_outside_repo_are_rejected() {
1393        let repo_dir = TempDir::new().unwrap();
1394        let outside_dir = TempDir::new().unwrap();
1395        let repo = Repository::init(repo_dir.path()).unwrap();
1396
1397        commit_file(&repo, "sample.py", "def foo():\n    return 1\n", "init");
1398        fs::write(
1399            repo_dir.path().join("sample.py"),
1400            "def foo():\n    return 2\n",
1401        )
1402        .unwrap();
1403        let outside_path = outside_dir.path().join("outside.py");
1404        fs::write(&outside_path, "def outside():\n    return 1\n").unwrap();
1405
1406        let bridge = GitBridge::open(repo_dir.path()).unwrap();
1407        let err = bridge
1408            .get_changed_files(
1409                &DiffScope::Working,
1410                &[outside_path.to_string_lossy().to_string()],
1411            )
1412            .unwrap_err();
1413
1414        let message = err.to_string();
1415        assert!(message.contains("pathspec"));
1416        assert!(message.contains("is outside repository"));
1417    }
1418
1419    #[test]
1420    fn staged_file_rename_is_reported_as_single_rename_with_old_contents() {
1421        let temp = TempDir::new().unwrap();
1422        let repo = Repository::init(temp.path()).unwrap();
1423
1424        let contents = "export function foo() {\n  return 1;\n}\n";
1425        commit_file(&repo, "old.ts", contents, "init");
1426
1427        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1428        let mut index = repo.index().unwrap();
1429        index.remove_path(Path::new("old.ts")).unwrap();
1430        index.add_path(Path::new("new.ts")).unwrap();
1431        index.write().unwrap();
1432
1433        let bridge = GitBridge::open(temp.path()).unwrap();
1434        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1435
1436        assert_eq!(files.len(), 1);
1437        assert_eq!(files[0].status, FileStatus::Renamed);
1438        assert_eq!(files[0].file_path, "new.ts");
1439        assert_eq!(files[0].old_file_path.as_deref(), Some("old.ts"));
1440        assert_eq!(files[0].before_content.as_deref(), Some(contents));
1441        assert_eq!(files[0].after_content.as_deref(), Some(contents));
1442    }
1443
1444    #[test]
1445    fn staged_file_rename_with_edit_reports_single_moved_entity() {
1446        let temp = TempDir::new().unwrap();
1447        let repo = Repository::init(temp.path()).unwrap();
1448
1449        let before = "\
1450// shared header 01
1451// shared header 02
1452// shared header 03
1453// shared header 04
1454// shared header 05
1455// shared header 06
1456// shared header 07
1457// shared header 08
1458// shared header 09
1459// shared header 10
1460export function foo() {
1461  return alpha + beta + gamma;
1462}
1463";
1464        let after = before.replace(
1465            "return alpha + beta + gamma;",
1466            "return one + two + three;",
1467        );
1468
1469        commit_file(&repo, "old.ts", before, "init");
1470        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1471        fs::write(temp.path().join("new.ts"), &after).unwrap();
1472
1473        let mut index = repo.index().unwrap();
1474        index.remove_path(Path::new("old.ts")).unwrap();
1475        index.add_path(Path::new("new.ts")).unwrap();
1476        index.write().unwrap();
1477
1478        let bridge = GitBridge::open(temp.path()).unwrap();
1479        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1480        assert_eq!(files.len(), 1);
1481        assert_eq!(files[0].status, FileStatus::Renamed);
1482
1483        let registry = create_default_registry();
1484        let result = compute_semantic_diff(&files, &registry, None, None);
1485
1486        assert_eq!(result.added_count, 0);
1487        assert_eq!(result.deleted_count, 0);
1488        // `foo` is a compound Moved change whose body also changed, so it counts toward
1489        // both moved_count and modified_count.
1490        assert_eq!(result.modified_count, 1);
1491        assert_eq!(result.moved_count, 1);
1492        assert_eq!(result.changes.len(), 1);
1493        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1494        assert_eq!(result.changes[0].entity_name, "foo");
1495        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
1496        assert_eq!(result.changes[0].structural_change, Some(true));
1497    }
1498
1499    #[test]
1500    fn working_diff_preserves_staged_rename_with_unstaged_edit() {
1501        let temp = TempDir::new().unwrap();
1502        let repo = Repository::init(temp.path()).unwrap();
1503
1504        let before = "\
1505export function foo(x: number) {
1506  return x + 1;
1507}
1508
1509export function bar(y: number) {
1510  return y * 2;
1511}
1512";
1513        let after = "\
1514export function foo(x: number) {
1515  return x + 42;
1516}
1517
1518export function bar(y: number) {
1519  return y * 99;
1520}
1521";
1522
1523        commit_file(&repo, "a.ts", before, "init");
1524
1525        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1526        let mut index = repo.index().unwrap();
1527        index.remove_path(Path::new("a.ts")).unwrap();
1528        index.add_path(Path::new("b.ts")).unwrap();
1529        index.write().unwrap();
1530
1531        fs::write(temp.path().join("b.ts"), after).unwrap();
1532
1533        let bridge = GitBridge::open(temp.path()).unwrap();
1534        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1535
1536        assert!(matches!(scope, DiffScope::Working));
1537        assert_eq!(files.len(), 1);
1538        assert_eq!(files[0].status, FileStatus::Renamed);
1539        assert_eq!(files[0].file_path, "b.ts");
1540        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1541        assert_eq!(files[0].before_content.as_deref(), Some(before));
1542        assert_eq!(files[0].after_content.as_deref(), Some(after));
1543
1544        let registry = create_default_registry();
1545        let result = compute_semantic_diff(&files, &registry, None, None);
1546
1547        assert_eq!(result.added_count, 0);
1548        assert_eq!(result.deleted_count, 0);
1549        assert_eq!(result.modified_count, 2);
1550        assert_eq!(result.moved_count, 2);
1551        assert_eq!(result.changes.len(), 2);
1552        assert!(result
1553            .changes
1554            .iter()
1555            .all(|change| change.change_type == ChangeType::Moved));
1556        assert!(result
1557            .changes
1558            .iter()
1559            .all(|change| change.old_file_path.as_deref() == Some("a.ts")));
1560        assert!(result
1561            .changes
1562            .iter()
1563            .all(|change| change.structural_change == Some(true)));
1564    }
1565
1566    #[test]
1567    fn working_diff_uses_staged_rename_map_after_large_unstaged_rewrite() {
1568        let temp = TempDir::new().unwrap();
1569        let repo = Repository::init(temp.path()).unwrap();
1570
1571        let before_noise = (0..200)
1572            .map(|i| format!("// old filler {i} alpha beta gamma"))
1573            .collect::<Vec<_>>()
1574            .join("\n");
1575        let after_noise = (0..200)
1576            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1577            .collect::<Vec<_>>()
1578            .join("\n");
1579        let before = format!(
1580            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1581        );
1582        let after = format!(
1583            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1584        );
1585
1586        commit_file(&repo, "a.ts", &before, "init");
1587
1588        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1589        let mut index = repo.index().unwrap();
1590        index.remove_path(Path::new("a.ts")).unwrap();
1591        index.add_path(Path::new("b.ts")).unwrap();
1592        index.write().unwrap();
1593
1594        fs::write(temp.path().join("b.ts"), &after).unwrap();
1595
1596        let bridge = GitBridge::open(temp.path()).unwrap();
1597        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1598
1599        assert!(matches!(scope, DiffScope::Working));
1600        assert_eq!(files.len(), 1);
1601        assert_eq!(files[0].status, FileStatus::Renamed);
1602        assert_eq!(files[0].file_path, "b.ts");
1603        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1604        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1605        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1606
1607        let registry = create_default_registry();
1608        let result = compute_semantic_diff(&files, &registry, None, None);
1609
1610        assert_eq!(result.added_count, 0);
1611        assert_eq!(result.deleted_count, 0);
1612        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
1613        // compound Moved change whose body also changed, so it counts toward both
1614        // moved_count and modified_count.
1615        assert_eq!(result.modified_count, 2);
1616        assert_eq!(result.moved_count, 1);
1617        assert!(result
1618            .changes
1619            .iter()
1620            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1621    }
1622
1623    #[test]
1624    fn explicit_ref_to_working_uses_index_rename_map_after_large_unstaged_rewrite() {
1625        let temp = TempDir::new().unwrap();
1626        let repo = Repository::init(temp.path()).unwrap();
1627
1628        let before_noise = (0..200)
1629            .map(|i| format!("// old filler {i} alpha beta gamma"))
1630            .collect::<Vec<_>>()
1631            .join("\n");
1632        let after_noise = (0..200)
1633            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1634            .collect::<Vec<_>>()
1635            .join("\n");
1636        let before = format!(
1637            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1638        );
1639        let after = format!(
1640            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1641        );
1642
1643        commit_file(&repo, "a.ts", &before, "init");
1644
1645        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1646        let mut index = repo.index().unwrap();
1647        index.remove_path(Path::new("a.ts")).unwrap();
1648        index.add_path(Path::new("b.ts")).unwrap();
1649        index.write().unwrap();
1650
1651        fs::write(temp.path().join("b.ts"), &after).unwrap();
1652
1653        let bridge = GitBridge::open(temp.path()).unwrap();
1654        let files = bridge
1655            .get_changed_files(
1656                &DiffScope::RefToWorking {
1657                    refspec: "HEAD".to_string(),
1658                },
1659                &[],
1660            )
1661            .unwrap();
1662
1663        assert_eq!(files.len(), 1);
1664        assert_eq!(files[0].status, FileStatus::Renamed);
1665        assert_eq!(files[0].file_path, "b.ts");
1666        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1667        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1668        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1669
1670        let registry = create_default_registry();
1671        let result = compute_semantic_diff(&files, &registry, None, None);
1672
1673        assert_eq!(result.added_count, 0);
1674        assert_eq!(result.deleted_count, 0);
1675        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
1676        // compound Moved change whose body also changed, so it counts toward both
1677        // moved_count and modified_count.
1678        assert_eq!(result.modified_count, 2);
1679        assert_eq!(result.moved_count, 1);
1680        assert!(result
1681            .changes
1682            .iter()
1683            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1684    }
1685
1686    #[test]
1687    fn staged_rename_map_overrides_wrong_worktree_rename_pairing() {
1688        let temp = TempDir::new().unwrap();
1689        let repo = Repository::init(temp.path()).unwrap();
1690
1691        let a_before = "export function foo(x: number) {\n  return x + 1;\n}\n";
1692        let c_before = "export function foo(x: number) {\n  return x + 42;\n}\n";
1693
1694        commit_file(&repo, "a.ts", a_before, "init a");
1695        commit_file(&repo, "c.ts", c_before, "init c");
1696
1697        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1698        let mut index = repo.index().unwrap();
1699        index.remove_path(Path::new("a.ts")).unwrap();
1700        index.add_path(Path::new("b.ts")).unwrap();
1701        index.write().unwrap();
1702
1703        fs::remove_file(temp.path().join("c.ts")).unwrap();
1704        fs::write(temp.path().join("b.ts"), c_before).unwrap();
1705
1706        let bridge = GitBridge::open(temp.path()).unwrap();
1707        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1708
1709        assert!(matches!(scope, DiffScope::Working));
1710        let renamed = files
1711            .iter()
1712            .find(|file| {
1713                file.status == FileStatus::Renamed
1714                    && file.file_path == "b.ts"
1715                    && file.old_file_path.as_deref() == Some("a.ts")
1716            })
1717            .unwrap();
1718        assert_eq!(renamed.before_content.as_deref(), Some(a_before));
1719        assert_eq!(renamed.after_content.as_deref(), Some(c_before));
1720
1721        let deleted = files
1722            .iter()
1723            .find(|file| file.status == FileStatus::Deleted && file.file_path == "c.ts")
1724            .unwrap();
1725        assert_eq!(deleted.before_content.as_deref(), Some(c_before));
1726        assert_eq!(deleted.after_content.as_deref(), None);
1727        assert!(!files.iter().any(|file| {
1728            file.status == FileStatus::Renamed
1729                && file.file_path == "b.ts"
1730                && file.old_file_path.as_deref() == Some("c.ts")
1731        }));
1732    }
1733
1734    #[test]
1735    fn staged_diff_with_base_ref_compares_index_to_that_ref() {
1736        let temp = TempDir::new().unwrap();
1737        let repo = Repository::init(temp.path()).unwrap();
1738
1739        let v1 = "def foo():\n    return 1\n";
1740        let v2 = "def foo():\n    return 2\n";
1741        let v3 = "def foo():\n    return 3\n";
1742        let v4 = "def foo():\n    return 4\n";
1743
1744        commit_file(&repo, "a.py", v1, "init");
1745        commit_file(&repo, "a.py", v2, "second");
1746        fs::write(temp.path().join("a.py"), v3).unwrap();
1747
1748        let mut index = repo.index().unwrap();
1749        index.add_path(Path::new("a.py")).unwrap();
1750        index.write().unwrap();
1751
1752        fs::write(temp.path().join("a.py"), v4).unwrap();
1753
1754        let bridge = GitBridge::open(temp.path()).unwrap();
1755        let files = bridge
1756            .get_staged_files_with_base_ref("HEAD~1", &[])
1757            .unwrap();
1758
1759        assert_eq!(files.len(), 1);
1760        assert_eq!(files[0].status, FileStatus::Modified);
1761        assert_eq!(files[0].file_path, "a.py");
1762        assert_eq!(files[0].before_content.as_deref(), Some(v1));
1763        assert_eq!(files[0].after_content.as_deref(), Some(v3));
1764
1765        let registry = create_default_registry();
1766        let result = compute_semantic_diff(&files, &registry, None, None);
1767
1768        assert_eq!(result.modified_count, 1);
1769        assert_eq!(result.changes.len(), 1);
1770        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
1771        assert_eq!(result.changes[0].entity_name, "foo");
1772    }
1773
1774    #[test]
1775    fn crlf_only_difference_in_working_file_is_invisible() {
1776        let temp = TempDir::new().unwrap();
1777        let repo = Repository::init(temp.path()).unwrap();
1778
1779        commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
1780        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
1781
1782        let bridge = GitBridge::open(temp.path()).unwrap();
1783        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1784
1785        assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
1786
1787        let before = files[0].before_content.as_deref().unwrap();
1788        let after = files[0].after_content.as_deref().unwrap();
1789
1790        assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
1791    }
1792
1793    #[test]
1794    fn crlf_stored_in_blob_is_normalized_on_read() {
1795        let temp = TempDir::new().unwrap();
1796        let repo = Repository::init(temp.path()).unwrap();
1797
1798        repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
1799        commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
1800        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
1801
1802        let bridge = GitBridge::open(temp.path()).unwrap();
1803        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1804
1805        assert_eq!(files.len(), 1, "expected git to detect the modification");
1806
1807        let before = files[0].before_content.as_deref().unwrap();
1808        assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
1809    }
1810}