Skip to main content

sem_core/git/
bridge.rs

1use std::env;
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4use std::process::Command;
5use std::sync::{Mutex, OnceLock};
6
7use git2::{Blame, Delta, Diff, DiffFindOptions, DiffOptions, ErrorCode, Oid, Repository};
8use thiserror::Error;
9
10use super::types::{CommitInfo, DiffScope, FileChange, FileCommitInfo, FileStatus};
11use super::types::BlameLineInfo;
12
13#[derive(Error, Debug)]
14pub enum GitError {
15    #[error("not a git repository")]
16    NotARepo,
17    #[error("git error: {0}")]
18    Git2(#[from] git2::Error),
19    #[error("io error: {0}")]
20    Io(#[from] std::io::Error),
21}
22
23pub struct GitBridge {
24    repo: Repository,
25    repo_root: PathBuf,
26    cwd: PathBuf,
27}
28
29impl GitBridge {
30    pub fn open(path: &Path) -> Result<Self, GitError> {
31        let cwd = normalize_open_path(path)?;
32        let repo = match Repository::discover(path) {
33            Ok(repo) => repo,
34            Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
35                let _guard = owner_validation_lock()
36                    .lock()
37                    .unwrap_or_else(|poisoned| poisoned.into_inner());
38                let _owner_validation = OwnerValidationDisabled::new()?;
39                let repo = Repository::discover(path);
40                repo.map_err(map_git_error)?
41            }
42            Err(error) => return Err(map_git_error(error)),
43        };
44        let repo_root = repo.workdir().ok_or(GitError::NotARepo)?;
45        let repo_root = fs::canonicalize(repo_root)?;
46        Ok(Self {
47            repo,
48            repo_root,
49            cwd,
50        })
51    }
52
53    pub fn repo_root(&self) -> &Path {
54        &self.repo_root
55    }
56
57    /// Return the URL of the "origin" remote, if one exists.
58    pub fn get_remote_url(&self) -> Option<String> {
59        self.repo
60            .find_remote("origin")
61            .ok()
62            .and_then(|r| r.url().map(String::from))
63    }
64
65    /// Resolve a refspec to its full commit SHA, if valid.
66    pub fn resolve_ref_sha(&self, refspec: &str) -> Option<String> {
67        self.repo
68            .revparse_single(refspec)
69            .ok()
70            .and_then(|obj| obj.peel_to_commit().ok())
71            .map(|c| c.id().to_string())
72    }
73
74    pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
75        Ok(self.repo.blame_file(file_path, None)?)
76    }
77
78    pub fn blame_file_porcelain(&self, file_path: &Path) -> Result<Vec<BlameLineInfo>, GitError> {
79        let output = Command::new("git")
80            .arg("-C")
81            .arg(&self.repo_root)
82            .arg("blame")
83            .arg("--line-porcelain")
84            .arg("--")
85            .arg(file_path)
86            .output()?;
87
88        if !output.status.success() {
89            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
90            return Err(git_command_error(if stderr.is_empty() {
91                format!("git blame exited with {}", output.status)
92            } else {
93                stderr
94            }));
95        }
96
97        let parsed = parse_blame_porcelain(&String::from_utf8_lossy(&output.stdout));
98        if parsed.is_empty() && !output.stdout.is_empty() {
99            return Err(git_command_error(
100                "failed to parse git blame porcelain output".to_string(),
101            ));
102        }
103
104        Ok(parsed)
105    }
106
107    pub fn commit_summary(&self, oid: Oid) -> Option<String> {
108        self.repo
109            .find_commit(oid)
110            .ok()
111            .and_then(|commit| commit.summary().map(String::from))
112    }
113
114    pub fn get_head_sha(&self) -> Result<String, GitError> {
115        let head = self.repo.head()?;
116        let oid = head.target().ok_or_else(|| {
117            git2::Error::from_str("HEAD has no target")
118        })?;
119        Ok(oid.to_string())
120    }
121
122    /// Combined detect scope + get files in one call (fast path).
123    /// Shows all changes from HEAD to the current working state by default.
124    /// Use `--staged` for staged changes only.
125    pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
126        // Show the full current working state, including staged changes.
127        let mut working_files = self.get_working_diff_files(pathspecs)?;
128        if !working_files.is_empty() {
129            self.populate_contents(&mut working_files, &DiffScope::Working)?;
130            return Ok((DiffScope::Working, working_files));
131        }
132
133        // Clean worktree = no changes
134        Ok((DiffScope::Working, Vec::new()))
135    }
136
137    /// Get changed files for a specific scope
138    pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
139        let mut files = match scope {
140            DiffScope::Working => {
141                self.get_working_diff_files(pathspecs)?
142            }
143            DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
144            DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
145            DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
146            DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
147        };
148
149        // Filter .sem/ files
150        files.retain(|f| !f.file_path.starts_with(".sem/"));
151
152        self.populate_contents(&mut files, scope)?;
153        Ok(files)
154    }
155
156    /// True when this repo uses a sparse checkout. libgit2 cannot read a
157    /// sparse index (`unsupported mandatory extension: 'sdir'`), and even when
158    /// the index is readable, its workdir diff reports sparse-excluded files as
159    /// deleted. In both cases we route working/staged diffs through the git CLI,
160    /// which understands sparse checkouts correctly.
161    fn is_sparse_checkout(&self) -> bool {
162        self.repo
163            .config()
164            .and_then(|cfg| cfg.get_bool("core.sparseCheckout"))
165            .unwrap_or(false)
166    }
167
168    /// Get working-tree or staged changed files via the git CLI. Used for
169    /// sparse checkouts where libgit2's index/workdir diff is unusable.
170    /// Rename detection (-M) is on; contents are populated by the caller.
171    ///
172    /// `staged` selects `--cached` (HEAD vs index). Otherwise we diff against
173    /// HEAD (not the bare worktree-vs-index `git diff`) to match sem's Working
174    /// scope, which shows the full current state including staged changes.
175    fn changed_files_via_cli(
176        &self,
177        staged: bool,
178        pathspecs: &[String],
179    ) -> Result<Vec<FileChange>, GitError> {
180        let has_head = self.repo.head().is_ok();
181        let mut command = Command::new("git");
182        command
183            .arg("-C")
184            .arg(&self.repo_root)
185            .arg("diff")
186            .arg("--name-status")
187            .arg("-M")
188            .arg("-z");
189        if staged {
190            command.arg("--cached");
191        } else if has_head {
192            // Full working state since HEAD (includes staged), matching the
193            // libgit2 diff_tree_to_workdir_with_index path.
194            command.arg("HEAD");
195        }
196        if !pathspecs.is_empty() {
197            command.arg("--");
198            for spec in self.normalize_pathspecs(pathspecs)? {
199                command.arg(spec);
200            }
201        }
202
203        let output = command.output()?;
204        if !output.status.success() {
205            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
206            return Err(git_command_error(if stderr.is_empty() {
207                format!("git diff exited with {}", output.status)
208            } else {
209                stderr
210            }));
211        }
212
213        Ok(parse_name_status_z(&output.stdout))
214    }
215
216    pub fn get_staged_files_with_base_ref(
217        &self,
218        base: &str,
219        pathspecs: &[String],
220    ) -> Result<Vec<FileChange>, GitError> {
221        let mut files = self.get_staged_diff_files_with_base(base, pathspecs)?;
222        files.retain(|f| !f.file_path.starts_with(".sem/"));
223
224        let base_tree = self.resolve_tree(base)?;
225        for file in files.iter_mut() {
226            if file.status != FileStatus::Deleted {
227                file.after_content = self.read_index_file(&file.file_path);
228            }
229            if file.status != FileStatus::Added {
230                let path = file
231                    .old_file_path
232                    .as_deref()
233                    .unwrap_or(&file.file_path);
234                file.before_content = self.read_blob_from_tree(&base_tree, path);
235            }
236        }
237
238        Ok(files)
239    }
240
241    /// Resolve the merge base between two refs
242    pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
243        let obj1 = self.repo.revparse_single(ref1)?;
244        let obj2 = self.repo.revparse_single(ref2)?;
245        let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
246        Ok(oid.to_string())
247    }
248
249    /// Check if a string resolves to a valid git revision
250    pub fn is_valid_rev(&self, refspec: &str) -> bool {
251        self.repo.revparse_single(refspec).is_ok()
252    }
253
254    fn make_diff_opts(&self, pathspecs: &[String]) -> Result<DiffOptions, GitError> {
255        let mut opts = DiffOptions::new();
256        for spec in self.normalize_pathspecs(pathspecs)? {
257            opts.pathspec(spec.as_str());
258        }
259        Ok(opts)
260    }
261
262    fn normalize_pathspecs(&self, pathspecs: &[String]) -> Result<Vec<String>, GitError> {
263        pathspecs
264            .iter()
265            .map(|spec| self.normalize_pathspec(spec))
266            .collect()
267    }
268
269    fn normalize_pathspec(&self, spec: &str) -> Result<String, GitError> {
270        if spec.is_empty() || spec.starts_with(':') {
271            return Ok(spec.to_string());
272        }
273
274        let spec_path = Path::new(spec);
275        let absolute = if spec_path.is_absolute() {
276            normalize_absolute_pathspec(spec_path)
277        } else {
278            normalize_lexical(&self.cwd.join(spec_path))
279        };
280
281        let repo_root = normalize_lexical(&self.repo_root);
282        let relative =
283            absolute
284                .strip_prefix(&repo_root)
285                .map_err(|_| pathspec_outside_repo_error(spec, &self.repo_root))?;
286
287        if relative.as_os_str().is_empty() {
288            Ok(".".to_string())
289        } else {
290            relative
291                .to_str()
292                .map(|path| path.replace('\\', "/"))
293                .ok_or_else(|| non_utf8_pathspec_error(spec))
294        }
295    }
296
297    fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
298        if self.is_sparse_checkout() {
299            return self.changed_files_via_cli(true, pathspecs);
300        }
301
302        let head_tree = match self.repo.head() {
303            Ok(head) => {
304                let commit = head.peel_to_commit()?;
305                Some(commit.tree()?)
306            }
307            Err(_) => None, // No commits yet
308        };
309
310        self.get_index_diff_files(head_tree.as_ref(), pathspecs)
311    }
312
313    fn get_staged_diff_files_with_base(
314        &self,
315        base: &str,
316        pathspecs: &[String],
317    ) -> Result<Vec<FileChange>, GitError> {
318        let base_tree = self.resolve_tree(base)?;
319        self.get_index_diff_files(Some(&base_tree), pathspecs)
320    }
321
322    fn get_index_diff_files(
323        &self,
324        base_tree: Option<&git2::Tree<'_>>,
325        pathspecs: &[String],
326    ) -> Result<Vec<FileChange>, GitError> {
327        let mut opts = self.make_diff_opts(pathspecs)?;
328        let mut diff = self.repo.diff_tree_to_index(
329            base_tree,
330            Some(&self.repo.index()?),
331            Some(&mut opts),
332        )?;
333        Self::detect_renames(&mut diff)?;
334
335        Ok(self.diff_to_file_changes(&diff))
336    }
337
338    fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
339        if self.is_sparse_checkout() {
340            // Sparse index is unreadable by libgit2, and its workdir diff would
341            // mark sparse-excluded files as deleted. Ask git directly.
342            return self.changed_files_via_cli(false, pathspecs);
343        }
344
345        let mut opts = self.make_diff_opts(pathspecs)?;
346        opts.include_untracked(false);
347
348        let head_tree = self.resolve_tree("HEAD").ok();
349        let mut diff = match head_tree.as_ref() {
350            Some(head_tree) => self
351                .repo
352                .diff_tree_to_workdir_with_index(Some(head_tree), Some(&mut opts))?,
353            None => self.repo.diff_index_to_workdir(None, Some(&mut opts))?,
354        };
355        Self::detect_renames(&mut diff)?;
356        self.apply_index_rename_map(
357            self.diff_to_file_changes(&diff),
358            head_tree.as_ref(),
359            pathspecs,
360        )
361    }
362
363    fn apply_index_rename_map(
364        &self,
365        mut files: Vec<FileChange>,
366        base_tree: Option<&git2::Tree<'_>>,
367        pathspecs: &[String],
368    ) -> Result<Vec<FileChange>, GitError> {
369        let Some(base_tree) = base_tree else {
370            return Ok(files);
371        };
372
373        let index_renames: Vec<FileChange> = self
374            .get_index_diff_files(Some(base_tree), pathspecs)?
375            .into_iter()
376            .filter(|file| file.status == FileStatus::Renamed)
377            .collect();
378
379        for rename in index_renames {
380            let Some(old_path) = rename.old_file_path.clone() else {
381                continue;
382            };
383            let target_pos = files
384                .iter()
385                .position(|file| {
386                    matches!(file.status, FileStatus::Added | FileStatus::Renamed)
387                        && file.file_path == rename.file_path
388                });
389            let deleted_pos = files
390                .iter()
391                .position(|file| {
392                    file.status == FileStatus::Deleted && file.file_path == old_path
393                });
394
395            if let (Some(target_pos), Some(deleted_pos)) = (target_pos, deleted_pos) {
396                if files[target_pos].status == FileStatus::Renamed
397                    && files[target_pos].old_file_path.as_deref() == Some(old_path.as_str())
398                {
399                    continue;
400                }
401
402                let target_file = files[target_pos].clone();
403                let deleted_file = files[deleted_pos].clone();
404                let displaced_deleted_path =
405                    if target_file.status == FileStatus::Renamed {
406                        target_file
407                            .old_file_path
408                            .as_ref()
409                            .filter(|path| *path != &old_path)
410                            .cloned()
411                    } else {
412                        None
413                    };
414
415                files = files
416                    .into_iter()
417                    .enumerate()
418                    .filter_map(|(idx, file)| {
419                        if idx == target_pos || idx == deleted_pos {
420                            None
421                        } else {
422                            Some(file)
423                        }
424                    })
425                    .collect();
426                let before_content = deleted_file
427                    .before_content
428                    .or_else(|| self.read_blob_from_tree(base_tree, &old_path));
429                let after_content = target_file
430                    .after_content
431                    .or_else(|| self.read_working_file(&target_file.file_path));
432                files.push(FileChange {
433                    file_path: target_file.file_path,
434                    status: FileStatus::Renamed,
435                    old_file_path: Some(old_path),
436                    before_content,
437                    after_content,
438                });
439                if let Some(file_path) = displaced_deleted_path {
440                    let before_content = self.read_blob_from_tree(base_tree, &file_path);
441                    files.push(FileChange {
442                        file_path,
443                        status: FileStatus::Deleted,
444                        old_file_path: None,
445                        before_content,
446                        after_content: None,
447                    });
448                }
449            }
450        }
451
452        Ok(files)
453    }
454
455    fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
456        let obj = self.repo.revparse_single(sha)?;
457        let commit = obj.peel_to_commit()?;
458        let tree = commit.tree()?;
459
460        let parent_tree = if commit.parent_count() > 0 {
461            Some(commit.parent(0)?.tree()?)
462        } else {
463            None
464        };
465
466        let mut opts = self.make_diff_opts(pathspecs)?;
467        let mut diff = self.repo.diff_tree_to_tree(
468            parent_tree.as_ref(),
469            Some(&tree),
470            Some(&mut opts),
471        )?;
472        Self::detect_renames(&mut diff)?;
473
474        Ok(self.diff_to_file_changes(&diff))
475    }
476
477    fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
478        let from_obj = self.repo.revparse_single(from)?;
479        let to_obj = self.repo.revparse_single(to)?;
480
481        let from_tree = from_obj.peel_to_commit()?.tree()?;
482        let to_tree = to_obj.peel_to_commit()?.tree()?;
483
484        let mut opts = self.make_diff_opts(pathspecs)?;
485        let mut diff = self.repo.diff_tree_to_tree(
486            Some(&from_tree),
487            Some(&to_tree),
488            Some(&mut opts),
489        )?;
490        Self::detect_renames(&mut diff)?;
491
492        Ok(self.diff_to_file_changes(&diff))
493    }
494
495    fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
496        let tree = self.resolve_tree(refspec)?;
497        let mut opts = self.make_diff_opts(pathspecs)?;
498        let mut diff = self.repo.diff_tree_to_workdir_with_index(
499            Some(&tree),
500            Some(&mut opts),
501        )?;
502        Self::detect_renames(&mut diff)?;
503        self.apply_index_rename_map(self.diff_to_file_changes(&diff), Some(&tree), pathspecs)
504    }
505
506    fn detect_renames(diff: &mut Diff) -> Result<(), GitError> {
507        let mut opts = DiffFindOptions::new();
508        opts.renames(true);
509        diff.find_similar(Some(&mut opts))?;
510        Ok(())
511    }
512
513    fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
514        let mut files = Vec::new();
515
516        for delta in diff.deltas() {
517            let (status, file_path, old_file_path) = match delta.status() {
518                Delta::Added => {
519                    let path = delta
520                        .new_file()
521                        .path()
522                        .and_then(|p| p.to_str())
523                        .unwrap_or("")
524                        .to_string();
525                    (FileStatus::Added, path, None)
526                }
527                Delta::Deleted => {
528                    let path = delta
529                        .old_file()
530                        .path()
531                        .and_then(|p| p.to_str())
532                        .unwrap_or("")
533                        .to_string();
534                    (FileStatus::Deleted, path, None)
535                }
536                Delta::Modified => {
537                    let path = delta
538                        .new_file()
539                        .path()
540                        .and_then(|p| p.to_str())
541                        .unwrap_or("")
542                        .to_string();
543                    (FileStatus::Modified, path, None)
544                }
545                Delta::Renamed => {
546                    let new_path = delta
547                        .new_file()
548                        .path()
549                        .and_then(|p| p.to_str())
550                        .unwrap_or("")
551                        .to_string();
552                    let old_path = delta
553                        .old_file()
554                        .path()
555                        .and_then(|p| p.to_str())
556                        .unwrap_or("")
557                        .to_string();
558                    (FileStatus::Renamed, new_path, Some(old_path))
559                }
560                _ => continue,
561            };
562
563            if !file_path.starts_with(".sem/") {
564                files.push(FileChange {
565                    file_path,
566                    status,
567                    old_file_path,
568                    before_content: None,
569                    after_content: None,
570                });
571            }
572        }
573
574        files
575    }
576
577    fn bytes_look_binary(bytes: &[u8], complete: bool) -> bool {
578        if bytes.iter().any(|byte| *byte == 0) {
579            return true;
580        }
581
582        match std::str::from_utf8(bytes) {
583            Ok(_) => false,
584            Err(error) => complete || error.error_len().is_some(),
585        }
586    }
587
588    fn populate_contents(
589        &self,
590        files: &mut [FileChange],
591        scope: &DiffScope,
592    ) -> Result<(), GitError> {
593        match scope {
594            DiffScope::Working => {
595                // Resolve HEAD tree once for all before_content reads
596                let head_tree = self.resolve_tree("HEAD").ok();
597                for file in files.iter_mut() {
598                    if file.status != FileStatus::Deleted {
599                        file.after_content = self.read_working_file(&file.file_path);
600                    }
601                    if file.status != FileStatus::Added {
602                        let path = file
603                            .old_file_path
604                            .as_deref()
605                            .unwrap_or(&file.file_path);
606                        file.before_content = head_tree
607                            .as_ref()
608                            .and_then(|t| self.read_blob_from_tree(t, path));
609                    }
610                }
611            }
612            DiffScope::Staged => {
613                let head_tree = self.resolve_tree("HEAD").ok();
614                for file in files.iter_mut() {
615                    if file.status != FileStatus::Deleted {
616                        file.after_content = self
617                            .read_index_file(&file.file_path)
618                            .or_else(|| self.read_working_file(&file.file_path));
619                    }
620                    if file.status != FileStatus::Added {
621                        let path = file
622                            .old_file_path
623                            .as_deref()
624                            .unwrap_or(&file.file_path);
625                        file.before_content = head_tree
626                            .as_ref()
627                            .and_then(|t| self.read_blob_from_tree(t, path));
628                    }
629                }
630            }
631            DiffScope::Commit { sha } => {
632                // Resolve both trees once instead of per-file
633                let after_tree = self.resolve_tree(sha)?;
634                let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
635                for file in files.iter_mut() {
636                    if file.status != FileStatus::Deleted {
637                        file.after_content =
638                            self.read_blob_from_tree(&after_tree, &file.file_path);
639                    }
640                    if file.status != FileStatus::Added {
641                        let path = file
642                            .old_file_path
643                            .as_deref()
644                            .unwrap_or(&file.file_path);
645                        file.before_content = before_tree
646                            .as_ref()
647                            .and_then(|t| self.read_blob_from_tree(t, path));
648                    }
649                }
650            }
651            DiffScope::Range { from, to } => {
652                let after_tree = self.resolve_tree(to)?;
653                let before_tree = self.resolve_tree(from)?;
654                for file in files.iter_mut() {
655                    if file.status != FileStatus::Deleted {
656                        file.after_content =
657                            self.read_blob_from_tree(&after_tree, &file.file_path);
658                    }
659                    if file.status != FileStatus::Added {
660                        let path = file
661                            .old_file_path
662                            .as_deref()
663                            .unwrap_or(&file.file_path);
664                        file.before_content =
665                            self.read_blob_from_tree(&before_tree, path);
666                    }
667                }
668            }
669            DiffScope::RefToWorking { refspec } => {
670                let before_tree = self.resolve_tree(refspec)?;
671                for file in files.iter_mut() {
672                    if file.status != FileStatus::Deleted {
673                        file.after_content = self.read_working_file(&file.file_path);
674                    }
675                    if file.status != FileStatus::Added {
676                        let path = file
677                            .old_file_path
678                            .as_deref()
679                            .unwrap_or(&file.file_path);
680                        file.before_content =
681                            self.read_blob_from_tree(&before_tree, path);
682                    }
683                }
684            }
685        }
686        Ok(())
687    }
688
689    fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
690        let obj = self.repo.revparse_single(refspec)?;
691        let commit = obj.peel_to_commit()?;
692        Ok(commit.tree()?)
693    }
694
695    fn normalize_line_endings(s: String) -> String {
696        if s.contains('\r') {
697            s.replace("\r\n", "\n").replace('\r', "\n")
698        } else {
699            s
700        }
701    }
702
703    fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
704        let entry = tree.get_path(Path::new(file_path)).ok()?;
705        let blob = self.repo.find_blob(entry.id()).ok()?;
706        let bytes = blob.content();
707        if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
708            return None;
709        }
710        std::str::from_utf8(bytes)
711            .ok()
712            .map(|s| Self::normalize_line_endings(s.to_string()))
713    }
714
715    fn read_working_file(&self, file_path: &str) -> Option<String> {
716        let full_path = self.repo_root.join(file_path);
717        let bytes = fs::read(full_path).ok()?;
718        if Self::bytes_look_binary(&bytes, true) {
719            return None;
720        }
721        String::from_utf8(bytes)
722            .ok()
723            .map(Self::normalize_line_endings)
724    }
725
726    fn read_index_file(&self, file_path: &str) -> Option<String> {
727        // libgit2 cannot open a sparse index; fall back to the git CLI.
728        let Ok(index) = self.repo.index() else {
729            return self.read_index_file_cli(file_path);
730        };
731        let entry = index.get_path(Path::new(file_path), 0)?;
732        let blob = self.repo.find_blob(entry.id).ok()?;
733        let bytes = blob.content();
734        if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
735            return None;
736        }
737        std::str::from_utf8(bytes)
738            .ok()
739            .map(|s| Self::normalize_line_endings(s.to_string()))
740    }
741
742    /// Read a file's staged (index) content via `git show :path`. Used when
743    /// libgit2 cannot open the index (sparse checkouts).
744    fn read_index_file_cli(&self, file_path: &str) -> Option<String> {
745        let output = Command::new("git")
746            .arg("-C")
747            .arg(&self.repo_root)
748            .arg("show")
749            .arg(format!(":{file_path}"))
750            .output()
751            .ok()?;
752        if !output.status.success() || Self::bytes_look_binary(&output.stdout, true) {
753            return None;
754        }
755        String::from_utf8(output.stdout)
756            .ok()
757            .map(Self::normalize_line_endings)
758    }
759
760
761    /// Read file content at a specific git ref (commit SHA, branch, tag, etc.)
762    pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
763        let tree = self.resolve_tree(refspec)?;
764        Ok(self.read_blob_from_tree(&tree, file_path))
765    }
766
767    /// Get commits that modified a specific file, walking history from HEAD.
768    /// Returns commits in reverse chronological order (newest first).
769    pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
770        let mut revwalk = self.repo.revwalk()?;
771        revwalk.push_head()?;
772        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
773
774        let mut commits = Vec::new();
775        let path = Path::new(file_path);
776
777        for oid_result in revwalk {
778            let oid = oid_result?;
779            let commit = self.repo.find_commit(oid)?;
780            let tree = commit.tree()?;
781
782            // Check if this file exists in this commit's tree
783            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
784
785            // Compare with parent to see if the file changed
786            let file_in_parent = if commit.parent_count() > 0 {
787                commit.parent(0)
788                    .ok()
789                    .and_then(|p| p.tree().ok())
790                    .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
791            } else {
792                None // No parent = initial commit, file was added
793            };
794
795            // Include if file changed between parent and this commit
796            let changed = match (file_in_commit, file_in_parent) {
797                (Some(cur), Some(prev)) => cur != prev,  // content changed
798                (Some(_), None) => true,                   // file added
799                (None, Some(_)) => true,                   // file deleted
800                (None, None) => false,                     // file not present in either
801            };
802
803            if changed {
804                let sha = oid.to_string();
805                commits.push(CommitInfo {
806                    short_sha: sha[..7.min(sha.len())].to_string(),
807                    sha,
808                    author: commit.author().name().unwrap_or("unknown").to_string(),
809                    date: commit.time().seconds().to_string(),
810                    message: commit.message().unwrap_or("").to_string(),
811                });
812
813                if limit != 0 && commits.len() >= limit {
814                    break;
815                }
816            }
817        }
818
819        Ok(commits)
820    }
821
822    /// Get commits that modified a specific file, following renames across history.
823    /// Like `git log --follow`: when the tracked path disappears between commits,
824    /// compute a diff with rename detection to find the old filename and continue.
825    /// Returns commits in reverse chronological order (newest first).
826    pub fn get_file_commits_follow_renames(
827        &self,
828        file_path: &str,
829        limit: usize,
830    ) -> Result<Vec<FileCommitInfo>, GitError> {
831        match self.get_file_commits_follow_renames_cli(file_path, limit) {
832            Ok(commits) if !commits.is_empty() => return Ok(commits),
833            Ok(_) => {}
834            Err(GitError::Io(error)) if error.kind() == std::io::ErrorKind::NotFound => {}
835            Err(error) => return Err(error),
836        }
837
838        let mut revwalk = self.repo.revwalk()?;
839        revwalk.push_head()?;
840        revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
841
842        let mut results = Vec::new();
843        let mut tracked_path = file_path.to_string();
844
845        for oid_result in revwalk {
846            let oid = oid_result?;
847            let commit = self.repo.find_commit(oid)?;
848            let tree = commit.tree()?;
849
850            let path = Path::new(&tracked_path);
851            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
852
853            let (parent_tree_opt, file_in_parent) = if commit.parent_count() > 0 {
854                let parent = commit.parent(0)?;
855                let ptree = parent.tree()?;
856                let fip = ptree.get_path(path).ok().map(|e| e.id());
857                (Some(ptree), fip)
858            } else {
859                (None, None)
860            };
861
862            let changed = match (file_in_commit, file_in_parent) {
863                (Some(cur), Some(prev)) => cur != prev,
864                (Some(_), None) => true,
865                (None, Some(_)) => true,
866                (None, None) => false,
867            };
868
869            if changed {
870                let sha_str = oid.to_string();
871                results.push(FileCommitInfo {
872                    commit: CommitInfo {
873                        short_sha: sha_str[..7.min(sha_str.len())].to_string(),
874                        sha: sha_str,
875                        author: commit.author().name().unwrap_or("unknown").to_string(),
876                        date: commit.time().seconds().to_string(),
877                        message: commit.message().unwrap_or("").to_string(),
878                    },
879                    file_path: tracked_path.clone(),
880                });
881
882                if limit != 0 && results.len() >= limit {
883                    break;
884                }
885            }
886
887            // When walking backward, the rename commit still contains the new
888            // path. Detect that parent-side old path before the next iteration.
889            let should_check_rename =
890                parent_tree_opt.is_some() && (file_in_parent.is_none() || file_in_commit.is_none());
891            if should_check_rename {
892                let mut diff = self.repo.diff_tree_to_tree(
893                    parent_tree_opt.as_ref(),
894                    Some(&tree),
895                    None,
896                )?;
897                let mut find_opts = DiffFindOptions::new();
898                find_opts.renames(true);
899                diff.find_similar(Some(&mut find_opts))?;
900
901                let mut found_rename = false;
902                for delta in diff.deltas() {
903                    if delta.status() == Delta::Renamed {
904                        let new_path = delta
905                            .new_file()
906                            .path()
907                            .and_then(|p| p.to_str())
908                            .unwrap_or("");
909                        if new_path == tracked_path {
910                            // The tracked file was renamed FROM old_path
911                            let old_path = delta
912                                .old_file()
913                                .path()
914                                .and_then(|p| p.to_str())
915                                .unwrap_or("")
916                                .to_string();
917                            if !old_path.is_empty() {
918                                tracked_path = old_path;
919                                found_rename = true;
920                                break;
921                            }
922                        }
923                    }
924                }
925
926                if !found_rename && file_in_commit.is_none() {
927                    // File truly deleted, stop tracking
928                    break;
929                }
930            }
931        }
932
933        Ok(results)
934    }
935
936    fn get_file_commits_follow_renames_cli(
937        &self,
938        file_path: &str,
939        limit: usize,
940    ) -> Result<Vec<FileCommitInfo>, GitError> {
941        let mut command = Command::new("git");
942        command
943            .arg("-C")
944            .arg(&self.repo_root)
945            .arg("log")
946            .arg("--follow")
947            .arg("--format=\x1e%H\x1f%an\x1f%at\x1f%s")
948            .arg("--name-status");
949        if limit != 0 {
950            command.arg("-n").arg(limit.to_string());
951        }
952        command.arg("--").arg(file_path);
953
954        let output = command.output()?;
955        if !output.status.success() {
956            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
957            return Err(git_command_error(if stderr.is_empty() {
958                format!("git log exited with {}", output.status)
959            } else {
960                stderr
961            }));
962        }
963
964        let stdout = String::from_utf8_lossy(&output.stdout);
965        let mut tracked_path = file_path.to_string();
966        let mut commits = Vec::new();
967
968        for record in stdout.split('\x1e') {
969            let record = record.trim_start_matches('\n');
970            if record.trim().is_empty() {
971                continue;
972            }
973
974            let mut lines = record.lines();
975            let Some(meta) = lines.next() else {
976                continue;
977            };
978            let mut parts = meta.splitn(4, '\x1f');
979            let Some(sha) = parts.next() else {
980                continue;
981            };
982            let Some(author) = parts.next() else {
983                continue;
984            };
985            let Some(date) = parts.next() else {
986                continue;
987            };
988            let message = parts.next().unwrap_or_default();
989
990            let commit_path = tracked_path.clone();
991            let mut previous_path = None;
992            for line in lines {
993                let fields: Vec<&str> = line.split('\t').collect();
994                if fields.len() >= 3 && fields[0].starts_with('R') && fields[2] == tracked_path {
995                    previous_path = Some(fields[1].to_string());
996                }
997            }
998
999            commits.push(FileCommitInfo {
1000                commit: CommitInfo {
1001                    short_sha: sha[..7.min(sha.len())].to_string(),
1002                    sha: sha.to_string(),
1003                    author: author.to_string(),
1004                    date: date.to_string(),
1005                    message: message.to_string(),
1006                },
1007                file_path: commit_path,
1008            });
1009
1010            if let Some(previous_path) = previous_path {
1011                tracked_path = previous_path;
1012            }
1013        }
1014
1015        Ok(commits)
1016    }
1017
1018    /// Get all file paths changed in a single commit (vs its parent).
1019    /// Returns file paths from the new side of each delta.
1020    pub fn get_commit_changed_files(&self, sha: &str) -> Result<Vec<String>, GitError> {
1021        let obj = self.repo.revparse_single(sha)?;
1022        let commit = obj.peel_to_commit()?;
1023        let tree = commit.tree()?;
1024        let parent_tree = if commit.parent_count() > 0 {
1025            Some(commit.parent(0)?.tree()?)
1026        } else {
1027            None
1028        };
1029        let diff = self.repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
1030        let mut paths = Vec::new();
1031        for delta in diff.deltas() {
1032            if let Some(p) = delta.new_file().path().and_then(|p| p.to_str()) {
1033                paths.push(p.to_string());
1034            }
1035            // Also include old path for deletions/renames
1036            if let Some(p) = delta.old_file().path().and_then(|p| p.to_str()) {
1037                if !paths.contains(&p.to_string()) {
1038                    paths.push(p.to_string());
1039                }
1040            }
1041        }
1042        Ok(paths)
1043    }
1044
1045    pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
1046        let mut revwalk = self.repo.revwalk()?;
1047        revwalk.push_head()?;
1048
1049        let mut commits = Vec::new();
1050        for (i, oid_result) in revwalk.enumerate() {
1051            if limit != 0 && i >= limit {
1052                break;
1053            }
1054            let oid = oid_result?;
1055            let commit = self.repo.find_commit(oid)?;
1056            let sha = oid.to_string();
1057            commits.push(CommitInfo {
1058                short_sha: sha[..7.min(sha.len())].to_string(),
1059                sha,
1060                author: commit.author().name().unwrap_or("unknown").to_string(),
1061                date: commit.time().seconds().to_string(),
1062                message: commit.message().unwrap_or("").to_string(),
1063            });
1064        }
1065
1066        Ok(commits)
1067    }
1068}
1069
1070/// Parse `git diff --name-status -M -z` output into FileChange entries.
1071/// Records are NUL-delimited; a rename/copy is a status token (R100/C75)
1072/// followed by old path then new path, others are status then one path.
1073fn parse_name_status_z(stdout: &[u8]) -> Vec<FileChange> {
1074    let text = String::from_utf8_lossy(stdout);
1075    let mut fields = text.split('\0').filter(|s| !s.is_empty());
1076    let mut files = Vec::new();
1077
1078    while let Some(status) = fields.next() {
1079        let code = status.chars().next().unwrap_or(' ');
1080        let (file_change, _) = match code {
1081            'R' | 'C' => {
1082                let Some(old_path) = fields.next() else { break };
1083                let Some(new_path) = fields.next() else { break };
1084                (
1085                    FileChange {
1086                        file_path: new_path.to_string(),
1087                        status: FileStatus::Renamed,
1088                        old_file_path: Some(old_path.to_string()),
1089                        before_content: None,
1090                        after_content: None,
1091                    },
1092                    (),
1093                )
1094            }
1095            'A' | 'D' | 'M' | 'T' => {
1096                let Some(path) = fields.next() else { break };
1097                let status = match code {
1098                    'A' => FileStatus::Added,
1099                    'D' => FileStatus::Deleted,
1100                    _ => FileStatus::Modified,
1101                };
1102                (
1103                    FileChange {
1104                        file_path: path.to_string(),
1105                        status,
1106                        old_file_path: None,
1107                        before_content: None,
1108                        after_content: None,
1109                    },
1110                    (),
1111                )
1112            }
1113            _ => continue,
1114        };
1115        if !file_change.file_path.starts_with(".sem/") {
1116            files.push(file_change);
1117        }
1118    }
1119
1120    files
1121}
1122
1123fn parse_blame_porcelain(output: &str) -> Vec<BlameLineInfo> {
1124    let lines: Vec<&str> = output.lines().collect();
1125    let mut parsed = Vec::new();
1126    let mut index = 0;
1127
1128    while index < lines.len() {
1129        let Some((raw_sha, line_number)) = parse_blame_header(lines[index]) else {
1130            index += 1;
1131            continue;
1132        };
1133        index += 1;
1134
1135        let mut author = String::new();
1136        let mut author_time = None;
1137        let mut summary = String::new();
1138
1139        while index < lines.len() {
1140            let line = lines[index];
1141            index += 1;
1142
1143            if line.starts_with('\t') {
1144                break;
1145            } else if let Some(value) = line.strip_prefix("author ") {
1146                author = value.to_string();
1147            } else if let Some(value) = line.strip_prefix("author-time ") {
1148                author_time = value.parse::<i64>().ok();
1149            } else if let Some(value) = line.strip_prefix("summary ") {
1150                summary = value.to_string();
1151            }
1152        }
1153
1154        let sha = raw_sha.trim_start_matches('^');
1155        let commit_sha = if sha.chars().all(|c| c == '0') {
1156            None
1157        } else {
1158            Some(sha.to_string())
1159        };
1160
1161        if author.is_empty() {
1162            author = if commit_sha.is_none() {
1163                "Not Committed Yet".to_string()
1164            } else {
1165                "unknown".to_string()
1166            };
1167        }
1168
1169        parsed.push(BlameLineInfo {
1170            line_number,
1171            commit_sha,
1172            author,
1173            author_time,
1174            summary,
1175        });
1176    }
1177
1178    parsed.sort_by_key(|line| line.line_number);
1179    parsed
1180}
1181
1182fn parse_blame_header(line: &str) -> Option<(&str, usize)> {
1183    let mut parts = line.split_whitespace();
1184    let sha = parts.next()?;
1185    if !is_blame_oid(sha) {
1186        return None;
1187    }
1188    parts.next()?;
1189    let final_line = parts.next()?.parse().ok()?;
1190    Some((sha, final_line))
1191}
1192
1193fn is_blame_oid(value: &str) -> bool {
1194    let value = value.strip_prefix('^').unwrap_or(value);
1195    value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
1196}
1197
1198fn git_command_error(message: String) -> GitError {
1199    GitError::Git2(git2::Error::from_str(&message))
1200}
1201
1202fn map_git_error(error: git2::Error) -> GitError {
1203    if error.code() == ErrorCode::NotFound {
1204        GitError::NotARepo
1205    } else {
1206        GitError::Git2(error)
1207    }
1208}
1209
1210fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
1211    let safe_directories = command_line_safe_directories();
1212    should_retry_with_safe_directory(error, path, &safe_directories)
1213}
1214
1215fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
1216    error.code() == ErrorCode::Owner
1217        && nearest_git_root(path).is_some_and(|repo_root| {
1218            safe_directories.iter().any(|safe_directory| {
1219                safe_directory == "*"
1220                    || paths_match(&repo_root, Path::new(safe_directory))
1221            })
1222        })
1223}
1224
1225fn command_line_safe_directories() -> Vec<String> {
1226    let count = env::var("GIT_CONFIG_COUNT")
1227        .ok()
1228        .and_then(|value| value.parse::<usize>().ok())
1229        .unwrap_or_default();
1230
1231    (0..count)
1232        .filter_map(|index| {
1233            let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
1234            if key.eq_ignore_ascii_case("safe.directory") {
1235                env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
1236            } else {
1237                None
1238            }
1239        })
1240        .collect()
1241}
1242
1243fn nearest_git_root(path: &Path) -> Option<PathBuf> {
1244    let mut current = if path.is_file() {
1245        path.parent()?
1246    } else {
1247        path
1248    };
1249
1250    loop {
1251        if current.join(".git").exists() {
1252            return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
1253        }
1254
1255        current = current.parent()?;
1256    }
1257}
1258
1259fn paths_match(left: &Path, right: &Path) -> bool {
1260    let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
1261    let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
1262
1263    if cfg!(windows) {
1264        left.to_string_lossy()
1265            .eq_ignore_ascii_case(&right.to_string_lossy())
1266    } else {
1267        left == right
1268    }
1269}
1270
1271fn owner_validation_lock() -> &'static Mutex<()> {
1272    static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
1273    LOCK.get_or_init(|| Mutex::new(()))
1274}
1275
1276struct OwnerValidationDisabled;
1277
1278impl OwnerValidationDisabled {
1279    fn new() -> Result<Self, GitError> {
1280        // libgit2 stores this as a process-global option; callers hold owner_validation_lock.
1281        unsafe { git2::opts::set_verify_owner_validation(false)? };
1282        Ok(Self)
1283    }
1284}
1285
1286impl Drop for OwnerValidationDisabled {
1287    fn drop(&mut self) {
1288        // Restore the default before the owner-validation lock is released.
1289        unsafe {
1290            let _ = git2::opts::set_verify_owner_validation(true);
1291        }
1292    }
1293}
1294
1295fn normalize_open_path(path: &Path) -> Result<PathBuf, GitError> {
1296    let canonical = match fs::canonicalize(path) {
1297        Ok(canonical) => canonical,
1298        Err(_) if path.is_absolute() => normalize_lexical(path),
1299        Err(_) => normalize_lexical(&env::current_dir()?.join(path)),
1300    };
1301
1302    Ok(if canonical.is_file() {
1303        canonical
1304            .parent()
1305            .map(Path::to_path_buf)
1306            .unwrap_or(canonical)
1307    } else {
1308        canonical
1309    })
1310}
1311
1312fn normalize_absolute_pathspec(path: &Path) -> PathBuf {
1313    let path = normalize_lexical(path);
1314    let Some(leaf) = path.file_name() else {
1315        return fs::canonicalize(&path).unwrap_or(path);
1316    };
1317    let mut trailing_components = vec![leaf.to_os_string()];
1318
1319    let Some(parent) = path.parent() else {
1320        return path;
1321    };
1322
1323    for ancestor in parent.ancestors() {
1324        if ancestor.exists() {
1325            let mut normalized =
1326                fs::canonicalize(ancestor).unwrap_or_else(|_| normalize_lexical(ancestor));
1327            for component in trailing_components.iter().rev() {
1328                normalized.push(component);
1329            }
1330            return normalized;
1331        }
1332
1333        let Some(name) = ancestor.file_name() else {
1334            return path;
1335        };
1336        trailing_components.push(name.to_os_string());
1337    }
1338
1339    path
1340}
1341
1342fn pathspec_outside_repo_error(pathspec: &str, repo_root: &Path) -> GitError {
1343    GitError::Git2(git2::Error::from_str(&format!(
1344        "pathspec '{pathspec}' is outside repository '{}'",
1345        repo_root.display()
1346    )))
1347}
1348
1349fn non_utf8_pathspec_error(pathspec: &str) -> GitError {
1350    GitError::Git2(git2::Error::from_str(&format!(
1351        "pathspec '{pathspec}' is not valid UTF-8 after normalization"
1352    )))
1353}
1354
1355fn normalize_lexical(path: &Path) -> PathBuf {
1356    let mut normalized = PathBuf::new();
1357
1358    for component in path.components() {
1359        match component {
1360            Component::CurDir => {}
1361            Component::ParentDir => {
1362                if !normalized.pop() && !normalized.has_root() {
1363                    normalized.push("..");
1364                }
1365            }
1366            Component::Normal(part) => normalized.push(part),
1367            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
1368            Component::RootDir => normalized.push(component.as_os_str()),
1369        }
1370    }
1371
1372    normalized
1373}
1374
1375#[cfg(test)]
1376mod tests {
1377    use super::*;
1378    use crate::model::change::ChangeType;
1379    use crate::parser::differ::{collect_binary_file_changes, compute_semantic_diff};
1380    use crate::parser::plugins::create_default_registry;
1381    use git2::{ErrorClass, Oid, Repository, Signature};
1382    use tempfile::TempDir;
1383
1384    fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
1385        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1386
1387        let mut index = repo.index().unwrap();
1388        index.add_path(Path::new(file_path)).unwrap();
1389        index.write().unwrap();
1390
1391        let tree_id = index.write_tree().unwrap();
1392        let tree = repo.find_tree(tree_id).unwrap();
1393        let sig = Signature::now("Test User", "test@example.com").unwrap();
1394
1395        match repo.head() {
1396            Ok(head) => {
1397                let parent = repo.find_commit(head.target().unwrap()).unwrap();
1398                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1399                    .unwrap()
1400            }
1401            Err(_) => repo
1402                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1403                .unwrap(),
1404        }
1405    }
1406
1407    fn commit_binary_file(
1408        repo: &Repository,
1409        file_path: &str,
1410        contents: &[u8],
1411        message: &str,
1412    ) -> Oid {
1413        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1414
1415        let mut index = repo.index().unwrap();
1416        index.add_path(Path::new(file_path)).unwrap();
1417        index.write().unwrap();
1418
1419        let tree_id = index.write_tree().unwrap();
1420        let tree = repo.find_tree(tree_id).unwrap();
1421        let sig = Signature::now("Test User", "test@example.com").unwrap();
1422
1423        match repo.head() {
1424            Ok(head) => {
1425                let parent = repo.find_commit(head.target().unwrap()).unwrap();
1426                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1427                    .unwrap()
1428            }
1429            Err(_) => repo
1430                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1431                .unwrap(),
1432        }
1433    }
1434
1435    #[test]
1436    fn porcelain_blame_reports_uncommitted_lines() {
1437        let temp = TempDir::new().unwrap();
1438        let repo = Repository::init(temp.path()).unwrap();
1439
1440        commit_file(&repo, "a.py", "def foo():\n    return 1\n", "init");
1441        fs::write(temp.path().join("a.py"), "def foo():\n    return 2\n").unwrap();
1442
1443        let bridge = GitBridge::open(temp.path()).unwrap();
1444        let blame = bridge.blame_file_porcelain(Path::new("a.py")).unwrap();
1445
1446        assert!(blame[0].commit_sha.is_some());
1447        assert_eq!(blame[1].commit_sha, None);
1448        assert_eq!(blame[1].author, "Not Committed Yet");
1449    }
1450
1451    #[test]
1452    fn sparse_checkout_does_not_report_excluded_files_as_deleted() {
1453        // Regression for #330: with a cone-mode sparse checkout, libgit2's
1454        // workdir diff sees sparse-excluded files as absent and reports them
1455        // deleted (and a true sparse index errors outright). We route through
1456        // the git CLI, which understands sparse checkouts.
1457        let temp = TempDir::new().unwrap();
1458        let repo = Repository::init(temp.path()).unwrap();
1459        fs::create_dir_all(temp.path().join("keep")).unwrap();
1460        fs::create_dir_all(temp.path().join("drop")).unwrap();
1461        commit_file(&repo, "keep/a.rs", "fn kept() { let x = 1; }\n", "init keep");
1462        commit_file(&repo, "drop/b.rs", "fn dropped() {}\n", "init drop");
1463
1464        let git = |args: &[&str]| {
1465            Command::new("git")
1466                .arg("-C")
1467                .arg(temp.path())
1468                .args(args)
1469                .output()
1470                .expect("git")
1471        };
1472        // Enable cone-mode sparse checkout restricted to keep/.
1473        if !git(&["sparse-checkout", "init", "--cone"]).status.success() {
1474            return; // git too old for sparse-checkout; skip
1475        }
1476        git(&["sparse-checkout", "set", "keep"]);
1477        // Modify a file inside the cone.
1478        fs::write(temp.path().join("keep/a.rs"), "fn kept() { let x = 2; }\n").unwrap();
1479
1480        let bridge = GitBridge::open(temp.path()).unwrap();
1481        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1482
1483        // Only the in-cone modification; the sparse-excluded drop/b.rs must
1484        // NOT appear as deleted.
1485        assert_eq!(files.len(), 1, "got: {files:?}");
1486        assert_eq!(files[0].file_path, "keep/a.rs");
1487        assert_eq!(files[0].status, FileStatus::Modified);
1488        assert!(!files.iter().any(|f| f.file_path == "drop/b.rs"));
1489    }
1490
1491    #[test]
1492    fn clean_worktree_does_not_fall_back_to_head_commit() {
1493        let temp = TempDir::new().unwrap();
1494        let repo = Repository::init(temp.path()).unwrap();
1495
1496        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1497        commit_file(
1498            &repo,
1499            "sample.ts",
1500            "export function a() {\n  return 2;\n}\n",
1501            "change a",
1502        );
1503
1504        let bridge = GitBridge::open(temp.path()).unwrap();
1505        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1506
1507        assert!(matches!(scope, DiffScope::Working));
1508        assert!(files.is_empty());
1509    }
1510
1511    #[test]
1512    fn owner_error_retries_for_command_line_safe_directory() {
1513        let temp = TempDir::new().unwrap();
1514        Repository::init(temp.path()).unwrap();
1515
1516        let owner_error = git2::Error::new(
1517            ErrorCode::Owner,
1518            ErrorClass::Config,
1519            "owner mismatch",
1520        );
1521        let safe_directories = [temp.path().to_string_lossy().to_string()];
1522
1523        assert!(should_retry_with_safe_directory(
1524            &owner_error,
1525            temp.path(),
1526            &safe_directories,
1527        ));
1528
1529        let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
1530        assert!(!should_retry_with_safe_directory(
1531            &owner_error,
1532            temp.path(),
1533            &other_directories,
1534        ));
1535
1536        let not_found_error = git2::Error::new(
1537            ErrorCode::NotFound,
1538            ErrorClass::Repository,
1539            "not found",
1540        );
1541        assert!(!should_retry_with_safe_directory(
1542            &not_found_error,
1543            temp.path(),
1544            &["*".to_string()],
1545        ));
1546    }
1547
1548    #[test]
1549    fn explicit_commit_scope_still_reads_head_commit_diff() {
1550        let temp = TempDir::new().unwrap();
1551        let repo = Repository::init(temp.path()).unwrap();
1552
1553        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
1554        let head_oid = commit_file(
1555            &repo,
1556            "sample.ts",
1557            "export function a() {\n  return 2;\n}\n",
1558            "change a",
1559        );
1560
1561        let bridge = GitBridge::open(temp.path()).unwrap();
1562        let files = bridge
1563            .get_changed_files(&DiffScope::Commit {
1564                sha: head_oid.to_string(),
1565            }, &[])
1566            .unwrap();
1567
1568        assert_eq!(files.len(), 1);
1569        assert_eq!(files[0].file_path, "sample.ts");
1570        assert_eq!(files[0].status, FileStatus::Modified);
1571    }
1572
1573    #[test]
1574    fn pathspecs_are_normalized_from_open_directory() {
1575        let temp = TempDir::new().unwrap();
1576        let repo = Repository::init(temp.path()).unwrap();
1577        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1578
1579        commit_file(&repo, "pkg/a.py", "def foo():\n    return 1\n", "init");
1580        fs::write(temp.path().join("pkg/a.py"), "def foo():\n    return 2\n").unwrap();
1581
1582        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1583        let relative_files = bridge
1584            .get_changed_files(&DiffScope::Working, &["a.py".to_string()])
1585            .unwrap();
1586
1587        assert_eq!(relative_files.len(), 1);
1588        assert_eq!(relative_files[0].file_path, "pkg/a.py");
1589
1590        let absolute_path = temp.path().join("pkg/a.py").to_string_lossy().to_string();
1591        let absolute_files = bridge
1592            .get_changed_files(&DiffScope::Working, &[absolute_path])
1593            .unwrap();
1594
1595        assert_eq!(absolute_files.len(), 1);
1596        assert_eq!(absolute_files[0].file_path, "pkg/a.py");
1597    }
1598
1599    #[test]
1600    fn absolute_deleted_pathspecs_are_normalized_from_existing_parent() {
1601        let temp = TempDir::new().unwrap();
1602        let repo = Repository::init(temp.path()).unwrap();
1603        fs::create_dir_all(temp.path().join("pkg")).unwrap();
1604
1605        commit_file(
1606            &repo,
1607            "pkg/deleted.py",
1608            "def foo():\n    return 1\n",
1609            "init",
1610        );
1611        let absolute_path = temp
1612            .path()
1613            .join("pkg/deleted.py")
1614            .to_string_lossy()
1615            .to_string();
1616        fs::remove_file(temp.path().join("pkg/deleted.py")).unwrap();
1617
1618        let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1619        let files = bridge
1620            .get_changed_files(&DiffScope::Working, &[absolute_path])
1621            .unwrap();
1622
1623        assert_eq!(files.len(), 1);
1624        assert_eq!(files[0].file_path, "pkg/deleted.py");
1625        assert_eq!(files[0].status, FileStatus::Deleted);
1626    }
1627
1628    #[test]
1629    fn absolute_missing_pathspecs_preserve_trailing_component_order() {
1630        let temp = TempDir::new().unwrap();
1631        let existing_parent = temp.path().join("existing");
1632        fs::create_dir(&existing_parent).unwrap();
1633
1634        let pathspec = existing_parent.join("missing").join("leaf.py");
1635        let normalized = normalize_absolute_pathspec(&pathspec);
1636
1637        let mut expected = fs::canonicalize(&existing_parent).unwrap();
1638        expected.push("missing");
1639        expected.push("leaf.py");
1640        assert_eq!(normalized, expected);
1641    }
1642
1643    #[test]
1644    fn absolute_pathspecs_outside_repo_are_rejected() {
1645        let repo_dir = TempDir::new().unwrap();
1646        let outside_dir = TempDir::new().unwrap();
1647        let repo = Repository::init(repo_dir.path()).unwrap();
1648
1649        commit_file(&repo, "sample.py", "def foo():\n    return 1\n", "init");
1650        fs::write(
1651            repo_dir.path().join("sample.py"),
1652            "def foo():\n    return 2\n",
1653        )
1654        .unwrap();
1655        let outside_path = outside_dir.path().join("outside.py");
1656        fs::write(&outside_path, "def outside():\n    return 1\n").unwrap();
1657
1658        let bridge = GitBridge::open(repo_dir.path()).unwrap();
1659        let err = bridge
1660            .get_changed_files(
1661                &DiffScope::Working,
1662                &[outside_path.to_string_lossy().to_string()],
1663            )
1664            .unwrap_err();
1665
1666        let message = err.to_string();
1667        assert!(message.contains("pathspec"));
1668        assert!(message.contains("is outside repository"));
1669    }
1670
1671    #[test]
1672    fn working_binary_modification_is_reported_as_binary_change() {
1673        let temp = TempDir::new().unwrap();
1674        let repo = Repository::init(temp.path()).unwrap();
1675
1676        commit_binary_file(&repo, "pic.png", b"\0png-v1\0", "init");
1677        fs::write(temp.path().join("pic.png"), b"\0png-v2\0extra").unwrap();
1678
1679        let bridge = GitBridge::open(temp.path()).unwrap();
1680        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1681
1682        assert_eq!(files.len(), 1);
1683        assert_eq!(files[0].file_path, "pic.png");
1684        assert_eq!(files[0].status, FileStatus::Modified);
1685        assert!(files[0].before_content.is_none());
1686        assert!(files[0].after_content.is_none());
1687
1688        let binary_changes = collect_binary_file_changes(&files);
1689        let registry = create_default_registry();
1690        let result = compute_semantic_diff(&files, &registry, None, None);
1691
1692        assert!(result.changes.is_empty());
1693        assert_eq!(result.file_count, 0);
1694        assert_eq!(binary_changes.len(), 1);
1695        assert_eq!(binary_changes[0].file_path, "pic.png");
1696        assert_eq!(binary_changes[0].status, FileStatus::Modified);
1697    }
1698
1699    #[test]
1700    fn staged_binary_add_and_delete_are_reported_as_binary_changes() {
1701        let temp = TempDir::new().unwrap();
1702        let repo = Repository::init(temp.path()).unwrap();
1703
1704        fs::write(temp.path().join("added.png"), b"\0added-binary\0").unwrap();
1705        let mut index = repo.index().unwrap();
1706        index.add_path(Path::new("added.png")).unwrap();
1707        index.write().unwrap();
1708
1709        let bridge = GitBridge::open(temp.path()).unwrap();
1710        let added_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1711        assert_eq!(added_files.len(), 1);
1712        assert_eq!(added_files[0].file_path, "added.png");
1713        assert_eq!(added_files[0].status, FileStatus::Added);
1714        assert!(added_files[0].before_content.is_none());
1715        assert!(added_files[0].after_content.is_none());
1716        let added_binary_changes = collect_binary_file_changes(&added_files);
1717        assert_eq!(added_binary_changes.len(), 1);
1718        assert_eq!(added_binary_changes[0].file_path, "added.png");
1719
1720        let temp = TempDir::new().unwrap();
1721        let repo = Repository::init(temp.path()).unwrap();
1722        commit_binary_file(&repo, "deleted.png", b"\0deleted-binary\0", "init");
1723        fs::remove_file(temp.path().join("deleted.png")).unwrap();
1724        let mut index = repo.index().unwrap();
1725        index.remove_path(Path::new("deleted.png")).unwrap();
1726        index.write().unwrap();
1727
1728        let bridge = GitBridge::open(temp.path()).unwrap();
1729        let deleted_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1730        assert_eq!(deleted_files.len(), 1);
1731        assert_eq!(deleted_files[0].file_path, "deleted.png");
1732        assert_eq!(deleted_files[0].status, FileStatus::Deleted);
1733        assert!(deleted_files[0].before_content.is_none());
1734        assert!(deleted_files[0].after_content.is_none());
1735        let deleted_binary_changes = collect_binary_file_changes(&deleted_files);
1736        assert_eq!(deleted_binary_changes.len(), 1);
1737        assert_eq!(deleted_binary_changes[0].file_path, "deleted.png");
1738    }
1739
1740    #[test]
1741    fn partial_utf8_boundary_is_not_treated_as_binary() {
1742        assert!(!GitBridge::bytes_look_binary(&[0xe2, 0x82], false));
1743        assert!(GitBridge::bytes_look_binary(&[0xe2, 0x82], true));
1744    }
1745
1746    #[test]
1747    fn staged_file_rename_is_reported_as_single_rename_with_old_contents() {
1748        let temp = TempDir::new().unwrap();
1749        let repo = Repository::init(temp.path()).unwrap();
1750
1751        let contents = "export function foo() {\n  return 1;\n}\n";
1752        commit_file(&repo, "old.ts", contents, "init");
1753
1754        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1755        let mut index = repo.index().unwrap();
1756        index.remove_path(Path::new("old.ts")).unwrap();
1757        index.add_path(Path::new("new.ts")).unwrap();
1758        index.write().unwrap();
1759
1760        let bridge = GitBridge::open(temp.path()).unwrap();
1761        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1762
1763        assert_eq!(files.len(), 1);
1764        assert_eq!(files[0].status, FileStatus::Renamed);
1765        assert_eq!(files[0].file_path, "new.ts");
1766        assert_eq!(files[0].old_file_path.as_deref(), Some("old.ts"));
1767        assert_eq!(files[0].before_content.as_deref(), Some(contents));
1768        assert_eq!(files[0].after_content.as_deref(), Some(contents));
1769    }
1770
1771    #[test]
1772    fn staged_file_rename_with_edit_reports_single_moved_entity() {
1773        let temp = TempDir::new().unwrap();
1774        let repo = Repository::init(temp.path()).unwrap();
1775
1776        let before = "\
1777// shared header 01
1778// shared header 02
1779// shared header 03
1780// shared header 04
1781// shared header 05
1782// shared header 06
1783// shared header 07
1784// shared header 08
1785// shared header 09
1786// shared header 10
1787export function foo() {
1788  return alpha + beta + gamma;
1789}
1790";
1791        let after = before.replace(
1792            "return alpha + beta + gamma;",
1793            "return one + two + three;",
1794        );
1795
1796        commit_file(&repo, "old.ts", before, "init");
1797        fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1798        fs::write(temp.path().join("new.ts"), &after).unwrap();
1799
1800        let mut index = repo.index().unwrap();
1801        index.remove_path(Path::new("old.ts")).unwrap();
1802        index.add_path(Path::new("new.ts")).unwrap();
1803        index.write().unwrap();
1804
1805        let bridge = GitBridge::open(temp.path()).unwrap();
1806        let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1807        assert_eq!(files.len(), 1);
1808        assert_eq!(files[0].status, FileStatus::Renamed);
1809
1810        let registry = create_default_registry();
1811        let result = compute_semantic_diff(&files, &registry, None, None);
1812
1813        assert_eq!(result.added_count, 0);
1814        assert_eq!(result.deleted_count, 0);
1815        // `foo` is a compound Moved change whose body also changed, so it counts toward
1816        // both moved_count and modified_count.
1817        assert_eq!(result.modified_count, 1);
1818        assert_eq!(result.moved_count, 1);
1819        assert_eq!(result.changes.len(), 1);
1820        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1821        assert_eq!(result.changes[0].entity_name, "foo");
1822        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
1823        assert_eq!(result.changes[0].structural_change, Some(true));
1824    }
1825
1826    #[test]
1827    fn working_diff_preserves_staged_rename_with_unstaged_edit() {
1828        let temp = TempDir::new().unwrap();
1829        let repo = Repository::init(temp.path()).unwrap();
1830
1831        let before = "\
1832export function foo(x: number) {
1833  return x + 1;
1834}
1835
1836export function bar(y: number) {
1837  return y * 2;
1838}
1839";
1840        let after = "\
1841export function foo(x: number) {
1842  return x + 42;
1843}
1844
1845export function bar(y: number) {
1846  return y * 99;
1847}
1848";
1849
1850        commit_file(&repo, "a.ts", before, "init");
1851
1852        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1853        let mut index = repo.index().unwrap();
1854        index.remove_path(Path::new("a.ts")).unwrap();
1855        index.add_path(Path::new("b.ts")).unwrap();
1856        index.write().unwrap();
1857
1858        fs::write(temp.path().join("b.ts"), after).unwrap();
1859
1860        let bridge = GitBridge::open(temp.path()).unwrap();
1861        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1862
1863        assert!(matches!(scope, DiffScope::Working));
1864        assert_eq!(files.len(), 1);
1865        assert_eq!(files[0].status, FileStatus::Renamed);
1866        assert_eq!(files[0].file_path, "b.ts");
1867        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1868        assert_eq!(files[0].before_content.as_deref(), Some(before));
1869        assert_eq!(files[0].after_content.as_deref(), Some(after));
1870
1871        let registry = create_default_registry();
1872        let result = compute_semantic_diff(&files, &registry, None, None);
1873
1874        assert_eq!(result.added_count, 0);
1875        assert_eq!(result.deleted_count, 0);
1876        assert_eq!(result.modified_count, 2);
1877        assert_eq!(result.moved_count, 2);
1878        assert_eq!(result.changes.len(), 2);
1879        assert!(result
1880            .changes
1881            .iter()
1882            .all(|change| change.change_type == ChangeType::Moved));
1883        assert!(result
1884            .changes
1885            .iter()
1886            .all(|change| change.old_file_path.as_deref() == Some("a.ts")));
1887        assert!(result
1888            .changes
1889            .iter()
1890            .all(|change| change.structural_change == Some(true)));
1891    }
1892
1893    #[test]
1894    fn working_diff_uses_staged_rename_map_after_large_unstaged_rewrite() {
1895        let temp = TempDir::new().unwrap();
1896        let repo = Repository::init(temp.path()).unwrap();
1897
1898        let before_noise = (0..200)
1899            .map(|i| format!("// old filler {i} alpha beta gamma"))
1900            .collect::<Vec<_>>()
1901            .join("\n");
1902        let after_noise = (0..200)
1903            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1904            .collect::<Vec<_>>()
1905            .join("\n");
1906        let before = format!(
1907            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1908        );
1909        let after = format!(
1910            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1911        );
1912
1913        commit_file(&repo, "a.ts", &before, "init");
1914
1915        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1916        let mut index = repo.index().unwrap();
1917        index.remove_path(Path::new("a.ts")).unwrap();
1918        index.add_path(Path::new("b.ts")).unwrap();
1919        index.write().unwrap();
1920
1921        fs::write(temp.path().join("b.ts"), &after).unwrap();
1922
1923        let bridge = GitBridge::open(temp.path()).unwrap();
1924        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1925
1926        assert!(matches!(scope, DiffScope::Working));
1927        assert_eq!(files.len(), 1);
1928        assert_eq!(files[0].status, FileStatus::Renamed);
1929        assert_eq!(files[0].file_path, "b.ts");
1930        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1931        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1932        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1933
1934        let registry = create_default_registry();
1935        let result = compute_semantic_diff(&files, &registry, None, None);
1936
1937        assert_eq!(result.added_count, 0);
1938        assert_eq!(result.deleted_count, 0);
1939        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
1940        // compound Moved change whose body also changed, so it counts toward both
1941        // moved_count and modified_count.
1942        assert_eq!(result.modified_count, 2);
1943        assert_eq!(result.moved_count, 1);
1944        assert!(result
1945            .changes
1946            .iter()
1947            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1948    }
1949
1950    #[test]
1951    fn explicit_ref_to_working_uses_index_rename_map_after_large_unstaged_rewrite() {
1952        let temp = TempDir::new().unwrap();
1953        let repo = Repository::init(temp.path()).unwrap();
1954
1955        let before_noise = (0..200)
1956            .map(|i| format!("// old filler {i} alpha beta gamma"))
1957            .collect::<Vec<_>>()
1958            .join("\n");
1959        let after_noise = (0..200)
1960            .map(|i| format!("// new filler {i} delta epsilon zeta"))
1961            .collect::<Vec<_>>()
1962            .join("\n");
1963        let before = format!(
1964            "{before_noise}\nexport function foo(x: number) {{\n  return x + 1;\n}}\n"
1965        );
1966        let after = format!(
1967            "{after_noise}\nexport function foo(x: number) {{\n  return x + 42;\n}}\n"
1968        );
1969
1970        commit_file(&repo, "a.ts", &before, "init");
1971
1972        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1973        let mut index = repo.index().unwrap();
1974        index.remove_path(Path::new("a.ts")).unwrap();
1975        index.add_path(Path::new("b.ts")).unwrap();
1976        index.write().unwrap();
1977
1978        fs::write(temp.path().join("b.ts"), &after).unwrap();
1979
1980        let bridge = GitBridge::open(temp.path()).unwrap();
1981        let files = bridge
1982            .get_changed_files(
1983                &DiffScope::RefToWorking {
1984                    refspec: "HEAD".to_string(),
1985                },
1986                &[],
1987            )
1988            .unwrap();
1989
1990        assert_eq!(files.len(), 1);
1991        assert_eq!(files[0].status, FileStatus::Renamed);
1992        assert_eq!(files[0].file_path, "b.ts");
1993        assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1994        assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1995        assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1996
1997        let registry = create_default_registry();
1998        let result = compute_semantic_diff(&files, &registry, None, None);
1999
2000        assert_eq!(result.added_count, 0);
2001        assert_eq!(result.deleted_count, 0);
2002        // Two changes: the rewritten comment block is a Modified orphan, and `foo` is a
2003        // compound Moved change whose body also changed, so it counts toward both
2004        // moved_count and modified_count.
2005        assert_eq!(result.modified_count, 2);
2006        assert_eq!(result.moved_count, 1);
2007        assert!(result
2008            .changes
2009            .iter()
2010            .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
2011    }
2012
2013    #[test]
2014    fn staged_rename_map_overrides_wrong_worktree_rename_pairing() {
2015        let temp = TempDir::new().unwrap();
2016        let repo = Repository::init(temp.path()).unwrap();
2017
2018        let a_before = "export function foo(x: number) {\n  return x + 1;\n}\n";
2019        let c_before = "export function foo(x: number) {\n  return x + 42;\n}\n";
2020
2021        commit_file(&repo, "a.ts", a_before, "init a");
2022        commit_file(&repo, "c.ts", c_before, "init c");
2023
2024        fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
2025        let mut index = repo.index().unwrap();
2026        index.remove_path(Path::new("a.ts")).unwrap();
2027        index.add_path(Path::new("b.ts")).unwrap();
2028        index.write().unwrap();
2029
2030        fs::remove_file(temp.path().join("c.ts")).unwrap();
2031        fs::write(temp.path().join("b.ts"), c_before).unwrap();
2032
2033        let bridge = GitBridge::open(temp.path()).unwrap();
2034        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
2035
2036        assert!(matches!(scope, DiffScope::Working));
2037        let renamed = files
2038            .iter()
2039            .find(|file| {
2040                file.status == FileStatus::Renamed
2041                    && file.file_path == "b.ts"
2042                    && file.old_file_path.as_deref() == Some("a.ts")
2043            })
2044            .unwrap();
2045        assert_eq!(renamed.before_content.as_deref(), Some(a_before));
2046        assert_eq!(renamed.after_content.as_deref(), Some(c_before));
2047
2048        let deleted = files
2049            .iter()
2050            .find(|file| file.status == FileStatus::Deleted && file.file_path == "c.ts")
2051            .unwrap();
2052        assert_eq!(deleted.before_content.as_deref(), Some(c_before));
2053        assert_eq!(deleted.after_content.as_deref(), None);
2054        assert!(!files.iter().any(|file| {
2055            file.status == FileStatus::Renamed
2056                && file.file_path == "b.ts"
2057                && file.old_file_path.as_deref() == Some("c.ts")
2058        }));
2059    }
2060
2061    #[test]
2062    fn staged_diff_with_base_ref_compares_index_to_that_ref() {
2063        let temp = TempDir::new().unwrap();
2064        let repo = Repository::init(temp.path()).unwrap();
2065
2066        let v1 = "def foo():\n    return 1\n";
2067        let v2 = "def foo():\n    return 2\n";
2068        let v3 = "def foo():\n    return 3\n";
2069        let v4 = "def foo():\n    return 4\n";
2070
2071        commit_file(&repo, "a.py", v1, "init");
2072        commit_file(&repo, "a.py", v2, "second");
2073        fs::write(temp.path().join("a.py"), v3).unwrap();
2074
2075        let mut index = repo.index().unwrap();
2076        index.add_path(Path::new("a.py")).unwrap();
2077        index.write().unwrap();
2078
2079        fs::write(temp.path().join("a.py"), v4).unwrap();
2080
2081        let bridge = GitBridge::open(temp.path()).unwrap();
2082        let files = bridge
2083            .get_staged_files_with_base_ref("HEAD~1", &[])
2084            .unwrap();
2085
2086        assert_eq!(files.len(), 1);
2087        assert_eq!(files[0].status, FileStatus::Modified);
2088        assert_eq!(files[0].file_path, "a.py");
2089        assert_eq!(files[0].before_content.as_deref(), Some(v1));
2090        assert_eq!(files[0].after_content.as_deref(), Some(v3));
2091
2092        let registry = create_default_registry();
2093        let result = compute_semantic_diff(&files, &registry, None, None);
2094
2095        assert_eq!(result.modified_count, 1);
2096        assert_eq!(result.changes.len(), 1);
2097        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
2098        assert_eq!(result.changes[0].entity_name, "foo");
2099    }
2100
2101    #[test]
2102    fn crlf_only_difference_in_working_file_is_invisible() {
2103        let temp = TempDir::new().unwrap();
2104        let repo = Repository::init(temp.path()).unwrap();
2105
2106        commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
2107        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
2108
2109        let bridge = GitBridge::open(temp.path()).unwrap();
2110        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
2111
2112        assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
2113
2114        let before = files[0].before_content.as_deref().unwrap();
2115        let after = files[0].after_content.as_deref().unwrap();
2116
2117        assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
2118    }
2119
2120    #[test]
2121    fn crlf_stored_in_blob_is_normalized_on_read() {
2122        let temp = TempDir::new().unwrap();
2123        let repo = Repository::init(temp.path()).unwrap();
2124
2125        repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
2126        commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
2127        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
2128
2129        let bridge = GitBridge::open(temp.path()).unwrap();
2130        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
2131
2132        assert_eq!(files.len(), 1, "expected git to detect the modification");
2133
2134        let before = files[0].before_content.as_deref().unwrap();
2135        assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
2136    }
2137}