1use std::env;
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4use std::process::Command;
5use std::sync::{Mutex, OnceLock};
6
7use git2::{Blame, Delta, Diff, DiffFindOptions, DiffOptions, ErrorCode, Oid, Repository};
8use thiserror::Error;
9
10use super::types::{CommitInfo, DiffScope, FileChange, FileCommitInfo, FileStatus};
11use super::types::BlameLineInfo;
12
13#[derive(Error, Debug)]
14pub enum GitError {
15 #[error("not a git repository")]
16 NotARepo,
17 #[error("git error: {0}")]
18 Git2(#[from] git2::Error),
19 #[error("io error: {0}")]
20 Io(#[from] std::io::Error),
21}
22
23pub struct GitBridge {
24 repo: Repository,
25 repo_root: PathBuf,
26 cwd: PathBuf,
27}
28
29impl GitBridge {
30 pub fn open(path: &Path) -> Result<Self, GitError> {
31 let cwd = normalize_open_path(path)?;
32 let repo = match Repository::discover(path) {
33 Ok(repo) => repo,
34 Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
35 let _guard = owner_validation_lock()
36 .lock()
37 .unwrap_or_else(|poisoned| poisoned.into_inner());
38 let _owner_validation = OwnerValidationDisabled::new()?;
39 let repo = Repository::discover(path);
40 repo.map_err(map_git_error)?
41 }
42 Err(error) => return Err(map_git_error(error)),
43 };
44 let repo_root = repo.workdir().ok_or(GitError::NotARepo)?;
45 let repo_root = fs::canonicalize(repo_root)?;
46 Ok(Self {
47 repo,
48 repo_root,
49 cwd,
50 })
51 }
52
53 pub fn repo_root(&self) -> &Path {
54 &self.repo_root
55 }
56
57 pub fn get_remote_url(&self) -> Option<String> {
59 self.repo
60 .find_remote("origin")
61 .ok()
62 .and_then(|r| r.url().map(String::from))
63 }
64
65 pub fn resolve_ref_sha(&self, refspec: &str) -> Option<String> {
67 self.repo
68 .revparse_single(refspec)
69 .ok()
70 .and_then(|obj| obj.peel_to_commit().ok())
71 .map(|c| c.id().to_string())
72 }
73
74 pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
75 Ok(self.repo.blame_file(file_path, None)?)
76 }
77
78 pub fn blame_file_porcelain(&self, file_path: &Path) -> Result<Vec<BlameLineInfo>, GitError> {
79 let output = Command::new("git")
80 .arg("-C")
81 .arg(&self.repo_root)
82 .arg("blame")
83 .arg("--line-porcelain")
84 .arg("--")
85 .arg(file_path)
86 .output()?;
87
88 if !output.status.success() {
89 let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
90 return Err(git_command_error(if stderr.is_empty() {
91 format!("git blame exited with {}", output.status)
92 } else {
93 stderr
94 }));
95 }
96
97 let parsed = parse_blame_porcelain(&String::from_utf8_lossy(&output.stdout));
98 if parsed.is_empty() && !output.stdout.is_empty() {
99 return Err(git_command_error(
100 "failed to parse git blame porcelain output".to_string(),
101 ));
102 }
103
104 Ok(parsed)
105 }
106
107 pub fn commit_summary(&self, oid: Oid) -> Option<String> {
108 self.repo
109 .find_commit(oid)
110 .ok()
111 .and_then(|commit| commit.summary().map(String::from))
112 }
113
114 pub fn get_head_sha(&self) -> Result<String, GitError> {
115 let head = self.repo.head()?;
116 let oid = head.target().ok_or_else(|| {
117 git2::Error::from_str("HEAD has no target")
118 })?;
119 Ok(oid.to_string())
120 }
121
122 pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
126 let mut working_files = self.get_working_diff_files(pathspecs)?;
128 if !working_files.is_empty() {
129 self.populate_contents(&mut working_files, &DiffScope::Working)?;
130 return Ok((DiffScope::Working, working_files));
131 }
132
133 Ok((DiffScope::Working, Vec::new()))
135 }
136
137 pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
139 let mut files = match scope {
140 DiffScope::Working => {
141 self.get_working_diff_files(pathspecs)?
142 }
143 DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
144 DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
145 DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
146 DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
147 };
148
149 files.retain(|f| !f.file_path.starts_with(".sem/"));
151
152 self.populate_contents(&mut files, scope)?;
153 Ok(files)
154 }
155
156 pub fn get_staged_files_with_base_ref(
157 &self,
158 base: &str,
159 pathspecs: &[String],
160 ) -> Result<Vec<FileChange>, GitError> {
161 let mut files = self.get_staged_diff_files_with_base(base, pathspecs)?;
162 files.retain(|f| !f.file_path.starts_with(".sem/"));
163
164 let base_tree = self.resolve_tree(base)?;
165 for file in files.iter_mut() {
166 if file.status != FileStatus::Deleted {
167 file.after_content = self.read_index_file(&file.file_path);
168 }
169 if file.status != FileStatus::Added {
170 let path = file
171 .old_file_path
172 .as_deref()
173 .unwrap_or(&file.file_path);
174 file.before_content = self.read_blob_from_tree(&base_tree, path);
175 }
176 }
177
178 Ok(files)
179 }
180
181 pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
183 let obj1 = self.repo.revparse_single(ref1)?;
184 let obj2 = self.repo.revparse_single(ref2)?;
185 let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
186 Ok(oid.to_string())
187 }
188
189 pub fn is_valid_rev(&self, refspec: &str) -> bool {
191 self.repo.revparse_single(refspec).is_ok()
192 }
193
194 fn make_diff_opts(&self, pathspecs: &[String]) -> Result<DiffOptions, GitError> {
195 let mut opts = DiffOptions::new();
196 for spec in self.normalize_pathspecs(pathspecs)? {
197 opts.pathspec(spec.as_str());
198 }
199 Ok(opts)
200 }
201
202 fn normalize_pathspecs(&self, pathspecs: &[String]) -> Result<Vec<String>, GitError> {
203 pathspecs
204 .iter()
205 .map(|spec| self.normalize_pathspec(spec))
206 .collect()
207 }
208
209 fn normalize_pathspec(&self, spec: &str) -> Result<String, GitError> {
210 if spec.is_empty() || spec.starts_with(':') {
211 return Ok(spec.to_string());
212 }
213
214 let spec_path = Path::new(spec);
215 let absolute = if spec_path.is_absolute() {
216 normalize_absolute_pathspec(spec_path)
217 } else {
218 normalize_lexical(&self.cwd.join(spec_path))
219 };
220
221 let repo_root = normalize_lexical(&self.repo_root);
222 let relative =
223 absolute
224 .strip_prefix(&repo_root)
225 .map_err(|_| pathspec_outside_repo_error(spec, &self.repo_root))?;
226
227 if relative.as_os_str().is_empty() {
228 Ok(".".to_string())
229 } else {
230 relative
231 .to_str()
232 .map(|path| path.replace('\\', "/"))
233 .ok_or_else(|| non_utf8_pathspec_error(spec))
234 }
235 }
236
237 fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
238 let head_tree = match self.repo.head() {
239 Ok(head) => {
240 let commit = head.peel_to_commit()?;
241 Some(commit.tree()?)
242 }
243 Err(_) => None, };
245
246 self.get_index_diff_files(head_tree.as_ref(), pathspecs)
247 }
248
249 fn get_staged_diff_files_with_base(
250 &self,
251 base: &str,
252 pathspecs: &[String],
253 ) -> Result<Vec<FileChange>, GitError> {
254 let base_tree = self.resolve_tree(base)?;
255 self.get_index_diff_files(Some(&base_tree), pathspecs)
256 }
257
258 fn get_index_diff_files(
259 &self,
260 base_tree: Option<&git2::Tree<'_>>,
261 pathspecs: &[String],
262 ) -> Result<Vec<FileChange>, GitError> {
263 let mut opts = self.make_diff_opts(pathspecs)?;
264 let mut diff = self.repo.diff_tree_to_index(
265 base_tree,
266 Some(&self.repo.index()?),
267 Some(&mut opts),
268 )?;
269 Self::detect_renames(&mut diff)?;
270
271 Ok(self.diff_to_file_changes(&diff))
272 }
273
274 fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
275 let mut opts = self.make_diff_opts(pathspecs)?;
276 opts.include_untracked(false);
277
278 let head_tree = self.resolve_tree("HEAD").ok();
279 let mut diff = match head_tree.as_ref() {
280 Some(head_tree) => self
281 .repo
282 .diff_tree_to_workdir_with_index(Some(head_tree), Some(&mut opts))?,
283 None => self.repo.diff_index_to_workdir(None, Some(&mut opts))?,
284 };
285 Self::detect_renames(&mut diff)?;
286 self.apply_index_rename_map(
287 self.diff_to_file_changes(&diff),
288 head_tree.as_ref(),
289 pathspecs,
290 )
291 }
292
293 fn apply_index_rename_map(
294 &self,
295 mut files: Vec<FileChange>,
296 base_tree: Option<&git2::Tree<'_>>,
297 pathspecs: &[String],
298 ) -> Result<Vec<FileChange>, GitError> {
299 let Some(base_tree) = base_tree else {
300 return Ok(files);
301 };
302
303 let index_renames: Vec<FileChange> = self
304 .get_index_diff_files(Some(base_tree), pathspecs)?
305 .into_iter()
306 .filter(|file| file.status == FileStatus::Renamed)
307 .collect();
308
309 for rename in index_renames {
310 let Some(old_path) = rename.old_file_path.clone() else {
311 continue;
312 };
313 let target_pos = files
314 .iter()
315 .position(|file| {
316 matches!(file.status, FileStatus::Added | FileStatus::Renamed)
317 && file.file_path == rename.file_path
318 });
319 let deleted_pos = files
320 .iter()
321 .position(|file| {
322 file.status == FileStatus::Deleted && file.file_path == old_path
323 });
324
325 if let (Some(target_pos), Some(deleted_pos)) = (target_pos, deleted_pos) {
326 if files[target_pos].status == FileStatus::Renamed
327 && files[target_pos].old_file_path.as_deref() == Some(old_path.as_str())
328 {
329 continue;
330 }
331
332 let target_file = files[target_pos].clone();
333 let deleted_file = files[deleted_pos].clone();
334 let displaced_deleted_path =
335 if target_file.status == FileStatus::Renamed {
336 target_file
337 .old_file_path
338 .as_ref()
339 .filter(|path| *path != &old_path)
340 .cloned()
341 } else {
342 None
343 };
344
345 files = files
346 .into_iter()
347 .enumerate()
348 .filter_map(|(idx, file)| {
349 if idx == target_pos || idx == deleted_pos {
350 None
351 } else {
352 Some(file)
353 }
354 })
355 .collect();
356 let before_content = deleted_file
357 .before_content
358 .or_else(|| self.read_blob_from_tree(base_tree, &old_path));
359 let after_content = target_file
360 .after_content
361 .or_else(|| self.read_working_file(&target_file.file_path));
362 files.push(FileChange {
363 file_path: target_file.file_path,
364 status: FileStatus::Renamed,
365 old_file_path: Some(old_path),
366 before_content,
367 after_content,
368 });
369 if let Some(file_path) = displaced_deleted_path {
370 let before_content = self.read_blob_from_tree(base_tree, &file_path);
371 files.push(FileChange {
372 file_path,
373 status: FileStatus::Deleted,
374 old_file_path: None,
375 before_content,
376 after_content: None,
377 });
378 }
379 }
380 }
381
382 Ok(files)
383 }
384
385 fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
386 let obj = self.repo.revparse_single(sha)?;
387 let commit = obj.peel_to_commit()?;
388 let tree = commit.tree()?;
389
390 let parent_tree = if commit.parent_count() > 0 {
391 Some(commit.parent(0)?.tree()?)
392 } else {
393 None
394 };
395
396 let mut opts = self.make_diff_opts(pathspecs)?;
397 let mut diff = self.repo.diff_tree_to_tree(
398 parent_tree.as_ref(),
399 Some(&tree),
400 Some(&mut opts),
401 )?;
402 Self::detect_renames(&mut diff)?;
403
404 Ok(self.diff_to_file_changes(&diff))
405 }
406
407 fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
408 let from_obj = self.repo.revparse_single(from)?;
409 let to_obj = self.repo.revparse_single(to)?;
410
411 let from_tree = from_obj.peel_to_commit()?.tree()?;
412 let to_tree = to_obj.peel_to_commit()?.tree()?;
413
414 let mut opts = self.make_diff_opts(pathspecs)?;
415 let mut diff = self.repo.diff_tree_to_tree(
416 Some(&from_tree),
417 Some(&to_tree),
418 Some(&mut opts),
419 )?;
420 Self::detect_renames(&mut diff)?;
421
422 Ok(self.diff_to_file_changes(&diff))
423 }
424
425 fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
426 let tree = self.resolve_tree(refspec)?;
427 let mut opts = self.make_diff_opts(pathspecs)?;
428 let mut diff = self.repo.diff_tree_to_workdir_with_index(
429 Some(&tree),
430 Some(&mut opts),
431 )?;
432 Self::detect_renames(&mut diff)?;
433 self.apply_index_rename_map(self.diff_to_file_changes(&diff), Some(&tree), pathspecs)
434 }
435
436 fn detect_renames(diff: &mut Diff) -> Result<(), GitError> {
437 let mut opts = DiffFindOptions::new();
438 opts.renames(true);
439 diff.find_similar(Some(&mut opts))?;
440 Ok(())
441 }
442
443 fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
444 let mut files = Vec::new();
445
446 for delta in diff.deltas() {
447 let (status, file_path, old_file_path) = match delta.status() {
448 Delta::Added => {
449 let path = delta
450 .new_file()
451 .path()
452 .and_then(|p| p.to_str())
453 .unwrap_or("")
454 .to_string();
455 (FileStatus::Added, path, None)
456 }
457 Delta::Deleted => {
458 let path = delta
459 .old_file()
460 .path()
461 .and_then(|p| p.to_str())
462 .unwrap_or("")
463 .to_string();
464 (FileStatus::Deleted, path, None)
465 }
466 Delta::Modified => {
467 let path = delta
468 .new_file()
469 .path()
470 .and_then(|p| p.to_str())
471 .unwrap_or("")
472 .to_string();
473 (FileStatus::Modified, path, None)
474 }
475 Delta::Renamed => {
476 let new_path = delta
477 .new_file()
478 .path()
479 .and_then(|p| p.to_str())
480 .unwrap_or("")
481 .to_string();
482 let old_path = delta
483 .old_file()
484 .path()
485 .and_then(|p| p.to_str())
486 .unwrap_or("")
487 .to_string();
488 (FileStatus::Renamed, new_path, Some(old_path))
489 }
490 _ => continue,
491 };
492
493 if !file_path.starts_with(".sem/") {
494 files.push(FileChange {
495 file_path,
496 status,
497 old_file_path,
498 before_content: None,
499 after_content: None,
500 });
501 }
502 }
503
504 files
505 }
506
507 fn bytes_look_binary(bytes: &[u8], complete: bool) -> bool {
508 if bytes.iter().any(|byte| *byte == 0) {
509 return true;
510 }
511
512 match std::str::from_utf8(bytes) {
513 Ok(_) => false,
514 Err(error) => complete || error.error_len().is_some(),
515 }
516 }
517
518 fn populate_contents(
519 &self,
520 files: &mut [FileChange],
521 scope: &DiffScope,
522 ) -> Result<(), GitError> {
523 match scope {
524 DiffScope::Working => {
525 let head_tree = self.resolve_tree("HEAD").ok();
527 for file in files.iter_mut() {
528 if file.status != FileStatus::Deleted {
529 file.after_content = self.read_working_file(&file.file_path);
530 }
531 if file.status != FileStatus::Added {
532 let path = file
533 .old_file_path
534 .as_deref()
535 .unwrap_or(&file.file_path);
536 file.before_content = head_tree
537 .as_ref()
538 .and_then(|t| self.read_blob_from_tree(t, path));
539 }
540 }
541 }
542 DiffScope::Staged => {
543 let head_tree = self.resolve_tree("HEAD").ok();
544 for file in files.iter_mut() {
545 if file.status != FileStatus::Deleted {
546 file.after_content = self
547 .read_index_file(&file.file_path)
548 .or_else(|| self.read_working_file(&file.file_path));
549 }
550 if file.status != FileStatus::Added {
551 let path = file
552 .old_file_path
553 .as_deref()
554 .unwrap_or(&file.file_path);
555 file.before_content = head_tree
556 .as_ref()
557 .and_then(|t| self.read_blob_from_tree(t, path));
558 }
559 }
560 }
561 DiffScope::Commit { sha } => {
562 let after_tree = self.resolve_tree(sha)?;
564 let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
565 for file in files.iter_mut() {
566 if file.status != FileStatus::Deleted {
567 file.after_content =
568 self.read_blob_from_tree(&after_tree, &file.file_path);
569 }
570 if file.status != FileStatus::Added {
571 let path = file
572 .old_file_path
573 .as_deref()
574 .unwrap_or(&file.file_path);
575 file.before_content = before_tree
576 .as_ref()
577 .and_then(|t| self.read_blob_from_tree(t, path));
578 }
579 }
580 }
581 DiffScope::Range { from, to } => {
582 let after_tree = self.resolve_tree(to)?;
583 let before_tree = self.resolve_tree(from)?;
584 for file in files.iter_mut() {
585 if file.status != FileStatus::Deleted {
586 file.after_content =
587 self.read_blob_from_tree(&after_tree, &file.file_path);
588 }
589 if file.status != FileStatus::Added {
590 let path = file
591 .old_file_path
592 .as_deref()
593 .unwrap_or(&file.file_path);
594 file.before_content =
595 self.read_blob_from_tree(&before_tree, path);
596 }
597 }
598 }
599 DiffScope::RefToWorking { refspec } => {
600 let before_tree = self.resolve_tree(refspec)?;
601 for file in files.iter_mut() {
602 if file.status != FileStatus::Deleted {
603 file.after_content = self.read_working_file(&file.file_path);
604 }
605 if file.status != FileStatus::Added {
606 let path = file
607 .old_file_path
608 .as_deref()
609 .unwrap_or(&file.file_path);
610 file.before_content =
611 self.read_blob_from_tree(&before_tree, path);
612 }
613 }
614 }
615 }
616 Ok(())
617 }
618
619 fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
620 let obj = self.repo.revparse_single(refspec)?;
621 let commit = obj.peel_to_commit()?;
622 Ok(commit.tree()?)
623 }
624
625 fn normalize_line_endings(s: String) -> String {
626 if s.contains('\r') {
627 s.replace("\r\n", "\n").replace('\r', "\n")
628 } else {
629 s
630 }
631 }
632
633 fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
634 let entry = tree.get_path(Path::new(file_path)).ok()?;
635 let blob = self.repo.find_blob(entry.id()).ok()?;
636 let bytes = blob.content();
637 if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
638 return None;
639 }
640 std::str::from_utf8(bytes)
641 .ok()
642 .map(|s| Self::normalize_line_endings(s.to_string()))
643 }
644
645 fn read_working_file(&self, file_path: &str) -> Option<String> {
646 let full_path = self.repo_root.join(file_path);
647 let bytes = fs::read(full_path).ok()?;
648 if Self::bytes_look_binary(&bytes, true) {
649 return None;
650 }
651 String::from_utf8(bytes)
652 .ok()
653 .map(Self::normalize_line_endings)
654 }
655
656 fn read_index_file(&self, file_path: &str) -> Option<String> {
657 let index = self.repo.index().ok()?;
658 let entry = index.get_path(Path::new(file_path), 0)?;
659 let blob = self.repo.find_blob(entry.id).ok()?;
660 let bytes = blob.content();
661 if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
662 return None;
663 }
664 std::str::from_utf8(bytes)
665 .ok()
666 .map(|s| Self::normalize_line_endings(s.to_string()))
667 }
668
669
670 pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
672 let tree = self.resolve_tree(refspec)?;
673 Ok(self.read_blob_from_tree(&tree, file_path))
674 }
675
676 pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
679 let mut revwalk = self.repo.revwalk()?;
680 revwalk.push_head()?;
681 revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
682
683 let mut commits = Vec::new();
684 let path = Path::new(file_path);
685
686 for oid_result in revwalk {
687 let oid = oid_result?;
688 let commit = self.repo.find_commit(oid)?;
689 let tree = commit.tree()?;
690
691 let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
693
694 let file_in_parent = if commit.parent_count() > 0 {
696 commit.parent(0)
697 .ok()
698 .and_then(|p| p.tree().ok())
699 .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
700 } else {
701 None };
703
704 let changed = match (file_in_commit, file_in_parent) {
706 (Some(cur), Some(prev)) => cur != prev, (Some(_), None) => true, (None, Some(_)) => true, (None, None) => false, };
711
712 if changed {
713 let sha = oid.to_string();
714 commits.push(CommitInfo {
715 short_sha: sha[..7.min(sha.len())].to_string(),
716 sha,
717 author: commit.author().name().unwrap_or("unknown").to_string(),
718 date: commit.time().seconds().to_string(),
719 message: commit.message().unwrap_or("").to_string(),
720 });
721
722 if limit != 0 && commits.len() >= limit {
723 break;
724 }
725 }
726 }
727
728 Ok(commits)
729 }
730
731 pub fn get_file_commits_follow_renames(
736 &self,
737 file_path: &str,
738 limit: usize,
739 ) -> Result<Vec<FileCommitInfo>, GitError> {
740 match self.get_file_commits_follow_renames_cli(file_path, limit) {
741 Ok(commits) if !commits.is_empty() => return Ok(commits),
742 Ok(_) => {}
743 Err(GitError::Io(error)) if error.kind() == std::io::ErrorKind::NotFound => {}
744 Err(error) => return Err(error),
745 }
746
747 let mut revwalk = self.repo.revwalk()?;
748 revwalk.push_head()?;
749 revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
750
751 let mut results = Vec::new();
752 let mut tracked_path = file_path.to_string();
753
754 for oid_result in revwalk {
755 let oid = oid_result?;
756 let commit = self.repo.find_commit(oid)?;
757 let tree = commit.tree()?;
758
759 let path = Path::new(&tracked_path);
760 let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
761
762 let (parent_tree_opt, file_in_parent) = if commit.parent_count() > 0 {
763 let parent = commit.parent(0)?;
764 let ptree = parent.tree()?;
765 let fip = ptree.get_path(path).ok().map(|e| e.id());
766 (Some(ptree), fip)
767 } else {
768 (None, None)
769 };
770
771 let changed = match (file_in_commit, file_in_parent) {
772 (Some(cur), Some(prev)) => cur != prev,
773 (Some(_), None) => true,
774 (None, Some(_)) => true,
775 (None, None) => false,
776 };
777
778 if changed {
779 let sha_str = oid.to_string();
780 results.push(FileCommitInfo {
781 commit: CommitInfo {
782 short_sha: sha_str[..7.min(sha_str.len())].to_string(),
783 sha: sha_str,
784 author: commit.author().name().unwrap_or("unknown").to_string(),
785 date: commit.time().seconds().to_string(),
786 message: commit.message().unwrap_or("").to_string(),
787 },
788 file_path: tracked_path.clone(),
789 });
790
791 if limit != 0 && results.len() >= limit {
792 break;
793 }
794 }
795
796 let should_check_rename =
799 parent_tree_opt.is_some() && (file_in_parent.is_none() || file_in_commit.is_none());
800 if should_check_rename {
801 let mut diff = self.repo.diff_tree_to_tree(
802 parent_tree_opt.as_ref(),
803 Some(&tree),
804 None,
805 )?;
806 let mut find_opts = DiffFindOptions::new();
807 find_opts.renames(true);
808 diff.find_similar(Some(&mut find_opts))?;
809
810 let mut found_rename = false;
811 for delta in diff.deltas() {
812 if delta.status() == Delta::Renamed {
813 let new_path = delta
814 .new_file()
815 .path()
816 .and_then(|p| p.to_str())
817 .unwrap_or("");
818 if new_path == tracked_path {
819 let old_path = delta
821 .old_file()
822 .path()
823 .and_then(|p| p.to_str())
824 .unwrap_or("")
825 .to_string();
826 if !old_path.is_empty() {
827 tracked_path = old_path;
828 found_rename = true;
829 break;
830 }
831 }
832 }
833 }
834
835 if !found_rename && file_in_commit.is_none() {
836 break;
838 }
839 }
840 }
841
842 Ok(results)
843 }
844
845 fn get_file_commits_follow_renames_cli(
846 &self,
847 file_path: &str,
848 limit: usize,
849 ) -> Result<Vec<FileCommitInfo>, GitError> {
850 let mut command = Command::new("git");
851 command
852 .arg("-C")
853 .arg(&self.repo_root)
854 .arg("log")
855 .arg("--follow")
856 .arg("--format=\x1e%H\x1f%an\x1f%at\x1f%s")
857 .arg("--name-status");
858 if limit != 0 {
859 command.arg("-n").arg(limit.to_string());
860 }
861 command.arg("--").arg(file_path);
862
863 let output = command.output()?;
864 if !output.status.success() {
865 let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
866 return Err(git_command_error(if stderr.is_empty() {
867 format!("git log exited with {}", output.status)
868 } else {
869 stderr
870 }));
871 }
872
873 let stdout = String::from_utf8_lossy(&output.stdout);
874 let mut tracked_path = file_path.to_string();
875 let mut commits = Vec::new();
876
877 for record in stdout.split('\x1e') {
878 let record = record.trim_start_matches('\n');
879 if record.trim().is_empty() {
880 continue;
881 }
882
883 let mut lines = record.lines();
884 let Some(meta) = lines.next() else {
885 continue;
886 };
887 let mut parts = meta.splitn(4, '\x1f');
888 let Some(sha) = parts.next() else {
889 continue;
890 };
891 let Some(author) = parts.next() else {
892 continue;
893 };
894 let Some(date) = parts.next() else {
895 continue;
896 };
897 let message = parts.next().unwrap_or_default();
898
899 let commit_path = tracked_path.clone();
900 let mut previous_path = None;
901 for line in lines {
902 let fields: Vec<&str> = line.split('\t').collect();
903 if fields.len() >= 3 && fields[0].starts_with('R') && fields[2] == tracked_path {
904 previous_path = Some(fields[1].to_string());
905 }
906 }
907
908 commits.push(FileCommitInfo {
909 commit: CommitInfo {
910 short_sha: sha[..7.min(sha.len())].to_string(),
911 sha: sha.to_string(),
912 author: author.to_string(),
913 date: date.to_string(),
914 message: message.to_string(),
915 },
916 file_path: commit_path,
917 });
918
919 if let Some(previous_path) = previous_path {
920 tracked_path = previous_path;
921 }
922 }
923
924 Ok(commits)
925 }
926
927 pub fn get_commit_changed_files(&self, sha: &str) -> Result<Vec<String>, GitError> {
930 let obj = self.repo.revparse_single(sha)?;
931 let commit = obj.peel_to_commit()?;
932 let tree = commit.tree()?;
933 let parent_tree = if commit.parent_count() > 0 {
934 Some(commit.parent(0)?.tree()?)
935 } else {
936 None
937 };
938 let diff = self.repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
939 let mut paths = Vec::new();
940 for delta in diff.deltas() {
941 if let Some(p) = delta.new_file().path().and_then(|p| p.to_str()) {
942 paths.push(p.to_string());
943 }
944 if let Some(p) = delta.old_file().path().and_then(|p| p.to_str()) {
946 if !paths.contains(&p.to_string()) {
947 paths.push(p.to_string());
948 }
949 }
950 }
951 Ok(paths)
952 }
953
954 pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
955 let mut revwalk = self.repo.revwalk()?;
956 revwalk.push_head()?;
957
958 let mut commits = Vec::new();
959 for (i, oid_result) in revwalk.enumerate() {
960 if limit != 0 && i >= limit {
961 break;
962 }
963 let oid = oid_result?;
964 let commit = self.repo.find_commit(oid)?;
965 let sha = oid.to_string();
966 commits.push(CommitInfo {
967 short_sha: sha[..7.min(sha.len())].to_string(),
968 sha,
969 author: commit.author().name().unwrap_or("unknown").to_string(),
970 date: commit.time().seconds().to_string(),
971 message: commit.message().unwrap_or("").to_string(),
972 });
973 }
974
975 Ok(commits)
976 }
977}
978
979fn parse_blame_porcelain(output: &str) -> Vec<BlameLineInfo> {
980 let lines: Vec<&str> = output.lines().collect();
981 let mut parsed = Vec::new();
982 let mut index = 0;
983
984 while index < lines.len() {
985 let Some((raw_sha, line_number)) = parse_blame_header(lines[index]) else {
986 index += 1;
987 continue;
988 };
989 index += 1;
990
991 let mut author = String::new();
992 let mut author_time = None;
993 let mut summary = String::new();
994
995 while index < lines.len() {
996 let line = lines[index];
997 index += 1;
998
999 if line.starts_with('\t') {
1000 break;
1001 } else if let Some(value) = line.strip_prefix("author ") {
1002 author = value.to_string();
1003 } else if let Some(value) = line.strip_prefix("author-time ") {
1004 author_time = value.parse::<i64>().ok();
1005 } else if let Some(value) = line.strip_prefix("summary ") {
1006 summary = value.to_string();
1007 }
1008 }
1009
1010 let sha = raw_sha.trim_start_matches('^');
1011 let commit_sha = if sha.chars().all(|c| c == '0') {
1012 None
1013 } else {
1014 Some(sha.to_string())
1015 };
1016
1017 if author.is_empty() {
1018 author = if commit_sha.is_none() {
1019 "Not Committed Yet".to_string()
1020 } else {
1021 "unknown".to_string()
1022 };
1023 }
1024
1025 parsed.push(BlameLineInfo {
1026 line_number,
1027 commit_sha,
1028 author,
1029 author_time,
1030 summary,
1031 });
1032 }
1033
1034 parsed.sort_by_key(|line| line.line_number);
1035 parsed
1036}
1037
1038fn parse_blame_header(line: &str) -> Option<(&str, usize)> {
1039 let mut parts = line.split_whitespace();
1040 let sha = parts.next()?;
1041 if !is_blame_oid(sha) {
1042 return None;
1043 }
1044 parts.next()?;
1045 let final_line = parts.next()?.parse().ok()?;
1046 Some((sha, final_line))
1047}
1048
1049fn is_blame_oid(value: &str) -> bool {
1050 let value = value.strip_prefix('^').unwrap_or(value);
1051 value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
1052}
1053
1054fn git_command_error(message: String) -> GitError {
1055 GitError::Git2(git2::Error::from_str(&message))
1056}
1057
1058fn map_git_error(error: git2::Error) -> GitError {
1059 if error.code() == ErrorCode::NotFound {
1060 GitError::NotARepo
1061 } else {
1062 GitError::Git2(error)
1063 }
1064}
1065
1066fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
1067 let safe_directories = command_line_safe_directories();
1068 should_retry_with_safe_directory(error, path, &safe_directories)
1069}
1070
1071fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
1072 error.code() == ErrorCode::Owner
1073 && nearest_git_root(path).is_some_and(|repo_root| {
1074 safe_directories.iter().any(|safe_directory| {
1075 safe_directory == "*"
1076 || paths_match(&repo_root, Path::new(safe_directory))
1077 })
1078 })
1079}
1080
1081fn command_line_safe_directories() -> Vec<String> {
1082 let count = env::var("GIT_CONFIG_COUNT")
1083 .ok()
1084 .and_then(|value| value.parse::<usize>().ok())
1085 .unwrap_or_default();
1086
1087 (0..count)
1088 .filter_map(|index| {
1089 let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
1090 if key.eq_ignore_ascii_case("safe.directory") {
1091 env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
1092 } else {
1093 None
1094 }
1095 })
1096 .collect()
1097}
1098
1099fn nearest_git_root(path: &Path) -> Option<PathBuf> {
1100 let mut current = if path.is_file() {
1101 path.parent()?
1102 } else {
1103 path
1104 };
1105
1106 loop {
1107 if current.join(".git").exists() {
1108 return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
1109 }
1110
1111 current = current.parent()?;
1112 }
1113}
1114
1115fn paths_match(left: &Path, right: &Path) -> bool {
1116 let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
1117 let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
1118
1119 if cfg!(windows) {
1120 left.to_string_lossy()
1121 .eq_ignore_ascii_case(&right.to_string_lossy())
1122 } else {
1123 left == right
1124 }
1125}
1126
1127fn owner_validation_lock() -> &'static Mutex<()> {
1128 static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
1129 LOCK.get_or_init(|| Mutex::new(()))
1130}
1131
1132struct OwnerValidationDisabled;
1133
1134impl OwnerValidationDisabled {
1135 fn new() -> Result<Self, GitError> {
1136 unsafe { git2::opts::set_verify_owner_validation(false)? };
1138 Ok(Self)
1139 }
1140}
1141
1142impl Drop for OwnerValidationDisabled {
1143 fn drop(&mut self) {
1144 unsafe {
1146 let _ = git2::opts::set_verify_owner_validation(true);
1147 }
1148 }
1149}
1150
1151fn normalize_open_path(path: &Path) -> Result<PathBuf, GitError> {
1152 let canonical = match fs::canonicalize(path) {
1153 Ok(canonical) => canonical,
1154 Err(_) if path.is_absolute() => normalize_lexical(path),
1155 Err(_) => normalize_lexical(&env::current_dir()?.join(path)),
1156 };
1157
1158 Ok(if canonical.is_file() {
1159 canonical
1160 .parent()
1161 .map(Path::to_path_buf)
1162 .unwrap_or(canonical)
1163 } else {
1164 canonical
1165 })
1166}
1167
1168fn normalize_absolute_pathspec(path: &Path) -> PathBuf {
1169 let path = normalize_lexical(path);
1170 let Some(leaf) = path.file_name() else {
1171 return fs::canonicalize(&path).unwrap_or(path);
1172 };
1173 let mut trailing_components = vec![leaf.to_os_string()];
1174
1175 let Some(parent) = path.parent() else {
1176 return path;
1177 };
1178
1179 for ancestor in parent.ancestors() {
1180 if ancestor.exists() {
1181 let mut normalized =
1182 fs::canonicalize(ancestor).unwrap_or_else(|_| normalize_lexical(ancestor));
1183 for component in trailing_components.iter().rev() {
1184 normalized.push(component);
1185 }
1186 return normalized;
1187 }
1188
1189 let Some(name) = ancestor.file_name() else {
1190 return path;
1191 };
1192 trailing_components.push(name.to_os_string());
1193 }
1194
1195 path
1196}
1197
1198fn pathspec_outside_repo_error(pathspec: &str, repo_root: &Path) -> GitError {
1199 GitError::Git2(git2::Error::from_str(&format!(
1200 "pathspec '{pathspec}' is outside repository '{}'",
1201 repo_root.display()
1202 )))
1203}
1204
1205fn non_utf8_pathspec_error(pathspec: &str) -> GitError {
1206 GitError::Git2(git2::Error::from_str(&format!(
1207 "pathspec '{pathspec}' is not valid UTF-8 after normalization"
1208 )))
1209}
1210
1211fn normalize_lexical(path: &Path) -> PathBuf {
1212 let mut normalized = PathBuf::new();
1213
1214 for component in path.components() {
1215 match component {
1216 Component::CurDir => {}
1217 Component::ParentDir => {
1218 if !normalized.pop() && !normalized.has_root() {
1219 normalized.push("..");
1220 }
1221 }
1222 Component::Normal(part) => normalized.push(part),
1223 Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
1224 Component::RootDir => normalized.push(component.as_os_str()),
1225 }
1226 }
1227
1228 normalized
1229}
1230
1231#[cfg(test)]
1232mod tests {
1233 use super::*;
1234 use crate::model::change::ChangeType;
1235 use crate::parser::differ::{collect_binary_file_changes, compute_semantic_diff};
1236 use crate::parser::plugins::create_default_registry;
1237 use git2::{ErrorClass, Oid, Repository, Signature};
1238 use tempfile::TempDir;
1239
1240 fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
1241 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1242
1243 let mut index = repo.index().unwrap();
1244 index.add_path(Path::new(file_path)).unwrap();
1245 index.write().unwrap();
1246
1247 let tree_id = index.write_tree().unwrap();
1248 let tree = repo.find_tree(tree_id).unwrap();
1249 let sig = Signature::now("Test User", "test@example.com").unwrap();
1250
1251 match repo.head() {
1252 Ok(head) => {
1253 let parent = repo.find_commit(head.target().unwrap()).unwrap();
1254 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1255 .unwrap()
1256 }
1257 Err(_) => repo
1258 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1259 .unwrap(),
1260 }
1261 }
1262
1263 fn commit_binary_file(
1264 repo: &Repository,
1265 file_path: &str,
1266 contents: &[u8],
1267 message: &str,
1268 ) -> Oid {
1269 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1270
1271 let mut index = repo.index().unwrap();
1272 index.add_path(Path::new(file_path)).unwrap();
1273 index.write().unwrap();
1274
1275 let tree_id = index.write_tree().unwrap();
1276 let tree = repo.find_tree(tree_id).unwrap();
1277 let sig = Signature::now("Test User", "test@example.com").unwrap();
1278
1279 match repo.head() {
1280 Ok(head) => {
1281 let parent = repo.find_commit(head.target().unwrap()).unwrap();
1282 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1283 .unwrap()
1284 }
1285 Err(_) => repo
1286 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1287 .unwrap(),
1288 }
1289 }
1290
1291 #[test]
1292 fn porcelain_blame_reports_uncommitted_lines() {
1293 let temp = TempDir::new().unwrap();
1294 let repo = Repository::init(temp.path()).unwrap();
1295
1296 commit_file(&repo, "a.py", "def foo():\n return 1\n", "init");
1297 fs::write(temp.path().join("a.py"), "def foo():\n return 2\n").unwrap();
1298
1299 let bridge = GitBridge::open(temp.path()).unwrap();
1300 let blame = bridge.blame_file_porcelain(Path::new("a.py")).unwrap();
1301
1302 assert!(blame[0].commit_sha.is_some());
1303 assert_eq!(blame[1].commit_sha, None);
1304 assert_eq!(blame[1].author, "Not Committed Yet");
1305 }
1306
1307 #[test]
1308 fn clean_worktree_does_not_fall_back_to_head_commit() {
1309 let temp = TempDir::new().unwrap();
1310 let repo = Repository::init(temp.path()).unwrap();
1311
1312 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
1313 commit_file(
1314 &repo,
1315 "sample.ts",
1316 "export function a() {\n return 2;\n}\n",
1317 "change a",
1318 );
1319
1320 let bridge = GitBridge::open(temp.path()).unwrap();
1321 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1322
1323 assert!(matches!(scope, DiffScope::Working));
1324 assert!(files.is_empty());
1325 }
1326
1327 #[test]
1328 fn owner_error_retries_for_command_line_safe_directory() {
1329 let temp = TempDir::new().unwrap();
1330 Repository::init(temp.path()).unwrap();
1331
1332 let owner_error = git2::Error::new(
1333 ErrorCode::Owner,
1334 ErrorClass::Config,
1335 "owner mismatch",
1336 );
1337 let safe_directories = [temp.path().to_string_lossy().to_string()];
1338
1339 assert!(should_retry_with_safe_directory(
1340 &owner_error,
1341 temp.path(),
1342 &safe_directories,
1343 ));
1344
1345 let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
1346 assert!(!should_retry_with_safe_directory(
1347 &owner_error,
1348 temp.path(),
1349 &other_directories,
1350 ));
1351
1352 let not_found_error = git2::Error::new(
1353 ErrorCode::NotFound,
1354 ErrorClass::Repository,
1355 "not found",
1356 );
1357 assert!(!should_retry_with_safe_directory(
1358 ¬_found_error,
1359 temp.path(),
1360 &["*".to_string()],
1361 ));
1362 }
1363
1364 #[test]
1365 fn explicit_commit_scope_still_reads_head_commit_diff() {
1366 let temp = TempDir::new().unwrap();
1367 let repo = Repository::init(temp.path()).unwrap();
1368
1369 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
1370 let head_oid = commit_file(
1371 &repo,
1372 "sample.ts",
1373 "export function a() {\n return 2;\n}\n",
1374 "change a",
1375 );
1376
1377 let bridge = GitBridge::open(temp.path()).unwrap();
1378 let files = bridge
1379 .get_changed_files(&DiffScope::Commit {
1380 sha: head_oid.to_string(),
1381 }, &[])
1382 .unwrap();
1383
1384 assert_eq!(files.len(), 1);
1385 assert_eq!(files[0].file_path, "sample.ts");
1386 assert_eq!(files[0].status, FileStatus::Modified);
1387 }
1388
1389 #[test]
1390 fn pathspecs_are_normalized_from_open_directory() {
1391 let temp = TempDir::new().unwrap();
1392 let repo = Repository::init(temp.path()).unwrap();
1393 fs::create_dir_all(temp.path().join("pkg")).unwrap();
1394
1395 commit_file(&repo, "pkg/a.py", "def foo():\n return 1\n", "init");
1396 fs::write(temp.path().join("pkg/a.py"), "def foo():\n return 2\n").unwrap();
1397
1398 let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1399 let relative_files = bridge
1400 .get_changed_files(&DiffScope::Working, &["a.py".to_string()])
1401 .unwrap();
1402
1403 assert_eq!(relative_files.len(), 1);
1404 assert_eq!(relative_files[0].file_path, "pkg/a.py");
1405
1406 let absolute_path = temp.path().join("pkg/a.py").to_string_lossy().to_string();
1407 let absolute_files = bridge
1408 .get_changed_files(&DiffScope::Working, &[absolute_path])
1409 .unwrap();
1410
1411 assert_eq!(absolute_files.len(), 1);
1412 assert_eq!(absolute_files[0].file_path, "pkg/a.py");
1413 }
1414
1415 #[test]
1416 fn absolute_deleted_pathspecs_are_normalized_from_existing_parent() {
1417 let temp = TempDir::new().unwrap();
1418 let repo = Repository::init(temp.path()).unwrap();
1419 fs::create_dir_all(temp.path().join("pkg")).unwrap();
1420
1421 commit_file(
1422 &repo,
1423 "pkg/deleted.py",
1424 "def foo():\n return 1\n",
1425 "init",
1426 );
1427 let absolute_path = temp
1428 .path()
1429 .join("pkg/deleted.py")
1430 .to_string_lossy()
1431 .to_string();
1432 fs::remove_file(temp.path().join("pkg/deleted.py")).unwrap();
1433
1434 let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1435 let files = bridge
1436 .get_changed_files(&DiffScope::Working, &[absolute_path])
1437 .unwrap();
1438
1439 assert_eq!(files.len(), 1);
1440 assert_eq!(files[0].file_path, "pkg/deleted.py");
1441 assert_eq!(files[0].status, FileStatus::Deleted);
1442 }
1443
1444 #[test]
1445 fn absolute_missing_pathspecs_preserve_trailing_component_order() {
1446 let temp = TempDir::new().unwrap();
1447 let existing_parent = temp.path().join("existing");
1448 fs::create_dir(&existing_parent).unwrap();
1449
1450 let pathspec = existing_parent.join("missing").join("leaf.py");
1451 let normalized = normalize_absolute_pathspec(&pathspec);
1452
1453 let mut expected = fs::canonicalize(&existing_parent).unwrap();
1454 expected.push("missing");
1455 expected.push("leaf.py");
1456 assert_eq!(normalized, expected);
1457 }
1458
1459 #[test]
1460 fn absolute_pathspecs_outside_repo_are_rejected() {
1461 let repo_dir = TempDir::new().unwrap();
1462 let outside_dir = TempDir::new().unwrap();
1463 let repo = Repository::init(repo_dir.path()).unwrap();
1464
1465 commit_file(&repo, "sample.py", "def foo():\n return 1\n", "init");
1466 fs::write(
1467 repo_dir.path().join("sample.py"),
1468 "def foo():\n return 2\n",
1469 )
1470 .unwrap();
1471 let outside_path = outside_dir.path().join("outside.py");
1472 fs::write(&outside_path, "def outside():\n return 1\n").unwrap();
1473
1474 let bridge = GitBridge::open(repo_dir.path()).unwrap();
1475 let err = bridge
1476 .get_changed_files(
1477 &DiffScope::Working,
1478 &[outside_path.to_string_lossy().to_string()],
1479 )
1480 .unwrap_err();
1481
1482 let message = err.to_string();
1483 assert!(message.contains("pathspec"));
1484 assert!(message.contains("is outside repository"));
1485 }
1486
1487 #[test]
1488 fn working_binary_modification_is_reported_as_binary_change() {
1489 let temp = TempDir::new().unwrap();
1490 let repo = Repository::init(temp.path()).unwrap();
1491
1492 commit_binary_file(&repo, "pic.png", b"\0png-v1\0", "init");
1493 fs::write(temp.path().join("pic.png"), b"\0png-v2\0extra").unwrap();
1494
1495 let bridge = GitBridge::open(temp.path()).unwrap();
1496 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1497
1498 assert_eq!(files.len(), 1);
1499 assert_eq!(files[0].file_path, "pic.png");
1500 assert_eq!(files[0].status, FileStatus::Modified);
1501 assert!(files[0].before_content.is_none());
1502 assert!(files[0].after_content.is_none());
1503
1504 let binary_changes = collect_binary_file_changes(&files);
1505 let registry = create_default_registry();
1506 let result = compute_semantic_diff(&files, ®istry, None, None);
1507
1508 assert!(result.changes.is_empty());
1509 assert_eq!(result.file_count, 0);
1510 assert_eq!(binary_changes.len(), 1);
1511 assert_eq!(binary_changes[0].file_path, "pic.png");
1512 assert_eq!(binary_changes[0].status, FileStatus::Modified);
1513 }
1514
1515 #[test]
1516 fn staged_binary_add_and_delete_are_reported_as_binary_changes() {
1517 let temp = TempDir::new().unwrap();
1518 let repo = Repository::init(temp.path()).unwrap();
1519
1520 fs::write(temp.path().join("added.png"), b"\0added-binary\0").unwrap();
1521 let mut index = repo.index().unwrap();
1522 index.add_path(Path::new("added.png")).unwrap();
1523 index.write().unwrap();
1524
1525 let bridge = GitBridge::open(temp.path()).unwrap();
1526 let added_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1527 assert_eq!(added_files.len(), 1);
1528 assert_eq!(added_files[0].file_path, "added.png");
1529 assert_eq!(added_files[0].status, FileStatus::Added);
1530 assert!(added_files[0].before_content.is_none());
1531 assert!(added_files[0].after_content.is_none());
1532 let added_binary_changes = collect_binary_file_changes(&added_files);
1533 assert_eq!(added_binary_changes.len(), 1);
1534 assert_eq!(added_binary_changes[0].file_path, "added.png");
1535
1536 let temp = TempDir::new().unwrap();
1537 let repo = Repository::init(temp.path()).unwrap();
1538 commit_binary_file(&repo, "deleted.png", b"\0deleted-binary\0", "init");
1539 fs::remove_file(temp.path().join("deleted.png")).unwrap();
1540 let mut index = repo.index().unwrap();
1541 index.remove_path(Path::new("deleted.png")).unwrap();
1542 index.write().unwrap();
1543
1544 let bridge = GitBridge::open(temp.path()).unwrap();
1545 let deleted_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1546 assert_eq!(deleted_files.len(), 1);
1547 assert_eq!(deleted_files[0].file_path, "deleted.png");
1548 assert_eq!(deleted_files[0].status, FileStatus::Deleted);
1549 assert!(deleted_files[0].before_content.is_none());
1550 assert!(deleted_files[0].after_content.is_none());
1551 let deleted_binary_changes = collect_binary_file_changes(&deleted_files);
1552 assert_eq!(deleted_binary_changes.len(), 1);
1553 assert_eq!(deleted_binary_changes[0].file_path, "deleted.png");
1554 }
1555
1556 #[test]
1557 fn partial_utf8_boundary_is_not_treated_as_binary() {
1558 assert!(!GitBridge::bytes_look_binary(&[0xe2, 0x82], false));
1559 assert!(GitBridge::bytes_look_binary(&[0xe2, 0x82], true));
1560 }
1561
1562 #[test]
1563 fn staged_file_rename_is_reported_as_single_rename_with_old_contents() {
1564 let temp = TempDir::new().unwrap();
1565 let repo = Repository::init(temp.path()).unwrap();
1566
1567 let contents = "export function foo() {\n return 1;\n}\n";
1568 commit_file(&repo, "old.ts", contents, "init");
1569
1570 fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1571 let mut index = repo.index().unwrap();
1572 index.remove_path(Path::new("old.ts")).unwrap();
1573 index.add_path(Path::new("new.ts")).unwrap();
1574 index.write().unwrap();
1575
1576 let bridge = GitBridge::open(temp.path()).unwrap();
1577 let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1578
1579 assert_eq!(files.len(), 1);
1580 assert_eq!(files[0].status, FileStatus::Renamed);
1581 assert_eq!(files[0].file_path, "new.ts");
1582 assert_eq!(files[0].old_file_path.as_deref(), Some("old.ts"));
1583 assert_eq!(files[0].before_content.as_deref(), Some(contents));
1584 assert_eq!(files[0].after_content.as_deref(), Some(contents));
1585 }
1586
1587 #[test]
1588 fn staged_file_rename_with_edit_reports_single_moved_entity() {
1589 let temp = TempDir::new().unwrap();
1590 let repo = Repository::init(temp.path()).unwrap();
1591
1592 let before = "\
1593// shared header 01
1594// shared header 02
1595// shared header 03
1596// shared header 04
1597// shared header 05
1598// shared header 06
1599// shared header 07
1600// shared header 08
1601// shared header 09
1602// shared header 10
1603export function foo() {
1604 return alpha + beta + gamma;
1605}
1606";
1607 let after = before.replace(
1608 "return alpha + beta + gamma;",
1609 "return one + two + three;",
1610 );
1611
1612 commit_file(&repo, "old.ts", before, "init");
1613 fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1614 fs::write(temp.path().join("new.ts"), &after).unwrap();
1615
1616 let mut index = repo.index().unwrap();
1617 index.remove_path(Path::new("old.ts")).unwrap();
1618 index.add_path(Path::new("new.ts")).unwrap();
1619 index.write().unwrap();
1620
1621 let bridge = GitBridge::open(temp.path()).unwrap();
1622 let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1623 assert_eq!(files.len(), 1);
1624 assert_eq!(files[0].status, FileStatus::Renamed);
1625
1626 let registry = create_default_registry();
1627 let result = compute_semantic_diff(&files, ®istry, None, None);
1628
1629 assert_eq!(result.added_count, 0);
1630 assert_eq!(result.deleted_count, 0);
1631 assert_eq!(result.modified_count, 1);
1634 assert_eq!(result.moved_count, 1);
1635 assert_eq!(result.changes.len(), 1);
1636 assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1637 assert_eq!(result.changes[0].entity_name, "foo");
1638 assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
1639 assert_eq!(result.changes[0].structural_change, Some(true));
1640 }
1641
1642 #[test]
1643 fn working_diff_preserves_staged_rename_with_unstaged_edit() {
1644 let temp = TempDir::new().unwrap();
1645 let repo = Repository::init(temp.path()).unwrap();
1646
1647 let before = "\
1648export function foo(x: number) {
1649 return x + 1;
1650}
1651
1652export function bar(y: number) {
1653 return y * 2;
1654}
1655";
1656 let after = "\
1657export function foo(x: number) {
1658 return x + 42;
1659}
1660
1661export function bar(y: number) {
1662 return y * 99;
1663}
1664";
1665
1666 commit_file(&repo, "a.ts", before, "init");
1667
1668 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1669 let mut index = repo.index().unwrap();
1670 index.remove_path(Path::new("a.ts")).unwrap();
1671 index.add_path(Path::new("b.ts")).unwrap();
1672 index.write().unwrap();
1673
1674 fs::write(temp.path().join("b.ts"), after).unwrap();
1675
1676 let bridge = GitBridge::open(temp.path()).unwrap();
1677 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1678
1679 assert!(matches!(scope, DiffScope::Working));
1680 assert_eq!(files.len(), 1);
1681 assert_eq!(files[0].status, FileStatus::Renamed);
1682 assert_eq!(files[0].file_path, "b.ts");
1683 assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1684 assert_eq!(files[0].before_content.as_deref(), Some(before));
1685 assert_eq!(files[0].after_content.as_deref(), Some(after));
1686
1687 let registry = create_default_registry();
1688 let result = compute_semantic_diff(&files, ®istry, None, None);
1689
1690 assert_eq!(result.added_count, 0);
1691 assert_eq!(result.deleted_count, 0);
1692 assert_eq!(result.modified_count, 2);
1693 assert_eq!(result.moved_count, 2);
1694 assert_eq!(result.changes.len(), 2);
1695 assert!(result
1696 .changes
1697 .iter()
1698 .all(|change| change.change_type == ChangeType::Moved));
1699 assert!(result
1700 .changes
1701 .iter()
1702 .all(|change| change.old_file_path.as_deref() == Some("a.ts")));
1703 assert!(result
1704 .changes
1705 .iter()
1706 .all(|change| change.structural_change == Some(true)));
1707 }
1708
1709 #[test]
1710 fn working_diff_uses_staged_rename_map_after_large_unstaged_rewrite() {
1711 let temp = TempDir::new().unwrap();
1712 let repo = Repository::init(temp.path()).unwrap();
1713
1714 let before_noise = (0..200)
1715 .map(|i| format!("// old filler {i} alpha beta gamma"))
1716 .collect::<Vec<_>>()
1717 .join("\n");
1718 let after_noise = (0..200)
1719 .map(|i| format!("// new filler {i} delta epsilon zeta"))
1720 .collect::<Vec<_>>()
1721 .join("\n");
1722 let before = format!(
1723 "{before_noise}\nexport function foo(x: number) {{\n return x + 1;\n}}\n"
1724 );
1725 let after = format!(
1726 "{after_noise}\nexport function foo(x: number) {{\n return x + 42;\n}}\n"
1727 );
1728
1729 commit_file(&repo, "a.ts", &before, "init");
1730
1731 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1732 let mut index = repo.index().unwrap();
1733 index.remove_path(Path::new("a.ts")).unwrap();
1734 index.add_path(Path::new("b.ts")).unwrap();
1735 index.write().unwrap();
1736
1737 fs::write(temp.path().join("b.ts"), &after).unwrap();
1738
1739 let bridge = GitBridge::open(temp.path()).unwrap();
1740 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1741
1742 assert!(matches!(scope, DiffScope::Working));
1743 assert_eq!(files.len(), 1);
1744 assert_eq!(files[0].status, FileStatus::Renamed);
1745 assert_eq!(files[0].file_path, "b.ts");
1746 assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1747 assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1748 assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1749
1750 let registry = create_default_registry();
1751 let result = compute_semantic_diff(&files, ®istry, None, None);
1752
1753 assert_eq!(result.added_count, 0);
1754 assert_eq!(result.deleted_count, 0);
1755 assert_eq!(result.modified_count, 2);
1759 assert_eq!(result.moved_count, 1);
1760 assert!(result
1761 .changes
1762 .iter()
1763 .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1764 }
1765
1766 #[test]
1767 fn explicit_ref_to_working_uses_index_rename_map_after_large_unstaged_rewrite() {
1768 let temp = TempDir::new().unwrap();
1769 let repo = Repository::init(temp.path()).unwrap();
1770
1771 let before_noise = (0..200)
1772 .map(|i| format!("// old filler {i} alpha beta gamma"))
1773 .collect::<Vec<_>>()
1774 .join("\n");
1775 let after_noise = (0..200)
1776 .map(|i| format!("// new filler {i} delta epsilon zeta"))
1777 .collect::<Vec<_>>()
1778 .join("\n");
1779 let before = format!(
1780 "{before_noise}\nexport function foo(x: number) {{\n return x + 1;\n}}\n"
1781 );
1782 let after = format!(
1783 "{after_noise}\nexport function foo(x: number) {{\n return x + 42;\n}}\n"
1784 );
1785
1786 commit_file(&repo, "a.ts", &before, "init");
1787
1788 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1789 let mut index = repo.index().unwrap();
1790 index.remove_path(Path::new("a.ts")).unwrap();
1791 index.add_path(Path::new("b.ts")).unwrap();
1792 index.write().unwrap();
1793
1794 fs::write(temp.path().join("b.ts"), &after).unwrap();
1795
1796 let bridge = GitBridge::open(temp.path()).unwrap();
1797 let files = bridge
1798 .get_changed_files(
1799 &DiffScope::RefToWorking {
1800 refspec: "HEAD".to_string(),
1801 },
1802 &[],
1803 )
1804 .unwrap();
1805
1806 assert_eq!(files.len(), 1);
1807 assert_eq!(files[0].status, FileStatus::Renamed);
1808 assert_eq!(files[0].file_path, "b.ts");
1809 assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1810 assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1811 assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1812
1813 let registry = create_default_registry();
1814 let result = compute_semantic_diff(&files, ®istry, None, None);
1815
1816 assert_eq!(result.added_count, 0);
1817 assert_eq!(result.deleted_count, 0);
1818 assert_eq!(result.modified_count, 2);
1822 assert_eq!(result.moved_count, 1);
1823 assert!(result
1824 .changes
1825 .iter()
1826 .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1827 }
1828
1829 #[test]
1830 fn staged_rename_map_overrides_wrong_worktree_rename_pairing() {
1831 let temp = TempDir::new().unwrap();
1832 let repo = Repository::init(temp.path()).unwrap();
1833
1834 let a_before = "export function foo(x: number) {\n return x + 1;\n}\n";
1835 let c_before = "export function foo(x: number) {\n return x + 42;\n}\n";
1836
1837 commit_file(&repo, "a.ts", a_before, "init a");
1838 commit_file(&repo, "c.ts", c_before, "init c");
1839
1840 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1841 let mut index = repo.index().unwrap();
1842 index.remove_path(Path::new("a.ts")).unwrap();
1843 index.add_path(Path::new("b.ts")).unwrap();
1844 index.write().unwrap();
1845
1846 fs::remove_file(temp.path().join("c.ts")).unwrap();
1847 fs::write(temp.path().join("b.ts"), c_before).unwrap();
1848
1849 let bridge = GitBridge::open(temp.path()).unwrap();
1850 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1851
1852 assert!(matches!(scope, DiffScope::Working));
1853 let renamed = files
1854 .iter()
1855 .find(|file| {
1856 file.status == FileStatus::Renamed
1857 && file.file_path == "b.ts"
1858 && file.old_file_path.as_deref() == Some("a.ts")
1859 })
1860 .unwrap();
1861 assert_eq!(renamed.before_content.as_deref(), Some(a_before));
1862 assert_eq!(renamed.after_content.as_deref(), Some(c_before));
1863
1864 let deleted = files
1865 .iter()
1866 .find(|file| file.status == FileStatus::Deleted && file.file_path == "c.ts")
1867 .unwrap();
1868 assert_eq!(deleted.before_content.as_deref(), Some(c_before));
1869 assert_eq!(deleted.after_content.as_deref(), None);
1870 assert!(!files.iter().any(|file| {
1871 file.status == FileStatus::Renamed
1872 && file.file_path == "b.ts"
1873 && file.old_file_path.as_deref() == Some("c.ts")
1874 }));
1875 }
1876
1877 #[test]
1878 fn staged_diff_with_base_ref_compares_index_to_that_ref() {
1879 let temp = TempDir::new().unwrap();
1880 let repo = Repository::init(temp.path()).unwrap();
1881
1882 let v1 = "def foo():\n return 1\n";
1883 let v2 = "def foo():\n return 2\n";
1884 let v3 = "def foo():\n return 3\n";
1885 let v4 = "def foo():\n return 4\n";
1886
1887 commit_file(&repo, "a.py", v1, "init");
1888 commit_file(&repo, "a.py", v2, "second");
1889 fs::write(temp.path().join("a.py"), v3).unwrap();
1890
1891 let mut index = repo.index().unwrap();
1892 index.add_path(Path::new("a.py")).unwrap();
1893 index.write().unwrap();
1894
1895 fs::write(temp.path().join("a.py"), v4).unwrap();
1896
1897 let bridge = GitBridge::open(temp.path()).unwrap();
1898 let files = bridge
1899 .get_staged_files_with_base_ref("HEAD~1", &[])
1900 .unwrap();
1901
1902 assert_eq!(files.len(), 1);
1903 assert_eq!(files[0].status, FileStatus::Modified);
1904 assert_eq!(files[0].file_path, "a.py");
1905 assert_eq!(files[0].before_content.as_deref(), Some(v1));
1906 assert_eq!(files[0].after_content.as_deref(), Some(v3));
1907
1908 let registry = create_default_registry();
1909 let result = compute_semantic_diff(&files, ®istry, None, None);
1910
1911 assert_eq!(result.modified_count, 1);
1912 assert_eq!(result.changes.len(), 1);
1913 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
1914 assert_eq!(result.changes[0].entity_name, "foo");
1915 }
1916
1917 #[test]
1918 fn crlf_only_difference_in_working_file_is_invisible() {
1919 let temp = TempDir::new().unwrap();
1920 let repo = Repository::init(temp.path()).unwrap();
1921
1922 commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
1923 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
1924
1925 let bridge = GitBridge::open(temp.path()).unwrap();
1926 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1927
1928 assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
1929
1930 let before = files[0].before_content.as_deref().unwrap();
1931 let after = files[0].after_content.as_deref().unwrap();
1932
1933 assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
1934 }
1935
1936 #[test]
1937 fn crlf_stored_in_blob_is_normalized_on_read() {
1938 let temp = TempDir::new().unwrap();
1939 let repo = Repository::init(temp.path()).unwrap();
1940
1941 repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
1942 commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
1943 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
1944
1945 let bridge = GitBridge::open(temp.path()).unwrap();
1946 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1947
1948 assert_eq!(files.len(), 1, "expected git to detect the modification");
1949
1950 let before = files[0].before_content.as_deref().unwrap();
1951 assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
1952 }
1953}