1use std::env;
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4use std::process::Command;
5use std::sync::{Mutex, OnceLock};
6
7use git2::{Blame, Delta, Diff, DiffFindOptions, DiffOptions, ErrorCode, Oid, Repository};
8use thiserror::Error;
9
10use super::types::{CommitInfo, DiffScope, FileChange, FileCommitInfo, FileStatus};
11use super::types::BlameLineInfo;
12
13#[derive(Error, Debug)]
14pub enum GitError {
15 #[error("not a git repository")]
16 NotARepo,
17 #[error("git error: {0}")]
18 Git2(#[from] git2::Error),
19 #[error("io error: {0}")]
20 Io(#[from] std::io::Error),
21}
22
23pub struct GitBridge {
24 repo: Repository,
25 repo_root: PathBuf,
26 cwd: PathBuf,
27}
28
29impl GitBridge {
30 pub fn open(path: &Path) -> Result<Self, GitError> {
31 let cwd = normalize_open_path(path)?;
32 let repo = match Repository::discover(path) {
33 Ok(repo) => repo,
34 Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
35 let _guard = owner_validation_lock()
36 .lock()
37 .unwrap_or_else(|poisoned| poisoned.into_inner());
38 let _owner_validation = OwnerValidationDisabled::new()?;
39 let repo = Repository::discover(path);
40 repo.map_err(map_git_error)?
41 }
42 Err(error) => return Err(map_git_error(error)),
43 };
44 let repo_root = repo.workdir().ok_or(GitError::NotARepo)?;
45 let repo_root = fs::canonicalize(repo_root)?;
46 Ok(Self {
47 repo,
48 repo_root,
49 cwd,
50 })
51 }
52
53 pub fn repo_root(&self) -> &Path {
54 &self.repo_root
55 }
56
57 pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
58 Ok(self.repo.blame_file(file_path, None)?)
59 }
60
61 pub fn blame_file_porcelain(&self, file_path: &Path) -> Result<Vec<BlameLineInfo>, GitError> {
62 let output = Command::new("git")
63 .arg("-C")
64 .arg(&self.repo_root)
65 .arg("blame")
66 .arg("--line-porcelain")
67 .arg("--")
68 .arg(file_path)
69 .output()?;
70
71 if !output.status.success() {
72 let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
73 return Err(git_command_error(if stderr.is_empty() {
74 format!("git blame exited with {}", output.status)
75 } else {
76 stderr
77 }));
78 }
79
80 let parsed = parse_blame_porcelain(&String::from_utf8_lossy(&output.stdout));
81 if parsed.is_empty() && !output.stdout.is_empty() {
82 return Err(git_command_error(
83 "failed to parse git blame porcelain output".to_string(),
84 ));
85 }
86
87 Ok(parsed)
88 }
89
90 pub fn commit_summary(&self, oid: Oid) -> Option<String> {
91 self.repo
92 .find_commit(oid)
93 .ok()
94 .and_then(|commit| commit.summary().map(String::from))
95 }
96
97 pub fn get_head_sha(&self) -> Result<String, GitError> {
98 let head = self.repo.head()?;
99 let oid = head.target().ok_or_else(|| {
100 git2::Error::from_str("HEAD has no target")
101 })?;
102 Ok(oid.to_string())
103 }
104
105 pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
109 let mut working_files = self.get_working_diff_files(pathspecs)?;
111 if !working_files.is_empty() {
112 self.populate_contents(&mut working_files, &DiffScope::Working)?;
113 return Ok((DiffScope::Working, working_files));
114 }
115
116 Ok((DiffScope::Working, Vec::new()))
118 }
119
120 pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
122 let mut files = match scope {
123 DiffScope::Working => {
124 self.get_working_diff_files(pathspecs)?
125 }
126 DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
127 DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
128 DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
129 DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
130 };
131
132 files.retain(|f| !f.file_path.starts_with(".sem/"));
134
135 self.populate_contents(&mut files, scope)?;
136 Ok(files)
137 }
138
139 pub fn get_staged_files_with_base_ref(
140 &self,
141 base: &str,
142 pathspecs: &[String],
143 ) -> Result<Vec<FileChange>, GitError> {
144 let mut files = self.get_staged_diff_files_with_base(base, pathspecs)?;
145 files.retain(|f| !f.file_path.starts_with(".sem/"));
146
147 let base_tree = self.resolve_tree(base)?;
148 for file in files.iter_mut() {
149 if file.status != FileStatus::Deleted {
150 file.after_content = self.read_index_file(&file.file_path);
151 }
152 if file.status != FileStatus::Added {
153 let path = file
154 .old_file_path
155 .as_deref()
156 .unwrap_or(&file.file_path);
157 file.before_content = self.read_blob_from_tree(&base_tree, path);
158 }
159 }
160
161 Ok(files)
162 }
163
164 pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
166 let obj1 = self.repo.revparse_single(ref1)?;
167 let obj2 = self.repo.revparse_single(ref2)?;
168 let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
169 Ok(oid.to_string())
170 }
171
172 pub fn is_valid_rev(&self, refspec: &str) -> bool {
174 self.repo.revparse_single(refspec).is_ok()
175 }
176
177 fn make_diff_opts(&self, pathspecs: &[String]) -> Result<DiffOptions, GitError> {
178 let mut opts = DiffOptions::new();
179 for spec in self.normalize_pathspecs(pathspecs)? {
180 opts.pathspec(spec.as_str());
181 }
182 Ok(opts)
183 }
184
185 fn normalize_pathspecs(&self, pathspecs: &[String]) -> Result<Vec<String>, GitError> {
186 pathspecs
187 .iter()
188 .map(|spec| self.normalize_pathspec(spec))
189 .collect()
190 }
191
192 fn normalize_pathspec(&self, spec: &str) -> Result<String, GitError> {
193 if spec.is_empty() || spec.starts_with(':') {
194 return Ok(spec.to_string());
195 }
196
197 let spec_path = Path::new(spec);
198 let absolute = if spec_path.is_absolute() {
199 normalize_absolute_pathspec(spec_path)
200 } else {
201 normalize_lexical(&self.cwd.join(spec_path))
202 };
203
204 let repo_root = normalize_lexical(&self.repo_root);
205 let relative =
206 absolute
207 .strip_prefix(&repo_root)
208 .map_err(|_| pathspec_outside_repo_error(spec, &self.repo_root))?;
209
210 if relative.as_os_str().is_empty() {
211 Ok(".".to_string())
212 } else {
213 relative
214 .to_str()
215 .map(|path| path.replace('\\', "/"))
216 .ok_or_else(|| non_utf8_pathspec_error(spec))
217 }
218 }
219
220 fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
221 let head_tree = match self.repo.head() {
222 Ok(head) => {
223 let commit = head.peel_to_commit()?;
224 Some(commit.tree()?)
225 }
226 Err(_) => None, };
228
229 self.get_index_diff_files(head_tree.as_ref(), pathspecs)
230 }
231
232 fn get_staged_diff_files_with_base(
233 &self,
234 base: &str,
235 pathspecs: &[String],
236 ) -> Result<Vec<FileChange>, GitError> {
237 let base_tree = self.resolve_tree(base)?;
238 self.get_index_diff_files(Some(&base_tree), pathspecs)
239 }
240
241 fn get_index_diff_files(
242 &self,
243 base_tree: Option<&git2::Tree<'_>>,
244 pathspecs: &[String],
245 ) -> Result<Vec<FileChange>, GitError> {
246 let mut opts = self.make_diff_opts(pathspecs)?;
247 let mut diff = self.repo.diff_tree_to_index(
248 base_tree,
249 Some(&self.repo.index()?),
250 Some(&mut opts),
251 )?;
252 Self::detect_renames(&mut diff)?;
253
254 Ok(self.diff_to_file_changes(&diff))
255 }
256
257 fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
258 let mut opts = self.make_diff_opts(pathspecs)?;
259 opts.include_untracked(false);
260
261 let head_tree = self.resolve_tree("HEAD").ok();
262 let mut diff = match head_tree.as_ref() {
263 Some(head_tree) => self
264 .repo
265 .diff_tree_to_workdir_with_index(Some(head_tree), Some(&mut opts))?,
266 None => self.repo.diff_index_to_workdir(None, Some(&mut opts))?,
267 };
268 Self::detect_renames(&mut diff)?;
269 self.apply_index_rename_map(
270 self.diff_to_file_changes(&diff),
271 head_tree.as_ref(),
272 pathspecs,
273 )
274 }
275
276 fn apply_index_rename_map(
277 &self,
278 mut files: Vec<FileChange>,
279 base_tree: Option<&git2::Tree<'_>>,
280 pathspecs: &[String],
281 ) -> Result<Vec<FileChange>, GitError> {
282 let Some(base_tree) = base_tree else {
283 return Ok(files);
284 };
285
286 let index_renames: Vec<FileChange> = self
287 .get_index_diff_files(Some(base_tree), pathspecs)?
288 .into_iter()
289 .filter(|file| file.status == FileStatus::Renamed)
290 .collect();
291
292 for rename in index_renames {
293 let Some(old_path) = rename.old_file_path.clone() else {
294 continue;
295 };
296 let target_pos = files
297 .iter()
298 .position(|file| {
299 matches!(file.status, FileStatus::Added | FileStatus::Renamed)
300 && file.file_path == rename.file_path
301 });
302 let deleted_pos = files
303 .iter()
304 .position(|file| {
305 file.status == FileStatus::Deleted && file.file_path == old_path
306 });
307
308 if let (Some(target_pos), Some(deleted_pos)) = (target_pos, deleted_pos) {
309 if files[target_pos].status == FileStatus::Renamed
310 && files[target_pos].old_file_path.as_deref() == Some(old_path.as_str())
311 {
312 continue;
313 }
314
315 let target_file = files[target_pos].clone();
316 let deleted_file = files[deleted_pos].clone();
317 let displaced_deleted_path =
318 if target_file.status == FileStatus::Renamed {
319 target_file
320 .old_file_path
321 .as_ref()
322 .filter(|path| *path != &old_path)
323 .cloned()
324 } else {
325 None
326 };
327
328 files = files
329 .into_iter()
330 .enumerate()
331 .filter_map(|(idx, file)| {
332 if idx == target_pos || idx == deleted_pos {
333 None
334 } else {
335 Some(file)
336 }
337 })
338 .collect();
339 let before_content = deleted_file
340 .before_content
341 .or_else(|| self.read_blob_from_tree(base_tree, &old_path));
342 let after_content = target_file
343 .after_content
344 .or_else(|| self.read_working_file(&target_file.file_path));
345 files.push(FileChange {
346 file_path: target_file.file_path,
347 status: FileStatus::Renamed,
348 old_file_path: Some(old_path),
349 before_content,
350 after_content,
351 });
352 if let Some(file_path) = displaced_deleted_path {
353 let before_content = self.read_blob_from_tree(base_tree, &file_path);
354 files.push(FileChange {
355 file_path,
356 status: FileStatus::Deleted,
357 old_file_path: None,
358 before_content,
359 after_content: None,
360 });
361 }
362 }
363 }
364
365 Ok(files)
366 }
367
368 fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
369 let obj = self.repo.revparse_single(sha)?;
370 let commit = obj.peel_to_commit()?;
371 let tree = commit.tree()?;
372
373 let parent_tree = if commit.parent_count() > 0 {
374 Some(commit.parent(0)?.tree()?)
375 } else {
376 None
377 };
378
379 let mut opts = self.make_diff_opts(pathspecs)?;
380 let mut diff = self.repo.diff_tree_to_tree(
381 parent_tree.as_ref(),
382 Some(&tree),
383 Some(&mut opts),
384 )?;
385 Self::detect_renames(&mut diff)?;
386
387 Ok(self.diff_to_file_changes(&diff))
388 }
389
390 fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
391 let from_obj = self.repo.revparse_single(from)?;
392 let to_obj = self.repo.revparse_single(to)?;
393
394 let from_tree = from_obj.peel_to_commit()?.tree()?;
395 let to_tree = to_obj.peel_to_commit()?.tree()?;
396
397 let mut opts = self.make_diff_opts(pathspecs)?;
398 let mut diff = self.repo.diff_tree_to_tree(
399 Some(&from_tree),
400 Some(&to_tree),
401 Some(&mut opts),
402 )?;
403 Self::detect_renames(&mut diff)?;
404
405 Ok(self.diff_to_file_changes(&diff))
406 }
407
408 fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
409 let tree = self.resolve_tree(refspec)?;
410 let mut opts = self.make_diff_opts(pathspecs)?;
411 let mut diff = self.repo.diff_tree_to_workdir_with_index(
412 Some(&tree),
413 Some(&mut opts),
414 )?;
415 Self::detect_renames(&mut diff)?;
416 self.apply_index_rename_map(self.diff_to_file_changes(&diff), Some(&tree), pathspecs)
417 }
418
419 fn detect_renames(diff: &mut Diff) -> Result<(), GitError> {
420 let mut opts = DiffFindOptions::new();
421 opts.renames(true);
422 diff.find_similar(Some(&mut opts))?;
423 Ok(())
424 }
425
426 fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
427 let mut files = Vec::new();
428
429 for delta in diff.deltas() {
430 let (status, file_path, old_file_path) = match delta.status() {
431 Delta::Added => {
432 let path = delta
433 .new_file()
434 .path()
435 .and_then(|p| p.to_str())
436 .unwrap_or("")
437 .to_string();
438 (FileStatus::Added, path, None)
439 }
440 Delta::Deleted => {
441 let path = delta
442 .old_file()
443 .path()
444 .and_then(|p| p.to_str())
445 .unwrap_or("")
446 .to_string();
447 (FileStatus::Deleted, path, None)
448 }
449 Delta::Modified => {
450 let path = delta
451 .new_file()
452 .path()
453 .and_then(|p| p.to_str())
454 .unwrap_or("")
455 .to_string();
456 (FileStatus::Modified, path, None)
457 }
458 Delta::Renamed => {
459 let new_path = delta
460 .new_file()
461 .path()
462 .and_then(|p| p.to_str())
463 .unwrap_or("")
464 .to_string();
465 let old_path = delta
466 .old_file()
467 .path()
468 .and_then(|p| p.to_str())
469 .unwrap_or("")
470 .to_string();
471 (FileStatus::Renamed, new_path, Some(old_path))
472 }
473 _ => continue,
474 };
475
476 if !file_path.starts_with(".sem/") {
477 files.push(FileChange {
478 file_path,
479 status,
480 old_file_path,
481 before_content: None,
482 after_content: None,
483 });
484 }
485 }
486
487 files
488 }
489
490 fn bytes_look_binary(bytes: &[u8], complete: bool) -> bool {
491 if bytes.iter().any(|byte| *byte == 0) {
492 return true;
493 }
494
495 match std::str::from_utf8(bytes) {
496 Ok(_) => false,
497 Err(error) => complete || error.error_len().is_some(),
498 }
499 }
500
501 fn populate_contents(
502 &self,
503 files: &mut [FileChange],
504 scope: &DiffScope,
505 ) -> Result<(), GitError> {
506 match scope {
507 DiffScope::Working => {
508 let head_tree = self.resolve_tree("HEAD").ok();
510 for file in files.iter_mut() {
511 if file.status != FileStatus::Deleted {
512 file.after_content = self.read_working_file(&file.file_path);
513 }
514 if file.status != FileStatus::Added {
515 let path = file
516 .old_file_path
517 .as_deref()
518 .unwrap_or(&file.file_path);
519 file.before_content = head_tree
520 .as_ref()
521 .and_then(|t| self.read_blob_from_tree(t, path));
522 }
523 }
524 }
525 DiffScope::Staged => {
526 let head_tree = self.resolve_tree("HEAD").ok();
527 for file in files.iter_mut() {
528 if file.status != FileStatus::Deleted {
529 file.after_content = self
530 .read_index_file(&file.file_path)
531 .or_else(|| self.read_working_file(&file.file_path));
532 }
533 if file.status != FileStatus::Added {
534 let path = file
535 .old_file_path
536 .as_deref()
537 .unwrap_or(&file.file_path);
538 file.before_content = head_tree
539 .as_ref()
540 .and_then(|t| self.read_blob_from_tree(t, path));
541 }
542 }
543 }
544 DiffScope::Commit { sha } => {
545 let after_tree = self.resolve_tree(sha)?;
547 let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
548 for file in files.iter_mut() {
549 if file.status != FileStatus::Deleted {
550 file.after_content =
551 self.read_blob_from_tree(&after_tree, &file.file_path);
552 }
553 if file.status != FileStatus::Added {
554 let path = file
555 .old_file_path
556 .as_deref()
557 .unwrap_or(&file.file_path);
558 file.before_content = before_tree
559 .as_ref()
560 .and_then(|t| self.read_blob_from_tree(t, path));
561 }
562 }
563 }
564 DiffScope::Range { from, to } => {
565 let after_tree = self.resolve_tree(to)?;
566 let before_tree = self.resolve_tree(from)?;
567 for file in files.iter_mut() {
568 if file.status != FileStatus::Deleted {
569 file.after_content =
570 self.read_blob_from_tree(&after_tree, &file.file_path);
571 }
572 if file.status != FileStatus::Added {
573 let path = file
574 .old_file_path
575 .as_deref()
576 .unwrap_or(&file.file_path);
577 file.before_content =
578 self.read_blob_from_tree(&before_tree, path);
579 }
580 }
581 }
582 DiffScope::RefToWorking { refspec } => {
583 let before_tree = self.resolve_tree(refspec)?;
584 for file in files.iter_mut() {
585 if file.status != FileStatus::Deleted {
586 file.after_content = self.read_working_file(&file.file_path);
587 }
588 if file.status != FileStatus::Added {
589 let path = file
590 .old_file_path
591 .as_deref()
592 .unwrap_or(&file.file_path);
593 file.before_content =
594 self.read_blob_from_tree(&before_tree, path);
595 }
596 }
597 }
598 }
599 Ok(())
600 }
601
602 fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
603 let obj = self.repo.revparse_single(refspec)?;
604 let commit = obj.peel_to_commit()?;
605 Ok(commit.tree()?)
606 }
607
608 fn normalize_line_endings(s: String) -> String {
609 if s.contains('\r') {
610 s.replace("\r\n", "\n").replace('\r', "\n")
611 } else {
612 s
613 }
614 }
615
616 fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
617 let entry = tree.get_path(Path::new(file_path)).ok()?;
618 let blob = self.repo.find_blob(entry.id()).ok()?;
619 let bytes = blob.content();
620 if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
621 return None;
622 }
623 std::str::from_utf8(bytes)
624 .ok()
625 .map(|s| Self::normalize_line_endings(s.to_string()))
626 }
627
628 fn read_working_file(&self, file_path: &str) -> Option<String> {
629 let full_path = self.repo_root.join(file_path);
630 let bytes = fs::read(full_path).ok()?;
631 if Self::bytes_look_binary(&bytes, true) {
632 return None;
633 }
634 String::from_utf8(bytes)
635 .ok()
636 .map(Self::normalize_line_endings)
637 }
638
639 fn read_index_file(&self, file_path: &str) -> Option<String> {
640 let index = self.repo.index().ok()?;
641 let entry = index.get_path(Path::new(file_path), 0)?;
642 let blob = self.repo.find_blob(entry.id).ok()?;
643 let bytes = blob.content();
644 if blob.is_binary() || Self::bytes_look_binary(bytes, true) {
645 return None;
646 }
647 std::str::from_utf8(bytes)
648 .ok()
649 .map(|s| Self::normalize_line_endings(s.to_string()))
650 }
651
652
653 pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
655 let tree = self.resolve_tree(refspec)?;
656 Ok(self.read_blob_from_tree(&tree, file_path))
657 }
658
659 pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
662 let mut revwalk = self.repo.revwalk()?;
663 revwalk.push_head()?;
664 revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
665
666 let mut commits = Vec::new();
667 let path = Path::new(file_path);
668
669 for oid_result in revwalk {
670 let oid = oid_result?;
671 let commit = self.repo.find_commit(oid)?;
672 let tree = commit.tree()?;
673
674 let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
676
677 let file_in_parent = if commit.parent_count() > 0 {
679 commit.parent(0)
680 .ok()
681 .and_then(|p| p.tree().ok())
682 .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
683 } else {
684 None };
686
687 let changed = match (file_in_commit, file_in_parent) {
689 (Some(cur), Some(prev)) => cur != prev, (Some(_), None) => true, (None, Some(_)) => true, (None, None) => false, };
694
695 if changed {
696 let sha = oid.to_string();
697 commits.push(CommitInfo {
698 short_sha: sha[..7.min(sha.len())].to_string(),
699 sha,
700 author: commit.author().name().unwrap_or("unknown").to_string(),
701 date: commit.time().seconds().to_string(),
702 message: commit.message().unwrap_or("").to_string(),
703 });
704
705 if limit != 0 && commits.len() >= limit {
706 break;
707 }
708 }
709 }
710
711 Ok(commits)
712 }
713
714 pub fn get_file_commits_follow_renames(
719 &self,
720 file_path: &str,
721 limit: usize,
722 ) -> Result<Vec<FileCommitInfo>, GitError> {
723 match self.get_file_commits_follow_renames_cli(file_path, limit) {
724 Ok(commits) if !commits.is_empty() => return Ok(commits),
725 Ok(_) => {}
726 Err(GitError::Io(error)) if error.kind() == std::io::ErrorKind::NotFound => {}
727 Err(error) => return Err(error),
728 }
729
730 let mut revwalk = self.repo.revwalk()?;
731 revwalk.push_head()?;
732 revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::TIME)?;
733
734 let mut results = Vec::new();
735 let mut tracked_path = file_path.to_string();
736
737 for oid_result in revwalk {
738 let oid = oid_result?;
739 let commit = self.repo.find_commit(oid)?;
740 let tree = commit.tree()?;
741
742 let path = Path::new(&tracked_path);
743 let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
744
745 let (parent_tree_opt, file_in_parent) = if commit.parent_count() > 0 {
746 let parent = commit.parent(0)?;
747 let ptree = parent.tree()?;
748 let fip = ptree.get_path(path).ok().map(|e| e.id());
749 (Some(ptree), fip)
750 } else {
751 (None, None)
752 };
753
754 let changed = match (file_in_commit, file_in_parent) {
755 (Some(cur), Some(prev)) => cur != prev,
756 (Some(_), None) => true,
757 (None, Some(_)) => true,
758 (None, None) => false,
759 };
760
761 if changed {
762 let sha_str = oid.to_string();
763 results.push(FileCommitInfo {
764 commit: CommitInfo {
765 short_sha: sha_str[..7.min(sha_str.len())].to_string(),
766 sha: sha_str,
767 author: commit.author().name().unwrap_or("unknown").to_string(),
768 date: commit.time().seconds().to_string(),
769 message: commit.message().unwrap_or("").to_string(),
770 },
771 file_path: tracked_path.clone(),
772 });
773
774 if limit != 0 && results.len() >= limit {
775 break;
776 }
777 }
778
779 let should_check_rename =
782 parent_tree_opt.is_some() && (file_in_parent.is_none() || file_in_commit.is_none());
783 if should_check_rename {
784 let mut diff = self.repo.diff_tree_to_tree(
785 parent_tree_opt.as_ref(),
786 Some(&tree),
787 None,
788 )?;
789 let mut find_opts = DiffFindOptions::new();
790 find_opts.renames(true);
791 diff.find_similar(Some(&mut find_opts))?;
792
793 let mut found_rename = false;
794 for delta in diff.deltas() {
795 if delta.status() == Delta::Renamed {
796 let new_path = delta
797 .new_file()
798 .path()
799 .and_then(|p| p.to_str())
800 .unwrap_or("");
801 if new_path == tracked_path {
802 let old_path = delta
804 .old_file()
805 .path()
806 .and_then(|p| p.to_str())
807 .unwrap_or("")
808 .to_string();
809 if !old_path.is_empty() {
810 tracked_path = old_path;
811 found_rename = true;
812 break;
813 }
814 }
815 }
816 }
817
818 if !found_rename && file_in_commit.is_none() {
819 break;
821 }
822 }
823 }
824
825 Ok(results)
826 }
827
828 fn get_file_commits_follow_renames_cli(
829 &self,
830 file_path: &str,
831 limit: usize,
832 ) -> Result<Vec<FileCommitInfo>, GitError> {
833 let mut command = Command::new("git");
834 command
835 .arg("-C")
836 .arg(&self.repo_root)
837 .arg("log")
838 .arg("--follow")
839 .arg("--format=\x1e%H\x1f%an\x1f%at\x1f%s")
840 .arg("--name-status");
841 if limit != 0 {
842 command.arg("-n").arg(limit.to_string());
843 }
844 command.arg("--").arg(file_path);
845
846 let output = command.output()?;
847 if !output.status.success() {
848 let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
849 return Err(git_command_error(if stderr.is_empty() {
850 format!("git log exited with {}", output.status)
851 } else {
852 stderr
853 }));
854 }
855
856 let stdout = String::from_utf8_lossy(&output.stdout);
857 let mut tracked_path = file_path.to_string();
858 let mut commits = Vec::new();
859
860 for record in stdout.split('\x1e') {
861 let record = record.trim_start_matches('\n');
862 if record.trim().is_empty() {
863 continue;
864 }
865
866 let mut lines = record.lines();
867 let Some(meta) = lines.next() else {
868 continue;
869 };
870 let mut parts = meta.splitn(4, '\x1f');
871 let Some(sha) = parts.next() else {
872 continue;
873 };
874 let Some(author) = parts.next() else {
875 continue;
876 };
877 let Some(date) = parts.next() else {
878 continue;
879 };
880 let message = parts.next().unwrap_or_default();
881
882 let commit_path = tracked_path.clone();
883 let mut previous_path = None;
884 for line in lines {
885 let fields: Vec<&str> = line.split('\t').collect();
886 if fields.len() >= 3 && fields[0].starts_with('R') && fields[2] == tracked_path {
887 previous_path = Some(fields[1].to_string());
888 }
889 }
890
891 commits.push(FileCommitInfo {
892 commit: CommitInfo {
893 short_sha: sha[..7.min(sha.len())].to_string(),
894 sha: sha.to_string(),
895 author: author.to_string(),
896 date: date.to_string(),
897 message: message.to_string(),
898 },
899 file_path: commit_path,
900 });
901
902 if let Some(previous_path) = previous_path {
903 tracked_path = previous_path;
904 }
905 }
906
907 Ok(commits)
908 }
909
910 pub fn get_commit_changed_files(&self, sha: &str) -> Result<Vec<String>, GitError> {
913 let obj = self.repo.revparse_single(sha)?;
914 let commit = obj.peel_to_commit()?;
915 let tree = commit.tree()?;
916 let parent_tree = if commit.parent_count() > 0 {
917 Some(commit.parent(0)?.tree()?)
918 } else {
919 None
920 };
921 let diff = self.repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
922 let mut paths = Vec::new();
923 for delta in diff.deltas() {
924 if let Some(p) = delta.new_file().path().and_then(|p| p.to_str()) {
925 paths.push(p.to_string());
926 }
927 if let Some(p) = delta.old_file().path().and_then(|p| p.to_str()) {
929 if !paths.contains(&p.to_string()) {
930 paths.push(p.to_string());
931 }
932 }
933 }
934 Ok(paths)
935 }
936
937 pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
938 let mut revwalk = self.repo.revwalk()?;
939 revwalk.push_head()?;
940
941 let mut commits = Vec::new();
942 for (i, oid_result) in revwalk.enumerate() {
943 if limit != 0 && i >= limit {
944 break;
945 }
946 let oid = oid_result?;
947 let commit = self.repo.find_commit(oid)?;
948 let sha = oid.to_string();
949 commits.push(CommitInfo {
950 short_sha: sha[..7.min(sha.len())].to_string(),
951 sha,
952 author: commit.author().name().unwrap_or("unknown").to_string(),
953 date: commit.time().seconds().to_string(),
954 message: commit.message().unwrap_or("").to_string(),
955 });
956 }
957
958 Ok(commits)
959 }
960}
961
962fn parse_blame_porcelain(output: &str) -> Vec<BlameLineInfo> {
963 let lines: Vec<&str> = output.lines().collect();
964 let mut parsed = Vec::new();
965 let mut index = 0;
966
967 while index < lines.len() {
968 let Some((raw_sha, line_number)) = parse_blame_header(lines[index]) else {
969 index += 1;
970 continue;
971 };
972 index += 1;
973
974 let mut author = String::new();
975 let mut author_time = None;
976 let mut summary = String::new();
977
978 while index < lines.len() {
979 let line = lines[index];
980 index += 1;
981
982 if line.starts_with('\t') {
983 break;
984 } else if let Some(value) = line.strip_prefix("author ") {
985 author = value.to_string();
986 } else if let Some(value) = line.strip_prefix("author-time ") {
987 author_time = value.parse::<i64>().ok();
988 } else if let Some(value) = line.strip_prefix("summary ") {
989 summary = value.to_string();
990 }
991 }
992
993 let sha = raw_sha.trim_start_matches('^');
994 let commit_sha = if sha.chars().all(|c| c == '0') {
995 None
996 } else {
997 Some(sha.to_string())
998 };
999
1000 if author.is_empty() {
1001 author = if commit_sha.is_none() {
1002 "Not Committed Yet".to_string()
1003 } else {
1004 "unknown".to_string()
1005 };
1006 }
1007
1008 parsed.push(BlameLineInfo {
1009 line_number,
1010 commit_sha,
1011 author,
1012 author_time,
1013 summary,
1014 });
1015 }
1016
1017 parsed.sort_by_key(|line| line.line_number);
1018 parsed
1019}
1020
1021fn parse_blame_header(line: &str) -> Option<(&str, usize)> {
1022 let mut parts = line.split_whitespace();
1023 let sha = parts.next()?;
1024 if !is_blame_oid(sha) {
1025 return None;
1026 }
1027 parts.next()?;
1028 let final_line = parts.next()?.parse().ok()?;
1029 Some((sha, final_line))
1030}
1031
1032fn is_blame_oid(value: &str) -> bool {
1033 let value = value.strip_prefix('^').unwrap_or(value);
1034 value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
1035}
1036
1037fn git_command_error(message: String) -> GitError {
1038 GitError::Git2(git2::Error::from_str(&message))
1039}
1040
1041fn map_git_error(error: git2::Error) -> GitError {
1042 if error.code() == ErrorCode::NotFound {
1043 GitError::NotARepo
1044 } else {
1045 GitError::Git2(error)
1046 }
1047}
1048
1049fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
1050 let safe_directories = command_line_safe_directories();
1051 should_retry_with_safe_directory(error, path, &safe_directories)
1052}
1053
1054fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
1055 error.code() == ErrorCode::Owner
1056 && nearest_git_root(path).is_some_and(|repo_root| {
1057 safe_directories.iter().any(|safe_directory| {
1058 safe_directory == "*"
1059 || paths_match(&repo_root, Path::new(safe_directory))
1060 })
1061 })
1062}
1063
1064fn command_line_safe_directories() -> Vec<String> {
1065 let count = env::var("GIT_CONFIG_COUNT")
1066 .ok()
1067 .and_then(|value| value.parse::<usize>().ok())
1068 .unwrap_or_default();
1069
1070 (0..count)
1071 .filter_map(|index| {
1072 let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
1073 if key.eq_ignore_ascii_case("safe.directory") {
1074 env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
1075 } else {
1076 None
1077 }
1078 })
1079 .collect()
1080}
1081
1082fn nearest_git_root(path: &Path) -> Option<PathBuf> {
1083 let mut current = if path.is_file() {
1084 path.parent()?
1085 } else {
1086 path
1087 };
1088
1089 loop {
1090 if current.join(".git").exists() {
1091 return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
1092 }
1093
1094 current = current.parent()?;
1095 }
1096}
1097
1098fn paths_match(left: &Path, right: &Path) -> bool {
1099 let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
1100 let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
1101
1102 if cfg!(windows) {
1103 left.to_string_lossy()
1104 .eq_ignore_ascii_case(&right.to_string_lossy())
1105 } else {
1106 left == right
1107 }
1108}
1109
1110fn owner_validation_lock() -> &'static Mutex<()> {
1111 static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
1112 LOCK.get_or_init(|| Mutex::new(()))
1113}
1114
1115struct OwnerValidationDisabled;
1116
1117impl OwnerValidationDisabled {
1118 fn new() -> Result<Self, GitError> {
1119 unsafe { git2::opts::set_verify_owner_validation(false)? };
1121 Ok(Self)
1122 }
1123}
1124
1125impl Drop for OwnerValidationDisabled {
1126 fn drop(&mut self) {
1127 unsafe {
1129 let _ = git2::opts::set_verify_owner_validation(true);
1130 }
1131 }
1132}
1133
1134fn normalize_open_path(path: &Path) -> Result<PathBuf, GitError> {
1135 let canonical = match fs::canonicalize(path) {
1136 Ok(canonical) => canonical,
1137 Err(_) if path.is_absolute() => normalize_lexical(path),
1138 Err(_) => normalize_lexical(&env::current_dir()?.join(path)),
1139 };
1140
1141 Ok(if canonical.is_file() {
1142 canonical
1143 .parent()
1144 .map(Path::to_path_buf)
1145 .unwrap_or(canonical)
1146 } else {
1147 canonical
1148 })
1149}
1150
1151fn normalize_absolute_pathspec(path: &Path) -> PathBuf {
1152 let path = normalize_lexical(path);
1153 let Some(leaf) = path.file_name() else {
1154 return fs::canonicalize(&path).unwrap_or(path);
1155 };
1156 let mut trailing_components = vec![leaf.to_os_string()];
1157
1158 let Some(parent) = path.parent() else {
1159 return path;
1160 };
1161
1162 for ancestor in parent.ancestors() {
1163 if ancestor.exists() {
1164 let mut normalized =
1165 fs::canonicalize(ancestor).unwrap_or_else(|_| normalize_lexical(ancestor));
1166 for component in trailing_components.iter().rev() {
1167 normalized.push(component);
1168 }
1169 return normalized;
1170 }
1171
1172 let Some(name) = ancestor.file_name() else {
1173 return path;
1174 };
1175 trailing_components.push(name.to_os_string());
1176 }
1177
1178 path
1179}
1180
1181fn pathspec_outside_repo_error(pathspec: &str, repo_root: &Path) -> GitError {
1182 GitError::Git2(git2::Error::from_str(&format!(
1183 "pathspec '{pathspec}' is outside repository '{}'",
1184 repo_root.display()
1185 )))
1186}
1187
1188fn non_utf8_pathspec_error(pathspec: &str) -> GitError {
1189 GitError::Git2(git2::Error::from_str(&format!(
1190 "pathspec '{pathspec}' is not valid UTF-8 after normalization"
1191 )))
1192}
1193
1194fn normalize_lexical(path: &Path) -> PathBuf {
1195 let mut normalized = PathBuf::new();
1196
1197 for component in path.components() {
1198 match component {
1199 Component::CurDir => {}
1200 Component::ParentDir => {
1201 if !normalized.pop() && !normalized.has_root() {
1202 normalized.push("..");
1203 }
1204 }
1205 Component::Normal(part) => normalized.push(part),
1206 Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
1207 Component::RootDir => normalized.push(component.as_os_str()),
1208 }
1209 }
1210
1211 normalized
1212}
1213
1214#[cfg(test)]
1215mod tests {
1216 use super::*;
1217 use crate::model::change::ChangeType;
1218 use crate::parser::differ::{collect_binary_file_changes, compute_semantic_diff};
1219 use crate::parser::plugins::create_default_registry;
1220 use git2::{ErrorClass, Oid, Repository, Signature};
1221 use tempfile::TempDir;
1222
1223 fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
1224 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1225
1226 let mut index = repo.index().unwrap();
1227 index.add_path(Path::new(file_path)).unwrap();
1228 index.write().unwrap();
1229
1230 let tree_id = index.write_tree().unwrap();
1231 let tree = repo.find_tree(tree_id).unwrap();
1232 let sig = Signature::now("Test User", "test@example.com").unwrap();
1233
1234 match repo.head() {
1235 Ok(head) => {
1236 let parent = repo.find_commit(head.target().unwrap()).unwrap();
1237 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1238 .unwrap()
1239 }
1240 Err(_) => repo
1241 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1242 .unwrap(),
1243 }
1244 }
1245
1246 fn commit_binary_file(
1247 repo: &Repository,
1248 file_path: &str,
1249 contents: &[u8],
1250 message: &str,
1251 ) -> Oid {
1252 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
1253
1254 let mut index = repo.index().unwrap();
1255 index.add_path(Path::new(file_path)).unwrap();
1256 index.write().unwrap();
1257
1258 let tree_id = index.write_tree().unwrap();
1259 let tree = repo.find_tree(tree_id).unwrap();
1260 let sig = Signature::now("Test User", "test@example.com").unwrap();
1261
1262 match repo.head() {
1263 Ok(head) => {
1264 let parent = repo.find_commit(head.target().unwrap()).unwrap();
1265 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
1266 .unwrap()
1267 }
1268 Err(_) => repo
1269 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
1270 .unwrap(),
1271 }
1272 }
1273
1274 #[test]
1275 fn porcelain_blame_reports_uncommitted_lines() {
1276 let temp = TempDir::new().unwrap();
1277 let repo = Repository::init(temp.path()).unwrap();
1278
1279 commit_file(&repo, "a.py", "def foo():\n return 1\n", "init");
1280 fs::write(temp.path().join("a.py"), "def foo():\n return 2\n").unwrap();
1281
1282 let bridge = GitBridge::open(temp.path()).unwrap();
1283 let blame = bridge.blame_file_porcelain(Path::new("a.py")).unwrap();
1284
1285 assert!(blame[0].commit_sha.is_some());
1286 assert_eq!(blame[1].commit_sha, None);
1287 assert_eq!(blame[1].author, "Not Committed Yet");
1288 }
1289
1290 #[test]
1291 fn clean_worktree_does_not_fall_back_to_head_commit() {
1292 let temp = TempDir::new().unwrap();
1293 let repo = Repository::init(temp.path()).unwrap();
1294
1295 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
1296 commit_file(
1297 &repo,
1298 "sample.ts",
1299 "export function a() {\n return 2;\n}\n",
1300 "change a",
1301 );
1302
1303 let bridge = GitBridge::open(temp.path()).unwrap();
1304 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1305
1306 assert!(matches!(scope, DiffScope::Working));
1307 assert!(files.is_empty());
1308 }
1309
1310 #[test]
1311 fn owner_error_retries_for_command_line_safe_directory() {
1312 let temp = TempDir::new().unwrap();
1313 Repository::init(temp.path()).unwrap();
1314
1315 let owner_error = git2::Error::new(
1316 ErrorCode::Owner,
1317 ErrorClass::Config,
1318 "owner mismatch",
1319 );
1320 let safe_directories = [temp.path().to_string_lossy().to_string()];
1321
1322 assert!(should_retry_with_safe_directory(
1323 &owner_error,
1324 temp.path(),
1325 &safe_directories,
1326 ));
1327
1328 let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
1329 assert!(!should_retry_with_safe_directory(
1330 &owner_error,
1331 temp.path(),
1332 &other_directories,
1333 ));
1334
1335 let not_found_error = git2::Error::new(
1336 ErrorCode::NotFound,
1337 ErrorClass::Repository,
1338 "not found",
1339 );
1340 assert!(!should_retry_with_safe_directory(
1341 ¬_found_error,
1342 temp.path(),
1343 &["*".to_string()],
1344 ));
1345 }
1346
1347 #[test]
1348 fn explicit_commit_scope_still_reads_head_commit_diff() {
1349 let temp = TempDir::new().unwrap();
1350 let repo = Repository::init(temp.path()).unwrap();
1351
1352 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
1353 let head_oid = commit_file(
1354 &repo,
1355 "sample.ts",
1356 "export function a() {\n return 2;\n}\n",
1357 "change a",
1358 );
1359
1360 let bridge = GitBridge::open(temp.path()).unwrap();
1361 let files = bridge
1362 .get_changed_files(&DiffScope::Commit {
1363 sha: head_oid.to_string(),
1364 }, &[])
1365 .unwrap();
1366
1367 assert_eq!(files.len(), 1);
1368 assert_eq!(files[0].file_path, "sample.ts");
1369 assert_eq!(files[0].status, FileStatus::Modified);
1370 }
1371
1372 #[test]
1373 fn pathspecs_are_normalized_from_open_directory() {
1374 let temp = TempDir::new().unwrap();
1375 let repo = Repository::init(temp.path()).unwrap();
1376 fs::create_dir_all(temp.path().join("pkg")).unwrap();
1377
1378 commit_file(&repo, "pkg/a.py", "def foo():\n return 1\n", "init");
1379 fs::write(temp.path().join("pkg/a.py"), "def foo():\n return 2\n").unwrap();
1380
1381 let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1382 let relative_files = bridge
1383 .get_changed_files(&DiffScope::Working, &["a.py".to_string()])
1384 .unwrap();
1385
1386 assert_eq!(relative_files.len(), 1);
1387 assert_eq!(relative_files[0].file_path, "pkg/a.py");
1388
1389 let absolute_path = temp.path().join("pkg/a.py").to_string_lossy().to_string();
1390 let absolute_files = bridge
1391 .get_changed_files(&DiffScope::Working, &[absolute_path])
1392 .unwrap();
1393
1394 assert_eq!(absolute_files.len(), 1);
1395 assert_eq!(absolute_files[0].file_path, "pkg/a.py");
1396 }
1397
1398 #[test]
1399 fn absolute_deleted_pathspecs_are_normalized_from_existing_parent() {
1400 let temp = TempDir::new().unwrap();
1401 let repo = Repository::init(temp.path()).unwrap();
1402 fs::create_dir_all(temp.path().join("pkg")).unwrap();
1403
1404 commit_file(
1405 &repo,
1406 "pkg/deleted.py",
1407 "def foo():\n return 1\n",
1408 "init",
1409 );
1410 let absolute_path = temp
1411 .path()
1412 .join("pkg/deleted.py")
1413 .to_string_lossy()
1414 .to_string();
1415 fs::remove_file(temp.path().join("pkg/deleted.py")).unwrap();
1416
1417 let bridge = GitBridge::open(&temp.path().join("pkg")).unwrap();
1418 let files = bridge
1419 .get_changed_files(&DiffScope::Working, &[absolute_path])
1420 .unwrap();
1421
1422 assert_eq!(files.len(), 1);
1423 assert_eq!(files[0].file_path, "pkg/deleted.py");
1424 assert_eq!(files[0].status, FileStatus::Deleted);
1425 }
1426
1427 #[test]
1428 fn absolute_missing_pathspecs_preserve_trailing_component_order() {
1429 let temp = TempDir::new().unwrap();
1430 let existing_parent = temp.path().join("existing");
1431 fs::create_dir(&existing_parent).unwrap();
1432
1433 let pathspec = existing_parent.join("missing").join("leaf.py");
1434 let normalized = normalize_absolute_pathspec(&pathspec);
1435
1436 let mut expected = fs::canonicalize(&existing_parent).unwrap();
1437 expected.push("missing");
1438 expected.push("leaf.py");
1439 assert_eq!(normalized, expected);
1440 }
1441
1442 #[test]
1443 fn absolute_pathspecs_outside_repo_are_rejected() {
1444 let repo_dir = TempDir::new().unwrap();
1445 let outside_dir = TempDir::new().unwrap();
1446 let repo = Repository::init(repo_dir.path()).unwrap();
1447
1448 commit_file(&repo, "sample.py", "def foo():\n return 1\n", "init");
1449 fs::write(
1450 repo_dir.path().join("sample.py"),
1451 "def foo():\n return 2\n",
1452 )
1453 .unwrap();
1454 let outside_path = outside_dir.path().join("outside.py");
1455 fs::write(&outside_path, "def outside():\n return 1\n").unwrap();
1456
1457 let bridge = GitBridge::open(repo_dir.path()).unwrap();
1458 let err = bridge
1459 .get_changed_files(
1460 &DiffScope::Working,
1461 &[outside_path.to_string_lossy().to_string()],
1462 )
1463 .unwrap_err();
1464
1465 let message = err.to_string();
1466 assert!(message.contains("pathspec"));
1467 assert!(message.contains("is outside repository"));
1468 }
1469
1470 #[test]
1471 fn working_binary_modification_is_reported_as_binary_change() {
1472 let temp = TempDir::new().unwrap();
1473 let repo = Repository::init(temp.path()).unwrap();
1474
1475 commit_binary_file(&repo, "pic.png", b"\0png-v1\0", "init");
1476 fs::write(temp.path().join("pic.png"), b"\0png-v2\0extra").unwrap();
1477
1478 let bridge = GitBridge::open(temp.path()).unwrap();
1479 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1480
1481 assert_eq!(files.len(), 1);
1482 assert_eq!(files[0].file_path, "pic.png");
1483 assert_eq!(files[0].status, FileStatus::Modified);
1484 assert!(files[0].before_content.is_none());
1485 assert!(files[0].after_content.is_none());
1486
1487 let binary_changes = collect_binary_file_changes(&files);
1488 let registry = create_default_registry();
1489 let result = compute_semantic_diff(&files, ®istry, None, None);
1490
1491 assert!(result.changes.is_empty());
1492 assert_eq!(result.file_count, 0);
1493 assert_eq!(binary_changes.len(), 1);
1494 assert_eq!(binary_changes[0].file_path, "pic.png");
1495 assert_eq!(binary_changes[0].status, FileStatus::Modified);
1496 }
1497
1498 #[test]
1499 fn staged_binary_add_and_delete_are_reported_as_binary_changes() {
1500 let temp = TempDir::new().unwrap();
1501 let repo = Repository::init(temp.path()).unwrap();
1502
1503 fs::write(temp.path().join("added.png"), b"\0added-binary\0").unwrap();
1504 let mut index = repo.index().unwrap();
1505 index.add_path(Path::new("added.png")).unwrap();
1506 index.write().unwrap();
1507
1508 let bridge = GitBridge::open(temp.path()).unwrap();
1509 let added_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1510 assert_eq!(added_files.len(), 1);
1511 assert_eq!(added_files[0].file_path, "added.png");
1512 assert_eq!(added_files[0].status, FileStatus::Added);
1513 assert!(added_files[0].before_content.is_none());
1514 assert!(added_files[0].after_content.is_none());
1515 let added_binary_changes = collect_binary_file_changes(&added_files);
1516 assert_eq!(added_binary_changes.len(), 1);
1517 assert_eq!(added_binary_changes[0].file_path, "added.png");
1518
1519 let temp = TempDir::new().unwrap();
1520 let repo = Repository::init(temp.path()).unwrap();
1521 commit_binary_file(&repo, "deleted.png", b"\0deleted-binary\0", "init");
1522 fs::remove_file(temp.path().join("deleted.png")).unwrap();
1523 let mut index = repo.index().unwrap();
1524 index.remove_path(Path::new("deleted.png")).unwrap();
1525 index.write().unwrap();
1526
1527 let bridge = GitBridge::open(temp.path()).unwrap();
1528 let deleted_files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1529 assert_eq!(deleted_files.len(), 1);
1530 assert_eq!(deleted_files[0].file_path, "deleted.png");
1531 assert_eq!(deleted_files[0].status, FileStatus::Deleted);
1532 assert!(deleted_files[0].before_content.is_none());
1533 assert!(deleted_files[0].after_content.is_none());
1534 let deleted_binary_changes = collect_binary_file_changes(&deleted_files);
1535 assert_eq!(deleted_binary_changes.len(), 1);
1536 assert_eq!(deleted_binary_changes[0].file_path, "deleted.png");
1537 }
1538
1539 #[test]
1540 fn partial_utf8_boundary_is_not_treated_as_binary() {
1541 assert!(!GitBridge::bytes_look_binary(&[0xe2, 0x82], false));
1542 assert!(GitBridge::bytes_look_binary(&[0xe2, 0x82], true));
1543 }
1544
1545 #[test]
1546 fn staged_file_rename_is_reported_as_single_rename_with_old_contents() {
1547 let temp = TempDir::new().unwrap();
1548 let repo = Repository::init(temp.path()).unwrap();
1549
1550 let contents = "export function foo() {\n return 1;\n}\n";
1551 commit_file(&repo, "old.ts", contents, "init");
1552
1553 fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1554 let mut index = repo.index().unwrap();
1555 index.remove_path(Path::new("old.ts")).unwrap();
1556 index.add_path(Path::new("new.ts")).unwrap();
1557 index.write().unwrap();
1558
1559 let bridge = GitBridge::open(temp.path()).unwrap();
1560 let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1561
1562 assert_eq!(files.len(), 1);
1563 assert_eq!(files[0].status, FileStatus::Renamed);
1564 assert_eq!(files[0].file_path, "new.ts");
1565 assert_eq!(files[0].old_file_path.as_deref(), Some("old.ts"));
1566 assert_eq!(files[0].before_content.as_deref(), Some(contents));
1567 assert_eq!(files[0].after_content.as_deref(), Some(contents));
1568 }
1569
1570 #[test]
1571 fn staged_file_rename_with_edit_reports_single_moved_entity() {
1572 let temp = TempDir::new().unwrap();
1573 let repo = Repository::init(temp.path()).unwrap();
1574
1575 let before = "\
1576// shared header 01
1577// shared header 02
1578// shared header 03
1579// shared header 04
1580// shared header 05
1581// shared header 06
1582// shared header 07
1583// shared header 08
1584// shared header 09
1585// shared header 10
1586export function foo() {
1587 return alpha + beta + gamma;
1588}
1589";
1590 let after = before.replace(
1591 "return alpha + beta + gamma;",
1592 "return one + two + three;",
1593 );
1594
1595 commit_file(&repo, "old.ts", before, "init");
1596 fs::rename(temp.path().join("old.ts"), temp.path().join("new.ts")).unwrap();
1597 fs::write(temp.path().join("new.ts"), &after).unwrap();
1598
1599 let mut index = repo.index().unwrap();
1600 index.remove_path(Path::new("old.ts")).unwrap();
1601 index.add_path(Path::new("new.ts")).unwrap();
1602 index.write().unwrap();
1603
1604 let bridge = GitBridge::open(temp.path()).unwrap();
1605 let files = bridge.get_changed_files(&DiffScope::Staged, &[]).unwrap();
1606 assert_eq!(files.len(), 1);
1607 assert_eq!(files[0].status, FileStatus::Renamed);
1608
1609 let registry = create_default_registry();
1610 let result = compute_semantic_diff(&files, ®istry, None, None);
1611
1612 assert_eq!(result.added_count, 0);
1613 assert_eq!(result.deleted_count, 0);
1614 assert_eq!(result.modified_count, 1);
1617 assert_eq!(result.moved_count, 1);
1618 assert_eq!(result.changes.len(), 1);
1619 assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1620 assert_eq!(result.changes[0].entity_name, "foo");
1621 assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
1622 assert_eq!(result.changes[0].structural_change, Some(true));
1623 }
1624
1625 #[test]
1626 fn working_diff_preserves_staged_rename_with_unstaged_edit() {
1627 let temp = TempDir::new().unwrap();
1628 let repo = Repository::init(temp.path()).unwrap();
1629
1630 let before = "\
1631export function foo(x: number) {
1632 return x + 1;
1633}
1634
1635export function bar(y: number) {
1636 return y * 2;
1637}
1638";
1639 let after = "\
1640export function foo(x: number) {
1641 return x + 42;
1642}
1643
1644export function bar(y: number) {
1645 return y * 99;
1646}
1647";
1648
1649 commit_file(&repo, "a.ts", before, "init");
1650
1651 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1652 let mut index = repo.index().unwrap();
1653 index.remove_path(Path::new("a.ts")).unwrap();
1654 index.add_path(Path::new("b.ts")).unwrap();
1655 index.write().unwrap();
1656
1657 fs::write(temp.path().join("b.ts"), after).unwrap();
1658
1659 let bridge = GitBridge::open(temp.path()).unwrap();
1660 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1661
1662 assert!(matches!(scope, DiffScope::Working));
1663 assert_eq!(files.len(), 1);
1664 assert_eq!(files[0].status, FileStatus::Renamed);
1665 assert_eq!(files[0].file_path, "b.ts");
1666 assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1667 assert_eq!(files[0].before_content.as_deref(), Some(before));
1668 assert_eq!(files[0].after_content.as_deref(), Some(after));
1669
1670 let registry = create_default_registry();
1671 let result = compute_semantic_diff(&files, ®istry, None, None);
1672
1673 assert_eq!(result.added_count, 0);
1674 assert_eq!(result.deleted_count, 0);
1675 assert_eq!(result.modified_count, 2);
1676 assert_eq!(result.moved_count, 2);
1677 assert_eq!(result.changes.len(), 2);
1678 assert!(result
1679 .changes
1680 .iter()
1681 .all(|change| change.change_type == ChangeType::Moved));
1682 assert!(result
1683 .changes
1684 .iter()
1685 .all(|change| change.old_file_path.as_deref() == Some("a.ts")));
1686 assert!(result
1687 .changes
1688 .iter()
1689 .all(|change| change.structural_change == Some(true)));
1690 }
1691
1692 #[test]
1693 fn working_diff_uses_staged_rename_map_after_large_unstaged_rewrite() {
1694 let temp = TempDir::new().unwrap();
1695 let repo = Repository::init(temp.path()).unwrap();
1696
1697 let before_noise = (0..200)
1698 .map(|i| format!("// old filler {i} alpha beta gamma"))
1699 .collect::<Vec<_>>()
1700 .join("\n");
1701 let after_noise = (0..200)
1702 .map(|i| format!("// new filler {i} delta epsilon zeta"))
1703 .collect::<Vec<_>>()
1704 .join("\n");
1705 let before = format!(
1706 "{before_noise}\nexport function foo(x: number) {{\n return x + 1;\n}}\n"
1707 );
1708 let after = format!(
1709 "{after_noise}\nexport function foo(x: number) {{\n return x + 42;\n}}\n"
1710 );
1711
1712 commit_file(&repo, "a.ts", &before, "init");
1713
1714 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1715 let mut index = repo.index().unwrap();
1716 index.remove_path(Path::new("a.ts")).unwrap();
1717 index.add_path(Path::new("b.ts")).unwrap();
1718 index.write().unwrap();
1719
1720 fs::write(temp.path().join("b.ts"), &after).unwrap();
1721
1722 let bridge = GitBridge::open(temp.path()).unwrap();
1723 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1724
1725 assert!(matches!(scope, DiffScope::Working));
1726 assert_eq!(files.len(), 1);
1727 assert_eq!(files[0].status, FileStatus::Renamed);
1728 assert_eq!(files[0].file_path, "b.ts");
1729 assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1730 assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1731 assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1732
1733 let registry = create_default_registry();
1734 let result = compute_semantic_diff(&files, ®istry, None, None);
1735
1736 assert_eq!(result.added_count, 0);
1737 assert_eq!(result.deleted_count, 0);
1738 assert_eq!(result.modified_count, 2);
1742 assert_eq!(result.moved_count, 1);
1743 assert!(result
1744 .changes
1745 .iter()
1746 .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1747 }
1748
1749 #[test]
1750 fn explicit_ref_to_working_uses_index_rename_map_after_large_unstaged_rewrite() {
1751 let temp = TempDir::new().unwrap();
1752 let repo = Repository::init(temp.path()).unwrap();
1753
1754 let before_noise = (0..200)
1755 .map(|i| format!("// old filler {i} alpha beta gamma"))
1756 .collect::<Vec<_>>()
1757 .join("\n");
1758 let after_noise = (0..200)
1759 .map(|i| format!("// new filler {i} delta epsilon zeta"))
1760 .collect::<Vec<_>>()
1761 .join("\n");
1762 let before = format!(
1763 "{before_noise}\nexport function foo(x: number) {{\n return x + 1;\n}}\n"
1764 );
1765 let after = format!(
1766 "{after_noise}\nexport function foo(x: number) {{\n return x + 42;\n}}\n"
1767 );
1768
1769 commit_file(&repo, "a.ts", &before, "init");
1770
1771 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1772 let mut index = repo.index().unwrap();
1773 index.remove_path(Path::new("a.ts")).unwrap();
1774 index.add_path(Path::new("b.ts")).unwrap();
1775 index.write().unwrap();
1776
1777 fs::write(temp.path().join("b.ts"), &after).unwrap();
1778
1779 let bridge = GitBridge::open(temp.path()).unwrap();
1780 let files = bridge
1781 .get_changed_files(
1782 &DiffScope::RefToWorking {
1783 refspec: "HEAD".to_string(),
1784 },
1785 &[],
1786 )
1787 .unwrap();
1788
1789 assert_eq!(files.len(), 1);
1790 assert_eq!(files[0].status, FileStatus::Renamed);
1791 assert_eq!(files[0].file_path, "b.ts");
1792 assert_eq!(files[0].old_file_path.as_deref(), Some("a.ts"));
1793 assert_eq!(files[0].before_content.as_deref(), Some(before.as_str()));
1794 assert_eq!(files[0].after_content.as_deref(), Some(after.as_str()));
1795
1796 let registry = create_default_registry();
1797 let result = compute_semantic_diff(&files, ®istry, None, None);
1798
1799 assert_eq!(result.added_count, 0);
1800 assert_eq!(result.deleted_count, 0);
1801 assert_eq!(result.modified_count, 2);
1805 assert_eq!(result.moved_count, 1);
1806 assert!(result
1807 .changes
1808 .iter()
1809 .any(|change| change.change_type == ChangeType::Moved && change.entity_name == "foo"));
1810 }
1811
1812 #[test]
1813 fn staged_rename_map_overrides_wrong_worktree_rename_pairing() {
1814 let temp = TempDir::new().unwrap();
1815 let repo = Repository::init(temp.path()).unwrap();
1816
1817 let a_before = "export function foo(x: number) {\n return x + 1;\n}\n";
1818 let c_before = "export function foo(x: number) {\n return x + 42;\n}\n";
1819
1820 commit_file(&repo, "a.ts", a_before, "init a");
1821 commit_file(&repo, "c.ts", c_before, "init c");
1822
1823 fs::rename(temp.path().join("a.ts"), temp.path().join("b.ts")).unwrap();
1824 let mut index = repo.index().unwrap();
1825 index.remove_path(Path::new("a.ts")).unwrap();
1826 index.add_path(Path::new("b.ts")).unwrap();
1827 index.write().unwrap();
1828
1829 fs::remove_file(temp.path().join("c.ts")).unwrap();
1830 fs::write(temp.path().join("b.ts"), c_before).unwrap();
1831
1832 let bridge = GitBridge::open(temp.path()).unwrap();
1833 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
1834
1835 assert!(matches!(scope, DiffScope::Working));
1836 let renamed = files
1837 .iter()
1838 .find(|file| {
1839 file.status == FileStatus::Renamed
1840 && file.file_path == "b.ts"
1841 && file.old_file_path.as_deref() == Some("a.ts")
1842 })
1843 .unwrap();
1844 assert_eq!(renamed.before_content.as_deref(), Some(a_before));
1845 assert_eq!(renamed.after_content.as_deref(), Some(c_before));
1846
1847 let deleted = files
1848 .iter()
1849 .find(|file| file.status == FileStatus::Deleted && file.file_path == "c.ts")
1850 .unwrap();
1851 assert_eq!(deleted.before_content.as_deref(), Some(c_before));
1852 assert_eq!(deleted.after_content.as_deref(), None);
1853 assert!(!files.iter().any(|file| {
1854 file.status == FileStatus::Renamed
1855 && file.file_path == "b.ts"
1856 && file.old_file_path.as_deref() == Some("c.ts")
1857 }));
1858 }
1859
1860 #[test]
1861 fn staged_diff_with_base_ref_compares_index_to_that_ref() {
1862 let temp = TempDir::new().unwrap();
1863 let repo = Repository::init(temp.path()).unwrap();
1864
1865 let v1 = "def foo():\n return 1\n";
1866 let v2 = "def foo():\n return 2\n";
1867 let v3 = "def foo():\n return 3\n";
1868 let v4 = "def foo():\n return 4\n";
1869
1870 commit_file(&repo, "a.py", v1, "init");
1871 commit_file(&repo, "a.py", v2, "second");
1872 fs::write(temp.path().join("a.py"), v3).unwrap();
1873
1874 let mut index = repo.index().unwrap();
1875 index.add_path(Path::new("a.py")).unwrap();
1876 index.write().unwrap();
1877
1878 fs::write(temp.path().join("a.py"), v4).unwrap();
1879
1880 let bridge = GitBridge::open(temp.path()).unwrap();
1881 let files = bridge
1882 .get_staged_files_with_base_ref("HEAD~1", &[])
1883 .unwrap();
1884
1885 assert_eq!(files.len(), 1);
1886 assert_eq!(files[0].status, FileStatus::Modified);
1887 assert_eq!(files[0].file_path, "a.py");
1888 assert_eq!(files[0].before_content.as_deref(), Some(v1));
1889 assert_eq!(files[0].after_content.as_deref(), Some(v3));
1890
1891 let registry = create_default_registry();
1892 let result = compute_semantic_diff(&files, ®istry, None, None);
1893
1894 assert_eq!(result.modified_count, 1);
1895 assert_eq!(result.changes.len(), 1);
1896 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
1897 assert_eq!(result.changes[0].entity_name, "foo");
1898 }
1899
1900 #[test]
1901 fn crlf_only_difference_in_working_file_is_invisible() {
1902 let temp = TempDir::new().unwrap();
1903 let repo = Repository::init(temp.path()).unwrap();
1904
1905 commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
1906 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
1907
1908 let bridge = GitBridge::open(temp.path()).unwrap();
1909 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1910
1911 assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
1912
1913 let before = files[0].before_content.as_deref().unwrap();
1914 let after = files[0].after_content.as_deref().unwrap();
1915
1916 assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
1917 }
1918
1919 #[test]
1920 fn crlf_stored_in_blob_is_normalized_on_read() {
1921 let temp = TempDir::new().unwrap();
1922 let repo = Repository::init(temp.path()).unwrap();
1923
1924 repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
1925 commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
1926 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
1927
1928 let bridge = GitBridge::open(temp.path()).unwrap();
1929 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
1930
1931 assert_eq!(files.len(), 1, "expected git to detect the modification");
1932
1933 let before = files[0].before_content.as_deref().unwrap();
1934 assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
1935 }
1936}