1use std::env;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::sync::{Mutex, OnceLock};
5
6use git2::{Blame, Delta, Diff, DiffOptions, ErrorCode, Oid, Repository};
7use thiserror::Error;
8
9use super::types::{CommitInfo, DiffScope, FileChange, FileStatus};
10
11#[derive(Error, Debug)]
12pub enum GitError {
13 #[error("not a git repository")]
14 NotARepo,
15 #[error("git error: {0}")]
16 Git2(#[from] git2::Error),
17 #[error("io error: {0}")]
18 Io(#[from] std::io::Error),
19}
20
21pub struct GitBridge {
22 repo: Repository,
23 repo_root: PathBuf,
24}
25
26impl GitBridge {
27 pub fn open(path: &Path) -> Result<Self, GitError> {
28 let repo = match Repository::discover(path) {
29 Ok(repo) => repo,
30 Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
31 let _guard = owner_validation_lock()
32 .lock()
33 .unwrap_or_else(|poisoned| poisoned.into_inner());
34 let _owner_validation = OwnerValidationDisabled::new()?;
35 let repo = Repository::discover(path);
36 repo.map_err(map_git_error)?
37 }
38 Err(error) => return Err(map_git_error(error)),
39 };
40 let repo_root = repo
41 .workdir()
42 .ok_or(GitError::NotARepo)?
43 .to_path_buf();
44 Ok(Self { repo, repo_root })
45 }
46
47 pub fn repo_root(&self) -> &Path {
48 &self.repo_root
49 }
50
51 pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
52 Ok(self.repo.blame_file(file_path, None)?)
53 }
54
55 pub fn commit_summary(&self, oid: Oid) -> Option<String> {
56 self.repo
57 .find_commit(oid)
58 .ok()
59 .and_then(|commit| commit.summary().map(String::from))
60 }
61
62 pub fn get_head_sha(&self) -> Result<String, GitError> {
63 let head = self.repo.head()?;
64 let oid = head.target().ok_or_else(|| {
65 git2::Error::from_str("HEAD has no target")
66 })?;
67 Ok(oid.to_string())
68 }
69
70 pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
74 let mut working_files = self.get_working_diff_files(pathspecs)?;
76 if !working_files.is_empty() {
77 self.populate_contents(&mut working_files, &DiffScope::Working)?;
78 return Ok((DiffScope::Working, working_files));
79 }
80
81 Ok((DiffScope::Working, Vec::new()))
83 }
84
85 pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
87 let mut files = match scope {
88 DiffScope::Working => {
89 self.get_working_diff_files(pathspecs)?
90 }
91 DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
92 DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
93 DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
94 DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
95 };
96
97 files.retain(|f| !f.file_path.starts_with(".sem/"));
99
100 self.populate_contents(&mut files, scope)?;
101 Ok(files)
102 }
103
104 pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
106 let obj1 = self.repo.revparse_single(ref1)?;
107 let obj2 = self.repo.revparse_single(ref2)?;
108 let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
109 Ok(oid.to_string())
110 }
111
112 pub fn is_valid_rev(&self, refspec: &str) -> bool {
114 self.repo.revparse_single(refspec).is_ok()
115 }
116
117 fn make_diff_opts(pathspecs: &[String]) -> DiffOptions {
118 let mut opts = DiffOptions::new();
119 for spec in pathspecs {
120 opts.pathspec(spec.as_str());
121 }
122 opts
123 }
124
125 fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
126 let head_tree = match self.repo.head() {
127 Ok(head) => {
128 let commit = head.peel_to_commit()?;
129 Some(commit.tree()?)
130 }
131 Err(_) => None, };
133
134 let mut opts = Self::make_diff_opts(pathspecs);
135 let diff = self.repo.diff_tree_to_index(
136 head_tree.as_ref(),
137 Some(&self.repo.index()?),
138 Some(&mut opts),
139 )?;
140
141 Ok(self.diff_to_file_changes(&diff))
142 }
143
144 fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
145 let mut opts = Self::make_diff_opts(pathspecs);
146 opts.include_untracked(false);
147
148 let diff = self.repo.diff_index_to_workdir(None, Some(&mut opts))?;
149 Ok(self.diff_to_file_changes(&diff))
150 }
151
152 fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
153 let obj = self.repo.revparse_single(sha)?;
154 let commit = obj.peel_to_commit()?;
155 let tree = commit.tree()?;
156
157 let parent_tree = if commit.parent_count() > 0 {
158 Some(commit.parent(0)?.tree()?)
159 } else {
160 None
161 };
162
163 let mut opts = Self::make_diff_opts(pathspecs);
164 let diff = self.repo.diff_tree_to_tree(
165 parent_tree.as_ref(),
166 Some(&tree),
167 Some(&mut opts),
168 )?;
169
170 Ok(self.diff_to_file_changes(&diff))
171 }
172
173 fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
174 let from_obj = self.repo.revparse_single(from)?;
175 let to_obj = self.repo.revparse_single(to)?;
176
177 let from_tree = from_obj.peel_to_commit()?.tree()?;
178 let to_tree = to_obj.peel_to_commit()?.tree()?;
179
180 let mut opts = Self::make_diff_opts(pathspecs);
181 let diff = self.repo.diff_tree_to_tree(
182 Some(&from_tree),
183 Some(&to_tree),
184 Some(&mut opts),
185 )?;
186
187 Ok(self.diff_to_file_changes(&diff))
188 }
189
190 fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
191 let tree = self.resolve_tree(refspec)?;
192 let mut opts = Self::make_diff_opts(pathspecs);
193 let diff = self.repo.diff_tree_to_workdir_with_index(
194 Some(&tree),
195 Some(&mut opts),
196 )?;
197 Ok(self.diff_to_file_changes(&diff))
198 }
199
200 fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
201 let mut files = Vec::new();
202
203 for delta in diff.deltas() {
204 let (status, file_path, old_file_path) = match delta.status() {
205 Delta::Added => {
206 let path = delta
207 .new_file()
208 .path()
209 .and_then(|p| p.to_str())
210 .unwrap_or("")
211 .to_string();
212 (FileStatus::Added, path, None)
213 }
214 Delta::Deleted => {
215 let path = delta
216 .old_file()
217 .path()
218 .and_then(|p| p.to_str())
219 .unwrap_or("")
220 .to_string();
221 (FileStatus::Deleted, path, None)
222 }
223 Delta::Modified => {
224 let path = delta
225 .new_file()
226 .path()
227 .and_then(|p| p.to_str())
228 .unwrap_or("")
229 .to_string();
230 (FileStatus::Modified, path, None)
231 }
232 Delta::Renamed => {
233 let new_path = delta
234 .new_file()
235 .path()
236 .and_then(|p| p.to_str())
237 .unwrap_or("")
238 .to_string();
239 let old_path = delta
240 .old_file()
241 .path()
242 .and_then(|p| p.to_str())
243 .unwrap_or("")
244 .to_string();
245 (FileStatus::Renamed, new_path, Some(old_path))
246 }
247 _ => continue,
248 };
249
250 if !file_path.starts_with(".sem/") {
251 files.push(FileChange {
252 file_path,
253 status,
254 old_file_path,
255 before_content: None,
256 after_content: None,
257 });
258 }
259 }
260
261 files
262 }
263
264 fn populate_contents(
265 &self,
266 files: &mut [FileChange],
267 scope: &DiffScope,
268 ) -> Result<(), GitError> {
269 match scope {
270 DiffScope::Working => {
271 let head_tree = self.resolve_tree("HEAD").ok();
273 for file in files.iter_mut() {
274 if file.status != FileStatus::Deleted {
275 file.after_content = self.read_working_file(&file.file_path);
276 }
277 if file.status != FileStatus::Added {
278 file.before_content = head_tree
279 .as_ref()
280 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
281 }
282 }
283 }
284 DiffScope::Staged => {
285 let head_tree = self.resolve_tree("HEAD").ok();
286 for file in files.iter_mut() {
287 if file.status != FileStatus::Deleted {
288 file.after_content = self
289 .read_index_file(&file.file_path)
290 .or_else(|| self.read_working_file(&file.file_path));
291 }
292 if file.status != FileStatus::Added {
293 file.before_content = head_tree
294 .as_ref()
295 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
296 }
297 }
298 }
299 DiffScope::Commit { sha } => {
300 let after_tree = self.resolve_tree(sha)?;
302 let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
303 for file in files.iter_mut() {
304 if file.status != FileStatus::Deleted {
305 file.after_content =
306 self.read_blob_from_tree(&after_tree, &file.file_path);
307 }
308 if file.status != FileStatus::Added {
309 file.before_content = before_tree
310 .as_ref()
311 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
312 }
313 }
314 }
315 DiffScope::Range { from, to } => {
316 let after_tree = self.resolve_tree(to)?;
317 let before_tree = self.resolve_tree(from)?;
318 for file in files.iter_mut() {
319 if file.status != FileStatus::Deleted {
320 file.after_content =
321 self.read_blob_from_tree(&after_tree, &file.file_path);
322 }
323 if file.status != FileStatus::Added {
324 let path = file
325 .old_file_path
326 .as_deref()
327 .unwrap_or(&file.file_path);
328 file.before_content =
329 self.read_blob_from_tree(&before_tree, path);
330 }
331 }
332 }
333 DiffScope::RefToWorking { refspec } => {
334 let before_tree = self.resolve_tree(refspec)?;
335 for file in files.iter_mut() {
336 if file.status != FileStatus::Deleted {
337 file.after_content = self.read_working_file(&file.file_path);
338 }
339 if file.status != FileStatus::Added {
340 file.before_content =
341 self.read_blob_from_tree(&before_tree, &file.file_path);
342 }
343 }
344 }
345 }
346 Ok(())
347 }
348
349 fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
350 let obj = self.repo.revparse_single(refspec)?;
351 let commit = obj.peel_to_commit()?;
352 Ok(commit.tree()?)
353 }
354
355 fn normalize_line_endings(s: String) -> String {
356 if s.contains('\r') {
357 s.replace("\r\n", "\n").replace('\r', "\n")
358 } else {
359 s
360 }
361 }
362
363 fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
364 let entry = tree.get_path(Path::new(file_path)).ok()?;
365 let blob = self.repo.find_blob(entry.id()).ok()?;
366 std::str::from_utf8(blob.content())
367 .ok()
368 .map(|s| Self::normalize_line_endings(s.to_string()))
369 }
370
371 fn read_working_file(&self, file_path: &str) -> Option<String> {
372 let full_path = self.repo_root.join(file_path);
373 fs::read_to_string(full_path)
374 .ok()
375 .map(Self::normalize_line_endings)
376 }
377
378 fn read_index_file(&self, file_path: &str) -> Option<String> {
379 let index = self.repo.index().ok()?;
380 let entry = index.get_path(Path::new(file_path), 0)?;
381 let blob = self.repo.find_blob(entry.id).ok()?;
382 std::str::from_utf8(blob.content())
383 .ok()
384 .map(|s| Self::normalize_line_endings(s.to_string()))
385 }
386
387
388 pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
390 let tree = self.resolve_tree(refspec)?;
391 Ok(self.read_blob_from_tree(&tree, file_path))
392 }
393
394 pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
397 let mut revwalk = self.repo.revwalk()?;
398 revwalk.push_head()?;
399 revwalk.set_sorting(git2::Sort::TIME)?;
400
401 let mut commits = Vec::new();
402 let path = Path::new(file_path);
403
404 for oid_result in revwalk {
405 let oid = oid_result?;
406 let commit = self.repo.find_commit(oid)?;
407 let tree = commit.tree()?;
408
409 let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
411
412 let file_in_parent = if commit.parent_count() > 0 {
414 commit.parent(0)
415 .ok()
416 .and_then(|p| p.tree().ok())
417 .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
418 } else {
419 None };
421
422 let changed = match (file_in_commit, file_in_parent) {
424 (Some(cur), Some(prev)) => cur != prev, (Some(_), None) => true, (None, Some(_)) => true, (None, None) => false, };
429
430 if changed {
431 let sha = oid.to_string();
432 commits.push(CommitInfo {
433 short_sha: sha[..7.min(sha.len())].to_string(),
434 sha,
435 author: commit.author().name().unwrap_or("unknown").to_string(),
436 date: commit.time().seconds().to_string(),
437 message: commit.message().unwrap_or("").to_string(),
438 });
439
440 if commits.len() >= limit {
441 break;
442 }
443 }
444 }
445
446 Ok(commits)
447 }
448
449 pub fn get_commit_changed_files(&self, sha: &str) -> Result<Vec<String>, GitError> {
452 let obj = self.repo.revparse_single(sha)?;
453 let commit = obj.peel_to_commit()?;
454 let tree = commit.tree()?;
455 let parent_tree = if commit.parent_count() > 0 {
456 Some(commit.parent(0)?.tree()?)
457 } else {
458 None
459 };
460 let diff = self.repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
461 let mut paths = Vec::new();
462 for delta in diff.deltas() {
463 if let Some(p) = delta.new_file().path().and_then(|p| p.to_str()) {
464 paths.push(p.to_string());
465 }
466 if let Some(p) = delta.old_file().path().and_then(|p| p.to_str()) {
468 if !paths.contains(&p.to_string()) {
469 paths.push(p.to_string());
470 }
471 }
472 }
473 Ok(paths)
474 }
475
476 pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
477 let mut revwalk = self.repo.revwalk()?;
478 revwalk.push_head()?;
479
480 let mut commits = Vec::new();
481 for (i, oid_result) in revwalk.enumerate() {
482 if i >= limit {
483 break;
484 }
485 let oid = oid_result?;
486 let commit = self.repo.find_commit(oid)?;
487 let sha = oid.to_string();
488 commits.push(CommitInfo {
489 short_sha: sha[..7.min(sha.len())].to_string(),
490 sha,
491 author: commit.author().name().unwrap_or("unknown").to_string(),
492 date: commit.time().seconds().to_string(),
493 message: commit.message().unwrap_or("").to_string(),
494 });
495 }
496
497 Ok(commits)
498 }
499}
500
501fn map_git_error(error: git2::Error) -> GitError {
502 if error.code() == ErrorCode::NotFound {
503 GitError::NotARepo
504 } else {
505 GitError::Git2(error)
506 }
507}
508
509fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
510 let safe_directories = command_line_safe_directories();
511 should_retry_with_safe_directory(error, path, &safe_directories)
512}
513
514fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
515 error.code() == ErrorCode::Owner
516 && nearest_git_root(path).is_some_and(|repo_root| {
517 safe_directories.iter().any(|safe_directory| {
518 safe_directory == "*"
519 || paths_match(&repo_root, Path::new(safe_directory))
520 })
521 })
522}
523
524fn command_line_safe_directories() -> Vec<String> {
525 let count = env::var("GIT_CONFIG_COUNT")
526 .ok()
527 .and_then(|value| value.parse::<usize>().ok())
528 .unwrap_or_default();
529
530 (0..count)
531 .filter_map(|index| {
532 let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
533 if key.eq_ignore_ascii_case("safe.directory") {
534 env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
535 } else {
536 None
537 }
538 })
539 .collect()
540}
541
542fn nearest_git_root(path: &Path) -> Option<PathBuf> {
543 let mut current = if path.is_file() {
544 path.parent()?
545 } else {
546 path
547 };
548
549 loop {
550 if current.join(".git").exists() {
551 return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
552 }
553
554 current = current.parent()?;
555 }
556}
557
558fn paths_match(left: &Path, right: &Path) -> bool {
559 let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
560 let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
561
562 if cfg!(windows) {
563 left.to_string_lossy()
564 .eq_ignore_ascii_case(&right.to_string_lossy())
565 } else {
566 left == right
567 }
568}
569
570fn owner_validation_lock() -> &'static Mutex<()> {
571 static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
572 LOCK.get_or_init(|| Mutex::new(()))
573}
574
575struct OwnerValidationDisabled;
576
577impl OwnerValidationDisabled {
578 fn new() -> Result<Self, GitError> {
579 unsafe { git2::opts::set_verify_owner_validation(false)? };
581 Ok(Self)
582 }
583}
584
585impl Drop for OwnerValidationDisabled {
586 fn drop(&mut self) {
587 unsafe {
589 let _ = git2::opts::set_verify_owner_validation(true);
590 }
591 }
592}
593
594#[cfg(test)]
595mod tests {
596 use super::*;
597 use git2::{ErrorClass, Oid, Repository, Signature};
598 use tempfile::TempDir;
599
600 fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
601 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
602
603 let mut index = repo.index().unwrap();
604 index.add_path(Path::new(file_path)).unwrap();
605 index.write().unwrap();
606
607 let tree_id = index.write_tree().unwrap();
608 let tree = repo.find_tree(tree_id).unwrap();
609 let sig = Signature::now("Test User", "test@example.com").unwrap();
610
611 match repo.head() {
612 Ok(head) => {
613 let parent = repo.find_commit(head.target().unwrap()).unwrap();
614 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
615 .unwrap()
616 }
617 Err(_) => repo
618 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
619 .unwrap(),
620 }
621 }
622
623 #[test]
624 fn clean_worktree_does_not_fall_back_to_head_commit() {
625 let temp = TempDir::new().unwrap();
626 let repo = Repository::init(temp.path()).unwrap();
627
628 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
629 commit_file(
630 &repo,
631 "sample.ts",
632 "export function a() {\n return 2;\n}\n",
633 "change a",
634 );
635
636 let bridge = GitBridge::open(temp.path()).unwrap();
637 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
638
639 assert!(matches!(scope, DiffScope::Working));
640 assert!(files.is_empty());
641 }
642
643 #[test]
644 fn owner_error_retries_for_command_line_safe_directory() {
645 let temp = TempDir::new().unwrap();
646 Repository::init(temp.path()).unwrap();
647
648 let owner_error = git2::Error::new(
649 ErrorCode::Owner,
650 ErrorClass::Config,
651 "owner mismatch",
652 );
653 let safe_directories = [temp.path().to_string_lossy().to_string()];
654
655 assert!(should_retry_with_safe_directory(
656 &owner_error,
657 temp.path(),
658 &safe_directories,
659 ));
660
661 let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
662 assert!(!should_retry_with_safe_directory(
663 &owner_error,
664 temp.path(),
665 &other_directories,
666 ));
667
668 let not_found_error = git2::Error::new(
669 ErrorCode::NotFound,
670 ErrorClass::Repository,
671 "not found",
672 );
673 assert!(!should_retry_with_safe_directory(
674 ¬_found_error,
675 temp.path(),
676 &["*".to_string()],
677 ));
678 }
679
680 #[test]
681 fn explicit_commit_scope_still_reads_head_commit_diff() {
682 let temp = TempDir::new().unwrap();
683 let repo = Repository::init(temp.path()).unwrap();
684
685 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
686 let head_oid = commit_file(
687 &repo,
688 "sample.ts",
689 "export function a() {\n return 2;\n}\n",
690 "change a",
691 );
692
693 let bridge = GitBridge::open(temp.path()).unwrap();
694 let files = bridge
695 .get_changed_files(&DiffScope::Commit {
696 sha: head_oid.to_string(),
697 }, &[])
698 .unwrap();
699
700 assert_eq!(files.len(), 1);
701 assert_eq!(files[0].file_path, "sample.ts");
702 assert_eq!(files[0].status, FileStatus::Modified);
703 }
704
705 #[test]
706 fn crlf_only_difference_in_working_file_is_invisible() {
707 let temp = TempDir::new().unwrap();
708 let repo = Repository::init(temp.path()).unwrap();
709
710 commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
711 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
712
713 let bridge = GitBridge::open(temp.path()).unwrap();
714 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
715
716 assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
717
718 let before = files[0].before_content.as_deref().unwrap();
719 let after = files[0].after_content.as_deref().unwrap();
720
721 assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
722 }
723
724 #[test]
725 fn crlf_stored_in_blob_is_normalized_on_read() {
726 let temp = TempDir::new().unwrap();
727 let repo = Repository::init(temp.path()).unwrap();
728
729 repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
730 commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
731 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
732
733 let bridge = GitBridge::open(temp.path()).unwrap();
734 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
735
736 assert_eq!(files.len(), 1, "expected git to detect the modification");
737
738 let before = files[0].before_content.as_deref().unwrap();
739 assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
740 }
741}