1use std::env;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::sync::{Mutex, OnceLock};
5
6use git2::{Blame, Delta, Diff, DiffOptions, ErrorCode, Oid, Repository};
7use thiserror::Error;
8
9use super::types::{CommitInfo, DiffScope, FileChange, FileStatus};
10
11#[derive(Error, Debug)]
12pub enum GitError {
13 #[error("not a git repository")]
14 NotARepo,
15 #[error("git error: {0}")]
16 Git2(#[from] git2::Error),
17 #[error("io error: {0}")]
18 Io(#[from] std::io::Error),
19}
20
21pub struct GitBridge {
22 repo: Repository,
23 repo_root: PathBuf,
24}
25
26impl GitBridge {
27 pub fn open(path: &Path) -> Result<Self, GitError> {
28 let repo = match Repository::discover(path) {
29 Ok(repo) => repo,
30 Err(error) if should_retry_with_command_line_safe_directory(&error, path) => {
31 let _guard = owner_validation_lock()
32 .lock()
33 .unwrap_or_else(|poisoned| poisoned.into_inner());
34 let _owner_validation = OwnerValidationDisabled::new()?;
35 let repo = Repository::discover(path);
36 repo.map_err(map_git_error)?
37 }
38 Err(error) => return Err(map_git_error(error)),
39 };
40 let repo_root = repo
41 .workdir()
42 .ok_or(GitError::NotARepo)?
43 .to_path_buf();
44 Ok(Self { repo, repo_root })
45 }
46
47 pub fn repo_root(&self) -> &Path {
48 &self.repo_root
49 }
50
51 pub fn blame_file(&self, file_path: &Path) -> Result<Blame<'_>, GitError> {
52 Ok(self.repo.blame_file(file_path, None)?)
53 }
54
55 pub fn commit_summary(&self, oid: Oid) -> Option<String> {
56 self.repo
57 .find_commit(oid)
58 .ok()
59 .and_then(|commit| commit.summary().map(String::from))
60 }
61
62 pub fn get_head_sha(&self) -> Result<String, GitError> {
63 let head = self.repo.head()?;
64 let oid = head.target().ok_or_else(|| {
65 git2::Error::from_str("HEAD has no target")
66 })?;
67 Ok(oid.to_string())
68 }
69
70 pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
74 let mut working_files = self.get_working_diff_files(pathspecs)?;
76 if !working_files.is_empty() {
77 self.populate_contents(&mut working_files, &DiffScope::Working)?;
78 return Ok((DiffScope::Working, working_files));
79 }
80
81 Ok((DiffScope::Working, Vec::new()))
83 }
84
85 pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
87 let mut files = match scope {
88 DiffScope::Working => {
89 self.get_working_diff_files(pathspecs)?
90 }
91 DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
92 DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
93 DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
94 DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
95 };
96
97 files.retain(|f| !f.file_path.starts_with(".sem/"));
99
100 self.populate_contents(&mut files, scope)?;
101 Ok(files)
102 }
103
104 pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
106 let obj1 = self.repo.revparse_single(ref1)?;
107 let obj2 = self.repo.revparse_single(ref2)?;
108 let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
109 Ok(oid.to_string())
110 }
111
112 pub fn is_valid_rev(&self, refspec: &str) -> bool {
114 self.repo.revparse_single(refspec).is_ok()
115 }
116
117 fn make_diff_opts(pathspecs: &[String]) -> DiffOptions {
118 let mut opts = DiffOptions::new();
119 for spec in pathspecs {
120 opts.pathspec(spec.as_str());
121 }
122 opts
123 }
124
125 fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
126 let head_tree = match self.repo.head() {
127 Ok(head) => {
128 let commit = head.peel_to_commit()?;
129 Some(commit.tree()?)
130 }
131 Err(_) => None, };
133
134 let mut opts = Self::make_diff_opts(pathspecs);
135 let diff = self.repo.diff_tree_to_index(
136 head_tree.as_ref(),
137 Some(&self.repo.index()?),
138 Some(&mut opts),
139 )?;
140
141 Ok(self.diff_to_file_changes(&diff))
142 }
143
144 fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
145 let mut opts = Self::make_diff_opts(pathspecs);
146 opts.include_untracked(false);
147
148 let diff = self.repo.diff_index_to_workdir(None, Some(&mut opts))?;
149 Ok(self.diff_to_file_changes(&diff))
150 }
151
152 fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
153 let obj = self.repo.revparse_single(sha)?;
154 let commit = obj.peel_to_commit()?;
155 let tree = commit.tree()?;
156
157 let parent_tree = if commit.parent_count() > 0 {
158 Some(commit.parent(0)?.tree()?)
159 } else {
160 None
161 };
162
163 let mut opts = Self::make_diff_opts(pathspecs);
164 let diff = self.repo.diff_tree_to_tree(
165 parent_tree.as_ref(),
166 Some(&tree),
167 Some(&mut opts),
168 )?;
169
170 Ok(self.diff_to_file_changes(&diff))
171 }
172
173 fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
174 let from_obj = self.repo.revparse_single(from)?;
175 let to_obj = self.repo.revparse_single(to)?;
176
177 let from_tree = from_obj.peel_to_commit()?.tree()?;
178 let to_tree = to_obj.peel_to_commit()?.tree()?;
179
180 let mut opts = Self::make_diff_opts(pathspecs);
181 let diff = self.repo.diff_tree_to_tree(
182 Some(&from_tree),
183 Some(&to_tree),
184 Some(&mut opts),
185 )?;
186
187 Ok(self.diff_to_file_changes(&diff))
188 }
189
190 fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
191 let tree = self.resolve_tree(refspec)?;
192 let mut opts = Self::make_diff_opts(pathspecs);
193 let diff = self.repo.diff_tree_to_workdir_with_index(
194 Some(&tree),
195 Some(&mut opts),
196 )?;
197 Ok(self.diff_to_file_changes(&diff))
198 }
199
200 fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
201 let mut files = Vec::new();
202
203 for delta in diff.deltas() {
204 let (status, file_path, old_file_path) = match delta.status() {
205 Delta::Added => {
206 let path = delta
207 .new_file()
208 .path()
209 .and_then(|p| p.to_str())
210 .unwrap_or("")
211 .to_string();
212 (FileStatus::Added, path, None)
213 }
214 Delta::Deleted => {
215 let path = delta
216 .old_file()
217 .path()
218 .and_then(|p| p.to_str())
219 .unwrap_or("")
220 .to_string();
221 (FileStatus::Deleted, path, None)
222 }
223 Delta::Modified => {
224 let path = delta
225 .new_file()
226 .path()
227 .and_then(|p| p.to_str())
228 .unwrap_or("")
229 .to_string();
230 (FileStatus::Modified, path, None)
231 }
232 Delta::Renamed => {
233 let new_path = delta
234 .new_file()
235 .path()
236 .and_then(|p| p.to_str())
237 .unwrap_or("")
238 .to_string();
239 let old_path = delta
240 .old_file()
241 .path()
242 .and_then(|p| p.to_str())
243 .unwrap_or("")
244 .to_string();
245 (FileStatus::Renamed, new_path, Some(old_path))
246 }
247 _ => continue,
248 };
249
250 if !file_path.starts_with(".sem/") {
251 files.push(FileChange {
252 file_path,
253 status,
254 old_file_path,
255 before_content: None,
256 after_content: None,
257 });
258 }
259 }
260
261 files
262 }
263
264 fn populate_contents(
265 &self,
266 files: &mut [FileChange],
267 scope: &DiffScope,
268 ) -> Result<(), GitError> {
269 match scope {
270 DiffScope::Working => {
271 let head_tree = self.resolve_tree("HEAD").ok();
273 for file in files.iter_mut() {
274 if file.status != FileStatus::Deleted {
275 file.after_content = self.read_working_file(&file.file_path);
276 }
277 if file.status != FileStatus::Added {
278 file.before_content = head_tree
279 .as_ref()
280 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
281 }
282 }
283 }
284 DiffScope::Staged => {
285 let head_tree = self.resolve_tree("HEAD").ok();
286 for file in files.iter_mut() {
287 if file.status != FileStatus::Deleted {
288 file.after_content = self
289 .read_index_file(&file.file_path)
290 .or_else(|| self.read_working_file(&file.file_path));
291 }
292 if file.status != FileStatus::Added {
293 file.before_content = head_tree
294 .as_ref()
295 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
296 }
297 }
298 }
299 DiffScope::Commit { sha } => {
300 let after_tree = self.resolve_tree(sha)?;
302 let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
303 for file in files.iter_mut() {
304 if file.status != FileStatus::Deleted {
305 file.after_content =
306 self.read_blob_from_tree(&after_tree, &file.file_path);
307 }
308 if file.status != FileStatus::Added {
309 file.before_content = before_tree
310 .as_ref()
311 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
312 }
313 }
314 }
315 DiffScope::Range { from, to } => {
316 let after_tree = self.resolve_tree(to)?;
317 let before_tree = self.resolve_tree(from)?;
318 for file in files.iter_mut() {
319 if file.status != FileStatus::Deleted {
320 file.after_content =
321 self.read_blob_from_tree(&after_tree, &file.file_path);
322 }
323 if file.status != FileStatus::Added {
324 let path = file
325 .old_file_path
326 .as_deref()
327 .unwrap_or(&file.file_path);
328 file.before_content =
329 self.read_blob_from_tree(&before_tree, path);
330 }
331 }
332 }
333 DiffScope::RefToWorking { refspec } => {
334 let before_tree = self.resolve_tree(refspec)?;
335 for file in files.iter_mut() {
336 if file.status != FileStatus::Deleted {
337 file.after_content = self.read_working_file(&file.file_path);
338 }
339 if file.status != FileStatus::Added {
340 file.before_content =
341 self.read_blob_from_tree(&before_tree, &file.file_path);
342 }
343 }
344 }
345 }
346 Ok(())
347 }
348
349 fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
350 let obj = self.repo.revparse_single(refspec)?;
351 let commit = obj.peel_to_commit()?;
352 Ok(commit.tree()?)
353 }
354
355 fn normalize_line_endings(s: String) -> String {
356 if s.contains('\r') {
357 s.replace("\r\n", "\n").replace('\r', "\n")
358 } else {
359 s
360 }
361 }
362
363 fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
364 let entry = tree.get_path(Path::new(file_path)).ok()?;
365 let blob = self.repo.find_blob(entry.id()).ok()?;
366 std::str::from_utf8(blob.content())
367 .ok()
368 .map(|s| Self::normalize_line_endings(s.to_string()))
369 }
370
371 fn read_working_file(&self, file_path: &str) -> Option<String> {
372 let full_path = self.repo_root.join(file_path);
373 fs::read_to_string(full_path)
374 .ok()
375 .map(Self::normalize_line_endings)
376 }
377
378 fn read_index_file(&self, file_path: &str) -> Option<String> {
379 let index = self.repo.index().ok()?;
380 let entry = index.get_path(Path::new(file_path), 0)?;
381 let blob = self.repo.find_blob(entry.id).ok()?;
382 std::str::from_utf8(blob.content())
383 .ok()
384 .map(|s| Self::normalize_line_endings(s.to_string()))
385 }
386
387
388 pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
390 let tree = self.resolve_tree(refspec)?;
391 Ok(self.read_blob_from_tree(&tree, file_path))
392 }
393
394 pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
397 let mut revwalk = self.repo.revwalk()?;
398 revwalk.push_head()?;
399 revwalk.set_sorting(git2::Sort::TIME)?;
400
401 let mut commits = Vec::new();
402 let path = Path::new(file_path);
403
404 for oid_result in revwalk {
405 let oid = oid_result?;
406 let commit = self.repo.find_commit(oid)?;
407 let tree = commit.tree()?;
408
409 let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
411
412 let file_in_parent = if commit.parent_count() > 0 {
414 commit.parent(0)
415 .ok()
416 .and_then(|p| p.tree().ok())
417 .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
418 } else {
419 None };
421
422 let changed = match (file_in_commit, file_in_parent) {
424 (Some(cur), Some(prev)) => cur != prev, (Some(_), None) => true, (None, Some(_)) => true, (None, None) => false, };
429
430 if changed {
431 let sha = oid.to_string();
432 commits.push(CommitInfo {
433 short_sha: sha[..7.min(sha.len())].to_string(),
434 sha,
435 author: commit.author().name().unwrap_or("unknown").to_string(),
436 date: commit.time().seconds().to_string(),
437 message: commit.message().unwrap_or("").to_string(),
438 });
439
440 if commits.len() >= limit {
441 break;
442 }
443 }
444 }
445
446 Ok(commits)
447 }
448
449 pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
450 let mut revwalk = self.repo.revwalk()?;
451 revwalk.push_head()?;
452
453 let mut commits = Vec::new();
454 for (i, oid_result) in revwalk.enumerate() {
455 if i >= limit {
456 break;
457 }
458 let oid = oid_result?;
459 let commit = self.repo.find_commit(oid)?;
460 let sha = oid.to_string();
461 commits.push(CommitInfo {
462 short_sha: sha[..7.min(sha.len())].to_string(),
463 sha,
464 author: commit.author().name().unwrap_or("unknown").to_string(),
465 date: commit.time().seconds().to_string(),
466 message: commit.message().unwrap_or("").to_string(),
467 });
468 }
469
470 Ok(commits)
471 }
472}
473
474fn map_git_error(error: git2::Error) -> GitError {
475 if error.code() == ErrorCode::NotFound {
476 GitError::NotARepo
477 } else {
478 GitError::Git2(error)
479 }
480}
481
482fn should_retry_with_command_line_safe_directory(error: &git2::Error, path: &Path) -> bool {
483 let safe_directories = command_line_safe_directories();
484 should_retry_with_safe_directory(error, path, &safe_directories)
485}
486
487fn should_retry_with_safe_directory(error: &git2::Error, path: &Path, safe_directories: &[String]) -> bool {
488 error.code() == ErrorCode::Owner
489 && nearest_git_root(path).is_some_and(|repo_root| {
490 safe_directories.iter().any(|safe_directory| {
491 safe_directory == "*"
492 || paths_match(&repo_root, Path::new(safe_directory))
493 })
494 })
495}
496
497fn command_line_safe_directories() -> Vec<String> {
498 let count = env::var("GIT_CONFIG_COUNT")
499 .ok()
500 .and_then(|value| value.parse::<usize>().ok())
501 .unwrap_or_default();
502
503 (0..count)
504 .filter_map(|index| {
505 let key = env::var(format!("GIT_CONFIG_KEY_{index}")).ok()?;
506 if key.eq_ignore_ascii_case("safe.directory") {
507 env::var(format!("GIT_CONFIG_VALUE_{index}")).ok()
508 } else {
509 None
510 }
511 })
512 .collect()
513}
514
515fn nearest_git_root(path: &Path) -> Option<PathBuf> {
516 let mut current = if path.is_file() {
517 path.parent()?
518 } else {
519 path
520 };
521
522 loop {
523 if current.join(".git").exists() {
524 return Some(fs::canonicalize(current).unwrap_or_else(|_| current.to_path_buf()));
525 }
526
527 current = current.parent()?;
528 }
529}
530
531fn paths_match(left: &Path, right: &Path) -> bool {
532 let left = fs::canonicalize(left).unwrap_or_else(|_| left.to_path_buf());
533 let right = fs::canonicalize(right).unwrap_or_else(|_| right.to_path_buf());
534
535 if cfg!(windows) {
536 left.to_string_lossy()
537 .eq_ignore_ascii_case(&right.to_string_lossy())
538 } else {
539 left == right
540 }
541}
542
543fn owner_validation_lock() -> &'static Mutex<()> {
544 static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
545 LOCK.get_or_init(|| Mutex::new(()))
546}
547
548struct OwnerValidationDisabled;
549
550impl OwnerValidationDisabled {
551 fn new() -> Result<Self, GitError> {
552 unsafe { git2::opts::set_verify_owner_validation(false)? };
554 Ok(Self)
555 }
556}
557
558impl Drop for OwnerValidationDisabled {
559 fn drop(&mut self) {
560 unsafe {
562 let _ = git2::opts::set_verify_owner_validation(true);
563 }
564 }
565}
566
567#[cfg(test)]
568mod tests {
569 use super::*;
570 use git2::{ErrorClass, Oid, Repository, Signature};
571 use tempfile::TempDir;
572
573 fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
574 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
575
576 let mut index = repo.index().unwrap();
577 index.add_path(Path::new(file_path)).unwrap();
578 index.write().unwrap();
579
580 let tree_id = index.write_tree().unwrap();
581 let tree = repo.find_tree(tree_id).unwrap();
582 let sig = Signature::now("Test User", "test@example.com").unwrap();
583
584 match repo.head() {
585 Ok(head) => {
586 let parent = repo.find_commit(head.target().unwrap()).unwrap();
587 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
588 .unwrap()
589 }
590 Err(_) => repo
591 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
592 .unwrap(),
593 }
594 }
595
596 #[test]
597 fn clean_worktree_does_not_fall_back_to_head_commit() {
598 let temp = TempDir::new().unwrap();
599 let repo = Repository::init(temp.path()).unwrap();
600
601 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
602 commit_file(
603 &repo,
604 "sample.ts",
605 "export function a() {\n return 2;\n}\n",
606 "change a",
607 );
608
609 let bridge = GitBridge::open(temp.path()).unwrap();
610 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
611
612 assert!(matches!(scope, DiffScope::Working));
613 assert!(files.is_empty());
614 }
615
616 #[test]
617 fn owner_error_retries_for_command_line_safe_directory() {
618 let temp = TempDir::new().unwrap();
619 Repository::init(temp.path()).unwrap();
620
621 let owner_error = git2::Error::new(
622 ErrorCode::Owner,
623 ErrorClass::Config,
624 "owner mismatch",
625 );
626 let safe_directories = [temp.path().to_string_lossy().to_string()];
627
628 assert!(should_retry_with_safe_directory(
629 &owner_error,
630 temp.path(),
631 &safe_directories,
632 ));
633
634 let other_directories = [temp.path().join("other").to_string_lossy().to_string()];
635 assert!(!should_retry_with_safe_directory(
636 &owner_error,
637 temp.path(),
638 &other_directories,
639 ));
640
641 let not_found_error = git2::Error::new(
642 ErrorCode::NotFound,
643 ErrorClass::Repository,
644 "not found",
645 );
646 assert!(!should_retry_with_safe_directory(
647 ¬_found_error,
648 temp.path(),
649 &["*".to_string()],
650 ));
651 }
652
653 #[test]
654 fn explicit_commit_scope_still_reads_head_commit_diff() {
655 let temp = TempDir::new().unwrap();
656 let repo = Repository::init(temp.path()).unwrap();
657
658 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
659 let head_oid = commit_file(
660 &repo,
661 "sample.ts",
662 "export function a() {\n return 2;\n}\n",
663 "change a",
664 );
665
666 let bridge = GitBridge::open(temp.path()).unwrap();
667 let files = bridge
668 .get_changed_files(&DiffScope::Commit {
669 sha: head_oid.to_string(),
670 }, &[])
671 .unwrap();
672
673 assert_eq!(files.len(), 1);
674 assert_eq!(files[0].file_path, "sample.ts");
675 assert_eq!(files[0].status, FileStatus::Modified);
676 }
677
678 #[test]
679 fn crlf_only_difference_in_working_file_is_invisible() {
680 let temp = TempDir::new().unwrap();
681 let repo = Repository::init(temp.path()).unwrap();
682
683 commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
684 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
685
686 let bridge = GitBridge::open(temp.path()).unwrap();
687 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
688
689 assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
690
691 let before = files[0].before_content.as_deref().unwrap();
692 let after = files[0].after_content.as_deref().unwrap();
693
694 assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
695 }
696
697 #[test]
698 fn crlf_stored_in_blob_is_normalized_on_read() {
699 let temp = TempDir::new().unwrap();
700 let repo = Repository::init(temp.path()).unwrap();
701
702 repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
703 commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
704 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
705
706 let bridge = GitBridge::open(temp.path()).unwrap();
707 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
708
709 assert_eq!(files.len(), 1, "expected git to detect the modification");
710
711 let before = files[0].before_content.as_deref().unwrap();
712 assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
713 }
714}