1use std::fs;
2use std::path::{Path, PathBuf};
3
4use git2::{
5 Delta, Diff, DiffOptions, ErrorCode, Repository,
6};
7use thiserror::Error;
8
9use super::types::{CommitInfo, DiffScope, FileChange, FileStatus};
10
11#[derive(Error, Debug)]
12pub enum GitError {
13 #[error("not a git repository")]
14 NotARepo,
15 #[error("git error: {0}")]
16 Git2(#[from] git2::Error),
17 #[error("io error: {0}")]
18 Io(#[from] std::io::Error),
19}
20
21pub struct GitBridge {
22 repo: Repository,
23 repo_root: PathBuf,
24}
25
26impl GitBridge {
27 pub fn open(path: &Path) -> Result<Self, GitError> {
28 let repo = Repository::discover(path).map_err(|e| {
29 if e.code() == ErrorCode::NotFound {
30 GitError::NotARepo
31 } else {
32 GitError::Git2(e)
33 }
34 })?;
35 let repo_root = repo
36 .workdir()
37 .ok_or(GitError::NotARepo)?
38 .to_path_buf();
39 Ok(Self { repo, repo_root })
40 }
41
42 pub fn repo_root(&self) -> &Path {
43 &self.repo_root
44 }
45
46 pub fn get_head_sha(&self) -> Result<String, GitError> {
47 let head = self.repo.head()?;
48 let oid = head.target().ok_or_else(|| {
49 git2::Error::from_str("HEAD has no target")
50 })?;
51 Ok(oid.to_string())
52 }
53
54 pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
56 let staged_files = self.get_staged_diff_files(pathspecs)?;
58 if !staged_files.is_empty() {
59 let mut files = staged_files;
60 self.populate_contents(&mut files, &DiffScope::Staged)?;
61 return Ok((DiffScope::Staged, files));
62 }
63
64 let mut working_files = self.get_working_diff_files(pathspecs)?;
66
67 if !working_files.is_empty() {
68 self.populate_contents(&mut working_files, &DiffScope::Working)?;
69 return Ok((DiffScope::Working, working_files));
70 }
71
72 Ok((DiffScope::Working, Vec::new()))
74 }
75
76 pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
78 let mut files = match scope {
79 DiffScope::Working => {
80 self.get_working_diff_files(pathspecs)?
81 }
82 DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
83 DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
84 DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
85 DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
86 };
87
88 files.retain(|f| !f.file_path.starts_with(".sem/"));
90
91 self.populate_contents(&mut files, scope)?;
92 Ok(files)
93 }
94
95 pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
97 let obj1 = self.repo.revparse_single(ref1)?;
98 let obj2 = self.repo.revparse_single(ref2)?;
99 let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
100 Ok(oid.to_string())
101 }
102
103 pub fn is_valid_rev(&self, refspec: &str) -> bool {
105 self.repo.revparse_single(refspec).is_ok()
106 }
107
108 fn make_diff_opts(pathspecs: &[String]) -> DiffOptions {
109 let mut opts = DiffOptions::new();
110 for spec in pathspecs {
111 opts.pathspec(spec.as_str());
112 }
113 opts
114 }
115
116 fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
117 let head_tree = match self.repo.head() {
118 Ok(head) => {
119 let commit = head.peel_to_commit()?;
120 Some(commit.tree()?)
121 }
122 Err(_) => None, };
124
125 let mut opts = Self::make_diff_opts(pathspecs);
126 let diff = self.repo.diff_tree_to_index(
127 head_tree.as_ref(),
128 Some(&self.repo.index()?),
129 Some(&mut opts),
130 )?;
131
132 Ok(self.diff_to_file_changes(&diff))
133 }
134
135 fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
136 let mut opts = Self::make_diff_opts(pathspecs);
137 opts.include_untracked(false);
138
139 let diff = self.repo.diff_index_to_workdir(None, Some(&mut opts))?;
140 Ok(self.diff_to_file_changes(&diff))
141 }
142
143 fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
144 let obj = self.repo.revparse_single(sha)?;
145 let commit = obj.peel_to_commit()?;
146 let tree = commit.tree()?;
147
148 let parent_tree = if commit.parent_count() > 0 {
149 Some(commit.parent(0)?.tree()?)
150 } else {
151 None
152 };
153
154 let mut opts = Self::make_diff_opts(pathspecs);
155 let diff = self.repo.diff_tree_to_tree(
156 parent_tree.as_ref(),
157 Some(&tree),
158 Some(&mut opts),
159 )?;
160
161 Ok(self.diff_to_file_changes(&diff))
162 }
163
164 fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
165 let from_obj = self.repo.revparse_single(from)?;
166 let to_obj = self.repo.revparse_single(to)?;
167
168 let from_tree = from_obj.peel_to_commit()?.tree()?;
169 let to_tree = to_obj.peel_to_commit()?.tree()?;
170
171 let mut opts = Self::make_diff_opts(pathspecs);
172 let diff = self.repo.diff_tree_to_tree(
173 Some(&from_tree),
174 Some(&to_tree),
175 Some(&mut opts),
176 )?;
177
178 Ok(self.diff_to_file_changes(&diff))
179 }
180
181 fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
182 let tree = self.resolve_tree(refspec)?;
183 let mut opts = Self::make_diff_opts(pathspecs);
184 let diff = self.repo.diff_tree_to_workdir_with_index(
185 Some(&tree),
186 Some(&mut opts),
187 )?;
188 Ok(self.diff_to_file_changes(&diff))
189 }
190
191 fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
192 let mut files = Vec::new();
193
194 for delta in diff.deltas() {
195 let (status, file_path, old_file_path) = match delta.status() {
196 Delta::Added => {
197 let path = delta
198 .new_file()
199 .path()
200 .and_then(|p| p.to_str())
201 .unwrap_or("")
202 .to_string();
203 (FileStatus::Added, path, None)
204 }
205 Delta::Deleted => {
206 let path = delta
207 .old_file()
208 .path()
209 .and_then(|p| p.to_str())
210 .unwrap_or("")
211 .to_string();
212 (FileStatus::Deleted, path, None)
213 }
214 Delta::Modified => {
215 let path = delta
216 .new_file()
217 .path()
218 .and_then(|p| p.to_str())
219 .unwrap_or("")
220 .to_string();
221 (FileStatus::Modified, path, None)
222 }
223 Delta::Renamed => {
224 let new_path = delta
225 .new_file()
226 .path()
227 .and_then(|p| p.to_str())
228 .unwrap_or("")
229 .to_string();
230 let old_path = delta
231 .old_file()
232 .path()
233 .and_then(|p| p.to_str())
234 .unwrap_or("")
235 .to_string();
236 (FileStatus::Renamed, new_path, Some(old_path))
237 }
238 _ => continue,
239 };
240
241 if !file_path.starts_with(".sem/") {
242 files.push(FileChange {
243 file_path,
244 status,
245 old_file_path,
246 before_content: None,
247 after_content: None,
248 });
249 }
250 }
251
252 files
253 }
254
255 fn populate_contents(
256 &self,
257 files: &mut [FileChange],
258 scope: &DiffScope,
259 ) -> Result<(), GitError> {
260 match scope {
261 DiffScope::Working => {
262 let head_tree = self.resolve_tree("HEAD").ok();
264 for file in files.iter_mut() {
265 if file.status != FileStatus::Deleted {
266 file.after_content = self.read_working_file(&file.file_path);
267 }
268 if file.status != FileStatus::Added {
269 file.before_content = head_tree
270 .as_ref()
271 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
272 }
273 }
274 }
275 DiffScope::Staged => {
276 let head_tree = self.resolve_tree("HEAD").ok();
277 for file in files.iter_mut() {
278 if file.status != FileStatus::Deleted {
279 file.after_content = self
280 .read_index_file(&file.file_path)
281 .or_else(|| self.read_working_file(&file.file_path));
282 }
283 if file.status != FileStatus::Added {
284 file.before_content = head_tree
285 .as_ref()
286 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
287 }
288 }
289 }
290 DiffScope::Commit { sha } => {
291 let after_tree = self.resolve_tree(sha)?;
293 let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
294 for file in files.iter_mut() {
295 if file.status != FileStatus::Deleted {
296 file.after_content =
297 self.read_blob_from_tree(&after_tree, &file.file_path);
298 }
299 if file.status != FileStatus::Added {
300 file.before_content = before_tree
301 .as_ref()
302 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
303 }
304 }
305 }
306 DiffScope::Range { from, to } => {
307 let after_tree = self.resolve_tree(to)?;
308 let before_tree = self.resolve_tree(from)?;
309 for file in files.iter_mut() {
310 if file.status != FileStatus::Deleted {
311 file.after_content =
312 self.read_blob_from_tree(&after_tree, &file.file_path);
313 }
314 if file.status != FileStatus::Added {
315 let path = file
316 .old_file_path
317 .as_deref()
318 .unwrap_or(&file.file_path);
319 file.before_content =
320 self.read_blob_from_tree(&before_tree, path);
321 }
322 }
323 }
324 DiffScope::RefToWorking { refspec } => {
325 let before_tree = self.resolve_tree(refspec)?;
326 for file in files.iter_mut() {
327 if file.status != FileStatus::Deleted {
328 file.after_content = self.read_working_file(&file.file_path);
329 }
330 if file.status != FileStatus::Added {
331 file.before_content =
332 self.read_blob_from_tree(&before_tree, &file.file_path);
333 }
334 }
335 }
336 }
337 Ok(())
338 }
339
340 fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
341 let obj = self.repo.revparse_single(refspec)?;
342 let commit = obj.peel_to_commit()?;
343 Ok(commit.tree()?)
344 }
345
346 fn normalize_line_endings(s: String) -> String {
347 if s.contains('\r') {
348 s.replace("\r\n", "\n").replace('\r', "\n")
349 } else {
350 s
351 }
352 }
353
354 fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
355 let entry = tree.get_path(Path::new(file_path)).ok()?;
356 let blob = self.repo.find_blob(entry.id()).ok()?;
357 std::str::from_utf8(blob.content())
358 .ok()
359 .map(|s| Self::normalize_line_endings(s.to_string()))
360 }
361
362 fn read_working_file(&self, file_path: &str) -> Option<String> {
363 let full_path = self.repo_root.join(file_path);
364 fs::read_to_string(full_path)
365 .ok()
366 .map(Self::normalize_line_endings)
367 }
368
369 fn read_index_file(&self, file_path: &str) -> Option<String> {
370 let index = self.repo.index().ok()?;
371 let entry = index.get_path(Path::new(file_path), 0)?;
372 let blob = self.repo.find_blob(entry.id).ok()?;
373 std::str::from_utf8(blob.content())
374 .ok()
375 .map(|s| Self::normalize_line_endings(s.to_string()))
376 }
377
378
379 pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
381 let tree = self.resolve_tree(refspec)?;
382 Ok(self.read_blob_from_tree(&tree, file_path))
383 }
384
385 pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
388 let mut revwalk = self.repo.revwalk()?;
389 revwalk.push_head()?;
390 revwalk.set_sorting(git2::Sort::TIME)?;
391
392 let mut commits = Vec::new();
393 let path = Path::new(file_path);
394
395 for oid_result in revwalk {
396 let oid = oid_result?;
397 let commit = self.repo.find_commit(oid)?;
398 let tree = commit.tree()?;
399
400 let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
402
403 let file_in_parent = if commit.parent_count() > 0 {
405 commit.parent(0)
406 .ok()
407 .and_then(|p| p.tree().ok())
408 .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
409 } else {
410 None };
412
413 let changed = match (file_in_commit, file_in_parent) {
415 (Some(cur), Some(prev)) => cur != prev, (Some(_), None) => true, (None, Some(_)) => true, (None, None) => false, };
420
421 if changed {
422 let sha = oid.to_string();
423 commits.push(CommitInfo {
424 short_sha: sha[..7.min(sha.len())].to_string(),
425 sha,
426 author: commit.author().name().unwrap_or("unknown").to_string(),
427 date: commit.time().seconds().to_string(),
428 message: commit.message().unwrap_or("").to_string(),
429 });
430
431 if commits.len() >= limit {
432 break;
433 }
434 }
435 }
436
437 Ok(commits)
438 }
439
440 pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
441 let mut revwalk = self.repo.revwalk()?;
442 revwalk.push_head()?;
443
444 let mut commits = Vec::new();
445 for (i, oid_result) in revwalk.enumerate() {
446 if i >= limit {
447 break;
448 }
449 let oid = oid_result?;
450 let commit = self.repo.find_commit(oid)?;
451 let sha = oid.to_string();
452 commits.push(CommitInfo {
453 short_sha: sha[..7.min(sha.len())].to_string(),
454 sha,
455 author: commit.author().name().unwrap_or("unknown").to_string(),
456 date: commit.time().seconds().to_string(),
457 message: commit.message().unwrap_or("").to_string(),
458 });
459 }
460
461 Ok(commits)
462 }
463}
464
465#[cfg(test)]
466mod tests {
467 use super::*;
468 use git2::{Oid, Repository, Signature};
469 use tempfile::TempDir;
470
471 fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
472 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
473
474 let mut index = repo.index().unwrap();
475 index.add_path(Path::new(file_path)).unwrap();
476 index.write().unwrap();
477
478 let tree_id = index.write_tree().unwrap();
479 let tree = repo.find_tree(tree_id).unwrap();
480 let sig = Signature::now("Test User", "test@example.com").unwrap();
481
482 match repo.head() {
483 Ok(head) => {
484 let parent = repo.find_commit(head.target().unwrap()).unwrap();
485 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
486 .unwrap()
487 }
488 Err(_) => repo
489 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
490 .unwrap(),
491 }
492 }
493
494 #[test]
495 fn clean_worktree_does_not_fall_back_to_head_commit() {
496 let temp = TempDir::new().unwrap();
497 let repo = Repository::init(temp.path()).unwrap();
498
499 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
500 commit_file(
501 &repo,
502 "sample.ts",
503 "export function a() {\n return 2;\n}\n",
504 "change a",
505 );
506
507 let bridge = GitBridge::open(temp.path()).unwrap();
508 let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
509
510 assert!(matches!(scope, DiffScope::Working));
511 assert!(files.is_empty());
512 }
513
514 #[test]
515 fn explicit_commit_scope_still_reads_head_commit_diff() {
516 let temp = TempDir::new().unwrap();
517 let repo = Repository::init(temp.path()).unwrap();
518
519 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
520 let head_oid = commit_file(
521 &repo,
522 "sample.ts",
523 "export function a() {\n return 2;\n}\n",
524 "change a",
525 );
526
527 let bridge = GitBridge::open(temp.path()).unwrap();
528 let files = bridge
529 .get_changed_files(&DiffScope::Commit {
530 sha: head_oid.to_string(),
531 }, &[])
532 .unwrap();
533
534 assert_eq!(files.len(), 1);
535 assert_eq!(files[0].file_path, "sample.ts");
536 assert_eq!(files[0].status, FileStatus::Modified);
537 }
538
539 #[test]
540 fn crlf_only_difference_in_working_file_is_invisible() {
541 let temp = TempDir::new().unwrap();
542 let repo = Repository::init(temp.path()).unwrap();
543
544 commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
545 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
546
547 let bridge = GitBridge::open(temp.path()).unwrap();
548 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
549
550 assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
551
552 let before = files[0].before_content.as_deref().unwrap();
553 let after = files[0].after_content.as_deref().unwrap();
554
555 assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
556 }
557
558 #[test]
559 fn crlf_stored_in_blob_is_normalized_on_read() {
560 let temp = TempDir::new().unwrap();
561 let repo = Repository::init(temp.path()).unwrap();
562
563 repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
564 commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
565 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
566
567 let bridge = GitBridge::open(temp.path()).unwrap();
568 let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
569
570 assert_eq!(files.len(), 1, "expected git to detect the modification");
571
572 let before = files[0].before_content.as_deref().unwrap();
573 assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
574 }
575}