1use std::fs;
2use std::path::{Path, PathBuf};
3
4use git2::{
5 Delta, Diff, DiffOptions, ErrorCode, Repository, StatusOptions,
6};
7use thiserror::Error;
8
9use super::types::{CommitInfo, DiffScope, FileChange, FileStatus};
10
11#[derive(Error, Debug)]
12pub enum GitError {
13 #[error("not a git repository")]
14 NotARepo,
15 #[error("git error: {0}")]
16 Git2(#[from] git2::Error),
17 #[error("io error: {0}")]
18 Io(#[from] std::io::Error),
19}
20
21pub struct GitBridge {
22 repo: Repository,
23 repo_root: PathBuf,
24}
25
26impl GitBridge {
27 pub fn open(path: &Path) -> Result<Self, GitError> {
28 let repo = Repository::discover(path).map_err(|e| {
29 if e.code() == ErrorCode::NotFound {
30 GitError::NotARepo
31 } else {
32 GitError::Git2(e)
33 }
34 })?;
35 let repo_root = repo
36 .workdir()
37 .ok_or(GitError::NotARepo)?
38 .to_path_buf();
39 Ok(Self { repo, repo_root })
40 }
41
42 pub fn repo_root(&self) -> &Path {
43 &self.repo_root
44 }
45
46 pub fn get_head_sha(&self) -> Result<String, GitError> {
47 let head = self.repo.head()?;
48 let oid = head.target().ok_or_else(|| {
49 git2::Error::from_str("HEAD has no target")
50 })?;
51 Ok(oid.to_string())
52 }
53
54 pub fn detect_and_get_files(&self) -> Result<(DiffScope, Vec<FileChange>), GitError> {
56 let staged_files = self.get_staged_diff_files()?;
58 if !staged_files.is_empty() {
59 let mut files = staged_files;
60 self.populate_contents(&mut files, &DiffScope::Staged)?;
61 return Ok((DiffScope::Staged, files));
62 }
63
64 let mut working_files = self.get_working_diff_files()?;
66 let untracked = self.get_untracked_files()?;
67 working_files.extend(untracked);
68
69 if !working_files.is_empty() {
70 self.populate_contents(&mut working_files, &DiffScope::Working)?;
71 return Ok((DiffScope::Working, working_files));
72 }
73
74 Ok((DiffScope::Working, Vec::new()))
76 }
77
78 pub fn get_changed_files(&self, scope: &DiffScope) -> Result<Vec<FileChange>, GitError> {
80 let mut files = match scope {
81 DiffScope::Working => {
82 let mut files = self.get_working_diff_files()?;
83 let untracked = self.get_untracked_files()?;
84 files.extend(untracked);
85 files
86 }
87 DiffScope::Staged => self.get_staged_diff_files()?,
88 DiffScope::Commit { sha } => self.get_commit_diff_files(sha)?,
89 DiffScope::Range { from, to } => self.get_range_diff_files(from, to)?,
90 };
91
92 files.retain(|f| !f.file_path.starts_with(".sem/"));
94
95 self.populate_contents(&mut files, scope)?;
96 Ok(files)
97 }
98
99 fn get_staged_diff_files(&self) -> Result<Vec<FileChange>, GitError> {
100 let head_tree = match self.repo.head() {
101 Ok(head) => {
102 let commit = head.peel_to_commit()?;
103 Some(commit.tree()?)
104 }
105 Err(_) => None, };
107
108 let diff = self.repo.diff_tree_to_index(
109 head_tree.as_ref(),
110 Some(&self.repo.index()?),
111 None,
112 )?;
113
114 Ok(self.diff_to_file_changes(&diff))
115 }
116
117 fn get_working_diff_files(&self) -> Result<Vec<FileChange>, GitError> {
118 let mut opts = DiffOptions::new();
119 opts.include_untracked(false);
120
121 let diff = self.repo.diff_index_to_workdir(None, Some(&mut opts))?;
122 Ok(self.diff_to_file_changes(&diff))
123 }
124
125 fn get_untracked_files(&self) -> Result<Vec<FileChange>, GitError> {
126 let mut opts = StatusOptions::new();
127 opts.include_untracked(true)
128 .recurse_untracked_dirs(true)
129 .exclude_submodules(true);
130
131 let statuses = self.repo.statuses(Some(&mut opts))?;
132 let mut files = Vec::new();
133
134 for entry in statuses.iter() {
135 if entry.status().contains(git2::Status::WT_NEW) {
136 if let Some(path) = entry.path() {
137 if !path.starts_with(".sem/") {
138 files.push(FileChange {
139 file_path: path.to_string(),
140 status: FileStatus::Added,
141 old_file_path: None,
142 before_content: None,
143 after_content: None,
144 });
145 }
146 }
147 }
148 }
149
150 Ok(files)
151 }
152
153 fn get_commit_diff_files(&self, sha: &str) -> Result<Vec<FileChange>, GitError> {
154 let obj = self.repo.revparse_single(sha)?;
155 let commit = obj.peel_to_commit()?;
156 let tree = commit.tree()?;
157
158 let parent_tree = if commit.parent_count() > 0 {
159 Some(commit.parent(0)?.tree()?)
160 } else {
161 None
162 };
163
164 let diff = self.repo.diff_tree_to_tree(
165 parent_tree.as_ref(),
166 Some(&tree),
167 None,
168 )?;
169
170 Ok(self.diff_to_file_changes(&diff))
171 }
172
173 fn get_range_diff_files(&self, from: &str, to: &str) -> Result<Vec<FileChange>, GitError> {
174 let from_obj = self.repo.revparse_single(from)?;
175 let to_obj = self.repo.revparse_single(to)?;
176
177 let from_tree = from_obj.peel_to_commit()?.tree()?;
178 let to_tree = to_obj.peel_to_commit()?.tree()?;
179
180 let diff = self.repo.diff_tree_to_tree(
181 Some(&from_tree),
182 Some(&to_tree),
183 None,
184 )?;
185
186 Ok(self.diff_to_file_changes(&diff))
187 }
188
189 fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
190 let mut files = Vec::new();
191
192 for delta in diff.deltas() {
193 let (status, file_path, old_file_path) = match delta.status() {
194 Delta::Added => {
195 let path = delta
196 .new_file()
197 .path()
198 .and_then(|p| p.to_str())
199 .unwrap_or("")
200 .to_string();
201 (FileStatus::Added, path, None)
202 }
203 Delta::Deleted => {
204 let path = delta
205 .old_file()
206 .path()
207 .and_then(|p| p.to_str())
208 .unwrap_or("")
209 .to_string();
210 (FileStatus::Deleted, path, None)
211 }
212 Delta::Modified => {
213 let path = delta
214 .new_file()
215 .path()
216 .and_then(|p| p.to_str())
217 .unwrap_or("")
218 .to_string();
219 (FileStatus::Modified, path, None)
220 }
221 Delta::Renamed => {
222 let new_path = delta
223 .new_file()
224 .path()
225 .and_then(|p| p.to_str())
226 .unwrap_or("")
227 .to_string();
228 let old_path = delta
229 .old_file()
230 .path()
231 .and_then(|p| p.to_str())
232 .unwrap_or("")
233 .to_string();
234 (FileStatus::Renamed, new_path, Some(old_path))
235 }
236 _ => continue,
237 };
238
239 if !file_path.starts_with(".sem/") {
240 files.push(FileChange {
241 file_path,
242 status,
243 old_file_path,
244 before_content: None,
245 after_content: None,
246 });
247 }
248 }
249
250 files
251 }
252
253 fn populate_contents(
254 &self,
255 files: &mut [FileChange],
256 scope: &DiffScope,
257 ) -> Result<(), GitError> {
258 match scope {
259 DiffScope::Working => {
260 let head_tree = self.resolve_tree("HEAD").ok();
262 for file in files.iter_mut() {
263 if file.status != FileStatus::Deleted {
264 file.after_content = self.read_working_file(&file.file_path);
265 }
266 if file.status != FileStatus::Added {
267 file.before_content = head_tree
268 .as_ref()
269 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
270 }
271 }
272 }
273 DiffScope::Staged => {
274 let head_tree = self.resolve_tree("HEAD").ok();
275 for file in files.iter_mut() {
276 if file.status != FileStatus::Deleted {
277 file.after_content = self
278 .read_index_file(&file.file_path)
279 .or_else(|| self.read_working_file(&file.file_path));
280 }
281 if file.status != FileStatus::Added {
282 file.before_content = head_tree
283 .as_ref()
284 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
285 }
286 }
287 }
288 DiffScope::Commit { sha } => {
289 let after_tree = self.resolve_tree(sha)?;
291 let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
292 for file in files.iter_mut() {
293 if file.status != FileStatus::Deleted {
294 file.after_content =
295 self.read_blob_from_tree(&after_tree, &file.file_path);
296 }
297 if file.status != FileStatus::Added {
298 file.before_content = before_tree
299 .as_ref()
300 .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
301 }
302 }
303 }
304 DiffScope::Range { from, to } => {
305 let after_tree = self.resolve_tree(to)?;
306 let before_tree = self.resolve_tree(from)?;
307 for file in files.iter_mut() {
308 if file.status != FileStatus::Deleted {
309 file.after_content =
310 self.read_blob_from_tree(&after_tree, &file.file_path);
311 }
312 if file.status != FileStatus::Added {
313 let path = file
314 .old_file_path
315 .as_deref()
316 .unwrap_or(&file.file_path);
317 file.before_content =
318 self.read_blob_from_tree(&before_tree, path);
319 }
320 }
321 }
322 }
323 Ok(())
324 }
325
326 fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
327 let obj = self.repo.revparse_single(refspec)?;
328 let commit = obj.peel_to_commit()?;
329 Ok(commit.tree()?)
330 }
331
332 fn normalize_line_endings(s: String) -> String {
333 if s.contains('\r') {
334 s.replace("\r\n", "\n").replace('\r', "\n")
335 } else {
336 s
337 }
338 }
339
340 fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
341 let entry = tree.get_path(Path::new(file_path)).ok()?;
342 let blob = self.repo.find_blob(entry.id()).ok()?;
343 std::str::from_utf8(blob.content())
344 .ok()
345 .map(|s| Self::normalize_line_endings(s.to_string()))
346 }
347
348 fn read_working_file(&self, file_path: &str) -> Option<String> {
349 let full_path = self.repo_root.join(file_path);
350 fs::read_to_string(full_path)
351 .ok()
352 .map(Self::normalize_line_endings)
353 }
354
355 fn read_index_file(&self, file_path: &str) -> Option<String> {
356 let index = self.repo.index().ok()?;
357 let entry = index.get_path(Path::new(file_path), 0)?;
358 let blob = self.repo.find_blob(entry.id).ok()?;
359 std::str::from_utf8(blob.content())
360 .ok()
361 .map(|s| Self::normalize_line_endings(s.to_string()))
362 }
363
364
365 pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
366 let mut revwalk = self.repo.revwalk()?;
367 revwalk.push_head()?;
368
369 let mut commits = Vec::new();
370 for (i, oid_result) in revwalk.enumerate() {
371 if i >= limit {
372 break;
373 }
374 let oid = oid_result?;
375 let commit = self.repo.find_commit(oid)?;
376 let sha = oid.to_string();
377 commits.push(CommitInfo {
378 short_sha: sha[..7.min(sha.len())].to_string(),
379 sha,
380 author: commit.author().name().unwrap_or("unknown").to_string(),
381 date: commit.time().seconds().to_string(),
382 message: commit.message().unwrap_or("").to_string(),
383 });
384 }
385
386 Ok(commits)
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393 use git2::{Oid, Repository, Signature};
394 use tempfile::TempDir;
395
396 fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
397 fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
398
399 let mut index = repo.index().unwrap();
400 index.add_path(Path::new(file_path)).unwrap();
401 index.write().unwrap();
402
403 let tree_id = index.write_tree().unwrap();
404 let tree = repo.find_tree(tree_id).unwrap();
405 let sig = Signature::now("Test User", "test@example.com").unwrap();
406
407 match repo.head() {
408 Ok(head) => {
409 let parent = repo.find_commit(head.target().unwrap()).unwrap();
410 repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
411 .unwrap()
412 }
413 Err(_) => repo
414 .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
415 .unwrap(),
416 }
417 }
418
419 #[test]
420 fn clean_worktree_does_not_fall_back_to_head_commit() {
421 let temp = TempDir::new().unwrap();
422 let repo = Repository::init(temp.path()).unwrap();
423
424 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
425 commit_file(
426 &repo,
427 "sample.ts",
428 "export function a() {\n return 2;\n}\n",
429 "change a",
430 );
431
432 let bridge = GitBridge::open(temp.path()).unwrap();
433 let (scope, files) = bridge.detect_and_get_files().unwrap();
434
435 assert!(matches!(scope, DiffScope::Working));
436 assert!(files.is_empty());
437 }
438
439 #[test]
440 fn explicit_commit_scope_still_reads_head_commit_diff() {
441 let temp = TempDir::new().unwrap();
442 let repo = Repository::init(temp.path()).unwrap();
443
444 commit_file(&repo, "sample.ts", "export function a() {\n return 1;\n}\n", "init");
445 let head_oid = commit_file(
446 &repo,
447 "sample.ts",
448 "export function a() {\n return 2;\n}\n",
449 "change a",
450 );
451
452 let bridge = GitBridge::open(temp.path()).unwrap();
453 let files = bridge
454 .get_changed_files(&DiffScope::Commit {
455 sha: head_oid.to_string(),
456 })
457 .unwrap();
458
459 assert_eq!(files.len(), 1);
460 assert_eq!(files[0].file_path, "sample.ts");
461 assert_eq!(files[0].status, FileStatus::Modified);
462 }
463
464 #[test]
465 fn crlf_only_difference_in_working_file_is_invisible() {
466 let temp = TempDir::new().unwrap();
467 let repo = Repository::init(temp.path()).unwrap();
468
469 commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
470 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
471
472 let bridge = GitBridge::open(temp.path()).unwrap();
473 let files = bridge.get_changed_files(&DiffScope::Working).unwrap();
474
475 assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
476
477 let before = files[0].before_content.as_deref().unwrap();
478 let after = files[0].after_content.as_deref().unwrap();
479
480 assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
481 }
482
483 #[test]
484 fn crlf_stored_in_blob_is_normalized_on_read() {
485 let temp = TempDir::new().unwrap();
486 let repo = Repository::init(temp.path()).unwrap();
487
488 repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
489 commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
490 fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
491
492 let bridge = GitBridge::open(temp.path()).unwrap();
493 let files = bridge.get_changed_files(&DiffScope::Working).unwrap();
494
495 assert_eq!(files.len(), 1, "expected git to detect the modification");
496
497 let before = files[0].before_content.as_deref().unwrap();
498 assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
499 }
500}