Skip to main content

sem_core/git/
bridge.rs

1use std::fs;
2use std::path::{Path, PathBuf};
3
4use git2::{
5    Delta, Diff, DiffOptions, ErrorCode, Repository, StatusOptions,
6};
7use thiserror::Error;
8
9use super::types::{CommitInfo, DiffScope, FileChange, FileStatus};
10
11#[derive(Error, Debug)]
12pub enum GitError {
13    #[error("not a git repository")]
14    NotARepo,
15    #[error("git error: {0}")]
16    Git2(#[from] git2::Error),
17    #[error("io error: {0}")]
18    Io(#[from] std::io::Error),
19}
20
21pub struct GitBridge {
22    repo: Repository,
23    repo_root: PathBuf,
24}
25
26impl GitBridge {
27    pub fn open(path: &Path) -> Result<Self, GitError> {
28        let repo = Repository::discover(path).map_err(|e| {
29            if e.code() == ErrorCode::NotFound {
30                GitError::NotARepo
31            } else {
32                GitError::Git2(e)
33            }
34        })?;
35        let repo_root = repo
36            .workdir()
37            .ok_or(GitError::NotARepo)?
38            .to_path_buf();
39        Ok(Self { repo, repo_root })
40    }
41
42    pub fn repo_root(&self) -> &Path {
43        &self.repo_root
44    }
45
46    pub fn get_head_sha(&self) -> Result<String, GitError> {
47        let head = self.repo.head()?;
48        let oid = head.target().ok_or_else(|| {
49            git2::Error::from_str("HEAD has no target")
50        })?;
51        Ok(oid.to_string())
52    }
53
54    /// Combined detect scope + get files in one call (fast path)
55    pub fn detect_and_get_files(&self) -> Result<(DiffScope, Vec<FileChange>), GitError> {
56        // Check for staged changes
57        let staged_files = self.get_staged_diff_files()?;
58        if !staged_files.is_empty() {
59            let mut files = staged_files;
60            self.populate_contents(&mut files, &DiffScope::Staged)?;
61            return Ok((DiffScope::Staged, files));
62        }
63
64        // Check for working tree changes + untracked
65        let mut working_files = self.get_working_diff_files()?;
66        let untracked = self.get_untracked_files()?;
67        working_files.extend(untracked);
68
69        if !working_files.is_empty() {
70            self.populate_contents(&mut working_files, &DiffScope::Working)?;
71            return Ok((DiffScope::Working, working_files));
72        }
73
74        // A clean worktree should report no live changes.
75        Ok((DiffScope::Working, Vec::new()))
76    }
77
78    /// Get changed files for a specific scope
79    pub fn get_changed_files(&self, scope: &DiffScope) -> Result<Vec<FileChange>, GitError> {
80        let mut files = match scope {
81            DiffScope::Working => {
82                let mut files = self.get_working_diff_files()?;
83                let untracked = self.get_untracked_files()?;
84                files.extend(untracked);
85                files
86            }
87            DiffScope::Staged => self.get_staged_diff_files()?,
88            DiffScope::Commit { sha } => self.get_commit_diff_files(sha)?,
89            DiffScope::Range { from, to } => self.get_range_diff_files(from, to)?,
90        };
91
92        // Filter .sem/ files
93        files.retain(|f| !f.file_path.starts_with(".sem/"));
94
95        self.populate_contents(&mut files, scope)?;
96        Ok(files)
97    }
98
99    fn get_staged_diff_files(&self) -> Result<Vec<FileChange>, GitError> {
100        let head_tree = match self.repo.head() {
101            Ok(head) => {
102                let commit = head.peel_to_commit()?;
103                Some(commit.tree()?)
104            }
105            Err(_) => None, // No commits yet
106        };
107
108        let diff = self.repo.diff_tree_to_index(
109            head_tree.as_ref(),
110            Some(&self.repo.index()?),
111            None,
112        )?;
113
114        Ok(self.diff_to_file_changes(&diff))
115    }
116
117    fn get_working_diff_files(&self) -> Result<Vec<FileChange>, GitError> {
118        let mut opts = DiffOptions::new();
119        opts.include_untracked(false);
120
121        let diff = self.repo.diff_index_to_workdir(None, Some(&mut opts))?;
122        Ok(self.diff_to_file_changes(&diff))
123    }
124
125    fn get_untracked_files(&self) -> Result<Vec<FileChange>, GitError> {
126        let mut opts = StatusOptions::new();
127        opts.include_untracked(true)
128            .recurse_untracked_dirs(true)
129            .exclude_submodules(true);
130
131        let statuses = self.repo.statuses(Some(&mut opts))?;
132        let mut files = Vec::new();
133
134        for entry in statuses.iter() {
135            if entry.status().contains(git2::Status::WT_NEW) {
136                if let Some(path) = entry.path() {
137                    if !path.starts_with(".sem/") {
138                        files.push(FileChange {
139                            file_path: path.to_string(),
140                            status: FileStatus::Added,
141                            old_file_path: None,
142                            before_content: None,
143                            after_content: None,
144                        });
145                    }
146                }
147            }
148        }
149
150        Ok(files)
151    }
152
153    fn get_commit_diff_files(&self, sha: &str) -> Result<Vec<FileChange>, GitError> {
154        let obj = self.repo.revparse_single(sha)?;
155        let commit = obj.peel_to_commit()?;
156        let tree = commit.tree()?;
157
158        let parent_tree = if commit.parent_count() > 0 {
159            Some(commit.parent(0)?.tree()?)
160        } else {
161            None
162        };
163
164        let diff = self.repo.diff_tree_to_tree(
165            parent_tree.as_ref(),
166            Some(&tree),
167            None,
168        )?;
169
170        Ok(self.diff_to_file_changes(&diff))
171    }
172
173    fn get_range_diff_files(&self, from: &str, to: &str) -> Result<Vec<FileChange>, GitError> {
174        let from_obj = self.repo.revparse_single(from)?;
175        let to_obj = self.repo.revparse_single(to)?;
176
177        let from_tree = from_obj.peel_to_commit()?.tree()?;
178        let to_tree = to_obj.peel_to_commit()?.tree()?;
179
180        let diff = self.repo.diff_tree_to_tree(
181            Some(&from_tree),
182            Some(&to_tree),
183            None,
184        )?;
185
186        Ok(self.diff_to_file_changes(&diff))
187    }
188
189    fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
190        let mut files = Vec::new();
191
192        for delta in diff.deltas() {
193            let (status, file_path, old_file_path) = match delta.status() {
194                Delta::Added => {
195                    let path = delta
196                        .new_file()
197                        .path()
198                        .and_then(|p| p.to_str())
199                        .unwrap_or("")
200                        .to_string();
201                    (FileStatus::Added, path, None)
202                }
203                Delta::Deleted => {
204                    let path = delta
205                        .old_file()
206                        .path()
207                        .and_then(|p| p.to_str())
208                        .unwrap_or("")
209                        .to_string();
210                    (FileStatus::Deleted, path, None)
211                }
212                Delta::Modified => {
213                    let path = delta
214                        .new_file()
215                        .path()
216                        .and_then(|p| p.to_str())
217                        .unwrap_or("")
218                        .to_string();
219                    (FileStatus::Modified, path, None)
220                }
221                Delta::Renamed => {
222                    let new_path = delta
223                        .new_file()
224                        .path()
225                        .and_then(|p| p.to_str())
226                        .unwrap_or("")
227                        .to_string();
228                    let old_path = delta
229                        .old_file()
230                        .path()
231                        .and_then(|p| p.to_str())
232                        .unwrap_or("")
233                        .to_string();
234                    (FileStatus::Renamed, new_path, Some(old_path))
235                }
236                _ => continue,
237            };
238
239            if !file_path.starts_with(".sem/") {
240                files.push(FileChange {
241                    file_path,
242                    status,
243                    old_file_path,
244                    before_content: None,
245                    after_content: None,
246                });
247            }
248        }
249
250        files
251    }
252
253    fn populate_contents(
254        &self,
255        files: &mut [FileChange],
256        scope: &DiffScope,
257    ) -> Result<(), GitError> {
258        match scope {
259            DiffScope::Working => {
260                // Resolve HEAD tree once for all before_content reads
261                let head_tree = self.resolve_tree("HEAD").ok();
262                for file in files.iter_mut() {
263                    if file.status != FileStatus::Deleted {
264                        file.after_content = self.read_working_file(&file.file_path);
265                    }
266                    if file.status != FileStatus::Added {
267                        file.before_content = head_tree
268                            .as_ref()
269                            .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
270                    }
271                }
272            }
273            DiffScope::Staged => {
274                let head_tree = self.resolve_tree("HEAD").ok();
275                for file in files.iter_mut() {
276                    if file.status != FileStatus::Deleted {
277                        file.after_content = self
278                            .read_index_file(&file.file_path)
279                            .or_else(|| self.read_working_file(&file.file_path));
280                    }
281                    if file.status != FileStatus::Added {
282                        file.before_content = head_tree
283                            .as_ref()
284                            .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
285                    }
286                }
287            }
288            DiffScope::Commit { sha } => {
289                // Resolve both trees once instead of per-file
290                let after_tree = self.resolve_tree(sha)?;
291                let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
292                for file in files.iter_mut() {
293                    if file.status != FileStatus::Deleted {
294                        file.after_content =
295                            self.read_blob_from_tree(&after_tree, &file.file_path);
296                    }
297                    if file.status != FileStatus::Added {
298                        file.before_content = before_tree
299                            .as_ref()
300                            .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
301                    }
302                }
303            }
304            DiffScope::Range { from, to } => {
305                let after_tree = self.resolve_tree(to)?;
306                let before_tree = self.resolve_tree(from)?;
307                for file in files.iter_mut() {
308                    if file.status != FileStatus::Deleted {
309                        file.after_content =
310                            self.read_blob_from_tree(&after_tree, &file.file_path);
311                    }
312                    if file.status != FileStatus::Added {
313                        let path = file
314                            .old_file_path
315                            .as_deref()
316                            .unwrap_or(&file.file_path);
317                        file.before_content =
318                            self.read_blob_from_tree(&before_tree, path);
319                    }
320                }
321            }
322        }
323        Ok(())
324    }
325
326    fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
327        let obj = self.repo.revparse_single(refspec)?;
328        let commit = obj.peel_to_commit()?;
329        Ok(commit.tree()?)
330    }
331
332    fn normalize_line_endings(s: String) -> String {
333        if s.contains('\r') {
334            s.replace("\r\n", "\n").replace('\r', "\n")
335        } else {
336            s
337        }
338    }
339
340    fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
341        let entry = tree.get_path(Path::new(file_path)).ok()?;
342        let blob = self.repo.find_blob(entry.id()).ok()?;
343        std::str::from_utf8(blob.content())
344            .ok()
345            .map(|s| Self::normalize_line_endings(s.to_string()))
346    }
347
348    fn read_working_file(&self, file_path: &str) -> Option<String> {
349        let full_path = self.repo_root.join(file_path);
350        fs::read_to_string(full_path)
351            .ok()
352            .map(Self::normalize_line_endings)
353    }
354
355    fn read_index_file(&self, file_path: &str) -> Option<String> {
356        let index = self.repo.index().ok()?;
357        let entry = index.get_path(Path::new(file_path), 0)?;
358        let blob = self.repo.find_blob(entry.id).ok()?;
359        std::str::from_utf8(blob.content())
360            .ok()
361            .map(|s| Self::normalize_line_endings(s.to_string()))
362    }
363
364
365    pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
366        let mut revwalk = self.repo.revwalk()?;
367        revwalk.push_head()?;
368
369        let mut commits = Vec::new();
370        for (i, oid_result) in revwalk.enumerate() {
371            if i >= limit {
372                break;
373            }
374            let oid = oid_result?;
375            let commit = self.repo.find_commit(oid)?;
376            let sha = oid.to_string();
377            commits.push(CommitInfo {
378                short_sha: sha[..7.min(sha.len())].to_string(),
379                sha,
380                author: commit.author().name().unwrap_or("unknown").to_string(),
381                date: commit.time().seconds().to_string(),
382                message: commit.message().unwrap_or("").to_string(),
383            });
384        }
385
386        Ok(commits)
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393    use git2::{Oid, Repository, Signature};
394    use tempfile::TempDir;
395
396    fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
397        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
398
399        let mut index = repo.index().unwrap();
400        index.add_path(Path::new(file_path)).unwrap();
401        index.write().unwrap();
402
403        let tree_id = index.write_tree().unwrap();
404        let tree = repo.find_tree(tree_id).unwrap();
405        let sig = Signature::now("Test User", "test@example.com").unwrap();
406
407        match repo.head() {
408            Ok(head) => {
409                let parent = repo.find_commit(head.target().unwrap()).unwrap();
410                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
411                    .unwrap()
412            }
413            Err(_) => repo
414                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
415                .unwrap(),
416        }
417    }
418
419    #[test]
420    fn clean_worktree_does_not_fall_back_to_head_commit() {
421        let temp = TempDir::new().unwrap();
422        let repo = Repository::init(temp.path()).unwrap();
423
424        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
425        commit_file(
426            &repo,
427            "sample.ts",
428            "export function a() {\n  return 2;\n}\n",
429            "change a",
430        );
431
432        let bridge = GitBridge::open(temp.path()).unwrap();
433        let (scope, files) = bridge.detect_and_get_files().unwrap();
434
435        assert!(matches!(scope, DiffScope::Working));
436        assert!(files.is_empty());
437    }
438
439    #[test]
440    fn explicit_commit_scope_still_reads_head_commit_diff() {
441        let temp = TempDir::new().unwrap();
442        let repo = Repository::init(temp.path()).unwrap();
443
444        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
445        let head_oid = commit_file(
446            &repo,
447            "sample.ts",
448            "export function a() {\n  return 2;\n}\n",
449            "change a",
450        );
451
452        let bridge = GitBridge::open(temp.path()).unwrap();
453        let files = bridge
454            .get_changed_files(&DiffScope::Commit {
455                sha: head_oid.to_string(),
456            })
457            .unwrap();
458
459        assert_eq!(files.len(), 1);
460        assert_eq!(files[0].file_path, "sample.ts");
461        assert_eq!(files[0].status, FileStatus::Modified);
462    }
463
464    #[test]
465    fn crlf_only_difference_in_working_file_is_invisible() {
466        let temp = TempDir::new().unwrap();
467        let repo = Repository::init(temp.path()).unwrap();
468
469        commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
470        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
471
472        let bridge = GitBridge::open(temp.path()).unwrap();
473        let files = bridge.get_changed_files(&DiffScope::Working).unwrap();
474
475        assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
476
477        let before = files[0].before_content.as_deref().unwrap();
478        let after = files[0].after_content.as_deref().unwrap();
479
480        assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
481    }
482
483    #[test]
484    fn crlf_stored_in_blob_is_normalized_on_read() {
485        let temp = TempDir::new().unwrap();
486        let repo = Repository::init(temp.path()).unwrap();
487
488        repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
489        commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
490        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
491
492        let bridge = GitBridge::open(temp.path()).unwrap();
493        let files = bridge.get_changed_files(&DiffScope::Working).unwrap();
494
495        assert_eq!(files.len(), 1, "expected git to detect the modification");
496
497        let before = files[0].before_content.as_deref().unwrap();
498        assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
499    }
500}