Skip to main content

sem_core/git/
bridge.rs

1use std::fs;
2use std::path::{Path, PathBuf};
3
4use git2::{
5    Delta, Diff, DiffOptions, ErrorCode, Repository,
6};
7use thiserror::Error;
8
9use super::types::{CommitInfo, DiffScope, FileChange, FileStatus};
10
11#[derive(Error, Debug)]
12pub enum GitError {
13    #[error("not a git repository")]
14    NotARepo,
15    #[error("git error: {0}")]
16    Git2(#[from] git2::Error),
17    #[error("io error: {0}")]
18    Io(#[from] std::io::Error),
19}
20
21pub struct GitBridge {
22    repo: Repository,
23    repo_root: PathBuf,
24}
25
26impl GitBridge {
27    pub fn open(path: &Path) -> Result<Self, GitError> {
28        let repo = Repository::discover(path).map_err(|e| {
29            if e.code() == ErrorCode::NotFound {
30                GitError::NotARepo
31            } else {
32                GitError::Git2(e)
33            }
34        })?;
35        let repo_root = repo
36            .workdir()
37            .ok_or(GitError::NotARepo)?
38            .to_path_buf();
39        Ok(Self { repo, repo_root })
40    }
41
42    pub fn repo_root(&self) -> &Path {
43        &self.repo_root
44    }
45
46    pub fn get_head_sha(&self) -> Result<String, GitError> {
47        let head = self.repo.head()?;
48        let oid = head.target().ok_or_else(|| {
49            git2::Error::from_str("HEAD has no target")
50        })?;
51        Ok(oid.to_string())
52    }
53
54    /// Combined detect scope + get files in one call (fast path)
55    pub fn detect_and_get_files(&self, pathspecs: &[String]) -> Result<(DiffScope, Vec<FileChange>), GitError> {
56        // Check for staged changes
57        let staged_files = self.get_staged_diff_files(pathspecs)?;
58        if !staged_files.is_empty() {
59            let mut files = staged_files;
60            self.populate_contents(&mut files, &DiffScope::Staged)?;
61            return Ok((DiffScope::Staged, files));
62        }
63
64        // Check for working tree changes (match git diff: no untracked files)
65        let mut working_files = self.get_working_diff_files(pathspecs)?;
66
67        if !working_files.is_empty() {
68            self.populate_contents(&mut working_files, &DiffScope::Working)?;
69            return Ok((DiffScope::Working, working_files));
70        }
71
72        // A clean worktree should report no live changes.
73        Ok((DiffScope::Working, Vec::new()))
74    }
75
76    /// Get changed files for a specific scope
77    pub fn get_changed_files(&self, scope: &DiffScope, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
78        let mut files = match scope {
79            DiffScope::Working => {
80                self.get_working_diff_files(pathspecs)?
81            }
82            DiffScope::Staged => self.get_staged_diff_files(pathspecs)?,
83            DiffScope::Commit { sha } => self.get_commit_diff_files(sha, pathspecs)?,
84            DiffScope::Range { from, to } => self.get_range_diff_files(from, to, pathspecs)?,
85            DiffScope::RefToWorking { refspec } => self.get_ref_to_working_diff_files(refspec, pathspecs)?,
86        };
87
88        // Filter .sem/ files
89        files.retain(|f| !f.file_path.starts_with(".sem/"));
90
91        self.populate_contents(&mut files, scope)?;
92        Ok(files)
93    }
94
95    /// Resolve the merge base between two refs
96    pub fn resolve_merge_base(&self, ref1: &str, ref2: &str) -> Result<String, GitError> {
97        let obj1 = self.repo.revparse_single(ref1)?;
98        let obj2 = self.repo.revparse_single(ref2)?;
99        let oid = self.repo.merge_base(obj1.id(), obj2.id())?;
100        Ok(oid.to_string())
101    }
102
103    /// Check if a string resolves to a valid git revision
104    pub fn is_valid_rev(&self, refspec: &str) -> bool {
105        self.repo.revparse_single(refspec).is_ok()
106    }
107
108    fn make_diff_opts(pathspecs: &[String]) -> DiffOptions {
109        let mut opts = DiffOptions::new();
110        for spec in pathspecs {
111            opts.pathspec(spec.as_str());
112        }
113        opts
114    }
115
116    fn get_staged_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
117        let head_tree = match self.repo.head() {
118            Ok(head) => {
119                let commit = head.peel_to_commit()?;
120                Some(commit.tree()?)
121            }
122            Err(_) => None, // No commits yet
123        };
124
125        let mut opts = Self::make_diff_opts(pathspecs);
126        let diff = self.repo.diff_tree_to_index(
127            head_tree.as_ref(),
128            Some(&self.repo.index()?),
129            Some(&mut opts),
130        )?;
131
132        Ok(self.diff_to_file_changes(&diff))
133    }
134
135    fn get_working_diff_files(&self, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
136        let mut opts = Self::make_diff_opts(pathspecs);
137        opts.include_untracked(false);
138
139        let diff = self.repo.diff_index_to_workdir(None, Some(&mut opts))?;
140        Ok(self.diff_to_file_changes(&diff))
141    }
142
143    fn get_commit_diff_files(&self, sha: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
144        let obj = self.repo.revparse_single(sha)?;
145        let commit = obj.peel_to_commit()?;
146        let tree = commit.tree()?;
147
148        let parent_tree = if commit.parent_count() > 0 {
149            Some(commit.parent(0)?.tree()?)
150        } else {
151            None
152        };
153
154        let mut opts = Self::make_diff_opts(pathspecs);
155        let diff = self.repo.diff_tree_to_tree(
156            parent_tree.as_ref(),
157            Some(&tree),
158            Some(&mut opts),
159        )?;
160
161        Ok(self.diff_to_file_changes(&diff))
162    }
163
164    fn get_range_diff_files(&self, from: &str, to: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
165        let from_obj = self.repo.revparse_single(from)?;
166        let to_obj = self.repo.revparse_single(to)?;
167
168        let from_tree = from_obj.peel_to_commit()?.tree()?;
169        let to_tree = to_obj.peel_to_commit()?.tree()?;
170
171        let mut opts = Self::make_diff_opts(pathspecs);
172        let diff = self.repo.diff_tree_to_tree(
173            Some(&from_tree),
174            Some(&to_tree),
175            Some(&mut opts),
176        )?;
177
178        Ok(self.diff_to_file_changes(&diff))
179    }
180
181    fn get_ref_to_working_diff_files(&self, refspec: &str, pathspecs: &[String]) -> Result<Vec<FileChange>, GitError> {
182        let tree = self.resolve_tree(refspec)?;
183        let mut opts = Self::make_diff_opts(pathspecs);
184        let diff = self.repo.diff_tree_to_workdir_with_index(
185            Some(&tree),
186            Some(&mut opts),
187        )?;
188        Ok(self.diff_to_file_changes(&diff))
189    }
190
191    fn diff_to_file_changes(&self, diff: &Diff) -> Vec<FileChange> {
192        let mut files = Vec::new();
193
194        for delta in diff.deltas() {
195            let (status, file_path, old_file_path) = match delta.status() {
196                Delta::Added => {
197                    let path = delta
198                        .new_file()
199                        .path()
200                        .and_then(|p| p.to_str())
201                        .unwrap_or("")
202                        .to_string();
203                    (FileStatus::Added, path, None)
204                }
205                Delta::Deleted => {
206                    let path = delta
207                        .old_file()
208                        .path()
209                        .and_then(|p| p.to_str())
210                        .unwrap_or("")
211                        .to_string();
212                    (FileStatus::Deleted, path, None)
213                }
214                Delta::Modified => {
215                    let path = delta
216                        .new_file()
217                        .path()
218                        .and_then(|p| p.to_str())
219                        .unwrap_or("")
220                        .to_string();
221                    (FileStatus::Modified, path, None)
222                }
223                Delta::Renamed => {
224                    let new_path = delta
225                        .new_file()
226                        .path()
227                        .and_then(|p| p.to_str())
228                        .unwrap_or("")
229                        .to_string();
230                    let old_path = delta
231                        .old_file()
232                        .path()
233                        .and_then(|p| p.to_str())
234                        .unwrap_or("")
235                        .to_string();
236                    (FileStatus::Renamed, new_path, Some(old_path))
237                }
238                _ => continue,
239            };
240
241            if !file_path.starts_with(".sem/") {
242                files.push(FileChange {
243                    file_path,
244                    status,
245                    old_file_path,
246                    before_content: None,
247                    after_content: None,
248                });
249            }
250        }
251
252        files
253    }
254
255    fn populate_contents(
256        &self,
257        files: &mut [FileChange],
258        scope: &DiffScope,
259    ) -> Result<(), GitError> {
260        match scope {
261            DiffScope::Working => {
262                // Resolve HEAD tree once for all before_content reads
263                let head_tree = self.resolve_tree("HEAD").ok();
264                for file in files.iter_mut() {
265                    if file.status != FileStatus::Deleted {
266                        file.after_content = self.read_working_file(&file.file_path);
267                    }
268                    if file.status != FileStatus::Added {
269                        file.before_content = head_tree
270                            .as_ref()
271                            .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
272                    }
273                }
274            }
275            DiffScope::Staged => {
276                let head_tree = self.resolve_tree("HEAD").ok();
277                for file in files.iter_mut() {
278                    if file.status != FileStatus::Deleted {
279                        file.after_content = self
280                            .read_index_file(&file.file_path)
281                            .or_else(|| self.read_working_file(&file.file_path));
282                    }
283                    if file.status != FileStatus::Added {
284                        file.before_content = head_tree
285                            .as_ref()
286                            .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
287                    }
288                }
289            }
290            DiffScope::Commit { sha } => {
291                // Resolve both trees once instead of per-file
292                let after_tree = self.resolve_tree(sha)?;
293                let before_tree = self.resolve_tree(&format!("{sha}~1")).ok();
294                for file in files.iter_mut() {
295                    if file.status != FileStatus::Deleted {
296                        file.after_content =
297                            self.read_blob_from_tree(&after_tree, &file.file_path);
298                    }
299                    if file.status != FileStatus::Added {
300                        file.before_content = before_tree
301                            .as_ref()
302                            .and_then(|t| self.read_blob_from_tree(t, &file.file_path));
303                    }
304                }
305            }
306            DiffScope::Range { from, to } => {
307                let after_tree = self.resolve_tree(to)?;
308                let before_tree = self.resolve_tree(from)?;
309                for file in files.iter_mut() {
310                    if file.status != FileStatus::Deleted {
311                        file.after_content =
312                            self.read_blob_from_tree(&after_tree, &file.file_path);
313                    }
314                    if file.status != FileStatus::Added {
315                        let path = file
316                            .old_file_path
317                            .as_deref()
318                            .unwrap_or(&file.file_path);
319                        file.before_content =
320                            self.read_blob_from_tree(&before_tree, path);
321                    }
322                }
323            }
324            DiffScope::RefToWorking { refspec } => {
325                let before_tree = self.resolve_tree(refspec)?;
326                for file in files.iter_mut() {
327                    if file.status != FileStatus::Deleted {
328                        file.after_content = self.read_working_file(&file.file_path);
329                    }
330                    if file.status != FileStatus::Added {
331                        file.before_content =
332                            self.read_blob_from_tree(&before_tree, &file.file_path);
333                    }
334                }
335            }
336        }
337        Ok(())
338    }
339
340    fn resolve_tree(&self, refspec: &str) -> Result<git2::Tree<'_>, GitError> {
341        let obj = self.repo.revparse_single(refspec)?;
342        let commit = obj.peel_to_commit()?;
343        Ok(commit.tree()?)
344    }
345
346    fn normalize_line_endings(s: String) -> String {
347        if s.contains('\r') {
348            s.replace("\r\n", "\n").replace('\r', "\n")
349        } else {
350            s
351        }
352    }
353
354    fn read_blob_from_tree(&self, tree: &git2::Tree, file_path: &str) -> Option<String> {
355        let entry = tree.get_path(Path::new(file_path)).ok()?;
356        let blob = self.repo.find_blob(entry.id()).ok()?;
357        std::str::from_utf8(blob.content())
358            .ok()
359            .map(|s| Self::normalize_line_endings(s.to_string()))
360    }
361
362    fn read_working_file(&self, file_path: &str) -> Option<String> {
363        let full_path = self.repo_root.join(file_path);
364        fs::read_to_string(full_path)
365            .ok()
366            .map(Self::normalize_line_endings)
367    }
368
369    fn read_index_file(&self, file_path: &str) -> Option<String> {
370        let index = self.repo.index().ok()?;
371        let entry = index.get_path(Path::new(file_path), 0)?;
372        let blob = self.repo.find_blob(entry.id).ok()?;
373        std::str::from_utf8(blob.content())
374            .ok()
375            .map(|s| Self::normalize_line_endings(s.to_string()))
376    }
377
378
379    /// Read file content at a specific git ref (commit SHA, branch, tag, etc.)
380    pub fn read_file_at_ref(&self, refspec: &str, file_path: &str) -> Result<Option<String>, GitError> {
381        let tree = self.resolve_tree(refspec)?;
382        Ok(self.read_blob_from_tree(&tree, file_path))
383    }
384
385    /// Get commits that modified a specific file, walking history from HEAD.
386    /// Returns commits in reverse chronological order (newest first).
387    pub fn get_file_commits(&self, file_path: &str, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
388        let mut revwalk = self.repo.revwalk()?;
389        revwalk.push_head()?;
390        revwalk.set_sorting(git2::Sort::TIME)?;
391
392        let mut commits = Vec::new();
393        let path = Path::new(file_path);
394
395        for oid_result in revwalk {
396            let oid = oid_result?;
397            let commit = self.repo.find_commit(oid)?;
398            let tree = commit.tree()?;
399
400            // Check if this file exists in this commit's tree
401            let file_in_commit = tree.get_path(path).ok().map(|e| e.id());
402
403            // Compare with parent to see if the file changed
404            let file_in_parent = if commit.parent_count() > 0 {
405                commit.parent(0)
406                    .ok()
407                    .and_then(|p| p.tree().ok())
408                    .and_then(|t| t.get_path(path).ok().map(|e| e.id()))
409            } else {
410                None // No parent = initial commit, file was added
411            };
412
413            // Include if file changed between parent and this commit
414            let changed = match (file_in_commit, file_in_parent) {
415                (Some(cur), Some(prev)) => cur != prev,  // content changed
416                (Some(_), None) => true,                   // file added
417                (None, Some(_)) => true,                   // file deleted
418                (None, None) => false,                     // file not present in either
419            };
420
421            if changed {
422                let sha = oid.to_string();
423                commits.push(CommitInfo {
424                    short_sha: sha[..7.min(sha.len())].to_string(),
425                    sha,
426                    author: commit.author().name().unwrap_or("unknown").to_string(),
427                    date: commit.time().seconds().to_string(),
428                    message: commit.message().unwrap_or("").to_string(),
429                });
430
431                if commits.len() >= limit {
432                    break;
433                }
434            }
435        }
436
437        Ok(commits)
438    }
439
440    pub fn get_log(&self, limit: usize) -> Result<Vec<CommitInfo>, GitError> {
441        let mut revwalk = self.repo.revwalk()?;
442        revwalk.push_head()?;
443
444        let mut commits = Vec::new();
445        for (i, oid_result) in revwalk.enumerate() {
446            if i >= limit {
447                break;
448            }
449            let oid = oid_result?;
450            let commit = self.repo.find_commit(oid)?;
451            let sha = oid.to_string();
452            commits.push(CommitInfo {
453                short_sha: sha[..7.min(sha.len())].to_string(),
454                sha,
455                author: commit.author().name().unwrap_or("unknown").to_string(),
456                date: commit.time().seconds().to_string(),
457                message: commit.message().unwrap_or("").to_string(),
458            });
459        }
460
461        Ok(commits)
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468    use git2::{Oid, Repository, Signature};
469    use tempfile::TempDir;
470
471    fn commit_file(repo: &Repository, file_path: &str, contents: &str, message: &str) -> Oid {
472        fs::write(repo.workdir().unwrap().join(file_path), contents).unwrap();
473
474        let mut index = repo.index().unwrap();
475        index.add_path(Path::new(file_path)).unwrap();
476        index.write().unwrap();
477
478        let tree_id = index.write_tree().unwrap();
479        let tree = repo.find_tree(tree_id).unwrap();
480        let sig = Signature::now("Test User", "test@example.com").unwrap();
481
482        match repo.head() {
483            Ok(head) => {
484                let parent = repo.find_commit(head.target().unwrap()).unwrap();
485                repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent])
486                    .unwrap()
487            }
488            Err(_) => repo
489                .commit(Some("HEAD"), &sig, &sig, message, &tree, &[])
490                .unwrap(),
491        }
492    }
493
494    #[test]
495    fn clean_worktree_does_not_fall_back_to_head_commit() {
496        let temp = TempDir::new().unwrap();
497        let repo = Repository::init(temp.path()).unwrap();
498
499        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
500        commit_file(
501            &repo,
502            "sample.ts",
503            "export function a() {\n  return 2;\n}\n",
504            "change a",
505        );
506
507        let bridge = GitBridge::open(temp.path()).unwrap();
508        let (scope, files) = bridge.detect_and_get_files(&[]).unwrap();
509
510        assert!(matches!(scope, DiffScope::Working));
511        assert!(files.is_empty());
512    }
513
514    #[test]
515    fn explicit_commit_scope_still_reads_head_commit_diff() {
516        let temp = TempDir::new().unwrap();
517        let repo = Repository::init(temp.path()).unwrap();
518
519        commit_file(&repo, "sample.ts", "export function a() {\n  return 1;\n}\n", "init");
520        let head_oid = commit_file(
521            &repo,
522            "sample.ts",
523            "export function a() {\n  return 2;\n}\n",
524            "change a",
525        );
526
527        let bridge = GitBridge::open(temp.path()).unwrap();
528        let files = bridge
529            .get_changed_files(&DiffScope::Commit {
530                sha: head_oid.to_string(),
531            }, &[])
532            .unwrap();
533
534        assert_eq!(files.len(), 1);
535        assert_eq!(files[0].file_path, "sample.ts");
536        assert_eq!(files[0].status, FileStatus::Modified);
537    }
538
539    #[test]
540    fn crlf_only_difference_in_working_file_is_invisible() {
541        let temp = TempDir::new().unwrap();
542        let repo = Repository::init(temp.path()).unwrap();
543
544        commit_file(&repo, "sample.rs", "fn a() {}\n", "init");
545        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\n").unwrap();
546
547        let bridge = GitBridge::open(temp.path()).unwrap();
548        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
549
550        assert_eq!(files.len(), 1, "expected git to detect the CRLF change as modified");
551
552        let before = files[0].before_content.as_deref().unwrap();
553        let after = files[0].after_content.as_deref().unwrap();
554
555        assert_eq!(before, after, "CRLF-only difference should be invisible after normalization");
556    }
557
558    #[test]
559    fn crlf_stored_in_blob_is_normalized_on_read() {
560        let temp = TempDir::new().unwrap();
561        let repo = Repository::init(temp.path()).unwrap();
562
563        repo.config().unwrap().set_str("core.autocrlf", "false").unwrap();
564        commit_file(&repo, "sample.rs", "fn a() {}\r\n", "init");
565        fs::write(temp.path().join("sample.rs"), "fn a() {}\r\nfn b() {}\r\n").unwrap();
566
567        let bridge = GitBridge::open(temp.path()).unwrap();
568        let files = bridge.get_changed_files(&DiffScope::Working, &[]).unwrap();
569
570        assert_eq!(files.len(), 1, "expected git to detect the modification");
571
572        let before = files[0].before_content.as_deref().unwrap();
573        assert!(!before.contains('\r'), "before_content read from CRLF blob should be normalized to LF");
574    }
575}