git_iris/
git.rs

1use crate::config::Config;
2use crate::context::{ChangeType, CommitContext, ProjectMetadata, RecentCommit, StagedFile};
3use crate::file_analyzers::{self, FileAnalyzer, should_exclude_file};
4use crate::log_debug;
5use anyhow::{Context, Result, anyhow};
6use futures::future::join_all;
7use git2::{DiffOptions, FileMode, Repository, Status, StatusOptions, Tree};
8use std::env;
9use std::fs;
10use std::io;
11use std::path::Path;
12use std::path::PathBuf;
13use std::process::{Command, Stdio};
14use tokio::task;
15
16/// Represents a Git repository and provides methods for interacting with it.
17pub struct GitRepo {
18    repo_path: PathBuf,
19}
20
21#[derive(Debug)]
22pub struct CommitResult {
23    pub branch: String,
24    pub commit_hash: String,
25    pub files_changed: usize,
26    pub insertions: usize,
27    pub deletions: usize,
28    pub new_files: Vec<(String, FileMode)>,
29}
30
31impl GitRepo {
32    /// Creates a new `GitRepo` instance.
33    ///
34    /// # Arguments
35    ///
36    /// * `repo_path` - The path to the Git repository.
37    ///
38    /// # Returns
39    ///
40    /// A Result containing the `GitRepo` instance or an error.
41    pub fn new(repo_path: &Path) -> Result<Self> {
42        Ok(Self {
43            repo_path: repo_path.to_path_buf(),
44        })
45    }
46
47    /// Open the repository at the stored path
48    pub fn open_repo(&self) -> Result<Repository, git2::Error> {
49        Repository::open(&self.repo_path)
50    }
51
52    /// Retrieves Git information for the repository.
53    ///
54    /// # Arguments
55    ///
56    /// * `_config` - The configuration object (currently unused).
57    ///
58    /// # Returns
59    ///
60    /// A Result containing the `CommitContext` or an error.
61    pub async fn get_git_info(&self, _config: &Config) -> Result<CommitContext> {
62        let repo = self.open_repo()?;
63        log_debug!("Getting git info for repo path: {:?}", repo.path());
64
65        let branch = self.get_current_branch()?;
66        let recent_commits = self.get_recent_commits(5)?;
67        let staged_files = Self::get_file_statuses(&repo)?;
68
69        let changed_files: Vec<String> =
70            staged_files.iter().map(|file| file.path.clone()).collect();
71
72        log_debug!("Changed files for metadata extraction: {:?}", changed_files);
73
74        let project_metadata = self.get_project_metadata(&changed_files).await?;
75
76        log_debug!("Extracted project metadata: {:?}", project_metadata);
77
78        let user_name = repo.config()?.get_string("user.name")?;
79        let user_email = repo.config()?.get_string("user.email")?;
80
81        let context = CommitContext::new(
82            branch,
83            recent_commits,
84            staged_files,
85            project_metadata,
86            user_name,
87            user_email,
88        );
89
90        log_debug!("Git info retrieved successfully");
91        Ok(context)
92    }
93
94    /// Retrieves the current branch name.
95    ///
96    /// # Returns
97    ///
98    /// A Result containing the branch name as a String or an error.
99    fn get_current_branch(&self) -> Result<String> {
100        let repo = self.open_repo()?;
101        let head = repo.head()?;
102        let branch_name = head.shorthand().unwrap_or("HEAD detached").to_string();
103        log_debug!("Current branch: {}", branch_name);
104        Ok(branch_name)
105    }
106
107    /// Retrieves recent commits.
108    ///
109    /// # Arguments
110    ///
111    /// * `count` - The number of recent commits to retrieve.
112    ///
113    /// # Returns
114    ///
115    /// A Result containing a Vec of `RecentCommit` objects or an error.
116    fn get_recent_commits(&self, count: usize) -> Result<Vec<RecentCommit>> {
117        let repo = self.open_repo()?;
118        log_debug!("Fetching {} recent commits", count);
119        let mut revwalk = repo.revwalk()?;
120        revwalk.push_head()?;
121
122        let commits = revwalk
123            .take(count)
124            .map(|oid| {
125                let oid = oid?;
126                let commit = repo.find_commit(oid)?;
127                let author = commit.author();
128                Ok(RecentCommit {
129                    hash: oid.to_string(),
130                    message: commit.message().unwrap_or_default().to_string(),
131                    author: author.name().unwrap_or_default().to_string(),
132                    timestamp: commit.time().seconds().to_string(),
133                })
134            })
135            .collect::<Result<Vec<_>>>()?;
136
137        log_debug!("Retrieved {} recent commits", commits.len());
138        Ok(commits)
139    }
140
141    /// Retrieves commits between two Git references.
142    ///
143    /// # Arguments
144    ///
145    /// * `from` - The starting Git reference.
146    /// * `to` - The ending Git reference.
147    /// * `callback` - A callback function to process each commit.
148    ///
149    /// # Returns
150    ///
151    /// A Result containing a Vec of processed commits or an error.
152    pub fn get_commits_between_with_callback<T, F>(
153        &self,
154        from: &str,
155        to: &str,
156        mut callback: F,
157    ) -> Result<Vec<T>>
158    where
159        F: FnMut(&RecentCommit) -> Result<T>,
160    {
161        let repo = self.open_repo()?;
162        let from_commit = repo.revparse_single(from)?.peel_to_commit()?;
163        let to_commit = repo.revparse_single(to)?.peel_to_commit()?;
164
165        let mut revwalk = repo.revwalk()?;
166        revwalk.push(to_commit.id())?;
167        revwalk.hide(from_commit.id())?;
168
169        revwalk
170            .filter_map(std::result::Result::ok)
171            .map(|id| {
172                let commit = repo.find_commit(id)?;
173                let recent_commit = RecentCommit {
174                    hash: commit.id().to_string(),
175                    message: commit.message().unwrap_or_default().to_string(),
176                    author: commit.author().name().unwrap_or_default().to_string(),
177                    timestamp: commit.time().seconds().to_string(),
178                };
179                callback(&recent_commit)
180            })
181            .collect()
182    }
183
184    /// Retrieves the status of files in the repository.
185    ///
186    /// # Returns
187    ///
188    /// A Result containing a Vec of `StagedFile` objects or an error.
189    fn get_file_statuses(repo: &Repository) -> Result<Vec<StagedFile>> {
190        log_debug!("Getting file statuses");
191        let mut staged_files = Vec::new();
192
193        let mut opts = StatusOptions::new();
194        opts.include_untracked(true);
195        let statuses = repo.statuses(Some(&mut opts))?;
196
197        for entry in statuses.iter() {
198            let path = entry.path().context("Could not get path")?;
199            let status = entry.status();
200
201            if status.is_index_new() || status.is_index_modified() || status.is_index_deleted() {
202                let change_type = if status.is_index_new() {
203                    ChangeType::Added
204                } else if status.is_index_modified() {
205                    ChangeType::Modified
206                } else {
207                    ChangeType::Deleted
208                };
209
210                let should_exclude = should_exclude_file(path);
211                let diff = if should_exclude {
212                    String::from("[Content excluded]")
213                } else {
214                    Self::get_diff_for_file(repo, path)?
215                };
216
217                let content = if should_exclude
218                    || change_type != ChangeType::Modified
219                    || Self::is_binary_diff(&diff)
220                {
221                    None
222                } else {
223                    let path_obj = Path::new(path);
224                    if path_obj.exists() {
225                        Some(fs::read_to_string(path_obj)?)
226                    } else {
227                        None
228                    }
229                };
230
231                let analyzer = file_analyzers::get_analyzer(path);
232                let staged_file = StagedFile {
233                    path: path.to_string(),
234                    change_type: change_type.clone(),
235                    diff: diff.clone(),
236                    analysis: Vec::new(),
237                    content: content.clone(),
238                    content_excluded: should_exclude,
239                };
240
241                let analysis = if should_exclude {
242                    vec!["[Analysis excluded]".to_string()]
243                } else {
244                    analyzer.analyze(path, &staged_file)
245                };
246
247                staged_files.push(StagedFile {
248                    path: path.to_string(),
249                    change_type,
250                    diff,
251                    analysis,
252                    content,
253                    content_excluded: should_exclude,
254                });
255            }
256        }
257
258        log_debug!("Found {} staged files", staged_files.len());
259        Ok(staged_files)
260    }
261
262    /// Retrieves the diff for a specific file.
263    ///
264    /// # Arguments
265    ///
266    /// * `path` - The path of the file to get the diff for.
267    ///
268    /// # Returns
269    ///
270    /// A Result containing the diff as a String or an error.
271    fn get_diff_for_file(repo: &Repository, path: &str) -> Result<String> {
272        log_debug!("Getting diff for file: {}", path);
273        let mut diff_options = DiffOptions::new();
274        diff_options.pathspec(path);
275
276        let tree = Some(repo.head()?.peel_to_tree()?);
277
278        let diff = repo.diff_tree_to_workdir_with_index(tree.as_ref(), Some(&mut diff_options))?;
279
280        let mut diff_string = String::new();
281        diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
282            let origin = match line.origin() {
283                '+' | '-' | ' ' => line.origin(),
284                _ => ' ',
285            };
286            diff_string.push(origin);
287            diff_string.push_str(&String::from_utf8_lossy(line.content()));
288            true
289        })?;
290
291        if Self::is_binary_diff(&diff_string) {
292            Ok("[Binary file changed]".to_string())
293        } else {
294            log_debug!("Generated diff for {} ({} bytes)", path, diff_string.len());
295            Ok(diff_string)
296        }
297    }
298
299    /// Retrieves project metadata for changed files.
300    ///
301    /// # Arguments
302    ///
303    /// * `changed_files` - A slice of Strings representing the changed file paths.
304    ///
305    /// # Returns
306    ///
307    /// A Result containing the `ProjectMetadata` or an error.
308    pub async fn get_project_metadata(&self, changed_files: &[String]) -> Result<ProjectMetadata> {
309        log_debug!(
310            "Getting project metadata for changed files: {:?}",
311            changed_files
312        );
313
314        let metadata_futures = changed_files.iter().map(|file_path| {
315            let file_path = file_path.clone();
316            task::spawn(async move {
317                let file_name = Path::new(&file_path)
318                    .file_name()
319                    .expect("Failed to get file name")
320                    .to_str()
321                    .expect("Failed to convert file name to string");
322                let analyzer: Box<dyn FileAnalyzer + Send + Sync> =
323                    file_analyzers::get_analyzer(file_name);
324
325                log_debug!("Analyzing file: {}", file_path);
326
327                if should_exclude_file(&file_path) {
328                    log_debug!("File excluded: {}", file_path);
329                    None
330                } else if let Ok(content) = tokio::fs::read_to_string(&file_path).await {
331                    let metadata = analyzer.extract_metadata(file_name, &content);
332                    log_debug!("Extracted metadata for {}: {:?}", file_name, metadata);
333                    Some(metadata)
334                } else {
335                    log_debug!("Failed to read file: {}", file_path);
336                    None
337                }
338            })
339        });
340
341        let results = join_all(metadata_futures).await;
342
343        let mut combined_metadata = ProjectMetadata::default();
344        let mut any_file_analyzed = false;
345        for metadata in results.into_iter().flatten().flatten() {
346            log_debug!("Merging metadata: {:?}", metadata);
347            combined_metadata.merge(metadata);
348            any_file_analyzed = true;
349        }
350
351        log_debug!("Final combined metadata: {:?}", combined_metadata);
352
353        if !any_file_analyzed {
354            log_debug!("No files were analyzed!");
355            combined_metadata.language = Some("Unknown".to_string());
356        } else if combined_metadata.language.is_none() {
357            combined_metadata.language = Some("Unknown".to_string());
358        }
359
360        Ok(combined_metadata)
361    }
362
363    /// Commits changes and verifies the commit.
364    ///
365    /// # Arguments
366    ///
367    /// * `message` - The commit message.
368    ///
369    /// # Returns
370    ///
371    /// A Result containing the `CommitResult` or an error.
372    pub fn commit_and_verify(&self, message: &str) -> Result<CommitResult> {
373        match self.commit(message) {
374            Ok(result) => {
375                if let Err(e) = self.execute_hook("post-commit") {
376                    log_debug!("Post-commit hook failed: {}", e);
377                }
378                Ok(result)
379            }
380            Err(e) => {
381                log_debug!("Commit failed: {}", e);
382                Err(e)
383            }
384        }
385    }
386
387    /// Commits changes to the repository.
388    ///
389    /// # Arguments
390    ///
391    /// * `message` - The commit message.
392    ///
393    /// # Returns
394    ///
395    /// A Result containing the `CommitResult` or an error.
396    pub fn commit(&self, message: &str) -> Result<CommitResult> {
397        let repo = self.open_repo()?;
398        let signature = repo.signature()?;
399        let mut index = repo.index()?;
400        let tree_id = index.write_tree()?;
401        let tree = repo.find_tree(tree_id)?;
402        let parent_commit = repo.head()?.peel_to_commit()?;
403        let commit_oid = repo.commit(
404            Some("HEAD"),
405            &signature,
406            &signature,
407            message,
408            &tree,
409            &[&parent_commit],
410        )?;
411
412        let branch_name = repo.head()?.shorthand().unwrap_or("HEAD").to_string();
413        let commit = repo.find_commit(commit_oid)?;
414        let short_hash = commit.id().to_string()[..7].to_string();
415
416        let mut files_changed = 0;
417        let mut insertions = 0;
418        let mut deletions = 0;
419        let mut new_files = Vec::new();
420
421        let diff = repo.diff_tree_to_tree(Some(&parent_commit.tree()?), Some(&tree), None)?;
422
423        diff.print(git2::DiffFormat::NameStatus, |_, _, line| {
424            files_changed += 1;
425            if line.origin() == '+' {
426                insertions += 1;
427            } else if line.origin() == '-' {
428                deletions += 1;
429            }
430            true
431        })?;
432
433        let statuses = repo.statuses(None)?;
434        for entry in statuses.iter() {
435            if entry.status().contains(Status::INDEX_NEW) {
436                new_files.push((
437                    entry.path().context("Could not get path")?.to_string(),
438                    entry
439                        .index_to_workdir()
440                        .context("Could not get index to workdir")?
441                        .new_file()
442                        .mode(),
443                ));
444            }
445        }
446
447        Ok(CommitResult {
448            branch: branch_name,
449            commit_hash: short_hash,
450            files_changed,
451            insertions,
452            deletions,
453            new_files,
454        })
455    }
456
457    /// Retrieves the README content at a specific commit.
458    ///
459    /// # Arguments
460    ///
461    /// * `commit_ish` - A string that resolves to a commit.
462    ///
463    /// # Returns
464    ///
465    /// A Result containing an Option<String> with the README content or an error.
466    pub fn get_readme_at_commit(&self, commit_ish: &str) -> Result<Option<String>> {
467        let repo = self.open_repo()?;
468        let obj = repo.revparse_single(commit_ish)?;
469        let tree = obj.peel_to_tree()?;
470
471        Self::find_readme_in_tree(&repo, &tree)
472            .context("Failed to find and read README at specified commit")
473    }
474
475    /// Finds a README file in the given tree.
476    ///
477    /// # Arguments
478    ///
479    /// * `tree` - A reference to a Git tree.
480    ///
481    /// # Returns
482    ///
483    /// A Result containing an Option<String> with the README content or an error.
484    fn find_readme_in_tree(repo: &Repository, tree: &Tree) -> Result<Option<String>> {
485        log_debug!("Searching for README file in the repository");
486
487        let readme_patterns = [
488            "README.md",
489            "README.markdown",
490            "README.txt",
491            "README",
492            "Readme.md",
493            "readme.md",
494        ];
495
496        for entry in tree {
497            let name = entry.name().unwrap_or("");
498            if readme_patterns
499                .iter()
500                .any(|&pattern| name.eq_ignore_ascii_case(pattern))
501            {
502                let object = entry.to_object(repo)?;
503                if let Some(blob) = object.as_blob() {
504                    if let Ok(content) = std::str::from_utf8(blob.content()) {
505                        log_debug!("README file found: {}", name);
506                        return Ok(Some(content.to_string()));
507                    }
508                }
509            }
510        }
511
512        log_debug!("No README file found");
513        Ok(None)
514    }
515
516    /// Executes a Git hook.
517    ///
518    /// # Arguments
519    ///
520    /// * `hook_name` - The name of the hook to execute.
521    ///
522    /// # Returns
523    ///
524    /// A Result indicating success or an error.
525    pub fn execute_hook(&self, hook_name: &str) -> Result<()> {
526        let repo = self.open_repo()?;
527        let hook_path = repo.path().join("hooks").join(hook_name);
528
529        if hook_path.exists() {
530            log_debug!("Executing hook: {}", hook_name);
531            log_debug!("Hook path: {:?}", hook_path);
532
533            // Get the repository's working directory (top level)
534            let repo_workdir = repo
535                .workdir()
536                .context("Repository has no working directory")?;
537            log_debug!("Repository working directory: {:?}", repo_workdir);
538
539            // Create a command with the proper environment and working directory
540            let mut command = Command::new(&hook_path);
541            command
542                .current_dir(repo_workdir) // Use the repository's working directory, not .git
543                .env("GIT_DIR", repo.path()) // Set GIT_DIR to the .git directory
544                .env("GIT_WORK_TREE", repo_workdir) // Set GIT_WORK_TREE to the working directory
545                .stdout(Stdio::piped())
546                .stderr(Stdio::piped());
547
548            log_debug!("Executing hook command: {:?}", command);
549
550            let mut child = command.spawn()?;
551
552            let stdout = child.stdout.take().context("Could not get stdout")?;
553            let stderr = child.stderr.take().context("Could not get stderr")?;
554
555            std::thread::spawn(move || {
556                io::copy(&mut io::BufReader::new(stdout), &mut io::stdout())
557                    .expect("Failed to copy data to stdout");
558            });
559            std::thread::spawn(move || {
560                io::copy(&mut io::BufReader::new(stderr), &mut io::stderr())
561                    .expect("Failed to copy data to stderr");
562            });
563
564            let status = child.wait()?;
565
566            if !status.success() {
567                return Err(anyhow!(
568                    "Hook '{}' failed with exit code: {:?}",
569                    hook_name,
570                    status.code()
571                ));
572            }
573
574            log_debug!("Hook '{}' executed successfully", hook_name);
575        } else {
576            log_debug!("Hook '{}' not found at {:?}", hook_name, hook_path);
577        }
578
579        Ok(())
580    }
581
582    /// Checks if the current directory is inside a Git work tree.
583    ///
584    /// # Returns
585    ///
586    /// A Result containing a boolean indicating if inside a work tree or an error.
587    pub fn is_inside_work_tree() -> Result<bool> {
588        log_debug!("Checking if inside Git work tree");
589        match Repository::discover(env::current_dir()?) {
590            Ok(repo) => {
591                if repo.is_bare() {
592                    log_debug!("Not inside Git work tree (bare repository)");
593                    Ok(false)
594                } else {
595                    log_debug!("Inside Git work tree");
596                    Ok(true)
597                }
598            }
599            Err(e) => {
600                log_debug!("Error discovering Git repository: {}", e);
601                Err(anyhow!("Not in a Git repository: {}", e))
602            }
603        }
604    }
605
606    fn is_binary_diff(diff: &str) -> bool {
607        diff.contains("Binary files")
608            || diff.contains("GIT binary patch")
609            || diff.contains("[Binary file changed]")
610    }
611}