hindsight_git/
parser.rs

1// Copyright (c) 2026 - present Nicholas D. Crosbie
2// SPDX-License-Identifier: MIT
3
4//! Git log parsing utilities
5//!
6//! This module provides functionality to parse git commits from a repository
7//! using the `git2` crate.
8
9use crate::commit::Commit;
10use crate::error::GitError;
11use chrono::{DateTime, TimeZone, Utc};
12use git2::{DiffOptions, Repository, Sort};
13use serde::{Deserialize, Serialize};
14use std::path::Path;
15
16/// Configuration for walking commits
17#[derive(Debug, Clone, Default)]
18pub struct WalkOptions {
19    /// Maximum number of commits to retrieve
20    pub limit: Option<usize>,
21    /// Start from this commit (defaults to HEAD)
22    pub from_ref: Option<String>,
23    /// Only include commits after this date
24    pub since: Option<DateTime<Utc>>,
25    /// Only include commits before this date
26    pub until: Option<DateTime<Utc>>,
27    /// Include diff information for each commit
28    pub include_diff: bool,
29}
30
31impl WalkOptions {
32    /// Create options for walking the N most recent commits
33    #[must_use]
34    pub fn latest(n: usize) -> Self {
35        Self {
36            limit: Some(n),
37            ..Default::default()
38        }
39    }
40
41    /// Create options with diff extraction enabled
42    #[must_use]
43    pub fn with_diff(mut self) -> Self {
44        self.include_diff = true;
45        self
46    }
47
48    /// Set the starting reference
49    #[must_use]
50    pub fn from(mut self, reference: &str) -> Self {
51        self.from_ref = Some(reference.to_string());
52        self
53    }
54
55    /// Filter commits since a date
56    #[must_use]
57    pub fn since(mut self, date: DateTime<Utc>) -> Self {
58        self.since = Some(date);
59        self
60    }
61
62    /// Filter commits until a date
63    #[must_use]
64    pub fn until(mut self, date: DateTime<Utc>) -> Self {
65        self.until = Some(date);
66        self
67    }
68}
69
70/// Represents file changes in a commit
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
72pub struct FileDiff {
73    /// Path to the file
74    pub path: String,
75    /// Change status: "added", "modified", "deleted", "renamed"
76    pub status: String,
77    /// Number of lines added
78    pub insertions: usize,
79    /// Number of lines deleted
80    pub deletions: usize,
81}
82
83/// Summary of all changes in a commit
84#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
85pub struct DiffSummary {
86    /// Number of files changed
87    pub files_changed: usize,
88    /// Total lines added
89    pub insertions: usize,
90    /// Total lines deleted
91    pub deletions: usize,
92    /// Per-file changes
93    pub files: Vec<FileDiff>,
94}
95
96impl DiffSummary {
97    /// Create an empty diff summary
98    #[must_use]
99    pub fn empty() -> Self {
100        Self {
101            files_changed: 0,
102            insertions: 0,
103            deletions: 0,
104            files: Vec::new(),
105        }
106    }
107}
108
109/// A commit with optional diff information
110#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
111pub struct CommitWithDiff {
112    /// The commit data
113    #[serde(flatten)]
114    pub commit: Commit,
115    /// Diff summary (if requested)
116    pub diff: Option<DiffSummary>,
117}
118
119/// A git repository wrapper for parsing commits
120pub struct GitRepo {
121    repo: Repository,
122}
123
124impl GitRepo {
125    /// Open a git repository at the given path
126    ///
127    /// # Errors
128    ///
129    /// Returns `GitError::RepositoryNotFound` if the path is not a git repository.
130    pub fn open(path: impl AsRef<Path>) -> Result<Self, GitError> {
131        let path = path.as_ref();
132        let repo = Repository::open(path).map_err(|_| GitError::RepositoryNotFound {
133            path: path.display().to_string(),
134        })?;
135        Ok(Self { repo })
136    }
137
138    /// Discover and open a git repository containing the given path
139    ///
140    /// This walks up the directory tree to find a `.git` directory.
141    ///
142    /// # Errors
143    ///
144    /// Returns `GitError::RepositoryNotFound` if no repository is found.
145    pub fn discover(path: impl AsRef<Path>) -> Result<Self, GitError> {
146        let path = path.as_ref();
147        let repo = Repository::discover(path).map_err(|_| GitError::RepositoryNotFound {
148            path: path.display().to_string(),
149        })?;
150        Ok(Self { repo })
151    }
152
153    /// Check if the repository is bare
154    #[must_use]
155    pub fn is_bare(&self) -> bool {
156        self.repo.is_bare()
157    }
158
159    /// Get the repository path
160    #[must_use]
161    pub fn path(&self) -> &Path {
162        self.repo.path()
163    }
164
165    /// Get the working directory path (None for bare repos)
166    #[must_use]
167    pub fn workdir(&self) -> Option<&Path> {
168        self.repo.workdir()
169    }
170
171    /// Walk commits according to the given options
172    ///
173    /// # Errors
174    ///
175    /// Returns `GitError` if the repository cannot be walked.
176    pub fn walk_commits(&self, options: &WalkOptions) -> Result<Vec<CommitWithDiff>, GitError> {
177        let mut revwalk = self.repo.revwalk()?;
178        revwalk.set_sorting(Sort::TIME | Sort::TOPOLOGICAL)?;
179
180        // Start from specified ref or HEAD
181        if let Some(ref from_ref) = options.from_ref {
182            let oid = self.repo.revparse_single(from_ref)?.id();
183            revwalk.push(oid)?;
184        } else {
185            revwalk.push_head()?;
186        }
187
188        let mut commits = Vec::new();
189        let limit = options.limit.unwrap_or(usize::MAX);
190
191        for oid_result in revwalk {
192            if commits.len() >= limit {
193                break;
194            }
195
196            let oid = oid_result?;
197            let git_commit = self.repo.find_commit(oid)?;
198
199            // Convert timestamp
200            let time = git_commit.time();
201            let timestamp = Utc
202                .timestamp_opt(time.seconds(), 0)
203                .single()
204                .unwrap_or_else(Utc::now);
205
206            // Apply date filters
207            if let Some(since) = options.since
208                && timestamp < since
209            {
210                continue;
211            }
212            if let Some(until) = options.until
213                && timestamp > until
214            {
215                continue;
216            }
217
218            // Extract commit data
219            let commit = self.extract_commit(&git_commit, timestamp)?;
220
221            // Extract diff if requested
222            let diff = if options.include_diff {
223                Some(self.extract_diff(&git_commit)?)
224            } else {
225                None
226            };
227
228            commits.push(CommitWithDiff { commit, diff });
229        }
230
231        Ok(commits)
232    }
233
234    /// Extract commit metadata from a git2 commit
235    fn extract_commit(
236        &self,
237        git_commit: &git2::Commit<'_>,
238        timestamp: DateTime<Utc>,
239    ) -> Result<Commit, GitError> {
240        let sha = git_commit.id().to_string();
241        let message = git_commit.message().unwrap_or("").to_string();
242        let author = git_commit.author().name().unwrap_or("Unknown").to_string();
243        let author_email = git_commit.author().email().unwrap_or("").to_string();
244
245        let parents: Vec<String> = git_commit.parents().map(|p| p.id().to_string()).collect();
246
247        Ok(Commit {
248            sha,
249            message,
250            author,
251            author_email,
252            timestamp,
253            parents,
254        })
255    }
256
257    /// Extract diff summary for a commit
258    fn extract_diff(&self, git_commit: &git2::Commit<'_>) -> Result<DiffSummary, GitError> {
259        let tree = git_commit.tree()?;
260
261        // Get parent tree (or empty for root commits)
262        let parent_tree = if git_commit.parent_count() > 0 {
263            Some(git_commit.parent(0)?.tree()?)
264        } else {
265            None
266        };
267
268        let mut opts = DiffOptions::new();
269        opts.ignore_whitespace(false);
270
271        let diff =
272            self.repo
273                .diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut opts))?;
274
275        let stats = diff.stats()?;
276        let mut files = Vec::new();
277
278        for delta in diff.deltas() {
279            let path = delta
280                .new_file()
281                .path()
282                .or_else(|| delta.old_file().path())
283                .map(|p| p.display().to_string())
284                .unwrap_or_else(|| "<unknown>".to_string());
285
286            let status = match delta.status() {
287                git2::Delta::Added => "added",
288                git2::Delta::Deleted => "deleted",
289                git2::Delta::Modified => "modified",
290                git2::Delta::Renamed => "renamed",
291                git2::Delta::Copied => "copied",
292                _ => "unknown",
293            }
294            .to_string();
295
296            // Note: per-file stats require iterating hunks, which is expensive
297            // We'll use 0 for per-file and rely on total stats
298            files.push(FileDiff {
299                path,
300                status,
301                insertions: 0,
302                deletions: 0,
303            });
304        }
305
306        Ok(DiffSummary {
307            files_changed: stats.files_changed(),
308            insertions: stats.insertions(),
309            deletions: stats.deletions(),
310            files,
311        })
312    }
313
314    /// Get a single commit by SHA or reference
315    ///
316    /// # Errors
317    ///
318    /// Returns `GitError::InvalidReference` if the reference cannot be resolved.
319    pub fn get_commit(&self, reference: &str) -> Result<CommitWithDiff, GitError> {
320        let obj = self
321            .repo
322            .revparse_single(reference)
323            .map_err(|_| GitError::InvalidReference {
324                reference: reference.to_string(),
325            })?;
326
327        let git_commit = obj
328            .peel_to_commit()
329            .map_err(|_| GitError::InvalidReference {
330                reference: reference.to_string(),
331            })?;
332
333        let time = git_commit.time();
334        let timestamp = Utc
335            .timestamp_opt(time.seconds(), 0)
336            .single()
337            .unwrap_or_else(Utc::now);
338
339        let commit = self.extract_commit(&git_commit, timestamp)?;
340        let diff = Some(self.extract_diff(&git_commit)?);
341
342        Ok(CommitWithDiff { commit, diff })
343    }
344
345    /// Get the HEAD commit SHA
346    ///
347    /// # Errors
348    ///
349    /// Returns `GitError` if HEAD cannot be resolved.
350    pub fn head_sha(&self) -> Result<String, GitError> {
351        let head = self.repo.head()?;
352        let oid = head.target().ok_or_else(|| GitError::InvalidReference {
353            reference: "HEAD".to_string(),
354        })?;
355        Ok(oid.to_string())
356    }
357
358    /// Count commits in the repository
359    ///
360    /// # Errors
361    ///
362    /// Returns `GitError` if the repository cannot be walked.
363    pub fn commit_count(&self) -> Result<usize, GitError> {
364        let mut revwalk = self.repo.revwalk()?;
365        revwalk.push_head()?;
366        Ok(revwalk.count())
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    use similar_asserts::assert_eq;
374    use std::env;
375
376    fn get_repo() -> GitRepo {
377        // Find the hindsight-mcp repo root
378        let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set");
379        let repo_root = Path::new(&manifest_dir).parent().unwrap().parent().unwrap();
380        GitRepo::open(repo_root).expect("Should open repo")
381    }
382
383    #[test]
384    fn test_open_repository() {
385        let repo = get_repo();
386        assert!(!repo.is_bare());
387        assert!(repo.workdir().is_some());
388    }
389
390    #[test]
391    fn test_discover_repository() {
392        let manifest_dir = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set");
393        let repo = GitRepo::discover(&manifest_dir).expect("Should discover repo");
394        assert!(!repo.is_bare());
395    }
396
397    #[test]
398    fn test_open_nonexistent_repository() {
399        let result = GitRepo::open("/nonexistent/path");
400        assert!(result.is_err());
401        match result {
402            Err(GitError::RepositoryNotFound { path }) => {
403                assert!(path.contains("nonexistent"));
404            }
405            _ => panic!("Expected RepositoryNotFound error"),
406        }
407    }
408
409    #[test]
410    fn test_head_sha() {
411        let repo = get_repo();
412        let sha = repo.head_sha().expect("Should get HEAD");
413        assert!(Commit::is_valid_sha(&sha), "HEAD SHA should be valid");
414    }
415
416    #[test]
417    fn test_walk_commits_limit() {
418        let repo = get_repo();
419        let options = WalkOptions::latest(5);
420        let commits = repo.walk_commits(&options).expect("Should walk commits");
421        assert!(commits.len() <= 5);
422        assert!(!commits.is_empty());
423    }
424
425    #[test]
426    fn test_walk_commits_with_diff() {
427        let repo = get_repo();
428        let options = WalkOptions::latest(3).with_diff();
429        let commits = repo.walk_commits(&options).expect("Should walk commits");
430
431        assert!(!commits.is_empty());
432        for cwc in &commits {
433            assert!(cwc.diff.is_some(), "Diff should be included");
434        }
435    }
436
437    #[test]
438    fn test_commit_extraction_fields() {
439        let repo = get_repo();
440        let options = WalkOptions::latest(1);
441        let commits = repo.walk_commits(&options).expect("Should walk commits");
442
443        assert_eq!(commits.len(), 1);
444        let commit = &commits[0].commit;
445
446        assert!(Commit::is_valid_sha(&commit.sha));
447        assert!(!commit.message.is_empty());
448        assert!(!commit.author.is_empty());
449    }
450
451    #[test]
452    fn test_get_commit_by_sha() {
453        let repo = get_repo();
454        let head_sha = repo.head_sha().expect("Should get HEAD");
455
456        let commit = repo.get_commit(&head_sha).expect("Should get commit");
457        assert_eq!(commit.commit.sha, head_sha);
458        assert!(commit.diff.is_some());
459    }
460
461    #[test]
462    fn test_get_commit_by_ref() {
463        let repo = get_repo();
464        let commit = repo.get_commit("HEAD").expect("Should get HEAD commit");
465        assert!(Commit::is_valid_sha(&commit.commit.sha));
466    }
467
468    #[test]
469    fn test_get_invalid_reference() {
470        let repo = get_repo();
471        let result = repo.get_commit("nonexistent-ref-12345");
472        assert!(result.is_err());
473    }
474
475    #[test]
476    fn test_commit_count() {
477        let repo = get_repo();
478        let count = repo.commit_count().expect("Should count commits");
479        assert!(count > 0, "Repository should have commits");
480    }
481
482    #[test]
483    fn test_walk_options_builder() {
484        let options = WalkOptions::latest(10).with_diff().from("main");
485
486        assert_eq!(options.limit, Some(10));
487        assert!(options.include_diff);
488        assert_eq!(options.from_ref, Some("main".to_string()));
489    }
490
491    #[test]
492    fn test_diff_summary_serialization() {
493        let diff = DiffSummary {
494            files_changed: 3,
495            insertions: 42,
496            deletions: 7,
497            files: vec![FileDiff {
498                path: "src/lib.rs".to_string(),
499                status: "modified".to_string(),
500                insertions: 30,
501                deletions: 5,
502            }],
503        };
504
505        let json = serde_json::to_string(&diff).expect("Should serialize");
506        assert!(json.contains("files_changed"));
507        assert!(json.contains("insertions"));
508
509        let deserialized: DiffSummary = serde_json::from_str(&json).expect("Should deserialize");
510        assert_eq!(diff, deserialized);
511    }
512
513    #[test]
514    fn test_commit_with_diff_serialization() {
515        let cwc = CommitWithDiff {
516            commit: Commit {
517                sha: "a".repeat(40),
518                message: "Test".to_string(),
519                author: "Author".to_string(),
520                author_email: "author@example.com".to_string(),
521                timestamp: Utc::now(),
522                parents: vec![],
523            },
524            diff: Some(DiffSummary::empty()),
525        };
526
527        let json = serde_json::to_string(&cwc).expect("Should serialize");
528        assert!(json.contains("sha"));
529        assert!(json.contains("diff"));
530    }
531}