Skip to main content

git_stats/
repo.rs

1//! Infrastructure: the only module that talks to gix and the filesystem.
2//!
3//! [`Repo`] has two constructors. [`Repo::open`] reads a real repository;
4//! [`Repo::create_null`] returns an in-memory stand-in driven by canned commits,
5//! so application and logic code can be tested without touching git.
6
7use rayon::prelude::*;
8
9use crate::error::{Error, Result};
10use crate::model::{Author, CommitMeta, DiffStat, Trailer};
11
12/// A commit yielded by [`Repo::walk`]: its logic-facing metadata plus an opaque
13/// handle the wrapper uses to compute the numstat later.
14pub struct WalkedCommit {
15    pub meta: CommitMeta,
16    is_merge: bool,
17    handle: Handle,
18}
19
20/// How to obtain a commit's numstat: from gix (real) or from a canned value (null).
21enum Handle {
22    Real(gix::ObjectId),
23    Null(DiffStat),
24}
25
26/// A canned commit used to build a nulled [`Repo`] in tests.
27#[derive(Debug, Clone)]
28pub struct NulledCommit {
29    pub meta: CommitMeta,
30    pub diff: DiffStat,
31    pub is_merge: bool,
32}
33
34enum Backend {
35    // Boxed because a `ThreadSafeRepository` is far larger than the null variant.
36    Real(Box<gix::ThreadSafeRepository>),
37    Null(Vec<NulledCommit>),
38}
39
40/// A git repository, or a nulled stand-in for it.
41pub struct Repo {
42    backend: Backend,
43}
44
45impl Repo {
46    /// Open the repository containing `path`, searching upward like git does.
47    ///
48    /// # Errors
49    ///
50    /// Returns an error if no git repository can be discovered from `path`.
51    pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self> {
52        let repo = gix::discover(path).map_err(|e| Error::OpenRepository(Box::new(e)))?;
53        Ok(Self {
54            backend: Backend::Real(Box::new(repo.into_sync())),
55        })
56    }
57
58    /// Build a nulled repository whose [`walk`](Self::walk) returns the given
59    /// commits verbatim. The revision range is ignored, since range resolution
60    /// is real-git behavior exercised separately by integration tests.
61    #[must_use]
62    pub fn create_null(commits: Vec<NulledCommit>) -> Self {
63        Self {
64            backend: Backend::Null(commits),
65        }
66    }
67
68    /// Read commit metadata for every commit in `range`.
69    ///
70    /// # Errors
71    ///
72    /// Returns an error if the range cannot be resolved or a commit object
73    /// cannot be read.
74    pub fn walk(&self, range: &str) -> Result<Vec<WalkedCommit>> {
75        match &self.backend {
76            Backend::Real(tsr) => walk_real(&tsr.to_thread_local(), range),
77            Backend::Null(commits) => Ok(commits
78                .iter()
79                .map(|c| WalkedCommit {
80                    meta: c.meta.clone(),
81                    is_merge: c.is_merge,
82                    handle: Handle::Null(c.diff),
83                })
84                .collect()),
85        }
86    }
87
88    /// Compute the numstat for each given commit. Merge commits contribute
89    /// nothing, matching `git log --numstat`'s default.
90    ///
91    /// # Errors
92    ///
93    /// Returns an error if a commit's tree or its parent's tree cannot be
94    /// diffed.
95    pub fn numstats(&self, commits: &[&WalkedCommit]) -> Result<Vec<DiffStat>> {
96        match &self.backend {
97            Backend::Real(tsr) => commits
98                .par_iter()
99                .map_init(
100                    || {
101                        let mut repo = tsr.to_thread_local();
102                        // Per-thread object cache so repeated tree/blob reads during
103                        // diffing hit memory instead of the pack. 8 MiB comfortably
104                        // holds a commit's trees at negligible per-thread cost.
105                        repo.object_cache_size_if_unset(8 * 1024 * 1024);
106                        repo
107                    },
108                    |repo, c| diffstat(c, Some(repo)),
109                )
110                .collect(),
111            Backend::Null(_) => commits.iter().map(|c| diffstat(c, None)).collect(),
112        }
113    }
114}
115
116/// A commit's numstat. Merge commits contribute nothing (matching
117/// `git log --numstat`); canned (null) commits return their stored value, and
118/// real commits are diffed against their first parent using `repo`.
119fn diffstat(commit: &WalkedCommit, repo: Option<&gix::Repository>) -> Result<DiffStat> {
120    if commit.is_merge {
121        return Ok(DiffStat::default());
122    }
123    match (&commit.handle, repo) {
124        (Handle::Null(diff), _) => Ok(*diff),
125        (Handle::Real(id), Some(repo)) => numstat_real(repo, *id),
126        // A real handle only ever comes from the real backend, which always
127        // supplies a repo, so this arm is unreachable in practice.
128        (Handle::Real(_), None) => Ok(DiffStat::default()),
129    }
130}
131
132fn walk_real(repo: &gix::Repository, range: &str) -> Result<Vec<WalkedCommit>> {
133    let (tips, hidden) = resolve_range(repo, range)?;
134    let mailmap = repo.open_mailmap();
135    let walk = repo
136        .rev_walk(tips)
137        .with_hidden(hidden)
138        .all()
139        .map_err(|e| Error::WalkRange {
140            range: range.to_string(),
141            source: Box::new(e),
142        })?;
143    let mut out = Vec::new();
144    for info in walk {
145        let info = info.map_err(|e| Error::ReadCommit(Box::new(e)))?;
146        let commit = repo
147            .find_commit(info.id)
148            .map_err(|e| Error::ReadCommit(Box::new(e)))?;
149        let is_merge = commit.parent_ids().take(2).count() > 1;
150        out.push(WalkedCommit {
151            meta: commit_meta(&commit, &mailmap)?,
152            is_merge,
153            handle: Handle::Real(info.id),
154        });
155    }
156    Ok(out)
157}
158
159fn commit_meta(commit: &gix::Commit, mailmap: &gix::mailmap::Snapshot) -> Result<CommitMeta> {
160    let author = mailmap.resolve(
161        commit
162            .author()
163            .map_err(|e| Error::ReadCommit(Box::new(e)))?,
164    );
165    let time_seconds = commit
166        .committer()
167        .map_err(|e| Error::ReadCommit(Box::new(e)))?
168        .seconds();
169    Ok(CommitMeta {
170        author: Author {
171            name: author.name.to_string(),
172            email: author.email.to_string(),
173        },
174        time_seconds,
175        trailers: parse_trailers(commit)?,
176    })
177}
178
179fn parse_trailers(commit: &gix::Commit) -> Result<Vec<Trailer>> {
180    let message = commit
181        .message()
182        .map_err(|e| Error::ReadCommit(Box::new(e)))?;
183    let Some(body) = message.body() else {
184        return Ok(Vec::new());
185    };
186    Ok(body
187        .trailers()
188        .map(|t| Trailer {
189            token: t.token.to_string(),
190            value: t.value.to_string(),
191        })
192        .collect())
193}
194
195fn numstat_real(repo: &gix::Repository, id: gix::ObjectId) -> Result<DiffStat> {
196    let commit = repo
197        .find_commit(id)
198        .map_err(|e| Error::ReadCommit(Box::new(e)))?;
199    let new_tree = commit.tree().map_err(|e| Error::ReadCommit(Box::new(e)))?;
200    let old_tree = match commit.parent_ids().next() {
201        Some(parent) => repo
202            .find_commit(parent.detach())
203            .map_err(|e| Error::ReadCommit(Box::new(e)))?
204            .tree()
205            .map_err(|e| Error::ReadCommit(Box::new(e)))?,
206        None => repo.empty_tree(),
207    };
208    let stats = old_tree
209        .changes()
210        .map_err(|e| Error::DiffStats(Box::new(e)))?
211        .stats(&new_tree)
212        .map_err(|e| Error::DiffStats(Box::new(e)))?;
213    Ok(DiffStat {
214        insertions: stats.lines_added,
215        deletions: stats.lines_removed,
216        files: stats.files_changed,
217    })
218}
219
220/// Tips to walk from, and commits to hide, for a revision range.
221type RangeEnds = (Vec<gix::ObjectId>, Vec<gix::ObjectId>);
222
223/// Resolve a revision range into walk tips and hidden commits. Each endpoint is
224/// fully git-spelled (refs, short hashes, `@{n}`, ...); only the `..`/`...`
225/// operators are interpreted here. Exotic gitrevisions(7) forms are unsupported.
226fn resolve_range(repo: &gix::Repository, range: &str) -> Result<RangeEnds> {
227    if let Some((a, b)) = range.split_once("...") {
228        let a = single(repo, default_head(a))?;
229        let b = single(repo, default_head(b))?;
230        return Ok((vec![a, b], merge_bases(repo, a, b)));
231    }
232    if let Some((a, b)) = range.split_once("..") {
233        let excluded = single(repo, default_head(a))?;
234        let included = single(repo, default_head(b))?;
235        return Ok((vec![included], vec![excluded]));
236    }
237    Ok((vec![single(repo, range)?], Vec::new()))
238}
239
240fn default_head(rev: &str) -> &str {
241    if rev.is_empty() { "HEAD" } else { rev }
242}
243
244fn single(repo: &gix::Repository, rev: &str) -> Result<gix::ObjectId> {
245    Ok(repo
246        .rev_parse_single(rev)
247        .map_err(|e| Error::ResolveRevision {
248            revision: rev.to_string(),
249            source: Box::new(e),
250        })?
251        .detach())
252}
253
254fn merge_bases(repo: &gix::Repository, a: gix::ObjectId, b: gix::ObjectId) -> Vec<gix::ObjectId> {
255    // Disjoint histories have no merge base; then nothing is hidden and the
256    // symmetric difference is simply everything reachable from either tip.
257    match repo.merge_base(a, b) {
258        Ok(base) => vec![base.detach()],
259        Err(_) => Vec::new(),
260    }
261}
262
263/// Parse a `--since`/`--until` style date into seconds since the Unix epoch.
264/// Accepts ISO 8601, RFC 2822, unix timestamps, and relative spellings like
265/// "2 weeks ago" (a documented subset of git's approxidate).
266///
267/// # Errors
268///
269/// Returns an error if the input is not a recognized date format.
270pub fn parse_date(input: Option<&str>) -> Result<Option<i64>> {
271    let Some(s) = input else { return Ok(None) };
272    let now = std::time::SystemTime::now();
273    let time = gix::date::parse(s, Some(now)).map_err(|e| Error::InvalidDate {
274        input: s.to_string(),
275        message: e.to_string(),
276    })?;
277    Ok(Some(time.seconds))
278}
279
280#[cfg(test)]
281mod tests {
282    use super::parse_date;
283
284    #[test]
285    fn parse_date_returns_none_for_no_input() {
286        assert_eq!(parse_date(None).unwrap(), None);
287    }
288
289    #[test]
290    fn parse_date_accepts_an_iso_date() {
291        assert!(parse_date(Some("2020-01-01")).unwrap().is_some());
292    }
293
294    #[test]
295    fn parse_date_rejects_unparseable_input() {
296        assert!(parse_date(Some("not-a-real-date")).is_err());
297    }
298}