mdbook_gitinfo/
git.rs

1//! Utility module for running Git commands.
2//!
3//! This module provides helpers for interacting with a Git repository,
4//! primarily to extract metadata (commit hash, tag, timestamp, branch).
5//!
6//! All functions return [`mdbook::errors::Error`] on failure so they can be
7//! integrated directly into the `mdbook` preprocessor error flow.
8//!
9//! See also:
10//! - [`get_git_output`] — Run arbitrary Git commands and capture output.
11//! - [`verify_branch`] — Convenience wrapper to check branch existence.
12
13use mdbook_preprocessor::errors::Error;
14use std::collections::BTreeSet;
15use std::ffi::OsStr;
16use std::path::Path;
17use std::process::{Command, Stdio};
18
19/// Run a Git command and return the trimmed `stdout` output as a [`String`].
20///
21/// This is the central utility for invoking Git. It is used by the
22/// `mdbook-gitinfo` preprocessor to fetch commit information such as:
23/// - short or long commit hash
24/// - nearest tag
25/// - commit date/time
26///
27/// See also: [`verify_branch`], which builds on this function to check
28/// if a branch exists locally.
29///
30/// # Type Parameters
31///
32/// - `I`: An iterator of arguments (e.g., a string slice array).
33/// - `S`: Each argument, convertible to [`OsStr`].
34///
35/// # Arguments
36///
37/// * `args` — Git command-line arguments (e.g., `["rev-parse", "HEAD"]`).
38/// * `dir` — Path to the Git repository root or working directory.
39///
40/// # Returns
41///
42/// * `Ok(String)` — Trimmed `stdout` output from Git.
43/// * `Err(Error)` — If Git fails to launch or exits with non-zero status.
44///
45/// # Errors
46///
47/// This function returns an [`Error`] if:
48/// - The `git` binary is missing or fails to start.
49/// - The command returns a non-zero exit code.
50/// - The output cannot be decoded as UTF-8.
51///
52/// # Example
53///
54/// ```no_run
55/// use std::path::Path;
56/// use mdbook_gitinfo::git::get_git_output;
57///
58/// let hash = get_git_output(["rev-parse", "--short", "HEAD"], Path::new("."))
59///     .expect("failed to get commit hash");
60/// println!("Current short commit hash: {}", hash);
61/// ```
62pub fn get_git_output<I, S>(args: I, dir: &Path) -> Result<String, Error>
63where
64    I: IntoIterator<Item = S>,
65    S: AsRef<OsStr>,
66{
67    let output = Command::new("git")
68        .args(args)
69        .current_dir(dir)
70        .stdout(Stdio::piped())
71        .output()
72        .map_err(|e| Error::msg(format!("Git command failed: {e}")))?;
73
74    if output.status.success() {
75        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
76    } else {
77        Err(Error::msg("Git command returned non-zero exit code"))
78    }
79}
80
81/// Verify that a branch exists locally in the given repository.
82///
83/// Internally runs:
84/// ```text
85/// git rev-parse --verify <branch>
86/// ```
87///
88/// This is a thin wrapper around [`get_git_output`], returning `true` if the
89/// Git call succeeds and `false` otherwise.
90///
91/// # Arguments
92///
93/// * `branch` — The name of the branch to check.
94/// * `dir` — Path to the Git repository root or working directory.
95///
96/// # Returns
97///
98/// * `true` if the branch exists locally.
99/// * `false` otherwise.
100///
101/// # Example
102///
103/// ```no_run
104/// use std::path::Path;
105/// use mdbook_gitinfo::git::verify_branch;
106///
107/// let dir = Path::new(".");
108/// if !verify_branch("dev", dir) {
109///     eprintln!("Branch 'dev' not found, falling back to 'main'");
110/// }
111/// ```
112pub fn verify_branch(branch: &str, dir: &Path) -> bool {
113    get_git_output(["rev-parse", "--verify", branch], dir).is_ok()
114}
115
116/// Return the latest tag name, preferring tags reachable from the given branch's HEAD.
117/// Falls back to global (by creator date) when describe fails.
118/// Returns "No tags found" if not tag found
119pub fn latest_tag_for_branch(branch: &str, dir: &std::path::Path) -> String {
120    // Prefer a tag reachable from branch HEAD
121    if let Ok(t) = get_git_output(["describe", "--tags", "--abbrev=0", branch], dir) {
122        if !t.trim().is_empty() {
123            return t;
124        }
125    }
126
127    // Fallback: newest tag by creator date
128    match get_git_output(["tag", "--sort=-creatordate"], dir) {
129        Ok(list) => {
130            if let Some(first) = list.lines().find(|l| !l.trim().is_empty()) {
131                return first.trim().to_string();
132            }
133        }
134        Err(_) => {}
135    }
136
137    "No tags found".to_string()
138}
139
140/// Extract a GitHub username from a GitHub noreply email address.
141///
142/// Supported patterns:
143/// - `username@users.noreply.github.com`
144/// - `12345+username@users.noreply.github.com`
145fn github_username_from_email(email: &str) -> Option<String> {
146    const SUFFIX: &str = "@users.noreply.github.com";
147    if !email.ends_with(SUFFIX) {
148        return None;
149    }
150    let local = &email[..email.len() - SUFFIX.len()];
151    let local = local.trim();
152    if local.is_empty() {
153        return None;
154    }
155    // Strip optional numeric prefix: "12345+username"
156    let username = match local.split_once('+') {
157        Some((_id, u)) if !u.trim().is_empty() => u.trim(),
158        _ => local,
159    };
160    if username.is_empty() {
161        None
162    } else {
163        Some(username.to_string())
164    }
165}
166
167fn is_plausible_github_username(u: &str) -> bool {
168    // Conservative subset: 1–39 chars of [A-Za-z0-9-], not starting/ending with '-'
169    let len = u.len();
170    if len == 0 || len > 39 {
171        return false;
172    }
173    if u.starts_with('-') || u.ends_with('-') {
174        return false;
175    }
176    u.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
177}
178
179/// Retrieve contributor usernames from `git shortlog -sne --all`.
180///
181/// Strategy:
182/// 1) Prefer the *author name* if it looks like a GitHub username.
183/// 2) Otherwise, fallback to extracting a username from GitHub noreply email.
184///
185/// Returns a unique, sorted list of inferred GitHub usernames.
186pub fn get_contributor_usernames_from_shortlog(dir: &Path) -> Result<Vec<String>, Error> {
187    let raw = get_git_output(["shortlog", "-sne", "--all"], dir)
188        .map_err(|e| Error::msg(format!("unable to get contributors: {e}")))?;
189
190    let mut set = BTreeSet::<String>::new();
191
192    for line in raw.lines() {
193        // Expected: "  42  Name <email>"
194        let line = line.trim();
195        if line.is_empty() {
196            continue;
197        }
198
199        // Split count from rest
200        let mut parts = line.splitn(2, char::is_whitespace);
201        let _count_str = parts.next().unwrap_or("");
202        let rest = parts.next().unwrap_or("").trim();
203        if rest.is_empty() {
204            continue;
205        }
206
207        // Extract name and optional email
208        let (name, email) = if let Some((n, e)) = rest.rsplit_once('<') {
209            let email = e.trim_end_matches('>').trim();
210            (n.trim(), Some(email))
211        } else {
212            (rest, None)
213        };
214
215        // 1) Prefer author name (if plausible)
216        if !name.is_empty() && is_plausible_github_username(name) {
217            set.insert(name.to_string());
218            continue;
219        }
220
221        // 2) Fallback to email-derived username (GitHub noreply only)
222        if let Some(email) = email {
223            if let Some(u) = github_username_from_email(email) {
224                if is_plausible_github_username(&u) {
225                    set.insert(u);
226                }
227            }
228        }
229    }
230
231    Ok(set.into_iter().collect())
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use std::path::PathBuf;
238
239    #[test]
240    fn returns_error_on_invalid_git_command() {
241        let result = get_git_output(["non-existent-command"], &PathBuf::from("."));
242        assert!(result.is_err());
243    }
244}