mdbook_gitinfo/git.rs
1//! Utility module for running Git commands.
2//!
3//! This module provides helpers for interacting with a Git repository,
4//! primarily to extract metadata (commit hash, tag, timestamp, branch).
5//!
6//! All functions return [`mdbook::errors::Error`] on failure so they can be
7//! integrated directly into the `mdbook` preprocessor error flow.
8//!
9//! See also:
10//! - [`get_git_output`] — Run arbitrary Git commands and capture output.
11//! - [`verify_branch`] — Convenience wrapper to check branch existence.
12
13use mdbook_preprocessor::errors::Error;
14use std::collections::BTreeSet;
15use std::ffi::OsStr;
16use std::path::Path;
17use std::process::{Command, Stdio};
18
19/// Run a Git command and return the trimmed `stdout` output as a [`String`].
20///
21/// This is the central utility for invoking Git. It is used by the
22/// `mdbook-gitinfo` preprocessor to fetch commit information such as:
23/// - short or long commit hash
24/// - nearest tag
25/// - commit date/time
26///
27/// See also: [`verify_branch`], which builds on this function to check
28/// if a branch exists locally.
29///
30/// # Type Parameters
31///
32/// - `I`: An iterator of arguments (e.g., a string slice array).
33/// - `S`: Each argument, convertible to [`OsStr`].
34///
35/// # Arguments
36///
37/// * `args` — Git command-line arguments (e.g., `["rev-parse", "HEAD"]`).
38/// * `dir` — Path to the Git repository root or working directory.
39///
40/// # Returns
41///
42/// * `Ok(String)` — Trimmed `stdout` output from Git.
43/// * `Err(Error)` — If Git fails to launch or exits with non-zero status.
44///
45/// # Errors
46///
47/// This function returns an [`Error`] if:
48/// - The `git` binary is missing or fails to start.
49/// - The command returns a non-zero exit code.
50/// - The output cannot be decoded as UTF-8.
51///
52/// # Example
53///
54/// ```no_run
55/// use std::path::Path;
56/// use mdbook_gitinfo::git::get_git_output;
57///
58/// let hash = get_git_output(["rev-parse", "--short", "HEAD"], Path::new("."))
59/// .expect("failed to get commit hash");
60/// println!("Current short commit hash: {}", hash);
61/// ```
62pub fn get_git_output<I, S>(args: I, dir: &Path) -> Result<String, Error>
63where
64 I: IntoIterator<Item = S>,
65 S: AsRef<OsStr>,
66{
67 let output = Command::new("git")
68 .args(args)
69 .current_dir(dir)
70 .stdout(Stdio::piped())
71 .output()
72 .map_err(|e| Error::msg(format!("Git command failed: {e}")))?;
73
74 if output.status.success() {
75 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
76 } else {
77 Err(Error::msg("Git command returned non-zero exit code"))
78 }
79}
80
81/// Verify that a branch exists locally in the given repository.
82///
83/// Internally runs:
84/// ```text
85/// git rev-parse --verify <branch>
86/// ```
87///
88/// This is a thin wrapper around [`get_git_output`], returning `true` if the
89/// Git call succeeds and `false` otherwise.
90///
91/// # Arguments
92///
93/// * `branch` — The name of the branch to check.
94/// * `dir` — Path to the Git repository root or working directory.
95///
96/// # Returns
97///
98/// * `true` if the branch exists locally.
99/// * `false` otherwise.
100///
101/// # Example
102///
103/// ```no_run
104/// use std::path::Path;
105/// use mdbook_gitinfo::git::verify_branch;
106///
107/// let dir = Path::new(".");
108/// if !verify_branch("dev", dir) {
109/// eprintln!("Branch 'dev' not found, falling back to 'main'");
110/// }
111/// ```
112pub fn verify_branch(branch: &str, dir: &Path) -> bool {
113 get_git_output(["rev-parse", "--verify", branch], dir).is_ok()
114}
115
116/// Return the latest tag name, preferring tags reachable from the given branch's HEAD.
117/// Falls back to global (by creator date) when describe fails.
118/// Returns "No tags found" if not tag found
119pub fn latest_tag_for_branch(branch: &str, dir: &std::path::Path) -> String {
120 // Prefer a tag reachable from branch HEAD
121 if let Ok(t) = get_git_output(["describe", "--tags", "--abbrev=0", branch], dir) {
122 if !t.trim().is_empty() {
123 return t;
124 }
125 }
126
127 // Fallback: newest tag by creator date
128 match get_git_output(["tag", "--sort=-creatordate"], dir) {
129 Ok(list) => {
130 if let Some(first) = list.lines().find(|l| !l.trim().is_empty()) {
131 return first.trim().to_string();
132 }
133 }
134 Err(_) => {}
135 }
136
137 "No tags found".to_string()
138}
139
140/// Extract a GitHub username from a GitHub noreply email address.
141///
142/// Supported patterns:
143/// - `username@users.noreply.github.com`
144/// - `12345+username@users.noreply.github.com`
145fn github_username_from_email(email: &str) -> Option<String> {
146 const SUFFIX: &str = "@users.noreply.github.com";
147 if !email.ends_with(SUFFIX) {
148 return None;
149 }
150 let local = &email[..email.len() - SUFFIX.len()];
151 let local = local.trim();
152 if local.is_empty() {
153 return None;
154 }
155 // Strip optional numeric prefix: "12345+username"
156 let username = match local.split_once('+') {
157 Some((_id, u)) if !u.trim().is_empty() => u.trim(),
158 _ => local,
159 };
160 if username.is_empty() {
161 None
162 } else {
163 Some(username.to_string())
164 }
165}
166
167fn is_plausible_github_username(u: &str) -> bool {
168 // Conservative subset: 1–39 chars of [A-Za-z0-9-], not starting/ending with '-'
169 let len = u.len();
170 if len == 0 || len > 39 {
171 return false;
172 }
173 if u.starts_with('-') || u.ends_with('-') {
174 return false;
175 }
176 u.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
177}
178
179/// Retrieve contributor usernames from `git shortlog -sne --all`.
180///
181/// Strategy:
182/// 1) Prefer the *author name* if it looks like a GitHub username.
183/// 2) Otherwise, fallback to extracting a username from GitHub noreply email.
184///
185/// Returns a unique, sorted list of inferred GitHub usernames.
186pub fn get_contributor_usernames_from_shortlog(dir: &Path) -> Result<Vec<String>, Error> {
187 let raw = get_git_output(["shortlog", "-sne", "--all"], dir)
188 .map_err(|e| Error::msg(format!("unable to get contributors: {e}")))?;
189
190 let mut set = BTreeSet::<String>::new();
191
192 for line in raw.lines() {
193 // Expected: " 42 Name <email>"
194 let line = line.trim();
195 if line.is_empty() {
196 continue;
197 }
198
199 // Split count from rest
200 let mut parts = line.splitn(2, char::is_whitespace);
201 let _count_str = parts.next().unwrap_or("");
202 let rest = parts.next().unwrap_or("").trim();
203 if rest.is_empty() {
204 continue;
205 }
206
207 // Extract name and optional email
208 let (name, email) = if let Some((n, e)) = rest.rsplit_once('<') {
209 let email = e.trim_end_matches('>').trim();
210 (n.trim(), Some(email))
211 } else {
212 (rest, None)
213 };
214
215 // 1) Prefer author name (if plausible)
216 if !name.is_empty() && is_plausible_github_username(name) {
217 set.insert(name.to_string());
218 continue;
219 }
220
221 // 2) Fallback to email-derived username (GitHub noreply only)
222 if let Some(email) = email {
223 if let Some(u) = github_username_from_email(email) {
224 if is_plausible_github_username(&u) {
225 set.insert(u);
226 }
227 }
228 }
229 }
230
231 Ok(set.into_iter().collect())
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237 use std::path::PathBuf;
238
239 #[test]
240 fn returns_error_on_invalid_git_command() {
241 let result = get_git_output(["non-existent-command"], &PathBuf::from("."));
242 assert!(result.is_err());
243 }
244}