Skip to main content

alint_rules/
git_commit_message.rs

1//! `git_commit_message` — assert commit messages match a shape
2//! (regex, max subject length, body required).
3//!
4//! Two modes:
5//!
6//! - **HEAD-only** (default, `since:` omitted): validate the tip
7//!   commit. Right shape for push-trigger CI and post-commit hooks.
8//! - **Range** (`since:` set): validate every commit reachable from
9//!   `HEAD` but not from `since`. Right shape for pull-request CI,
10//!   where `actions/checkout` checks out a synthetic merge commit
11//!   that the rule should never see. Set `since:` to the PR base
12//!   SHA (typically via `${ALINT_BASE_SHA}` env interpolation, with
13//!   the env var sourced from `github.event.pull_request.base.sha`
14//!   in the workflow). Merge commits in the range are skipped by
15//!   default (`include_merges: false`); set `include_merges: true`
16//!   to lint them too.
17//!
18//! Use cases: enforce Conventional Commits / Angular-style
19//! prefixes, cap the subject at a screen-friendly width (50–72),
20//! require commits that fix issues to include a body linking the
21//! issue.
22//!
23//! Outside a git repo, with no commits yet, or when `git` isn't on
24//! PATH, the rule silently no-ops. Same advisory posture as
25//! `git_tracked_only` and `git_no_denied_paths`: a rule about git
26//! only fires when there's git to inspect.
27//!
28//! Bad `since:` refs (typo, or a shallow-clone gotcha that left
29//! the ref out of local objects) hard-fail with a hint to widen
30//! the checkout. The user asked for a range; silently falling back
31//! to HEAD-only would mask the misconfiguration.
32//!
33//! Check-only — alint can't rewrite the user's commit message, and
34//! `git commit --amend` is a sensitive operation we don't automate.
35
36use alint_core::git::{
37    CommitRangeError, CommitRecord, commit_messages_in_range, head_commit_message,
38};
39use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Violation};
40use regex::Regex;
41use serde::Deserialize;
42
43#[derive(Debug, Deserialize)]
44#[serde(deny_unknown_fields)]
45struct Options {
46    /// Regex the full commit message (subject + body, joined with
47    /// newlines) must match. When omitted, no regex check is
48    /// applied. Use `(?s)` to make `.` match newlines if you want
49    /// to assert about content past the subject.
50    #[serde(default)]
51    pattern: Option<String>,
52    /// Maximum length of the subject line (the message's first
53    /// line, before any body). When omitted, no length cap.
54    /// Common values: 50 (Tim Pope's recommendation), 72 (GitHub's
55    /// PR-title cutoff).
56    #[serde(default)]
57    subject_max_length: Option<usize>,
58    /// When `true`, the message must have a non-empty body — at
59    /// least one line of content after the subject's trailing
60    /// blank line. Useful for mandating an explanation on `fix:`
61    /// commits etc.
62    #[serde(default)]
63    requires_body: bool,
64    /// Git ref to use as the base of the commit range. When set,
65    /// the rule validates every commit in `<since>..HEAD` instead
66    /// of just `HEAD`. Anything `git rev-parse` accepts works: a
67    /// 40-char or abbreviated SHA, a branch (`origin/main`), a tag
68    /// (`v1.2.3`), or a relative ref (`HEAD~5`). Supports POSIX
69    /// `${VAR}` and `${VAR:-default}` env-var interpolation so CI
70    /// can pass a SHA in via an env var (see the GitHub Actions
71    /// integration doc for the canonical recipe).
72    #[serde(default)]
73    since: Option<String>,
74    /// When validating a range (`since:` set), include merge
75    /// commits in the set of commits to check. Default `false`
76    /// because merge commits in a PR context are typically the
77    /// synthetic merge `actions/checkout` produces (with an
78    /// auto-generated subject the rule would always flag) or
79    /// maintainer-resolved merges from the base branch (also
80    /// uninteresting). Set `true` to lint them anyway. Has no
81    /// effect when `since:` is unset.
82    #[serde(default)]
83    include_merges: bool,
84}
85
86#[derive(Debug)]
87pub struct GitCommitMessageRule {
88    id: String,
89    level: Level,
90    policy_url: Option<String>,
91    message_override: Option<String>,
92    pattern: Option<Regex>,
93    subject_max_length: Option<usize>,
94    requires_body: bool,
95    /// `since:` value as written in the config, with `${VAR}`
96    /// interpolation NOT yet performed. Resolution is deferred to
97    /// evaluate-time so env vars exported by CI steps after rule
98    /// load are picked up correctly.
99    since_raw: Option<String>,
100    include_merges: bool,
101}
102
103impl Rule for GitCommitMessageRule {
104    alint_core::rule_common_impl!();
105
106    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
107        let mut violations = Vec::new();
108
109        // Resolve `since:` once at evaluate-time so `${VAR}` env
110        // expansions reflect the current process environment.
111        let since = match &self.since_raw {
112            None => None,
113            Some(raw) => match expand_env(raw, env_lookup) {
114                Ok(resolved) => Some(resolved),
115                Err(missing) => {
116                    return Err(Error::rule_config(
117                        &self.id,
118                        format!(
119                            "`since:` references undefined env var `{missing}` \
120                             and has no default. Either set the env var (for \
121                             example, `ALINT_BASE_SHA` from \
122                             `github.event.pull_request.base.sha` in a GitHub \
123                             Actions workflow) or use the `${{VAR:-default}}` \
124                             default-value syntax."
125                        ),
126                    ));
127                }
128            },
129        };
130
131        let commits = match &since {
132            None => match head_commit_message(ctx.root) {
133                Some(message) => vec![CommitRecord {
134                    sha: "HEAD".to_string(),
135                    message,
136                }],
137                None => return Ok(violations), // silent no-op
138            },
139            Some(since) => {
140                match commit_messages_in_range(ctx.root, since, self.include_merges) {
141                    Ok(None) => return Ok(violations), // not a git repo: silent
142                    Ok(Some(records)) => records,
143                    Err(CommitRangeError::BadRange { stderr }) => {
144                        return Err(Error::rule_config(
145                            &self.id,
146                            format!(
147                                "could not resolve commit range `{since}..HEAD`: {stderr}. \
148                                 Common cause: shallow clone. In a GitHub Actions PR \
149                                 workflow, use `actions/checkout@v4` with \
150                                 `fetch-depth: 0` so the base ref is reachable."
151                            ),
152                        ));
153                    }
154                }
155            }
156        };
157
158        for commit in &commits {
159            self.check_one(commit, &mut violations);
160        }
161
162        Ok(violations)
163    }
164}
165
166impl GitCommitMessageRule {
167    fn check_one(&self, commit: &CommitRecord, violations: &mut Vec<Violation>) {
168        let (subject, body) = split_subject_body(&commit.message);
169
170        if let Some(re) = &self.pattern
171            && !re.is_match(&commit.message)
172        {
173            violations.push(self.make_violation(format_msg(
174                commit,
175                subject,
176                &format!("commit message does not match pattern `{}`", re.as_str()),
177            )));
178        }
179
180        if let Some(max) = self.subject_max_length
181            && subject.chars().count() > max
182        {
183            violations.push(self.make_violation(format_msg(
184                commit,
185                subject,
186                &format!(
187                    "commit subject is {} chars; max allowed is {max}",
188                    subject.chars().count(),
189                ),
190            )));
191        }
192
193        if self.requires_body && body.trim().is_empty() {
194            violations.push(self.make_violation(format_msg(
195                commit,
196                subject,
197                "commit message has no body; this rule requires one",
198            )));
199        }
200    }
201
202    fn make_violation(&self, default_msg: String) -> Violation {
203        Violation::new(self.message_override.clone().unwrap_or(default_msg))
204    }
205}
206
207/// Render a violation message with the commit SHA + a trimmed
208/// subject snippet for context. SHA is "HEAD" in single-commit
209/// mode (the helper synthesises it) and an abbreviated SHA in
210/// range mode. Subject is truncated to ~60 chars so violation
211/// output stays readable.
212fn format_msg(commit: &CommitRecord, subject: &str, what: &str) -> String {
213    const SUBJECT_PREVIEW_MAX: usize = 60;
214    let preview: String = subject.chars().take(SUBJECT_PREVIEW_MAX).collect();
215    let ellipsis = if subject.chars().count() > SUBJECT_PREVIEW_MAX {
216        "…"
217    } else {
218        ""
219    };
220    format!(
221        "commit {}: {what} (subject: \"{preview}{ellipsis}\")",
222        commit.sha
223    )
224}
225
226/// Split a commit message into (subject, body). The subject is the
227/// first line; the body is everything after the first blank line
228/// that follows it. Messages with no blank-line separator have an
229/// empty body. Trailing whitespace on the subject is preserved
230/// as-is — the length check counts it.
231fn split_subject_body(message: &str) -> (&str, &str) {
232    let (subject, rest) = message.split_once('\n').unwrap_or((message, ""));
233    // Skip exactly one trailing blank-line separator if present
234    // (the canonical "subject\n\nbody" shape). Multiple blank
235    // lines fall through into the body — they're unusual but we
236    // don't want to silently swallow user content.
237    let body = rest.strip_prefix('\n').unwrap_or(rest);
238    (subject, body)
239}
240
241/// Expand POSIX-style env-var references in a `since:` value. The
242/// syntax is intentionally narrow:
243///
244/// - `${VAR}` — substitute the value of `VAR`. If unset, returns
245///   `Err(missing-var-name)` so the rule can hard-fail with a
246///   CI-friendly hint.
247/// - `${VAR:-default}` — substitute `VAR`, or `default` when
248///   `VAR` is unset or empty. `default` may not itself contain
249///   `${`, `}` or `:-` — keep the surface small.
250/// - Bare text — left as-is. So `since: origin/main` works
251///   unchanged.
252///
253/// Multiple `${...}` references in one value are supported. The
254/// double-brace GitHub Actions syntax (`${{ ... }}`) is NOT
255/// interpolated by alint; it has to be rendered by Actions before
256/// the YAML is read, which only works in workflow files, not in
257/// `.alint.yml`. The single-brace form is the alint convention.
258///
259/// `lookup` is an explicit env-var resolver (typically
260/// `|name| std::env::var(name).ok()`); injecting it keeps the
261/// pure-string parsing testable without `set_var` calls (the
262/// crate forbids unsafe code, and Rust 2024 marks `set_var` /
263/// `remove_var` unsafe).
264fn expand_env<F>(input: &str, lookup: F) -> std::result::Result<String, String>
265where
266    F: Fn(&str) -> Option<String>,
267{
268    let mut out = String::with_capacity(input.len());
269    let mut rest = input;
270    while let Some(start) = rest.find("${") {
271        out.push_str(&rest[..start]);
272        let after_open = &rest[start + 2..];
273        let Some(end) = after_open.find('}') else {
274            // Unclosed `${...`. Treat as literal, don't error —
275            // the caller's value is probably a literal SHA that
276            // happens to contain `${`.
277            out.push_str("${");
278            rest = after_open;
279            continue;
280        };
281        let inner = &after_open[..end];
282        let (name, default) = match inner.split_once(":-") {
283            Some((n, d)) => (n, Some(d)),
284            None => (inner, None),
285        };
286        match lookup(name) {
287            Some(v) if !v.is_empty() => out.push_str(&v),
288            _ => match default {
289                Some(d) => out.push_str(d),
290                None => return Err(name.to_string()),
291            },
292        }
293        rest = &after_open[end + 1..];
294    }
295    out.push_str(rest);
296    Ok(out)
297}
298
299/// Production lookup: read from the process environment.
300fn env_lookup(name: &str) -> Option<String> {
301    std::env::var(name).ok()
302}
303
304pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
305    let opts: Options = spec
306        .deserialize_options()
307        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
308
309    if opts.pattern.is_none() && opts.subject_max_length.is_none() && !opts.requires_body {
310        return Err(Error::rule_config(
311            &spec.id,
312            "git_commit_message needs at least one of `pattern:`, `subject_max_length:`, \
313             or `requires_body: true`",
314        ));
315    }
316    if spec.fix.is_some() {
317        return Err(Error::rule_config(
318            &spec.id,
319            "git_commit_message has no fix op",
320        ));
321    }
322    if opts.include_merges && opts.since.is_none() {
323        return Err(Error::rule_config(
324            &spec.id,
325            "`include_merges: true` has no effect without `since:`. Either remove it \
326             or set `since:` to enable range mode.",
327        ));
328    }
329
330    let pattern = opts
331        .pattern
332        .as_deref()
333        .map(|p| {
334            Regex::new(p).map_err(|e| {
335                Error::rule_config(&spec.id, format!("invalid `pattern:` regex `{p}`: {e}"))
336            })
337        })
338        .transpose()?;
339
340    Ok(Box::new(GitCommitMessageRule {
341        id: spec.id.clone(),
342        level: spec.level,
343        policy_url: spec.policy_url.clone(),
344        message_override: spec.message.clone(),
345        pattern,
346        subject_max_length: opts.subject_max_length,
347        requires_body: opts.requires_body,
348        since_raw: opts.since,
349        include_merges: opts.include_merges,
350    }))
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    #[test]
358    fn split_one_line_message() {
359        let (subj, body) = split_subject_body("just a subject");
360        assert_eq!(subj, "just a subject");
361        assert_eq!(body, "");
362    }
363
364    #[test]
365    fn split_subject_body_with_canonical_blank_line() {
366        let (subj, body) = split_subject_body("Add feature\n\nLong description here.\nMore.");
367        assert_eq!(subj, "Add feature");
368        assert_eq!(body, "Long description here.\nMore.");
369    }
370
371    #[test]
372    fn split_subject_no_blank_separator() {
373        // git-style messages should have a blank line, but tools
374        // like `git commit -m "first\nsecond"` produce bodies
375        // without one. Treat the second line on as body even
376        // without a separator.
377        let (subj, body) = split_subject_body("subject\nrest of content");
378        assert_eq!(subj, "subject");
379        assert_eq!(body, "rest of content");
380    }
381
382    #[test]
383    fn pattern_rejects_unrelated_subject() {
384        let re = Regex::new(r"^(feat|fix|chore): ").unwrap();
385        assert!(!re.is_match("WIP changes"));
386        assert!(re.is_match("feat: add markdown formatter"));
387    }
388
389    #[test]
390    fn subject_length_uses_chars_not_bytes() {
391        // Multi-byte unicode in the subject should count by
392        // grapheme-ish chars, not bytes — a 50-char subject of
393        // emoji should be 50 chars, not 200 bytes.
394        let subj = "🚀".repeat(50);
395        assert_eq!(subj.chars().count(), 50);
396        assert_eq!(subj.len(), 50 * 4); // bytes
397    }
398
399    #[test]
400    fn requires_body_detects_subject_only() {
401        let (_, body) = split_subject_body("just a subject");
402        assert!(body.trim().is_empty());
403    }
404
405    #[test]
406    fn requires_body_accepts_canonical_form() {
407        let (_, body) = split_subject_body("subject\n\nbody content");
408        assert!(!body.trim().is_empty());
409    }
410
411    // ----- format_msg formatting --------------------------------
412
413    #[test]
414    fn format_msg_renders_sha_and_subject() {
415        let commit = CommitRecord {
416            sha: "a1b2c3d".to_string(),
417            message: "fix: thing".to_string(),
418        };
419        let s = format_msg(&commit, "fix: thing", "subject too long");
420        assert!(s.contains("commit a1b2c3d"));
421        assert!(s.contains("fix: thing"));
422        assert!(s.contains("subject too long"));
423    }
424
425    #[test]
426    fn format_msg_truncates_long_subjects() {
427        let long_subject = "x".repeat(120);
428        let commit = CommitRecord {
429            sha: "abc1234".to_string(),
430            message: long_subject.clone(),
431        };
432        let s = format_msg(&commit, &long_subject, "too long");
433        // Subject preview is capped at 60 chars + ellipsis.
434        assert!(s.contains(&"x".repeat(60)));
435        assert!(s.contains('…'));
436        assert!(!s.contains(&"x".repeat(61)));
437    }
438
439    // ----- env-var interpolation --------------------------------
440
441    /// Build a fake env lookup from a list of (name, value) pairs.
442    /// Anything not in the list is treated as unset.
443    fn fake_env<'a>(pairs: &'a [(&'a str, &'a str)]) -> impl Fn(&str) -> Option<String> + 'a {
444        move |name: &str| {
445            pairs
446                .iter()
447                .find(|(k, _)| *k == name)
448                .map(|(_, v)| (*v).to_string())
449        }
450    }
451
452    #[test]
453    fn expand_env_passthrough_for_bare_string() {
454        let env = fake_env(&[]);
455        assert_eq!(expand_env("origin/main", &env).unwrap(), "origin/main");
456        assert_eq!(expand_env("v0.9.20", &env).unwrap(), "v0.9.20");
457        assert_eq!(
458            expand_env("abc1234567890abcdef1234567890abcdef12345678", &env,).unwrap(),
459            "abc1234567890abcdef1234567890abcdef12345678"
460        );
461    }
462
463    #[test]
464    fn expand_env_substitutes_simple_var() {
465        let env = fake_env(&[("ALINT_BASE_SHA", "deadbeef")]);
466        assert_eq!(expand_env("${ALINT_BASE_SHA}", &env).unwrap(), "deadbeef");
467    }
468
469    #[test]
470    fn expand_env_default_used_when_var_unset() {
471        let env = fake_env(&[]);
472        assert_eq!(
473            expand_env("${MISSING:-origin/main}", &env).unwrap(),
474            "origin/main"
475        );
476    }
477
478    #[test]
479    fn expand_env_default_used_when_var_empty() {
480        let env = fake_env(&[("EMPTY", "")]);
481        assert_eq!(
482            expand_env("${EMPTY:-origin/main}", &env).unwrap(),
483            "origin/main"
484        );
485    }
486
487    #[test]
488    fn expand_env_errors_when_var_unset_and_no_default() {
489        let env = fake_env(&[]);
490        let err = expand_env("${NOPE}", &env).unwrap_err();
491        assert_eq!(err, "NOPE");
492    }
493
494    #[test]
495    fn expand_env_handles_multiple_references() {
496        let env = fake_env(&[("A", "foo"), ("B", "bar")]);
497        assert_eq!(expand_env("${A}-${B}", &env).unwrap(), "foo-bar");
498    }
499
500    #[test]
501    fn expand_env_handles_text_around_var() {
502        let env = fake_env(&[("SHA", "abc1234")]);
503        assert_eq!(
504            expand_env("refs/${SHA}/head", &env).unwrap(),
505            "refs/abc1234/head"
506        );
507    }
508
509    #[test]
510    fn expand_env_ignores_unclosed_brace() {
511        // Don't crash on a value that contains a literal `${` —
512        // could be a base64 SHA or something weirder. Treat as
513        // literal.
514        let env = fake_env(&[]);
515        assert_eq!(expand_env("foo${unclosed", &env).unwrap(), "foo${unclosed");
516    }
517
518    // ----- build() validation -----------------------------------
519
520    fn spec(toml: &str) -> RuleSpec {
521        let mut full =
522            String::from("id = \"test-rule\"\nkind = \"git_commit_message\"\nlevel = \"error\"\n");
523        full.push_str(toml);
524        toml::from_str(&full).unwrap()
525    }
526
527    #[test]
528    fn build_requires_at_least_one_assertion() {
529        // No pattern, no subject_max_length, no requires_body. The
530        // rule has nothing to check; build() rejects.
531        let s = spec("");
532        let err = build(&s).unwrap_err();
533        assert!(err.to_string().contains("at least one of"));
534    }
535
536    #[test]
537    fn build_rejects_include_merges_without_since() {
538        // include_merges only makes sense alongside since:.
539        // Surfacing this at config-load time prevents silent
540        // no-ops.
541        let s = spec("requires_body = true\ninclude_merges = true\n");
542        let err = build(&s).unwrap_err();
543        assert!(
544            err.to_string().contains("include_merges"),
545            "expected include_merges hint, got: {err}"
546        );
547    }
548
549    #[test]
550    fn build_accepts_since_with_other_options() {
551        let s = spec("pattern = \"^feat: \"\nsince = \"origin/main\"\n");
552        assert!(build(&s).is_ok());
553    }
554
555    #[test]
556    fn build_accepts_since_with_include_merges() {
557        let s = spec("subject_max_length = 50\nsince = \"origin/main\"\ninclude_merges = true\n");
558        assert!(build(&s).is_ok());
559    }
560}