Skip to main content

alint_rules/
git_commit_message.rs

1//! `git_commit_message` — assert commit messages match a shape
2//! (regex, max subject length, body required).
3//!
4//! Two modes:
5//!
6//! - **HEAD-only** (default, `since:` omitted): validate the tip
7//!   commit. Right shape for push-trigger CI and post-commit hooks.
8//! - **Range** (`since:` set): validate every commit reachable from
9//!   `HEAD` but not from `since`. Right shape for pull-request CI,
10//!   where `actions/checkout` checks out a synthetic merge commit
11//!   that the rule should never see. Set `since:` to the PR base
12//!   SHA (typically via `${ALINT_BASE_SHA}` env interpolation, with
13//!   the env var sourced from `github.event.pull_request.base.sha`
14//!   in the workflow). Merge commits in the range are skipped by
15//!   default (`include_merges: false`); set `include_merges: true`
16//!   to lint them too.
17//!
18//! Use cases: enforce Conventional Commits / Angular-style
19//! prefixes, cap the subject at a screen-friendly width (50–72),
20//! require commits that fix issues to include a body linking the
21//! issue.
22//!
23//! Outside a git repo, with no commits yet, or when `git` isn't on
24//! PATH, the rule silently no-ops. Same advisory posture as
25//! `git_tracked_only` and `git_no_denied_paths`: a rule about git
26//! only fires when there's git to inspect.
27//!
28//! Bad `since:` refs (typo, or a shallow-clone gotcha that left
29//! the ref out of local objects) hard-fail with a hint to widen
30//! the checkout. The user asked for a range; silently falling back
31//! to HEAD-only would mask the misconfiguration.
32//!
33//! Check-only — alint can't rewrite the user's commit message, and
34//! `git commit --amend` is a sensitive operation we don't automate.
35
36use alint_core::git::{
37    CommitRangeError, CommitRecord, commit_messages_in_range, head_commit_message,
38};
39use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Violation};
40use regex::Regex;
41use serde::Deserialize;
42
43#[derive(Debug, Deserialize)]
44#[serde(deny_unknown_fields)]
45struct Options {
46    /// Regex the full commit message (subject + body, joined with
47    /// newlines) must match. When omitted, no regex check is
48    /// applied. Use `(?s)` to make `.` match newlines if you want
49    /// to assert about content past the subject.
50    #[serde(default)]
51    pattern: Option<String>,
52    /// Maximum length of the subject line (the message's first
53    /// line, before any body). When omitted, no length cap.
54    /// Common values: 50 (Tim Pope's recommendation), 72 (GitHub's
55    /// PR-title cutoff).
56    #[serde(default)]
57    subject_max_length: Option<usize>,
58    /// When `true`, the message must have a non-empty body — at
59    /// least one line of content after the subject's trailing
60    /// blank line. Useful for mandating an explanation on `fix:`
61    /// commits etc.
62    #[serde(default)]
63    requires_body: bool,
64    /// Git ref to use as the base of the commit range. When set,
65    /// the rule validates every commit in `<since>..HEAD` instead
66    /// of just `HEAD`. Anything `git rev-parse` accepts works: a
67    /// 40-char or abbreviated SHA, a branch (`origin/main`), a tag
68    /// (`v1.2.3`), or a relative ref (`HEAD~5`). Supports POSIX
69    /// `${VAR}` and `${VAR:-default}` env-var interpolation so CI
70    /// can pass a SHA in via an env var (see the GitHub Actions
71    /// integration doc for the canonical recipe).
72    #[serde(default)]
73    since: Option<String>,
74    /// When validating a range (`since:` set), include merge
75    /// commits in the set of commits to check. Default `false`
76    /// because merge commits in a PR context are typically the
77    /// synthetic merge `actions/checkout` produces (with an
78    /// auto-generated subject the rule would always flag) or
79    /// maintainer-resolved merges from the base branch (also
80    /// uninteresting). Set `true` to lint them anyway. Has no
81    /// effect when `since:` is unset.
82    #[serde(default)]
83    include_merges: bool,
84}
85
86#[derive(Debug)]
87pub struct GitCommitMessageRule {
88    id: String,
89    level: Level,
90    policy_url: Option<String>,
91    message_override: Option<String>,
92    pattern: Option<Regex>,
93    subject_max_length: Option<usize>,
94    requires_body: bool,
95    /// `since:` value as written in the config, with `${VAR}`
96    /// interpolation NOT yet performed. Resolution is deferred to
97    /// evaluate-time so env vars exported by CI steps after rule
98    /// load are picked up correctly.
99    since_raw: Option<String>,
100    include_merges: bool,
101}
102
103impl Rule for GitCommitMessageRule {
104    alint_core::rule_common_impl!();
105
106    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
107        let mut violations = Vec::new();
108
109        // Resolve `since:` once at evaluate-time so `${VAR}` env
110        // expansions reflect the current process environment.
111        let since = match &self.since_raw {
112            None => None,
113            Some(raw) => match expand_env(raw, env_lookup) {
114                Ok(resolved) => Some(resolved),
115                Err(missing) => {
116                    return Err(Error::rule_config(
117                        &self.id,
118                        format!(
119                            "`since:` references undefined env var `{missing}` \
120                             and has no default. Either set the env var (for \
121                             example, `ALINT_BASE_SHA` from \
122                             `github.event.pull_request.base.sha` in a GitHub \
123                             Actions workflow) or use the `${{VAR:-default}}` \
124                             default-value syntax."
125                        ),
126                    ));
127                }
128            },
129        };
130
131        let commits = match &since {
132            None => match head_commit_message(ctx.root) {
133                Some(message) => vec![CommitRecord {
134                    sha: "HEAD".to_string(),
135                    message,
136                    author_name: String::new(),
137                    author_email: String::new(),
138                }],
139                None => return Ok(violations), // silent no-op
140            },
141            Some(since) => {
142                match commit_messages_in_range(ctx.root, since, self.include_merges) {
143                    Ok(None) => return Ok(violations), // not a git repo: silent
144                    Ok(Some(records)) => records,
145                    Err(CommitRangeError::BadRange { stderr }) => {
146                        return Err(Error::rule_config(
147                            &self.id,
148                            format!(
149                                "could not resolve commit range `{since}..HEAD`: {stderr}. \
150                                 Common cause: shallow clone. In a GitHub Actions PR \
151                                 workflow, use `actions/checkout@v4` with \
152                                 `fetch-depth: 0` so the base ref is reachable."
153                            ),
154                        ));
155                    }
156                }
157            }
158        };
159
160        for commit in &commits {
161            self.check_one(commit, &mut violations);
162        }
163
164        Ok(violations)
165    }
166}
167
168impl GitCommitMessageRule {
169    fn check_one(&self, commit: &CommitRecord, violations: &mut Vec<Violation>) {
170        let (subject, body) = split_subject_body(&commit.message);
171
172        if let Some(re) = &self.pattern
173            && !re.is_match(&commit.message)
174        {
175            violations.push(self.make_violation(format_msg(
176                commit,
177                subject,
178                &format!("commit message does not match pattern `{}`", re.as_str()),
179            )));
180        }
181
182        if let Some(max) = self.subject_max_length
183            && subject.chars().count() > max
184        {
185            violations.push(self.make_violation(format_msg(
186                commit,
187                subject,
188                &format!(
189                    "commit subject is {} chars; max allowed is {max}",
190                    subject.chars().count(),
191                ),
192            )));
193        }
194
195        if self.requires_body && body.trim().is_empty() {
196            violations.push(self.make_violation(format_msg(
197                commit,
198                subject,
199                "commit message has no body; this rule requires one",
200            )));
201        }
202    }
203
204    fn make_violation(&self, default_msg: String) -> Violation {
205        Violation::new(self.message_override.clone().unwrap_or(default_msg))
206    }
207}
208
209/// Render a violation message with the commit SHA + a trimmed
210/// subject snippet for context. SHA is "HEAD" in single-commit
211/// mode (the helper synthesises it) and an abbreviated SHA in
212/// range mode. Subject is truncated to ~60 chars so violation
213/// output stays readable.
214fn format_msg(commit: &CommitRecord, subject: &str, what: &str) -> String {
215    const SUBJECT_PREVIEW_MAX: usize = 60;
216    let preview: String = subject.chars().take(SUBJECT_PREVIEW_MAX).collect();
217    let ellipsis = if subject.chars().count() > SUBJECT_PREVIEW_MAX {
218        "…"
219    } else {
220        ""
221    };
222    format!(
223        "commit {}: {what} (subject: \"{preview}{ellipsis}\")",
224        commit.sha
225    )
226}
227
228/// Split a commit message into (subject, body). The subject is the
229/// first line; the body is everything after the first blank line
230/// that follows it. Messages with no blank-line separator have an
231/// empty body. Trailing whitespace on the subject is preserved
232/// as-is — the length check counts it.
233fn split_subject_body(message: &str) -> (&str, &str) {
234    let (subject, rest) = message.split_once('\n').unwrap_or((message, ""));
235    // Skip exactly one trailing blank-line separator if present
236    // (the canonical "subject\n\nbody" shape). Multiple blank
237    // lines fall through into the body — they're unusual but we
238    // don't want to silently swallow user content.
239    let body = rest.strip_prefix('\n').unwrap_or(rest);
240    (subject, body)
241}
242
243/// Expand POSIX-style env-var references in a `since:` value. The
244/// syntax is intentionally narrow:
245///
246/// - `${VAR}` — substitute the value of `VAR`. If unset, returns
247///   `Err(missing-var-name)` so the rule can hard-fail with a
248///   CI-friendly hint.
249/// - `${VAR:-default}` — substitute `VAR`, or `default` when
250///   `VAR` is unset or empty. `default` may not itself contain
251///   `${`, `}` or `:-` — keep the surface small.
252/// - Bare text — left as-is. So `since: origin/main` works
253///   unchanged.
254///
255/// Multiple `${...}` references in one value are supported. The
256/// double-brace GitHub Actions syntax (`${{ ... }}`) is NOT
257/// interpolated by alint; it has to be rendered by Actions before
258/// the YAML is read, which only works in workflow files, not in
259/// `.alint.yml`. The single-brace form is the alint convention.
260///
261/// `lookup` is an explicit env-var resolver (typically
262/// `|name| std::env::var(name).ok()`); injecting it keeps the
263/// pure-string parsing testable without `set_var` calls (the
264/// crate forbids unsafe code, and Rust 2024 marks `set_var` /
265/// `remove_var` unsafe).
266fn expand_env<F>(input: &str, lookup: F) -> std::result::Result<String, String>
267where
268    F: Fn(&str) -> Option<String>,
269{
270    let mut out = String::with_capacity(input.len());
271    let mut rest = input;
272    while let Some(start) = rest.find("${") {
273        out.push_str(&rest[..start]);
274        let after_open = &rest[start + 2..];
275        let Some(end) = after_open.find('}') else {
276            // Unclosed `${...`. Treat as literal, don't error —
277            // the caller's value is probably a literal SHA that
278            // happens to contain `${`.
279            out.push_str("${");
280            rest = after_open;
281            continue;
282        };
283        let inner = &after_open[..end];
284        let (name, default) = match inner.split_once(":-") {
285            Some((n, d)) => (n, Some(d)),
286            None => (inner, None),
287        };
288        match lookup(name) {
289            Some(v) if !v.is_empty() => out.push_str(&v),
290            _ => match default {
291                Some(d) => out.push_str(d),
292                None => return Err(name.to_string()),
293            },
294        }
295        rest = &after_open[end + 1..];
296    }
297    out.push_str(rest);
298    Ok(out)
299}
300
301/// Production lookup: read from the process environment.
302fn env_lookup(name: &str) -> Option<String> {
303    std::env::var(name).ok()
304}
305
306/// Rewrite POSIX `${VAR}` / `${VAR:-default}` occurrences into the
307/// canonical v0.11 `{{env.VAR}}` / `{{env.VAR | default('default')}}`
308/// form, for the deprecation warning's actionable suggestion. Mirrors
309/// [`expand_env`]'s grammar; non-`${...}` text passes through.
310fn posix_to_env_template(input: &str) -> String {
311    use std::fmt::Write as _;
312    let mut out = String::with_capacity(input.len());
313    let mut rest = input;
314    while let Some(start) = rest.find("${") {
315        out.push_str(&rest[..start]);
316        let after_open = &rest[start + 2..];
317        let Some(end) = after_open.find('}') else {
318            out.push_str("${");
319            rest = after_open;
320            continue;
321        };
322        let inner = &after_open[..end];
323        match inner.split_once(":-") {
324            Some((name, def)) => {
325                let _ = write!(out, "{{{{env.{name} | default('{def}')}}}}");
326            }
327            None => {
328                let _ = write!(out, "{{{{env.{inner}}}}}");
329            }
330        }
331        rest = &after_open[end + 1..];
332    }
333    out.push_str(rest);
334    out
335}
336
337pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
338    let opts: Options = spec
339        .deserialize_options()
340        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
341
342    if opts.pattern.is_none() && opts.subject_max_length.is_none() && !opts.requires_body {
343        return Err(Error::rule_config(
344            &spec.id,
345            "git_commit_message needs at least one of `pattern:`, `subject_max_length:`, \
346             or `requires_body: true`",
347        ));
348    }
349    if spec.fix.is_some() {
350        return Err(Error::rule_config(
351            &spec.id,
352            "git_commit_message has no fix op",
353        ));
354    }
355    if opts.include_merges && opts.since.is_none() {
356        return Err(Error::rule_config(
357            &spec.id,
358            "`include_merges: true` has no effect without `since:`. Either remove it \
359             or set `since:` to enable range mode.",
360        ));
361    }
362
363    // Deprecation: v0.9.21 shipped POSIX `${VAR}` interpolation on
364    // `since:` only. v0.11 generalises env interpolation to the
365    // canonical `{{env.X}}` form (resolved at config load by
366    // `alint-dsl`), so a `since:` value still written as `${VAR}`
367    // is a legacy syntax. It keeps working this one minor (expanded
368    // at evaluate time by `expand_env`) but warns; v1.0 removes it.
369    // Scoped to `since:` because `${VAR}` was never interpolated in
370    // any other field — a literal `${` elsewhere is just a literal.
371    if let Some(raw) = &opts.since {
372        if raw.contains("${") {
373            eprintln!(
374                "alint: warning: rule {:?}: `since: {raw}` uses the deprecated v0.9.21 \
375                 `${{VAR}}` interpolation syntax. The canonical v0.11+ form is `{}`; \
376                 the `${{VAR}}` form will be removed in v1.0. \
377                 See https://alint.org/docs/configuration/#variable-interpolation.",
378                spec.id,
379                posix_to_env_template(raw),
380            );
381        }
382    }
383
384    let pattern = opts
385        .pattern
386        .as_deref()
387        .map(|p| {
388            Regex::new(p).map_err(|e| {
389                Error::rule_config(&spec.id, format!("invalid `pattern:` regex `{p}`: {e}"))
390            })
391        })
392        .transpose()?;
393
394    Ok(Box::new(GitCommitMessageRule {
395        id: spec.id.clone(),
396        level: spec.level,
397        policy_url: spec.policy_url.clone(),
398        message_override: spec.message.clone(),
399        pattern,
400        subject_max_length: opts.subject_max_length,
401        requires_body: opts.requires_body,
402        since_raw: opts.since,
403        include_merges: opts.include_merges,
404    }))
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410
411    #[test]
412    fn split_one_line_message() {
413        let (subj, body) = split_subject_body("just a subject");
414        assert_eq!(subj, "just a subject");
415        assert_eq!(body, "");
416    }
417
418    #[test]
419    fn split_subject_body_with_canonical_blank_line() {
420        let (subj, body) = split_subject_body("Add feature\n\nLong description here.\nMore.");
421        assert_eq!(subj, "Add feature");
422        assert_eq!(body, "Long description here.\nMore.");
423    }
424
425    #[test]
426    fn split_subject_no_blank_separator() {
427        // git-style messages should have a blank line, but tools
428        // like `git commit -m "first\nsecond"` produce bodies
429        // without one. Treat the second line on as body even
430        // without a separator.
431        let (subj, body) = split_subject_body("subject\nrest of content");
432        assert_eq!(subj, "subject");
433        assert_eq!(body, "rest of content");
434    }
435
436    #[test]
437    fn pattern_rejects_unrelated_subject() {
438        let re = Regex::new(r"^(feat|fix|chore): ").unwrap();
439        assert!(!re.is_match("WIP changes"));
440        assert!(re.is_match("feat: add markdown formatter"));
441    }
442
443    #[test]
444    fn subject_length_uses_chars_not_bytes() {
445        // Multi-byte unicode in the subject should count by
446        // grapheme-ish chars, not bytes — a 50-char subject of
447        // emoji should be 50 chars, not 200 bytes.
448        let subj = "🚀".repeat(50);
449        assert_eq!(subj.chars().count(), 50);
450        assert_eq!(subj.len(), 50 * 4); // bytes
451    }
452
453    #[test]
454    fn requires_body_detects_subject_only() {
455        let (_, body) = split_subject_body("just a subject");
456        assert!(body.trim().is_empty());
457    }
458
459    #[test]
460    fn requires_body_accepts_canonical_form() {
461        let (_, body) = split_subject_body("subject\n\nbody content");
462        assert!(!body.trim().is_empty());
463    }
464
465    // ----- format_msg formatting --------------------------------
466
467    #[test]
468    fn format_msg_renders_sha_and_subject() {
469        let commit = CommitRecord {
470            sha: "a1b2c3d".to_string(),
471            message: "fix: thing".to_string(),
472            author_name: String::new(),
473            author_email: String::new(),
474        };
475        let s = format_msg(&commit, "fix: thing", "subject too long");
476        assert!(s.contains("commit a1b2c3d"));
477        assert!(s.contains("fix: thing"));
478        assert!(s.contains("subject too long"));
479    }
480
481    #[test]
482    fn format_msg_truncates_long_subjects() {
483        let long_subject = "x".repeat(120);
484        let commit = CommitRecord {
485            sha: "abc1234".to_string(),
486            message: long_subject.clone(),
487            author_name: String::new(),
488            author_email: String::new(),
489        };
490        let s = format_msg(&commit, &long_subject, "too long");
491        // Subject preview is capped at 60 chars + ellipsis.
492        assert!(s.contains(&"x".repeat(60)));
493        assert!(s.contains('…'));
494        assert!(!s.contains(&"x".repeat(61)));
495    }
496
497    // ----- env-var interpolation --------------------------------
498
499    /// Build a fake env lookup from a list of (name, value) pairs.
500    /// Anything not in the list is treated as unset.
501    fn fake_env<'a>(pairs: &'a [(&'a str, &'a str)]) -> impl Fn(&str) -> Option<String> + 'a {
502        move |name: &str| {
503            pairs
504                .iter()
505                .find(|(k, _)| *k == name)
506                .map(|(_, v)| (*v).to_string())
507        }
508    }
509
510    #[test]
511    fn expand_env_passthrough_for_bare_string() {
512        let env = fake_env(&[]);
513        assert_eq!(expand_env("origin/main", &env).unwrap(), "origin/main");
514        assert_eq!(expand_env("v0.9.20", &env).unwrap(), "v0.9.20");
515        assert_eq!(
516            expand_env("abc1234567890abcdef1234567890abcdef12345678", &env,).unwrap(),
517            "abc1234567890abcdef1234567890abcdef12345678"
518        );
519    }
520
521    #[test]
522    fn expand_env_substitutes_simple_var() {
523        let env = fake_env(&[("ALINT_BASE_SHA", "deadbeef")]);
524        assert_eq!(expand_env("${ALINT_BASE_SHA}", &env).unwrap(), "deadbeef");
525    }
526
527    #[test]
528    fn expand_env_default_used_when_var_unset() {
529        let env = fake_env(&[]);
530        assert_eq!(
531            expand_env("${MISSING:-origin/main}", &env).unwrap(),
532            "origin/main"
533        );
534    }
535
536    #[test]
537    fn expand_env_default_used_when_var_empty() {
538        let env = fake_env(&[("EMPTY", "")]);
539        assert_eq!(
540            expand_env("${EMPTY:-origin/main}", &env).unwrap(),
541            "origin/main"
542        );
543    }
544
545    #[test]
546    fn expand_env_errors_when_var_unset_and_no_default() {
547        let env = fake_env(&[]);
548        let err = expand_env("${NOPE}", &env).unwrap_err();
549        assert_eq!(err, "NOPE");
550    }
551
552    #[test]
553    fn expand_env_handles_multiple_references() {
554        let env = fake_env(&[("A", "foo"), ("B", "bar")]);
555        assert_eq!(expand_env("${A}-${B}", &env).unwrap(), "foo-bar");
556    }
557
558    #[test]
559    fn expand_env_handles_text_around_var() {
560        let env = fake_env(&[("SHA", "abc1234")]);
561        assert_eq!(
562            expand_env("refs/${SHA}/head", &env).unwrap(),
563            "refs/abc1234/head"
564        );
565    }
566
567    #[test]
568    fn expand_env_ignores_unclosed_brace() {
569        // Don't crash on a value that contains a literal `${` —
570        // could be a base64 SHA or something weirder. Treat as
571        // literal.
572        let env = fake_env(&[]);
573        assert_eq!(expand_env("foo${unclosed", &env).unwrap(), "foo${unclosed");
574    }
575
576    // ----- build() validation -----------------------------------
577
578    fn spec(toml: &str) -> RuleSpec {
579        let mut full =
580            String::from("id = \"test-rule\"\nkind = \"git_commit_message\"\nlevel = \"error\"\n");
581        full.push_str(toml);
582        toml::from_str(&full).unwrap()
583    }
584
585    #[test]
586    fn build_requires_at_least_one_assertion() {
587        // No pattern, no subject_max_length, no requires_body. The
588        // rule has nothing to check; build() rejects.
589        let s = spec("");
590        let err = build(&s).unwrap_err();
591        assert!(err.to_string().contains("at least one of"));
592    }
593
594    #[test]
595    fn build_rejects_include_merges_without_since() {
596        // include_merges only makes sense alongside since:.
597        // Surfacing this at config-load time prevents silent
598        // no-ops.
599        let s = spec("requires_body = true\ninclude_merges = true\n");
600        let err = build(&s).unwrap_err();
601        assert!(
602            err.to_string().contains("include_merges"),
603            "expected include_merges hint, got: {err}"
604        );
605    }
606
607    #[test]
608    fn posix_to_env_template_converts_simple_and_default() {
609        assert_eq!(
610            posix_to_env_template("${ALINT_BASE_SHA}"),
611            "{{env.ALINT_BASE_SHA}}"
612        );
613        assert_eq!(
614            posix_to_env_template("${BASE:-origin/main}"),
615            "{{env.BASE | default('origin/main')}}"
616        );
617        // Bare text + embedded form pass through / convert in place.
618        assert_eq!(posix_to_env_template("origin/main"), "origin/main");
619        assert_eq!(posix_to_env_template("refs/${REF}"), "refs/{{env.REF}}");
620    }
621
622    #[test]
623    fn build_accepts_legacy_posix_since_with_deprecation() {
624        // `${VAR}` still builds (deprecation is a stderr warning, not
625        // an error) so existing v0.9.21 configs keep loading. The
626        // value is still expanded at evaluate time by `expand_env`.
627        let s = spec("requires_body = true\nsince = \"${ALINT_BASE_SHA}\"\n");
628        assert!(build(&s).is_ok());
629    }
630
631    #[test]
632    fn build_accepts_canonical_template_since() {
633        // The canonical `{{env.X}}` form is resolved upstream by the
634        // DSL interp pass before the rule sees it; here (no interp)
635        // it simply arrives as a literal ref and builds fine without
636        // triggering the `${VAR}` deprecation path.
637        let s = spec("requires_body = true\nsince = \"{{env.ALINT_BASE_SHA}}\"\n");
638        assert!(build(&s).is_ok());
639    }
640
641    #[test]
642    fn build_accepts_since_with_other_options() {
643        let s = spec("pattern = \"^feat: \"\nsince = \"origin/main\"\n");
644        assert!(build(&s).is_ok());
645    }
646
647    #[test]
648    fn build_accepts_since_with_include_merges() {
649        let s = spec("subject_max_length = 50\nsince = \"origin/main\"\ninclude_merges = true\n");
650        assert!(build(&s).is_ok());
651    }
652}