Skip to main content

torii_lib/vcs/
commit_scan.rs

1//! Commit policy enforcement.
2//!
3//! Loads a `policies/commits.toml` file and evaluates each commit in a range
4//! against the rules. Used by:
5//!
6//!   - `torii scan commits [--all] [--since N]` (CLI)
7//!   - pre-save hook (planned)
8//!   - CI gates (future, server-side)
9//!
10//! Schema (all keys optional):
11//!
12//!   forbid_trailers     = ["regex", …]
13//!   require_trailers    = ["regex", …]
14//!   forbid_subjects     = ["regex", …]
15//!   author_email_matches = "regex"
16//!   subject_max_length  = 72
17//!   subject_min_length  = 8
18//!   require_conventional = true
19//!
20//! All regexes are case-insensitive by default.
21
22use std::path::{Path, PathBuf};
23
24use regex::Regex;
25use serde::Deserialize;
26
27use crate::error::{Result, ToriiError};
28
29/// Raw TOML shape — directly deserialised from `policies/commits.toml`.
30#[derive(Debug, Default, Deserialize)]
31struct RawPolicy {
32    #[serde(default)]
33    forbid_trailers: Vec<String>,
34    #[serde(default)]
35    require_trailers: Vec<String>,
36    #[serde(default)]
37    forbid_subjects: Vec<String>,
38    #[serde(default)]
39    author_email_matches: Option<String>,
40    #[serde(default)]
41    subject_max_length: Option<usize>,
42    #[serde(default)]
43    subject_min_length: Option<usize>,
44    #[serde(default)]
45    require_conventional: bool,
46}
47
48/// Compiled, ready-to-run policy.
49pub struct CompiledCommitPolicy {
50    forbid_trailers: Vec<Regex>,
51    require_trailers: Vec<Regex>,
52    forbid_subjects: Vec<Regex>,
53    author_email_matches: Option<Regex>,
54    subject_max_length: Option<usize>,
55    subject_min_length: Option<usize>,
56    require_conventional: bool,
57}
58
59/// One rule failure for one commit.
60#[derive(Debug, Clone)]
61pub struct Violation {
62    /// Full commit OID — kept for future `--fix` mode that needs to
63    /// rewrite the exact commit.
64    #[allow(dead_code)]
65    pub commit_id: String,
66    pub commit_short: String,
67    pub subject: String,
68    pub rule: String,
69    pub detail: String,
70}
71
72impl CompiledCommitPolicy {
73    pub fn from_toml(src: &str) -> Result<Self> {
74        let raw: RawPolicy = toml::from_str(src)
75            .map_err(|e| ToriiError::InvalidConfig(format!("parse policy TOML: {}", e)))?;
76        let mut p = CompiledCommitPolicy {
77            forbid_trailers: Vec::new(),
78            require_trailers: Vec::new(),
79            forbid_subjects: Vec::new(),
80            author_email_matches: None,
81            subject_max_length: raw.subject_max_length,
82            subject_min_length: raw.subject_min_length,
83            require_conventional: raw.require_conventional,
84        };
85        for pat in &raw.forbid_trailers {
86            p.forbid_trailers.push(compile(pat)?);
87        }
88        for pat in &raw.require_trailers {
89            p.require_trailers.push(compile(pat)?);
90        }
91        for pat in &raw.forbid_subjects {
92            p.forbid_subjects.push(compile(pat)?);
93        }
94        if let Some(pat) = &raw.author_email_matches {
95            p.author_email_matches = Some(compile(pat)?);
96        }
97        Ok(p)
98    }
99
100    /// Load + compile a policy file. Returns `Ok(None)` if file does not
101    /// exist (callers can decide whether absent policy is silent or an error).
102    pub fn load(path: &Path) -> Result<Option<Self>> {
103        if !path.exists() {
104            return Ok(None);
105        }
106        let src = std::fs::read_to_string(path)
107            .map_err(|e| ToriiError::Fs(format!("read {}: {}", path.display(), e)))?;
108        Ok(Some(Self::from_toml(&src)?))
109    }
110
111    /// Evaluate a commit. Returns 0+ violations.
112    pub fn check(
113        &self,
114        commit_id: &str,
115        author_email: &str,
116        message: &str,
117    ) -> Vec<Violation> {
118        let short: String = commit_id.chars().take(7).collect();
119        let subject = message.lines().next().unwrap_or("").trim().to_string();
120        let mut out = Vec::new();
121
122        let push = |out: &mut Vec<Violation>, rule: &str, detail: String| {
123            out.push(Violation {
124                commit_id: commit_id.to_string(),
125                commit_short: short.clone(),
126                subject: subject.clone(),
127                rule: rule.to_string(),
128                detail,
129            });
130        };
131
132        for re in &self.forbid_trailers {
133            for line in message.lines() {
134                if re.is_match(line) {
135                    push(
136                        &mut out,
137                        "forbid_trailers",
138                        format!("matches /{}/: `{}`", re.as_str(), line.trim()),
139                    );
140                    break; // one match per rule per commit is enough
141                }
142            }
143        }
144
145        for re in &self.require_trailers {
146            let found = message.lines().any(|l| re.is_match(l));
147            if !found {
148                push(
149                    &mut out,
150                    "require_trailers",
151                    format!("no line matches /{}/", re.as_str()),
152                );
153            }
154        }
155
156        for re in &self.forbid_subjects {
157            if re.is_match(&subject) {
158                push(
159                    &mut out,
160                    "forbid_subjects",
161                    format!("subject matches /{}/", re.as_str()),
162                );
163            }
164        }
165
166        if let Some(re) = &self.author_email_matches {
167            if !re.is_match(author_email) {
168                push(
169                    &mut out,
170                    "author_email_matches",
171                    format!("`{}` doesn't match /{}/", author_email, re.as_str()),
172                );
173            }
174        }
175
176        if let Some(max) = self.subject_max_length {
177            let len = subject.chars().count();
178            if len > max {
179                push(
180                    &mut out,
181                    "subject_max_length",
182                    format!("subject is {} chars (max {})", len, max),
183                );
184            }
185        }
186        if let Some(min) = self.subject_min_length {
187            let len = subject.chars().count();
188            if len < min {
189                push(
190                    &mut out,
191                    "subject_min_length",
192                    format!("subject is {} chars (min {})", len, min),
193                );
194            }
195        }
196
197        if self.require_conventional && !is_conventional(&subject) {
198            push(
199                &mut out,
200                "require_conventional",
201                "subject doesn't match `<type>(scope?): description`".to_string(),
202            );
203        }
204
205        out
206    }
207}
208
209fn compile(pat: &str) -> Result<Regex> {
210    // Default to case-insensitive — trailers / authors are usually compared
211    // without caring about case.
212    let with_flag = format!("(?i){}", pat);
213    Regex::new(&with_flag)
214        .map_err(|e| ToriiError::InvalidConfig(format!("bad regex /{}/: {}", pat, e)))
215}
216
217/// Conventional Commits subject:
218///   feat: ...
219///   feat(scope): ...
220///   fix!: ...   (breaking)
221///   chore(release)!: ...
222fn is_conventional(subject: &str) -> bool {
223    static TYPES: &[&str] = &[
224        "feat", "fix", "docs", "style", "refactor", "perf", "test",
225        "build", "ci", "chore", "revert",
226    ];
227    let Some(colon) = subject.find(':') else { return false };
228    let head = &subject[..colon];
229    let head = head.strip_suffix('!').unwrap_or(head);
230    let (ty, _scope) = match head.find('(') {
231        Some(open) => {
232            let close = head.rfind(')').unwrap_or(open);
233            (&head[..open], Some(&head[open + 1..close]))
234        }
235        None => (head, None),
236    };
237    TYPES.contains(&ty)
238}
239
240/// Default location of the commit policy file inside a repo.
241pub fn default_policy_path(repo_root: &Path) -> PathBuf {
242    repo_root.join("policies").join("commits.toml")
243}
244
245/// Convenience: scan a range of commits with a loaded policy.
246/// `since_oid` = inclusive end of range (older). If None, walks all of HEAD.
247pub fn scan_repo(
248    repo: &git2::Repository,
249    policy: &CompiledCommitPolicy,
250    limit: usize,
251) -> Result<Vec<Violation>> {
252    let mut walk = repo.revwalk().map_err(ToriiError::Git)?;
253    walk.push_head().map_err(ToriiError::Git)?;
254    let mut all = Vec::new();
255    for oid in walk.take(limit) {
256        let oid = oid.map_err(ToriiError::Git)?;
257        let commit = repo.find_commit(oid).map_err(ToriiError::Git)?;
258        let id = oid.to_string();
259        let email = commit.author().email().unwrap_or("").to_string();
260        let msg = commit.message().unwrap_or("").to_string();
261        all.extend(policy.check(&id, &email, &msg));
262    }
263    Ok(all)
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    fn pol(src: &str) -> CompiledCommitPolicy {
271        CompiledCommitPolicy::from_toml(src).unwrap()
272    }
273
274    #[test]
275    fn forbid_trailer_catches_claude() {
276        let p = pol(r#"forbid_trailers = ["Co-Authored-By:.*Claude"]"#);
277        let v = p.check(
278            "abc123",
279            "x@y",
280            "feat: stuff\n\nCo-Authored-By: Claude Sonnet <noreply@anthropic.com>",
281        );
282        assert_eq!(v.len(), 1);
283        assert_eq!(v[0].rule, "forbid_trailers");
284    }
285
286    #[test]
287    fn require_trailer_missing() {
288        let p = pol(r#"require_trailers = ["Signed-off-by:"]"#);
289        let v = p.check("abc", "x@y", "feat: stuff");
290        assert_eq!(v.len(), 1);
291        assert_eq!(v[0].rule, "require_trailers");
292    }
293
294    #[test]
295    fn require_trailer_present_no_violation() {
296        let p = pol(r#"require_trailers = ["Signed-off-by:"]"#);
297        let v = p.check("abc", "x@y", "feat: stuff\n\nSigned-off-by: A B <a@b>");
298        assert!(v.is_empty());
299    }
300
301    #[test]
302    fn subject_length_limits() {
303        let p = pol("subject_max_length = 10\nsubject_min_length = 5");
304        assert_eq!(p.check("a", "x@y", "ok done").len(), 0);
305        assert_eq!(p.check("a", "x@y", "x").len(), 1); // too short
306        assert_eq!(p.check("a", "x@y", "way too long subject here").len(), 1); // too long
307    }
308
309    #[test]
310    fn forbid_subject() {
311        let p = pol(r#"forbid_subjects = ["^(wip|tmp)$"]"#);
312        assert_eq!(p.check("a", "x@y", "wip").len(), 1);
313        assert_eq!(p.check("a", "x@y", "feat: real").len(), 0);
314    }
315
316    #[test]
317    fn author_email_mismatch() {
318        let p = pol(r#"author_email_matches = ".*@paski\\.dev$""#);
319        assert_eq!(p.check("a", "x@y.com", "feat: x").len(), 1);
320        assert_eq!(p.check("a", "me@paski.dev", "feat: x").len(), 0);
321    }
322
323    #[test]
324    fn conventional_commits() {
325        let p = pol("require_conventional = true");
326        assert_eq!(p.check("a", "x@y", "feat: ok").len(), 0);
327        assert_eq!(p.check("a", "x@y", "feat(scope): ok").len(), 0);
328        assert_eq!(p.check("a", "x@y", "fix!: breaking").len(), 0);
329        assert_eq!(p.check("a", "x@y", "random message").len(), 1);
330        assert_eq!(p.check("a", "x@y", "wibble: unknown type").len(), 1);
331    }
332
333    #[test]
334    fn is_conventional_helper() {
335        assert!(is_conventional("feat: x"));
336        assert!(is_conventional("feat(scope): x"));
337        assert!(is_conventional("fix!: x"));
338        assert!(is_conventional("chore(release)!: x"));
339        assert!(!is_conventional("random"));
340        assert!(!is_conventional("frob: x"));
341    }
342
343    #[test]
344    fn empty_policy_is_valid() {
345        let p = pol("");
346        assert!(p.check("a", "x@y", "anything").is_empty());
347    }
348
349    #[test]
350    fn comments_and_unknown_keys_ok() {
351        let p = pol("# comment\nrequire_conventional = true");
352        assert_eq!(p.check("a", "x@y", "wibble").len(), 1);
353    }
354}