Skip to main content

koala_core/invariant/
user_toml.rs

1//! User-defined invariants from `.koala/invariants/*.toml`.
2//!
3//! Each TOML file may declare any number of `[[rule]]` blocks. A rule
4//! has an `id`, `category`, `intent`, optional `adr`, and a `match`
5//! table that drives the (small set of) supported predicates:
6//!
7//! ```toml
8//! [[rule]]
9//! id       = "biz.no-todo-in-public-api"
10//! category = "docs"
11//! intent   = "Public API doc-comments must not contain TODO/FIXME."
12//! adr      = "ADR-0019"
13//! match    = { kind = "forbid-substring", glob = "crates/**/*.rs", needle = "/// TODO" }
14//! ```
15//!
16//! Two `kind`s are supported in v1.0:
17//!   - `forbid-substring` — fail if any file matching `glob` contains
18//!     `needle`.
19//!   - `require-substring` — fail if no file matching `glob` contains
20//!     `needle`.
21//!
22//! Anything richer (property-based, perf bench, regex) lives in
23//! native invariants — TOML stays declarative on purpose.
24
25use crate::invariant::{Category, Context, Invariant, Outcome};
26use serde::Deserialize;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30const USER_DIR: &str = ".koala/invariants";
31
32#[derive(Debug, Deserialize)]
33struct File {
34    #[serde(default)]
35    rule: Vec<UserRule>,
36}
37
38#[derive(Debug, Deserialize, Clone)]
39struct UserRule {
40    id: String,
41    category: String,
42    intent: String,
43    #[serde(default)]
44    adr: Option<String>,
45    #[serde(rename = "match")]
46    match_: MatchSpec,
47}
48
49#[derive(Debug, Deserialize, Clone)]
50#[serde(tag = "kind")]
51enum MatchSpec {
52    #[serde(rename = "forbid-substring")]
53    ForbidSubstring { glob: String, needle: String },
54    #[serde(rename = "require-substring")]
55    RequireSubstring { glob: String, needle: String },
56}
57
58#[derive(Debug)]
59pub enum LoadError {
60    Io { path: PathBuf, err: std::io::Error },
61    Parse { path: PathBuf, err: toml::de::Error },
62    BadCategory { id: String, value: String },
63}
64
65impl std::fmt::Display for LoadError {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        match self {
68            Self::Io { path, err } => write!(f, "io ({}): {err}", path.display()),
69            Self::Parse { path, err } => write!(f, "parse ({}): {err}", path.display()),
70            Self::BadCategory { id, value } => write!(
71                f,
72                "rule `{id}`: unknown category `{value}` (expected arch / deps / docs / \
73                 governance / health / security)"
74            ),
75        }
76    }
77}
78
79impl std::error::Error for LoadError {}
80
81#[derive(Debug)]
82pub struct UserDefinedInvariant {
83    id: String,
84    category: Category,
85    intent: String,
86    adr: Option<String>,
87    spec: MatchSpec,
88}
89
90impl UserDefinedInvariant {
91    pub fn category_label(&self) -> &str {
92        self.category.as_str()
93    }
94}
95
96impl Invariant for UserDefinedInvariant {
97    fn id(&self) -> &'static str {
98        // Trait wants &'static str; leak the id once. Fine for
99        // long-running CLI runs.
100        Box::leak(self.id.clone().into_boxed_str())
101    }
102
103    fn category(&self) -> Category {
104        self.category
105    }
106
107    fn intent(&self) -> &'static str {
108        Box::leak(self.intent.clone().into_boxed_str())
109    }
110
111    fn adr(&self) -> Option<&'static str> {
112        self.adr.clone().map(|s| &*Box::leak(s.into_boxed_str()))
113    }
114
115    fn evaluate(&self, ctx: &Context) -> Outcome {
116        match &self.spec {
117            MatchSpec::ForbidSubstring { glob, needle } => evaluate_forbid(ctx, glob, needle),
118            MatchSpec::RequireSubstring { glob, needle } => evaluate_require(ctx, glob, needle),
119        }
120    }
121}
122
123fn evaluate_forbid(ctx: &Context, glob: &str, needle: &str) -> Outcome {
124    let mut hits = Vec::new();
125    for path in walk_glob(ctx.root(), glob) {
126        let Ok(text) = fs::read_to_string(&path) else {
127            continue;
128        };
129        if text.contains(needle) {
130            hits.push(rel_display(&path, ctx.root()));
131        }
132    }
133    if hits.is_empty() {
134        Outcome::pass()
135    } else {
136        Outcome::fail_repro(
137            format!(
138                "{n} file(s) contain forbidden substring `{needle}`:\n  {body}",
139                n = hits.len(),
140                body = hits.join("\n  ")
141            ),
142            format!("rg -F '{needle}' {glob}"),
143        )
144    }
145}
146
147fn evaluate_require(ctx: &Context, glob: &str, needle: &str) -> Outcome {
148    let any_present = walk_glob(ctx.root(), glob).into_iter().any(|p| {
149        fs::read_to_string(&p)
150            .map(|t| t.contains(needle))
151            .unwrap_or(false)
152    });
153    if any_present {
154        Outcome::pass()
155    } else {
156        Outcome::fail_repro(
157            format!("no file matching `{glob}` contains required substring `{needle}`"),
158            format!("rg -F '{needle}' {glob}"),
159        )
160    }
161}
162
163fn rel_display(p: &Path, root: &Path) -> String {
164    p.strip_prefix(root)
165        .unwrap_or(p)
166        .display()
167        .to_string()
168        .replace('\\', "/")
169}
170
171/// Minimal glob: only supports `**` (anywhere) and `*` (one path
172/// segment, no `/`). Sufficient for the v1.0 declarative ruleset.
173fn walk_glob(root: &Path, glob: &str) -> Vec<PathBuf> {
174    let mut out = Vec::new();
175    for entry in walkdir::WalkDir::new(root).into_iter().flatten() {
176        if !entry.file_type().is_file() {
177            continue;
178        }
179        let p = entry.path();
180        let Some(rel) = p.strip_prefix(root).ok() else {
181            continue;
182        };
183        let rel = rel.to_string_lossy().replace('\\', "/");
184        if glob_match(glob, &rel) {
185            out.push(p.to_path_buf());
186        }
187    }
188    out
189}
190
191fn glob_match(pattern: &str, text: &str) -> bool {
192    let segs: Vec<&str> = pattern.split('/').collect();
193    let parts: Vec<&str> = text.split('/').collect();
194    glob_segments(&segs, &parts)
195}
196
197fn glob_segments(pat: &[&str], text: &[&str]) -> bool {
198    if pat.is_empty() {
199        return text.is_empty();
200    }
201    let head = pat[0];
202    let rest_pat = &pat[1..];
203    if head == "**" {
204        // Match zero or more text segments.
205        if glob_segments(rest_pat, text) {
206            return true;
207        }
208        for i in 1..=text.len() {
209            if glob_segments(rest_pat, &text[i..]) {
210                return true;
211            }
212        }
213        return false;
214    }
215    if text.is_empty() {
216        return false;
217    }
218    if !segment_match(head, text[0]) {
219        return false;
220    }
221    glob_segments(rest_pat, &text[1..])
222}
223
224fn segment_match(pat: &str, text: &str) -> bool {
225    // Within one path segment: `*` matches any run of non-`/` chars.
226    let pb = pat.as_bytes();
227    let tb = text.as_bytes();
228    let mut pi = 0usize;
229    let mut ti = 0usize;
230    let mut star_pi: Option<usize> = None;
231    let mut star_ti = 0usize;
232    while ti < tb.len() {
233        if pi < pb.len() && pb[pi] == b'*' {
234            star_pi = Some(pi + 1);
235            star_ti = ti;
236            pi += 1;
237            continue;
238        }
239        if pi < pb.len() && pb[pi] == tb[ti] {
240            pi += 1;
241            ti += 1;
242            continue;
243        }
244        if let Some(spi) = star_pi {
245            star_ti += 1;
246            ti = star_ti;
247            pi = spi;
248            continue;
249        }
250        return false;
251    }
252    while pi < pb.len() && pb[pi] == b'*' {
253        pi += 1;
254    }
255    pi == pb.len()
256}
257
258pub fn load_all(repo_root: &Path) -> Result<Vec<UserDefinedInvariant>, LoadError> {
259    let dir = repo_root.join(USER_DIR);
260    let Ok(read) = fs::read_dir(&dir) else {
261        return Ok(Vec::new());
262    };
263    let mut out = Vec::new();
264    for entry in read.flatten() {
265        let path = entry.path();
266        if path.extension().and_then(|s| s.to_str()) != Some("toml") {
267            continue;
268        }
269        let text = fs::read_to_string(&path).map_err(|err| LoadError::Io {
270            path: path.clone(),
271            err,
272        })?;
273        let file: File = toml::from_str(&text).map_err(|err| LoadError::Parse {
274            path: path.clone(),
275            err,
276        })?;
277        for r in file.rule {
278            let category = parse_category(&r.id, &r.category)?;
279            out.push(UserDefinedInvariant {
280                id: r.id,
281                category,
282                intent: r.intent,
283                adr: r.adr,
284                spec: r.match_,
285            });
286        }
287    }
288    out.sort_by(|a, b| a.id.cmp(&b.id));
289    Ok(out)
290}
291
292fn parse_category(id: &str, value: &str) -> Result<Category, LoadError> {
293    Ok(match value {
294        "arch" => Category::Arch,
295        "deps" => Category::Deps,
296        "docs" => Category::Docs,
297        "governance" => Category::Governance,
298        "health" => Category::Health,
299        "security" => Category::Security,
300        other => {
301            return Err(LoadError::BadCategory {
302                id: id.to_string(),
303                value: other.to_string(),
304            })
305        }
306    })
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312    use tempfile::TempDir;
313
314    fn write(root: &Path, rel: &str, body: &str) {
315        let p = root.join(rel);
316        fs::create_dir_all(p.parent().unwrap()).unwrap();
317        fs::write(p, body).unwrap();
318    }
319
320    #[test]
321    fn glob_matches_simple_patterns() {
322        assert!(glob_match("crates/**/*.rs", "crates/koala-core/src/lib.rs"));
323        assert!(glob_match(
324            "crates/*/Cargo.toml",
325            "crates/koala-core/Cargo.toml"
326        ));
327        assert!(!glob_match("crates/*/Cargo.toml", "crates/a/b/Cargo.toml"));
328        assert!(glob_match("**/README.md", "README.md"));
329        assert!(glob_match("**/README.md", "wiki/README.md"));
330    }
331
332    #[test]
333    fn user_defined_toml_loaded() {
334        let tmp = TempDir::new().unwrap();
335        write(
336            tmp.path(),
337            ".koala/invariants/biz.toml",
338            r#"
339[[rule]]
340id       = "biz.no-fixme-in-src"
341category = "health"
342intent   = "Code under crates/ must not ship FIXME markers."
343adr      = "ADR-0019"
344
345[rule.match]
346kind   = "forbid-substring"
347glob   = "crates/**/*.rs"
348needle = "FIXME"
349"#,
350        );
351        let rules = load_all(tmp.path()).unwrap();
352        assert_eq!(rules.len(), 1);
353        let r = &rules[0];
354        assert_eq!(r.id(), "biz.no-fixme-in-src");
355        assert_eq!(r.category().as_str(), "health");
356        assert_eq!(r.adr(), Some("ADR-0019"));
357
358        // No matching files yet → pass.
359        let ctx = Context::new(tmp.path().to_path_buf());
360        assert!(matches!(r.evaluate(&ctx), Outcome::Pass { .. }));
361
362        // Add a file with FIXME → fail.
363        write(
364            tmp.path(),
365            "crates/x/src/lib.rs",
366            "// FIXME: rewrite\npub fn k() {}\n",
367        );
368        let out = r.evaluate(&ctx);
369        assert!(matches!(out, Outcome::Fail { .. }), "{out:?}");
370    }
371
372    #[test]
373    fn require_substring_rule() {
374        let tmp = TempDir::new().unwrap();
375        write(
376            tmp.path(),
377            ".koala/invariants/docs.toml",
378            r#"
379[[rule]]
380id       = "biz.readme-mentions-license"
381category = "docs"
382intent   = "README must mention the license."
383
384[rule.match]
385kind   = "require-substring"
386glob   = "README.md"
387needle = "Apache-2.0"
388"#,
389        );
390        let rules = load_all(tmp.path()).unwrap();
391        let r = &rules[0];
392
393        // Missing → fail.
394        let ctx = Context::new(tmp.path().to_path_buf());
395        assert!(matches!(r.evaluate(&ctx), Outcome::Fail { .. }));
396
397        // Present → pass.
398        write(
399            tmp.path(),
400            "README.md",
401            "# Project\n\nLicense: Apache-2.0\n",
402        );
403        assert!(matches!(r.evaluate(&ctx), Outcome::Pass { .. }));
404    }
405
406    #[test]
407    fn missing_user_dir_returns_empty() {
408        let tmp = TempDir::new().unwrap();
409        let rules = load_all(tmp.path()).unwrap();
410        assert!(rules.is_empty());
411    }
412
413    #[test]
414    fn bad_category_is_rejected() {
415        let tmp = TempDir::new().unwrap();
416        write(
417            tmp.path(),
418            ".koala/invariants/bad.toml",
419            r#"
420[[rule]]
421id       = "biz.x"
422category = "nonsense"
423intent   = "x"
424
425[rule.match]
426kind   = "forbid-substring"
427glob   = "**/*"
428needle = "x"
429"#,
430        );
431        let err = load_all(tmp.path()).unwrap_err();
432        assert!(matches!(err, LoadError::BadCategory { .. }));
433    }
434}