Skip to main content

alint_core/when/
mod.rs

1//! The `when` expression language — bounded DSL for gating rules on facts.
2//!
3//! Grammar (hand-written recursive-descent; no parser combinator):
4//!
5//! ```text
6//! expr       := or_expr
7//! or_expr    := and_expr ('or' and_expr)*
8//! and_expr   := not_expr ('and' not_expr)*
9//! not_expr   := ['not'] cmp_expr
10//! cmp_expr   := primary [cmp_op primary]
11//! cmp_op     := '==' | '!=' | '<' | '<=' | '>' | '>=' | 'in' | 'matches'
12//! primary    := literal | ident_or_call | '(' expr ')'
13//! literal    := STRING | INT | BOOL | 'null' | list
14//! list       := '[' [expr (',' expr)*] ']'
15//! ident_or_call := NS '.' NAME ['(' [expr (',' expr)*] ')']
16//! NS         := 'facts' | 'vars' | 'iter'
17//! ```
18//!
19//! Design choices (all load-bearing):
20//!
21//! - **No arithmetic.** Only comparison.
22//! - **Function calls limited to a fixed set on the `iter` namespace.**
23//!   `iter.has_file("Cargo.toml")` is supported; arbitrary user-defined
24//!   calls are not. Use declared `facts:` for repo-level computation.
25//! - **`iter.*` is only meaningful in iteration contexts** (per-iteration
26//!   `when_iter:` on `for_each_*`, and nested rules' `when:`). Outside
27//!   those, `iter.X` evaluates to `null` and `iter.has_file(_)` to `false`.
28//! - **`matches` RHS must be a string literal.** This lets us compile the
29//!   regex at parse time; dynamic patterns stay out of the hot path.
30//! - **Short-circuit `and` / `or`.** Unevaluated branches don't even touch
31//!   their subtree.
32//! - **Type coercion is explicit, not silent.** Comparing `Int` to `String`
33//!   is an error, not `false`.
34
35use std::collections::HashMap;
36use std::path::Path;
37
38use regex::Regex;
39use thiserror::Error;
40
41use crate::facts::{FactValue, FactValues};
42use crate::walker::FileIndex;
43
44// ─── Errors ──────────────────────────────────────────────────────────
45
46#[derive(Debug, Error)]
47pub enum WhenError {
48    #[error("when parse error at column {pos}: {message}")]
49    Parse { pos: usize, message: String },
50    #[error("when evaluation error: {0}")]
51    Eval(String),
52    #[error("invalid regex in `matches`: {0}")]
53    Regex(String),
54}
55
56// ─── Value (evaluation-time) ─────────────────────────────────────────
57
58#[derive(Debug, Clone)]
59pub enum Value {
60    Bool(bool),
61    Int(i64),
62    String(String),
63    List(Vec<Value>),
64    Null,
65}
66
67impl Value {
68    pub fn truthy(&self) -> bool {
69        match self {
70            Self::Bool(b) => *b,
71            Self::Int(n) => *n != 0,
72            Self::String(s) => !s.is_empty(),
73            Self::List(v) => !v.is_empty(),
74            Self::Null => false,
75        }
76    }
77
78    fn type_name(&self) -> &'static str {
79        match self {
80            Self::Bool(_) => "bool",
81            Self::Int(_) => "int",
82            Self::String(_) => "string",
83            Self::List(_) => "list",
84            Self::Null => "null",
85        }
86    }
87}
88
89impl From<&FactValue> for Value {
90    fn from(f: &FactValue) -> Self {
91        match f {
92            FactValue::Bool(b) => Self::Bool(*b),
93            FactValue::Int(n) => Self::Int(*n),
94            FactValue::String(s) => Self::String(s.clone()),
95        }
96    }
97}
98
99// ─── AST ─────────────────────────────────────────────────────────────
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102pub enum Namespace {
103    Facts,
104    Vars,
105    /// Per-iteration context. Available only when an `IterEnv`
106    /// is threaded into the evaluator (via
107    /// [`WhenEnv::with_iter`]). Outside those, `iter.X`
108    /// evaluates to `null` and `iter.has_file(_)` to `false` —
109    /// matching the "missing fact is falsy" rule.
110    Iter,
111}
112
113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
114pub enum CmpOp {
115    Eq,
116    Ne,
117    Lt,
118    Le,
119    Gt,
120    Ge,
121    In,
122}
123
124#[derive(Debug, Clone)]
125pub enum WhenExpr {
126    Literal(Value),
127    Ident {
128        ns: Namespace,
129        name: String,
130    },
131    /// `<ns>.<method>(args...)`. Currently only the `iter`
132    /// namespace exposes callable methods; an unknown
133    /// (namespace, method) pair is rejected at parse time so
134    /// typos don't silently coerce to `null` like value-style
135    /// idents do.
136    Call {
137        ns: Namespace,
138        method: String,
139        args: Vec<WhenExpr>,
140    },
141    Not(Box<WhenExpr>),
142    And(Box<WhenExpr>, Box<WhenExpr>),
143    Or(Box<WhenExpr>, Box<WhenExpr>),
144    Cmp {
145        left: Box<WhenExpr>,
146        op: CmpOp,
147        right: Box<WhenExpr>,
148    },
149    /// `left matches <compiled regex>` — RHS is compiled at parse time.
150    Matches {
151        left: Box<WhenExpr>,
152        pattern: Regex,
153    },
154    List(Vec<WhenExpr>),
155}
156
157// ─── Evaluation environment ──────────────────────────────────────────
158
159#[derive(Debug)]
160pub struct WhenEnv<'a> {
161    pub facts: &'a FactValues,
162    pub vars: &'a HashMap<String, String>,
163    /// Per-iteration context, populated when this `WhenEnv`
164    /// gates an iterated rule (`for_each_dir` /
165    /// `for_each_file` / `every_matching_has`). `None` for
166    /// top-level rule gating, where `iter.*` references
167    /// resolve to falsy / null per the "unknown fact is
168    /// falsy" convention.
169    pub iter: Option<IterEnv<'a>>,
170}
171
172impl<'a> WhenEnv<'a> {
173    /// Construct a `WhenEnv` without iteration context — the
174    /// shape every existing call site uses. `iter.*` references
175    /// in the expression resolve to null / false.
176    #[must_use]
177    pub fn new(facts: &'a FactValues, vars: &'a HashMap<String, String>) -> Self {
178        Self {
179            facts,
180            vars,
181            iter: None,
182        }
183    }
184
185    /// Attach an iteration context. The same `WhenEnv` shape can
186    /// then evaluate `iter.path`, `iter.basename`, and
187    /// `iter.has_file(...)` against the supplied path + index.
188    #[must_use]
189    pub fn with_iter(mut self, iter: IterEnv<'a>) -> Self {
190        self.iter = Some(iter);
191        self
192    }
193}
194
195/// Iteration context exposed to `when:` expressions through the
196/// `iter.*` namespace. Built once per iterated entry by
197/// `for_each_*` rules and threaded into both the outer
198/// `when_iter:` filter and any nested rule's `when:`.
199#[derive(Debug, Clone, Copy)]
200pub struct IterEnv<'a> {
201    /// Relative path of the iterated entry (as walker reported).
202    pub path: &'a Path,
203    /// Whether the iterated entry is a directory. `iter.has_file`
204    /// only does meaningful work when this is `true`; for files
205    /// it returns `false`.
206    pub is_dir: bool,
207    /// File index, used by `iter.has_file(pattern)` to look up
208    /// children of the iterated path.
209    pub index: &'a FileIndex,
210}
211
212// ─── Public entry points ─────────────────────────────────────────────
213
214pub fn parse(src: &str) -> Result<WhenExpr, WhenError> {
215    parse_inner(src).map_err(|e| enrich_diagnostic(src, e))
216}
217
218fn parse_inner(src: &str) -> Result<WhenExpr, WhenError> {
219    let tokens = lex(src)?;
220    let mut p = Parser::new(tokens);
221    let expr = p.parse_expr()?;
222    p.expect_eof()?;
223    Ok(expr)
224}
225
226/// Enrich a [`WhenError::Parse`] with domain-specific hints for the
227/// pitfalls catalogued in `docs/development/CONFIG-AUTHORING.md` § 12:
228///
229/// - **#12a** — `&&` / `||` / `!` symbols → suggest `and` / `or` / `not`.
230/// - **#12b** — `iter.foo.bar(` method-call shapes → suggest the
231///   `matches` operator or the bounded iter accessor set.
232///
233/// Only applies to `WhenError::Parse`; evaluation errors pass through
234/// unchanged. The original message is preserved; hints are appended on
235/// new lines so callers that just `Display` the error still get the
236/// position info.
237fn enrich_diagnostic(src: &str, err: WhenError) -> WhenError {
238    let WhenError::Parse { pos, message } = err else {
239        // Eval / Regex errors don't have positional context to
240        // diagnose; pass them through unchanged.
241        return err;
242    };
243    let hint = symbol_keyword_hint(src, pos).or_else(|| method_call_hint(src, pos));
244    match hint {
245        Some(h) => WhenError::Parse {
246            pos,
247            message: format!("{message}\n  hint: {h}"),
248        },
249        None => WhenError::Parse { pos, message },
250    }
251}
252
253/// Detect `&&` / `||` / `!` near `pos` and return a keyword
254/// suggestion. Pitfall #12a.
255fn symbol_keyword_hint(src: &str, pos: usize) -> Option<&'static str> {
256    let bytes = src.as_bytes();
257    let at = bytes.get(pos).copied();
258    let next = bytes.get(pos + 1).copied();
259    let prev = pos.checked_sub(1).and_then(|p| bytes.get(p).copied());
260
261    let _ = next; // kept for future second-character refinement
262    match at {
263        Some(b'&') if prev != Some(b'&') => {
264            Some("`&&` is not a `when:` operator. Use the keyword `and` instead.")
265        }
266        Some(b'|') if prev != Some(b'|') => {
267            Some("`||` is not a `when:` operator. Use the keyword `or` instead.")
268        }
269        Some(b'!') => Some("`!` is not a `when:` operator. Use the keyword `not` instead."),
270        _ => None,
271    }
272}
273
274/// Detect `iter.foo.bar(` method-call shapes anywhere in `src`
275/// and return a hint. Pitfall #12b.
276///
277/// The `iter.*` accessors are a fixed set: `iter.path`,
278/// `iter.basename`, `iter.parent_name`, `iter.is_dir`,
279/// `iter.has_file(...)`. There are no string method calls; use the
280/// `matches` operator for regex matching.
281///
282/// We use a global regex rather than a position-relative check
283/// because the lexer's failure column for `iter.path.contains("foo")`
284/// is on the second `.`, not the open paren — the position alone
285/// doesn't carry enough context to infer the bad shape.
286fn method_call_hint(src: &str, _pos: usize) -> Option<&'static str> {
287    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
288    let re = RE.get_or_init(|| {
289        // `iter.<ident>.<ident>(` — a double-dot chain off iter that
290        // ends in a function-call-shaped token. Catches
291        // `iter.path.contains(...)`, `iter.basename.starts_with(...)`,
292        // `iter.parent_name.ends_with(...)`, etc.
293        regex::Regex::new(r"\biter\.\w+\.\w+\s*\(").expect("static regex")
294    });
295    if re.is_match(src) {
296        return Some(
297            "`iter.*` accessors are a fixed set; method calls aren't supported. Use the `matches` \
298             operator for regex matching, e.g. `iter.path matches \"node_modules\"`. The supported \
299             accessors are documented in `docs/development/CONFIG-AUTHORING.md` § 12b.",
300        );
301    }
302    None
303}
304
305impl WhenExpr {
306    pub fn evaluate(&self, env: &WhenEnv<'_>) -> Result<bool, WhenError> {
307        let v = eval(self, env)?;
308        Ok(v.truthy())
309    }
310}
311
312mod eval;
313mod lexer;
314mod parser;
315
316use eval::eval;
317use lexer::lex;
318use parser::Parser;
319
320// ─── Tests ───────────────────────────────────────────────────────────
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    fn env() -> (FactValues, HashMap<String, String>) {
327        let mut f = FactValues::new();
328        f.insert("is_rust".into(), FactValue::Bool(true));
329        f.insert("is_node".into(), FactValue::Bool(false));
330        f.insert("n_files".into(), FactValue::Int(42));
331        f.insert("primary".into(), FactValue::String("Rust".into()));
332        let mut v = HashMap::new();
333        v.insert("org".into(), "Acme Corp".into());
334        v.insert("year".into(), "2026".into());
335        (f, v)
336    }
337
338    fn check(src: &str) -> bool {
339        let (facts, vars) = env();
340        let expr = parse(src).unwrap();
341        expr.evaluate(&WhenEnv {
342            facts: &facts,
343            vars: &vars,
344            iter: None,
345        })
346        .unwrap()
347    }
348
349    #[test]
350    fn simple_facts() {
351        assert!(check("facts.is_rust"));
352        assert!(!check("facts.is_node"));
353        assert!(check("not facts.is_node"));
354    }
355
356    #[test]
357    fn integer_comparison() {
358        assert!(check("facts.n_files > 0"));
359        assert!(check("facts.n_files == 42"));
360        assert!(!check("facts.n_files < 10"));
361        assert!(check("facts.n_files >= 42"));
362    }
363
364    #[test]
365    fn string_equality() {
366        assert!(check("facts.primary == \"Rust\""));
367        assert!(!check("facts.primary == \"Go\""));
368    }
369
370    #[test]
371    fn logical_ops_short_circuit() {
372        assert!(check("facts.is_rust and facts.n_files > 0"));
373        assert!(check("facts.is_node or facts.is_rust"));
374        assert!(!check("facts.is_node and facts.nonexistent == 5"));
375    }
376
377    #[test]
378    fn in_list() {
379        assert!(check("facts.primary in [\"Rust\", \"Go\"]"));
380        assert!(!check("facts.primary in [\"Python\", \"Java\"]"));
381    }
382
383    #[test]
384    fn in_string_is_substring() {
385        assert!(check("\"cme\" in vars.org"));
386        assert!(!check("\"Xyz\" in vars.org"));
387    }
388
389    #[test]
390    fn matches_regex() {
391        assert!(check("vars.org matches \"^Acme\""));
392        assert!(check("vars.year matches \"^\\\\d{4}$\""));
393        assert!(!check("vars.org matches \"^Xyz\""));
394    }
395
396    #[test]
397    fn parentheses_override_precedence() {
398        assert!(check(
399            "(facts.is_node or facts.is_rust) and facts.n_files > 0"
400        ));
401        assert!(!check("facts.is_node or facts.is_rust and facts.is_node"));
402        // Precedence: and binds tighter than or, so this is
403        // `is_node or (is_rust and is_node)` == false or (true and false) == false.
404    }
405
406    #[test]
407    fn unknown_facts_are_null_and_falsy() {
408        assert!(!check("facts.nonexistent"));
409        assert!(check("not facts.nonexistent"));
410    }
411
412    #[test]
413    fn unknown_vars_are_null() {
414        assert!(!check("vars.not_set"));
415    }
416
417    #[test]
418    fn null_equals_null() {
419        assert!(check("facts.nonexistent == null"));
420    }
421
422    #[test]
423    fn parse_rejects_bare_equals() {
424        let e = parse("facts.x = 1").unwrap_err();
425        matches!(e, WhenError::Parse { .. });
426    }
427
428    #[test]
429    fn parse_rejects_bang_alone() {
430        let e = parse("!facts.x").unwrap_err();
431        matches!(e, WhenError::Parse { .. });
432    }
433
434    #[test]
435    fn parse_rejects_invalid_identifier_namespace() {
436        let e = parse("ctx.x").unwrap_err();
437        let WhenError::Parse { message, .. } = e else {
438            panic!();
439        };
440        assert!(message.contains("facts.NAME"));
441    }
442
443    #[test]
444    fn parse_rejects_matches_with_non_literal_rhs() {
445        let e = parse("vars.org matches vars.pattern").unwrap_err();
446        let WhenError::Parse { message, .. } = e else {
447            panic!();
448        };
449        assert!(message.contains("string literal"));
450    }
451
452    #[test]
453    fn parse_rejects_invalid_regex() {
454        let e = parse("vars.org matches \"[unclosed\"").unwrap_err();
455        matches!(e, WhenError::Regex(_));
456    }
457
458    #[test]
459    fn evaluate_rejects_ordering_mixed_types() {
460        let (facts, vars) = env();
461        let expr = parse("facts.primary > facts.n_files").unwrap();
462        let result = expr.evaluate(&WhenEnv {
463            facts: &facts,
464            vars: &vars,
465            iter: None,
466        });
467        assert!(result.is_err());
468    }
469
470    #[test]
471    fn string_escapes() {
472        let (facts, vars) = env();
473        let expr = parse("vars.org == \"Acme Corp\"").unwrap();
474        assert!(
475            expr.evaluate(&WhenEnv {
476                facts: &facts,
477                vars: &vars,
478                iter: None,
479            })
480            .unwrap()
481        );
482    }
483
484    #[test]
485    fn nested_not_and_or() {
486        assert!(check(
487            "not (facts.is_node or (facts.n_files == 0 and facts.is_rust))"
488        ));
489    }
490
491    // ─── iter namespace ──────────────────────────────────────────
492
493    use crate::walker::{FileEntry, FileIndex};
494    use std::path::Path;
495
496    fn idx(paths: &[(&str, bool)]) -> FileIndex {
497        FileIndex::from_entries(
498            paths
499                .iter()
500                .map(|(p, is_dir)| FileEntry {
501                    path: Path::new(p).into(),
502                    is_dir: *is_dir,
503                    size: 1,
504                })
505                .collect(),
506        )
507    }
508
509    fn check_iter(src: &str, iter_path: &Path, is_dir: bool, index: &FileIndex) -> bool {
510        let (facts, vars) = env();
511        let expr = parse(src).unwrap();
512        expr.evaluate(&WhenEnv {
513            facts: &facts,
514            vars: &vars,
515            iter: Some(IterEnv {
516                path: iter_path,
517                is_dir,
518                index,
519            }),
520        })
521        .unwrap()
522    }
523
524    #[test]
525    fn iter_namespace_parses_and_resolves_value_fields() {
526        let index = idx(&[("crates/alint-core", true)]);
527        assert!(check_iter(
528            "iter.path == \"crates/alint-core\"",
529            Path::new("crates/alint-core"),
530            true,
531            &index,
532        ));
533        assert!(check_iter(
534            "iter.basename == \"alint-core\"",
535            Path::new("crates/alint-core"),
536            true,
537            &index,
538        ));
539        assert!(check_iter(
540            "iter.parent_name == \"crates\"",
541            Path::new("crates/alint-core"),
542            true,
543            &index,
544        ));
545        assert!(check_iter(
546            "iter.is_dir",
547            Path::new("crates/alint-core"),
548            true,
549            &index,
550        ));
551    }
552
553    #[test]
554    fn iter_has_file_matches_literal_child() {
555        let index = idx(&[
556            ("crates/alint-core", true),
557            ("crates/alint-core/Cargo.toml", false),
558            ("crates/alint-core/src", true),
559            ("crates/alint-core/src/lib.rs", false),
560            ("crates/other", true),
561            ("crates/other/Cargo.toml", false),
562        ]);
563        assert!(check_iter(
564            "iter.has_file(\"Cargo.toml\")",
565            Path::new("crates/alint-core"),
566            true,
567            &index,
568        ));
569        assert!(!check_iter(
570            "iter.has_file(\"package.json\")",
571            Path::new("crates/alint-core"),
572            true,
573            &index,
574        ));
575    }
576
577    #[test]
578    fn iter_has_file_supports_recursive_glob() {
579        let index = idx(&[
580            ("pkg", true),
581            ("pkg/src", true),
582            ("pkg/src/main.rs", false),
583            ("pkg/src/inner", true),
584            ("pkg/src/inner/lib.rs", false),
585        ]);
586        assert!(check_iter(
587            "iter.has_file(\"**/*.rs\")",
588            Path::new("pkg"),
589            true,
590            &index,
591        ));
592        assert!(!check_iter(
593            "iter.has_file(\"**/*.py\")",
594            Path::new("pkg"),
595            true,
596            &index,
597        ));
598    }
599
600    #[test]
601    fn iter_has_file_returns_false_for_file_iteration() {
602        let index = idx(&[("a.rs", false)]);
603        assert!(!check_iter(
604            "iter.has_file(\"x\")",
605            Path::new("a.rs"),
606            false,
607            &index,
608        ));
609    }
610
611    #[test]
612    fn iter_references_outside_iter_context_are_falsy() {
613        // Outside an iteration, `iter.X` resolves to null and
614        // `iter.has_file(...)` to false — same "missing fact"
615        // convention that `facts.unknown` already follows.
616        assert!(!check("iter.path"));
617        assert!(check("iter.path == null"));
618        assert!(!check("iter.has_file(\"X\")"));
619    }
620
621    #[test]
622    fn iter_has_file_can_compose_with_boolean_logic() {
623        let index = idx(&[("pkg", true), ("pkg/Cargo.toml", false), ("other", true)]);
624        assert!(check_iter(
625            "iter.has_file(\"Cargo.toml\") and iter.is_dir",
626            Path::new("pkg"),
627            true,
628            &index,
629        ));
630        assert!(!check_iter(
631            "iter.has_file(\"BUILD\") or iter.has_file(\"BUILD.bazel\")",
632            Path::new("pkg"),
633            true,
634            &index,
635        ));
636    }
637
638    #[test]
639    fn parse_rejects_call_on_non_iter_namespace() {
640        let e = parse("facts.something(\"x\")").unwrap_err();
641        let WhenError::Parse { message, .. } = e else {
642            panic!("expected parse error, got {e:?}");
643        };
644        assert!(
645            message.contains("only available on `iter`"),
646            "msg: {message}"
647        );
648    }
649
650    #[test]
651    fn parse_rejects_unknown_iter_method() {
652        let e = parse("iter.bogus(\"x\")").unwrap_err();
653        let WhenError::Parse { message, .. } = e else {
654            panic!("expected parse error, got {e:?}");
655        };
656        assert!(message.contains("unknown iter method"), "msg: {message}");
657    }
658
659    #[test]
660    fn evaluate_rejects_has_file_with_non_string_arg() {
661        let (facts, vars) = env();
662        let index = FileIndex::default();
663        let expr = parse("iter.has_file(42)").unwrap();
664        let err = expr
665            .evaluate(&WhenEnv {
666                facts: &facts,
667                vars: &vars,
668                iter: Some(IterEnv {
669                    path: Path::new("p"),
670                    is_dir: true,
671                    index: &index,
672                }),
673            })
674            .unwrap_err();
675        let WhenError::Eval(msg) = err else {
676            panic!("expected eval error");
677        };
678        assert!(msg.contains("must be a string"), "msg: {msg}");
679    }
680}