Skip to main content

alint_rules/
for_each_dir.rs

1//! `for_each_dir` — iterate over every directory matching `select:` and
2//! evaluate a nested `require:` block against each. Path-template tokens
3//! in the nested specs are pre-substituted per iteration using the
4//! iterated directory as the anchor.
5//!
6//! Token conventions (shared with `for_each_file` and `pair`):
7//!
8//! - `{path}` — full relative path of the iterated entry.
9//! - `{dir}`  — parent directory of the iterated entry.
10//! - `{basename}` — name of the iterated entry.
11//! - `{stem}` — name with the final extension stripped.
12//! - `{ext}` — final extension without the dot.
13//! - `{parent_name}` — name of the entry's parent directory.
14//!
15//! When iterating *directories*, use `{path}` to name the iterated dir
16//! itself (e.g. `"{path}/mod.rs"` to require a `mod.rs` inside it). Use
17//! `{dir}` only when you need the parent of the matched entry.
18//!
19//! Canonical shape — for every direct subdirectory of `src/`, require a
20//! `mod.rs`:
21//!
22//! ```yaml
23//! - id: every-module-has-mod
24//!   kind: for_each_dir
25//!   select: "src/*"
26//!   require:
27//!     - kind: file_exists
28//!       paths: "{path}/mod.rs"
29//!   level: error
30//! ```
31
32use alint_core::template::PathTokens;
33use alint_core::when::{IterEnv, WhenExpr};
34use alint_core::{
35    CompiledNestedSpec, Context, Error, Level, NestedRuleSpec, Result, Rule, RuleSpec, Scope,
36    Violation,
37};
38use serde::Deserialize;
39
40#[derive(Debug, Deserialize)]
41#[serde(deny_unknown_fields)]
42struct Options {
43    select: String,
44    /// Optional per-iteration filter — evaluated against each
45    /// iterated entry's `iter` context. Common shape:
46    /// `iter.has_file("Cargo.toml")` to scope the iteration to
47    /// directories that look like a workspace member.
48    #[serde(default)]
49    when_iter: Option<String>,
50    require: Vec<NestedRuleSpec>,
51}
52
53#[derive(Debug)]
54pub struct ForEachDirRule {
55    id: String,
56    level: Level,
57    policy_url: Option<String>,
58    select_scope: Scope,
59    when_iter: Option<WhenExpr>,
60    require: Vec<CompiledNestedSpec>,
61}
62
63impl Rule for ForEachDirRule {
64    fn id(&self) -> &str {
65        &self.id
66    }
67    fn level(&self) -> Level {
68        self.level
69    }
70    fn policy_url(&self) -> Option<&str> {
71        self.policy_url.as_deref()
72    }
73
74    fn requires_full_index(&self) -> bool {
75        // Cross-file: per-directory verdicts depend on what's in
76        // each iterated dir as a whole, not just changed entries.
77        // A `for_each_dir` over `src/*` requiring `mod.rs` must
78        // see every `src/*` even if only one file inside it
79        // changed. Per roadmap, opts out of `--changed` filtering.
80        true
81    }
82
83    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
84        evaluate_for_each(
85            &self.id,
86            self.level,
87            &self.select_scope,
88            self.when_iter.as_ref(),
89            &self.require,
90            ctx,
91            IterateMode::Dirs,
92        )
93    }
94}
95
96pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
97    alint_core::reject_scope_filter_on_cross_file(spec, "for_each_dir")?;
98    let opts: Options = spec
99        .deserialize_options()
100        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
101    if opts.require.is_empty() {
102        return Err(Error::rule_config(
103            &spec.id,
104            "for_each_dir requires at least one nested rule under `require:`",
105        ));
106    }
107    let select_scope = Scope::from_patterns(&[opts.select])?;
108    let when_iter = parse_when_iter(spec, opts.when_iter.as_deref())?;
109    let require = compile_nested_require(&spec.id, opts.require)?;
110    Ok(Box::new(ForEachDirRule {
111        id: spec.id.clone(),
112        level: spec.level,
113        policy_url: spec.policy_url.clone(),
114        select_scope,
115        when_iter,
116        require,
117    }))
118}
119
120/// Pre-compile each `NestedRuleSpec` in `require:` so its
121/// `when:` source is parsed exactly once at rule-build time.
122/// Shared by `for_each_dir`, `for_each_file`, and
123/// `every_matching_has` — all three accept nested rules with
124/// optional `when:` clauses, and all three pre-v0.9.12 re-
125/// parsed the source per iteration. This helper is the single
126/// place new cross-file iteration rules thread their require
127/// list through.
128pub(crate) fn compile_nested_require(
129    parent_id: &str,
130    require: Vec<NestedRuleSpec>,
131) -> Result<Vec<CompiledNestedSpec>> {
132    require
133        .into_iter()
134        .enumerate()
135        .map(|(idx, spec)| CompiledNestedSpec::compile(spec, parent_id, idx))
136        .collect()
137}
138
139/// Compile a `when_iter:` source string into a `WhenExpr` at
140/// rule-build time. Public to the crate so the sibling
141/// `for_each_file` and `every_matching_has` rules can reuse the
142/// same error shape.
143pub(crate) fn parse_when_iter(spec: &RuleSpec, src: Option<&str>) -> Result<Option<WhenExpr>> {
144    let Some(src) = src else { return Ok(None) };
145    alint_core::when::parse(src)
146        .map(Some)
147        .map_err(|e| Error::rule_config(&spec.id, format!("invalid `when_iter:`: {e}")))
148}
149
150/// What to iterate in [`evaluate_for_each`].
151#[derive(Debug, Clone, Copy, PartialEq, Eq)]
152pub(crate) enum IterateMode {
153    Dirs,
154    Files,
155    /// Both files and dirs (dirs first) — used by `every_matching_has`.
156    Both,
157}
158
159/// Shared evaluation logic for `for_each_dir`, `for_each_file`, and
160/// `every_matching_has`. `mode` selects which entries to iterate.
161/// `when_iter` (compiled at rule-build time) gates each iteration:
162/// when present and false for an entry, that entry is skipped
163/// before any nested rule is built or evaluated.
164///
165/// 108 lines after the v0.9.8 literal-path bypass landed —
166/// extracting the bypass into a separate helper would require
167/// threading the `parent_id` / level / current entry / nested
168/// spec through 5 args, and the bypass and the fallback path
169/// share the violation-attribution loop. Reads better
170/// top-to-bottom as one phased dispatcher.
171#[allow(clippy::too_many_lines)]
172pub(crate) fn evaluate_for_each(
173    parent_id: &str,
174    level: Level,
175    select_scope: &Scope,
176    when_iter: Option<&WhenExpr>,
177    require: &[CompiledNestedSpec],
178    ctx: &Context<'_>,
179    mode: IterateMode,
180) -> Result<Vec<Violation>> {
181    let Some(registry) = ctx.registry else {
182        return Err(Error::Other(format!(
183            "rule {parent_id}: nested-rule evaluation needs a RuleRegistry in the Context \
184             (likely an Engine constructed without one)",
185        )));
186    };
187
188    let entries: Box<dyn Iterator<Item = _>> = match mode {
189        IterateMode::Dirs => Box::new(ctx.index.dirs()),
190        IterateMode::Files => Box::new(ctx.index.files()),
191        IterateMode::Both => Box::new(ctx.index.dirs().chain(ctx.index.files())),
192    };
193
194    let mut violations = Vec::new();
195    for entry in entries {
196        if !select_scope.matches(&entry.path, ctx.index) {
197            continue;
198        }
199
200        // Per-iteration `when_iter:` filter. Cheap to evaluate
201        // (one IterEnv build + one expression walk per matched
202        // entry); skips the nested-rule build entirely on a
203        // false verdict, which is the whole point of the field.
204        let iter_env = IterEnv {
205            path: &entry.path,
206            is_dir: entry.is_dir,
207            index: ctx.index,
208        };
209        if let Some(expr) = when_iter {
210            if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
211                let env = alint_core::WhenEnv {
212                    facts,
213                    vars,
214                    iter: Some(iter_env),
215                };
216                match expr.evaluate(&env) {
217                    Ok(true) => {}
218                    Ok(false) => continue,
219                    Err(e) => {
220                        violations.push(
221                            Violation::new(format!("{parent_id}: when_iter error: {e}"))
222                                .with_path(entry.path.clone()),
223                        );
224                        continue;
225                    }
226                }
227            }
228        }
229
230        let tokens = PathTokens::from_path(&entry.path);
231        for (i, nested) in require.iter().enumerate() {
232            // v0.9.12: nested `when:` is pre-compiled at rule-
233            // build time (`CompiledNestedSpec`) — gate on the
234            // already-parsed expression instead of re-parsing
235            // the source per iteration. Same `iter.*` context
236            // is available so a nested rule can reach back to
237            // the iteration just like the outer `when_iter:`
238            // does. We instantiate the per-iteration spec only
239            // AFTER the gate so a falsy `when:` skips both the
240            // template-render work AND the registry build.
241            if let Some(expr) = &nested.when {
242                if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
243                    let env = alint_core::WhenEnv {
244                        facts,
245                        vars,
246                        iter: Some(iter_env),
247                    };
248                    match expr.evaluate(&env) {
249                        Ok(true) => {}
250                        Ok(false) => continue,
251                        Err(e) => {
252                            violations.push(
253                                Violation::new(format!(
254                                    "{parent_id}: nested rule #{i} when error: {e}"
255                                ))
256                                .with_path(entry.path.clone()),
257                            );
258                            continue;
259                        }
260                    }
261                }
262            }
263            let nested_spec = nested.spec.instantiate(parent_id, i, level, &tokens);
264            let nested_rule = match registry.build(&nested_spec) {
265                Ok(r) => r,
266                Err(e) => {
267                    violations.push(
268                        Violation::new(format!(
269                            "{parent_id}: failed to build nested rule #{i} for {}: {e}",
270                            entry.path.display()
271                        ))
272                        .with_path(entry.path.clone()),
273                    );
274                    continue;
275                }
276            };
277            // v0.9.8: when the nested rule's `paths:` template
278            // resolved to a single literal path AND the rule is
279            // a per-file rule, bypass `rule.evaluate(ctx)` —
280            // which would iterate `ctx.index.files()` (1M
281            // entries) for a single-target lookup — and dispatch
282            // via `evaluate_file` against the in-index entry
283            // directly. Closes the v0.9.7 → v0.9.8 cliff for the
284            // canonical for_each_file × per-file-content-rule
285            // shape (S7's `every-lib-has-content` was 484s under
286            // v0.9.7's full-index scan; this drops it to a few
287            // milliseconds × N iterations).
288            //
289            // For non-per-file rules (e.g. `file_exists`,
290            // `toml_path_matches`), fall through to the rule's
291            // own evaluate — file_exists has its own literal-
292            // path fast path (contains_file lookup) since
293            // v0.9.5; toml_path_matches reads the file
294            // directly without scanning the full index.
295            // v0.9.10: a single `path_scope().matches(literal, ctx.index)`
296            // covers both the path-glob AND the per-rule
297            // `scope_filter` ancestor predicate, since `Scope`
298            // now owns its `Option<ScopeFilter>` and `matches`
299            // consults it. The earlier v0.9.9
300            // `nested_rule.scope_filter()` guard this bypass
301            // had is no longer needed.
302            if let Some(literal) = nested_spec_single_literal(&nested_spec)
303                && let Some(pf) = nested_rule.as_per_file()
304                && pf.path_scope().matches(&literal, ctx.index)
305            {
306                let nested_violations = evaluate_one_per_file_rule(parent_id, i, &literal, pf, ctx);
307                for mut v in nested_violations {
308                    if v.path.is_none() {
309                        v.path = Some(entry.path.clone());
310                    }
311                    violations.push(v);
312                }
313                continue;
314            }
315            let nested_violations = nested_rule.evaluate(ctx)?;
316            for mut v in nested_violations {
317                if v.path.is_none() {
318                    v.path = Some(entry.path.clone());
319                }
320                violations.push(v);
321            }
322        }
323    }
324    Ok(violations)
325}
326
327/// Extract a single literal relative path from a nested rule
328/// spec's `paths:` field, or `None` if the spec carries multiple
329/// patterns / a glob / an include-exclude shape. Used by
330/// [`evaluate_for_each`] to detect when a per-file nested rule
331/// can be dispatched via `evaluate_file` against a single
332/// in-index entry instead of going through the rule's own
333/// O(N) full-index scan.
334///
335/// Conservative: returns `None` for any pattern containing a
336/// glob metacharacter, even when the metacharacter is escaped —
337/// the bench cliff this exists to fix is the canonical
338/// `paths: "{path}/<basename>"` shape, which always resolves to
339/// a literal post-template-expansion. False positives here
340/// would silently bypass the rule's own glob handling.
341fn nested_spec_single_literal(spec: &alint_core::RuleSpec) -> Option<std::path::PathBuf> {
342    use alint_core::PathsSpec;
343    let paths = spec.paths.as_ref()?;
344    let single: &str = match paths {
345        PathsSpec::Single(s) => s,
346        PathsSpec::Many(v) if v.len() == 1 => &v[0],
347        _ => return None,
348    };
349    if single.is_empty() || single.starts_with('!') {
350        return None;
351    }
352    if single
353        .chars()
354        .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
355    {
356        return None;
357    }
358    Some(std::path::PathBuf::from(single))
359}
360
361/// Read the in-index file at `literal` once, dispatch to the
362/// per-file rule's `evaluate_file`, and return any violations
363/// (with `parent_id`-flavoured rule-error prefixing on failure
364/// to match the rule-major path's shape).
365fn evaluate_one_per_file_rule(
366    parent_id: &str,
367    nested_i: usize,
368    literal: &std::path::Path,
369    pf: &dyn alint_core::PerFileRule,
370    ctx: &Context<'_>,
371) -> Vec<Violation> {
372    if !ctx.index.contains_file(literal) {
373        // No in-index file at this path — same observable result
374        // as the rule's own `evaluate` would produce when its
375        // path_scope matches no files (i.e. zero violations).
376        return Vec::new();
377    }
378    let abs = ctx.root.join(literal);
379    let Ok(bytes) = std::fs::read(&abs) else {
380        // Mirror the rule-major behaviour: silent skip on read
381        // failure (permission flake, race with mid-walk delete).
382        return Vec::new();
383    };
384    match pf.evaluate_file(ctx, literal, &bytes) {
385        Ok(vs) => vs,
386        Err(e) => vec![Violation::new(format!(
387            "{parent_id}: nested rule #{nested_i} error on {}: {e}",
388            literal.display()
389        ))],
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396    use alint_core::{FileEntry, FileIndex, RuleRegistry};
397    use std::path::Path;
398
399    fn index(entries: &[(&str, bool)]) -> FileIndex {
400        FileIndex::from_entries(
401            entries
402                .iter()
403                .map(|(p, is_dir)| FileEntry {
404                    path: std::path::Path::new(p).into(),
405                    is_dir: *is_dir,
406                    size: 1,
407                })
408                .collect(),
409        )
410    }
411
412    fn registry() -> RuleRegistry {
413        crate::builtin_registry()
414    }
415
416    fn eval_with(rule: &ForEachDirRule, files: &[(&str, bool)]) -> Vec<Violation> {
417        let idx = index(files);
418        let reg = registry();
419        let ctx = Context {
420            root: Path::new("/"),
421            index: &idx,
422            registry: Some(&reg),
423            facts: None,
424            vars: None,
425            git_tracked: None,
426            git_blame: None,
427        };
428        rule.evaluate(&ctx).unwrap()
429    }
430
431    fn rule(select: &str, require: Vec<NestedRuleSpec>) -> ForEachDirRule {
432        let require = compile_nested_require("t", require).unwrap();
433        ForEachDirRule {
434            id: "t".into(),
435            level: Level::Error,
436            policy_url: None,
437            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
438            when_iter: None,
439            require,
440        }
441    }
442
443    fn require_file_exists(path: &str) -> NestedRuleSpec {
444        // Build via YAML to exercise the same path production users take.
445        let yaml = format!("kind: file_exists\npaths: \"{path}\"\n");
446        serde_yaml_ng::from_str(&yaml).unwrap()
447    }
448
449    #[test]
450    fn passes_when_every_dir_has_required_file() {
451        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
452        let v = eval_with(
453            &r,
454            &[
455                ("src", true),
456                ("src/foo", true),
457                ("src/foo/mod.rs", false),
458                ("src/bar", true),
459                ("src/bar/mod.rs", false),
460            ],
461        );
462        assert!(v.is_empty(), "unexpected: {v:?}");
463    }
464
465    #[test]
466    fn violates_when_a_dir_missing_required_file() {
467        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
468        let v = eval_with(
469            &r,
470            &[
471                ("src", true),
472                ("src/foo", true),
473                ("src/foo/mod.rs", false),
474                ("src/bar", true), // no mod.rs
475            ],
476        );
477        assert_eq!(v.len(), 1);
478        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/bar")));
479    }
480
481    #[test]
482    fn no_matched_dirs_means_no_violations() {
483        let r = rule("components/*", vec![require_file_exists("{dir}/index.tsx")]);
484        let v = eval_with(&r, &[("src", true), ("src/foo", true)]);
485        assert!(v.is_empty());
486    }
487
488    #[test]
489    fn every_require_rule_evaluated_per_dir() {
490        let r = rule(
491            "src/*",
492            vec![
493                require_file_exists("{path}/mod.rs"),
494                require_file_exists("{path}/README.md"),
495            ],
496        );
497        let v = eval_with(
498            &r,
499            &[
500                ("src", true),
501                ("src/foo", true),
502                ("src/foo/mod.rs", false), // has mod.rs, missing README
503            ],
504        );
505        assert_eq!(v.len(), 1);
506        assert!(
507            v[0].message.contains("README"),
508            "expected README in message; got {:?}",
509            v[0].message
510        );
511    }
512
513    #[test]
514    fn build_rejects_scope_filter_on_cross_file_rule() {
515        // for_each_dir is a cross-file rule (requires_full_index =
516        // true); scope_filter is per-file-rules-only. The build
517        // path must reject it with a clear message pointing at
518        // the for_each_dir + when_iter: alternative.
519        let yaml = r#"
520id: t
521kind: for_each_dir
522select: "src/*"
523require:
524  - kind: file_exists
525    paths: "{path}/mod.rs"
526level: error
527scope_filter:
528  has_ancestor: Cargo.toml
529"#;
530        let spec = crate::test_support::spec_yaml(yaml);
531        let err = build(&spec).unwrap_err().to_string();
532        assert!(
533            err.contains("scope_filter is supported on per-file rules only"),
534            "expected per-file-only message, got: {err}",
535        );
536        assert!(
537            err.contains("for_each_dir"),
538            "expected message to name the cross-file kind, got: {err}",
539        );
540    }
541}