Skip to main content

alint_rules/
for_each_dir.rs

1//! `for_each_dir` — iterate over every directory matching `select:` and
2//! evaluate a nested `require:` block against each. Path-template tokens
3//! in the nested specs are pre-substituted per iteration using the
4//! iterated directory as the anchor.
5//!
6//! Token conventions (shared with `for_each_file` and `pair`):
7//!
8//! - `{path}` — full relative path of the iterated entry.
9//! - `{dir}`  — parent directory of the iterated entry.
10//! - `{basename}` — name of the iterated entry.
11//! - `{stem}` — name with the final extension stripped.
12//! - `{ext}` — final extension without the dot.
13//! - `{parent_name}` — name of the entry's parent directory.
14//!
15//! When iterating *directories*, use `{path}` to name the iterated dir
16//! itself (e.g. `"{path}/mod.rs"` to require a `mod.rs` inside it). Use
17//! `{dir}` only when you need the parent of the matched entry.
18//!
19//! Canonical shape — for every direct subdirectory of `src/`, require a
20//! `mod.rs`:
21//!
22//! ```yaml
23//! - id: every-module-has-mod
24//!   kind: for_each_dir
25//!   select: "src/*"
26//!   require:
27//!     - kind: file_exists
28//!       paths: "{path}/mod.rs"
29//!   level: error
30//! ```
31
32use alint_core::template::PathTokens;
33use alint_core::when::{IterEnv, WhenExpr};
34use alint_core::{
35    CompiledNestedSpec, Context, Error, Level, NestedRuleSpec, Result, Rule, RuleSpec, Scope,
36    Violation,
37};
38use serde::Deserialize;
39
40#[derive(Debug, Deserialize)]
41#[serde(deny_unknown_fields)]
42struct Options {
43    select: String,
44    /// Optional per-iteration filter — evaluated against each
45    /// iterated entry's `iter` context. Common shape:
46    /// `iter.has_file("Cargo.toml")` to scope the iteration to
47    /// directories that look like a workspace member.
48    #[serde(default)]
49    when_iter: Option<String>,
50    require: Vec<NestedRuleSpec>,
51}
52
53#[derive(Debug)]
54pub struct ForEachDirRule {
55    id: String,
56    level: Level,
57    policy_url: Option<String>,
58    select_scope: Scope,
59    when_iter: Option<WhenExpr>,
60    require: Vec<CompiledNestedSpec>,
61}
62
63impl Rule for ForEachDirRule {
64    alint_core::rule_common_impl!();
65
66    fn requires_full_index(&self) -> bool {
67        // Cross-file: per-directory verdicts depend on what's in
68        // each iterated dir as a whole, not just changed entries.
69        // A `for_each_dir` over `src/*` requiring `mod.rs` must
70        // see every `src/*` even if only one file inside it
71        // changed. Per roadmap, opts out of `--changed` filtering.
72        true
73    }
74
75    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
76        evaluate_for_each(
77            &self.id,
78            self.level,
79            &self.select_scope,
80            self.when_iter.as_ref(),
81            &self.require,
82            ctx,
83            IterateMode::Dirs,
84        )
85    }
86}
87
88pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
89    alint_core::reject_scope_filter_on_cross_file(spec, "for_each_dir")?;
90    let opts: Options = spec
91        .deserialize_options()
92        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
93    if opts.require.is_empty() {
94        return Err(Error::rule_config(
95            &spec.id,
96            "for_each_dir requires at least one nested rule under `require:`",
97        ));
98    }
99    let select_scope = Scope::from_patterns(&[opts.select])?;
100    let when_iter = parse_when_iter(spec, opts.when_iter.as_deref())?;
101    let require = compile_nested_require(&spec.id, opts.require)?;
102    Ok(Box::new(ForEachDirRule {
103        id: spec.id.clone(),
104        level: spec.level,
105        policy_url: spec.policy_url.clone(),
106        select_scope,
107        when_iter,
108        require,
109    }))
110}
111
112/// Pre-compile each `NestedRuleSpec` in `require:` so its
113/// `when:` source is parsed exactly once at rule-build time.
114/// Shared by `for_each_dir`, `for_each_file`, and
115/// `every_matching_has` — all three accept nested rules with
116/// optional `when:` clauses, and all three pre-v0.9.12 re-
117/// parsed the source per iteration. This helper is the single
118/// place new cross-file iteration rules thread their require
119/// list through.
120pub(crate) fn compile_nested_require(
121    parent_id: &str,
122    require: Vec<NestedRuleSpec>,
123) -> Result<Vec<CompiledNestedSpec>> {
124    require
125        .into_iter()
126        .enumerate()
127        .map(|(idx, spec)| CompiledNestedSpec::compile(spec, parent_id, idx))
128        .collect()
129}
130
131/// Compile a `when_iter:` source string into a `WhenExpr` at
132/// rule-build time. Public to the crate so the sibling
133/// `for_each_file` and `every_matching_has` rules can reuse the
134/// same error shape.
135pub(crate) fn parse_when_iter(spec: &RuleSpec, src: Option<&str>) -> Result<Option<WhenExpr>> {
136    let Some(src) = src else { return Ok(None) };
137    alint_core::when::parse(src)
138        .map(Some)
139        .map_err(|e| Error::rule_config(&spec.id, format!("invalid `when_iter:`: {e}")))
140}
141
142/// What to iterate in [`evaluate_for_each`].
143#[derive(Debug, Clone, Copy, PartialEq, Eq)]
144pub(crate) enum IterateMode {
145    Dirs,
146    Files,
147    /// Both files and dirs (dirs first) — used by `every_matching_has`.
148    Both,
149}
150
151/// Shared evaluation logic for `for_each_dir`, `for_each_file`, and
152/// `every_matching_has`. `mode` selects which entries to iterate.
153/// `when_iter` (compiled at rule-build time) gates each iteration:
154/// when present and false for an entry, that entry is skipped
155/// before any nested rule is built or evaluated.
156///
157/// 108 lines after the v0.9.8 literal-path bypass landed —
158/// extracting the bypass into a separate helper would require
159/// threading the `parent_id` / level / current entry / nested
160/// spec through 5 args, and the bypass and the fallback path
161/// share the violation-attribution loop. Reads better
162/// top-to-bottom as one phased dispatcher.
163#[allow(clippy::too_many_lines)]
164pub(crate) fn evaluate_for_each(
165    parent_id: &str,
166    level: Level,
167    select_scope: &Scope,
168    when_iter: Option<&WhenExpr>,
169    require: &[CompiledNestedSpec],
170    ctx: &Context<'_>,
171    mode: IterateMode,
172) -> Result<Vec<Violation>> {
173    let Some(registry) = ctx.registry else {
174        return Err(Error::Other(format!(
175            "rule {parent_id}: nested-rule evaluation needs a RuleRegistry in the Context \
176             (likely an Engine constructed without one)",
177        )));
178    };
179
180    let entries: Box<dyn Iterator<Item = _>> = match mode {
181        IterateMode::Dirs => Box::new(ctx.index.dirs()),
182        IterateMode::Files => Box::new(ctx.index.files()),
183        IterateMode::Both => Box::new(ctx.index.dirs().chain(ctx.index.files())),
184    };
185
186    let mut violations = Vec::new();
187    for entry in entries {
188        if !select_scope.matches(&entry.path, ctx.index) {
189            continue;
190        }
191
192        // Per-iteration `when_iter:` filter. Cheap to evaluate
193        // (one IterEnv build + one expression walk per matched
194        // entry); skips the nested-rule build entirely on a
195        // false verdict, which is the whole point of the field.
196        let iter_env = IterEnv {
197            path: &entry.path,
198            is_dir: entry.is_dir,
199            index: ctx.index,
200        };
201        if let Some(expr) = when_iter {
202            if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
203                let env = alint_core::WhenEnv {
204                    facts,
205                    vars,
206                    iter: Some(iter_env),
207                };
208                match expr.evaluate(&env) {
209                    Ok(true) => {}
210                    Ok(false) => continue,
211                    Err(e) => {
212                        violations.push(
213                            Violation::new(format!("{parent_id}: when_iter error: {e}"))
214                                .with_path(entry.path.clone()),
215                        );
216                        continue;
217                    }
218                }
219            }
220        }
221
222        let tokens = PathTokens::from_path(&entry.path);
223        for (i, nested) in require.iter().enumerate() {
224            // v0.9.12: nested `when:` is pre-compiled at rule-
225            // build time (`CompiledNestedSpec`) — gate on the
226            // already-parsed expression instead of re-parsing
227            // the source per iteration. Same `iter.*` context
228            // is available so a nested rule can reach back to
229            // the iteration just like the outer `when_iter:`
230            // does. We instantiate the per-iteration spec only
231            // AFTER the gate so a falsy `when:` skips both the
232            // template-render work AND the registry build.
233            if let Some(expr) = &nested.when {
234                if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
235                    let env = alint_core::WhenEnv {
236                        facts,
237                        vars,
238                        iter: Some(iter_env),
239                    };
240                    match expr.evaluate(&env) {
241                        Ok(true) => {}
242                        Ok(false) => continue,
243                        Err(e) => {
244                            violations.push(
245                                Violation::new(format!(
246                                    "{parent_id}: nested rule #{i} when error: {e}"
247                                ))
248                                .with_path(entry.path.clone()),
249                            );
250                            continue;
251                        }
252                    }
253                }
254            }
255            let nested_spec = nested.spec.instantiate(parent_id, i, level, &tokens);
256            let nested_rule = match registry.build(&nested_spec) {
257                Ok(r) => r,
258                Err(e) => {
259                    violations.push(
260                        Violation::new(format!(
261                            "{parent_id}: failed to build nested rule #{i} for {}: {e}",
262                            entry.path.display()
263                        ))
264                        .with_path(entry.path.clone()),
265                    );
266                    continue;
267                }
268            };
269            // v0.9.8: when the nested rule's `paths:` template
270            // resolved to a single literal path AND the rule is
271            // a per-file rule, bypass `rule.evaluate(ctx)` —
272            // which would iterate `ctx.index.files()` (1M
273            // entries) for a single-target lookup — and dispatch
274            // via `evaluate_file` against the in-index entry
275            // directly. Closes the v0.9.7 → v0.9.8 cliff for the
276            // canonical for_each_file × per-file-content-rule
277            // shape (S7's `every-lib-has-content` was 484s under
278            // v0.9.7's full-index scan; this drops it to a few
279            // milliseconds × N iterations).
280            //
281            // For non-per-file rules (e.g. `file_exists`,
282            // `toml_path_matches`), fall through to the rule's
283            // own evaluate — file_exists has its own literal-
284            // path fast path (contains_file lookup) since
285            // v0.9.5; toml_path_matches reads the file
286            // directly without scanning the full index.
287            // v0.9.10: a single `path_scope().matches(literal, ctx.index)`
288            // covers both the path-glob AND the per-rule
289            // `scope_filter` ancestor predicate, since `Scope`
290            // now owns its `Option<ScopeFilter>` and `matches`
291            // consults it. The earlier v0.9.9
292            // `nested_rule.scope_filter()` guard this bypass
293            // had is no longer needed.
294            if let Some(literal) = nested_spec_single_literal(&nested_spec)
295                && let Some(pf) = nested_rule.as_per_file()
296                && pf.path_scope().matches(&literal, ctx.index)
297            {
298                let nested_violations = evaluate_one_per_file_rule(parent_id, i, &literal, pf, ctx);
299                for mut v in nested_violations {
300                    if v.path.is_none() {
301                        v.path = Some(entry.path.clone());
302                    }
303                    violations.push(v);
304                }
305                continue;
306            }
307            let nested_violations = nested_rule.evaluate(ctx)?;
308            for mut v in nested_violations {
309                if v.path.is_none() {
310                    v.path = Some(entry.path.clone());
311                }
312                violations.push(v);
313            }
314        }
315    }
316    Ok(violations)
317}
318
319/// Extract a single literal relative path from a nested rule
320/// spec's `paths:` field, or `None` if the spec carries multiple
321/// patterns / a glob / an include-exclude shape. Used by
322/// [`evaluate_for_each`] to detect when a per-file nested rule
323/// can be dispatched via `evaluate_file` against a single
324/// in-index entry instead of going through the rule's own
325/// O(N) full-index scan.
326///
327/// Conservative: returns `None` for any pattern containing a
328/// glob metacharacter, even when the metacharacter is escaped —
329/// the bench cliff this exists to fix is the canonical
330/// `paths: "{path}/<basename>"` shape, which always resolves to
331/// a literal post-template-expansion. False positives here
332/// would silently bypass the rule's own glob handling.
333fn nested_spec_single_literal(spec: &alint_core::RuleSpec) -> Option<std::path::PathBuf> {
334    use alint_core::PathsSpec;
335    let paths = spec.paths.as_ref()?;
336    let single: &str = match paths {
337        PathsSpec::Single(s) => s,
338        PathsSpec::Many(v) if v.len() == 1 => &v[0],
339        _ => return None,
340    };
341    if single.is_empty() || single.starts_with('!') {
342        return None;
343    }
344    if single
345        .chars()
346        .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
347    {
348        return None;
349    }
350    Some(std::path::PathBuf::from(single))
351}
352
353/// Read the in-index file at `literal` once, dispatch to the
354/// per-file rule's `evaluate_file`, and return any violations
355/// (with `parent_id`-flavoured rule-error prefixing on failure
356/// to match the rule-major path's shape).
357fn evaluate_one_per_file_rule(
358    parent_id: &str,
359    nested_i: usize,
360    literal: &std::path::Path,
361    pf: &dyn alint_core::PerFileRule,
362    ctx: &Context<'_>,
363) -> Vec<Violation> {
364    if !ctx.index.contains_file(literal) {
365        // No in-index file at this path — same observable result
366        // as the rule's own `evaluate` would produce when its
367        // path_scope matches no files (i.e. zero violations).
368        return Vec::new();
369    }
370    let abs = ctx.root.join(literal);
371    let bytes = match crate::io::read_capped(&abs) {
372        Ok(b) => b,
373        Err(crate::io::ReadCapError::TooLarge(n)) => {
374            // Over the 256 MiB whole-file cap — surface a clear
375            // violation rather than silently skipping (which used
376            // to mask an OOM-DoS surface on hostile / accidental
377            // multi-GB files reached via a `for_each_dir` literal
378            // path).
379            return vec![
380                Violation::new(format!(
381                    "{parent_id}: nested rule #{nested_i} cannot analyze {} \
382                     — file is too large ({n} bytes; {} MiB cap)",
383                    literal.display(),
384                    crate::io::MAX_ANALYZE_BYTES / (1024 * 1024),
385                ))
386                .with_path(literal),
387            ];
388        }
389        Err(crate::io::ReadCapError::Io(_)) => {
390            // Mirror the rule-major behaviour: silent skip on read
391            // failure (permission flake, race with mid-walk delete).
392            return Vec::new();
393        }
394    };
395    match pf.evaluate_file(ctx, literal, &bytes) {
396        Ok(vs) => vs,
397        Err(e) => vec![Violation::new(format!(
398            "{parent_id}: nested rule #{nested_i} error on {}: {e}",
399            literal.display()
400        ))],
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use alint_core::{FileEntry, FileIndex, RuleRegistry};
408    use std::path::Path;
409
410    fn index(entries: &[(&str, bool)]) -> FileIndex {
411        FileIndex::from_entries(
412            entries
413                .iter()
414                .map(|(p, is_dir)| FileEntry {
415                    path: std::path::Path::new(p).into(),
416                    is_dir: *is_dir,
417                    size: 1,
418                })
419                .collect(),
420        )
421    }
422
423    fn registry() -> RuleRegistry {
424        crate::builtin_registry()
425    }
426
427    fn eval_with(rule: &ForEachDirRule, files: &[(&str, bool)]) -> Vec<Violation> {
428        let idx = index(files);
429        let reg = registry();
430        let ctx = Context {
431            root: Path::new("/"),
432            index: &idx,
433            registry: Some(&reg),
434            facts: None,
435            vars: None,
436            git_tracked: None,
437            git_blame: None,
438        };
439        rule.evaluate(&ctx).unwrap()
440    }
441
442    fn rule(select: &str, require: Vec<NestedRuleSpec>) -> ForEachDirRule {
443        let require = compile_nested_require("t", require).unwrap();
444        ForEachDirRule {
445            id: "t".into(),
446            level: Level::Error,
447            policy_url: None,
448            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
449            when_iter: None,
450            require,
451        }
452    }
453
454    fn require_file_exists(path: &str) -> NestedRuleSpec {
455        // Build via YAML to exercise the same path production users take.
456        let yaml = format!("kind: file_exists\npaths: \"{path}\"\n");
457        serde_yaml_ng::from_str(&yaml).unwrap()
458    }
459
460    #[test]
461    fn passes_when_every_dir_has_required_file() {
462        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
463        let v = eval_with(
464            &r,
465            &[
466                ("src", true),
467                ("src/foo", true),
468                ("src/foo/mod.rs", false),
469                ("src/bar", true),
470                ("src/bar/mod.rs", false),
471            ],
472        );
473        assert!(v.is_empty(), "unexpected: {v:?}");
474    }
475
476    #[test]
477    fn violates_when_a_dir_missing_required_file() {
478        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
479        let v = eval_with(
480            &r,
481            &[
482                ("src", true),
483                ("src/foo", true),
484                ("src/foo/mod.rs", false),
485                ("src/bar", true), // no mod.rs
486            ],
487        );
488        assert_eq!(v.len(), 1);
489        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/bar")));
490    }
491
492    #[test]
493    fn no_matched_dirs_means_no_violations() {
494        let r = rule("components/*", vec![require_file_exists("{dir}/index.tsx")]);
495        let v = eval_with(&r, &[("src", true), ("src/foo", true)]);
496        assert!(v.is_empty());
497    }
498
499    #[test]
500    fn every_require_rule_evaluated_per_dir() {
501        let r = rule(
502            "src/*",
503            vec![
504                require_file_exists("{path}/mod.rs"),
505                require_file_exists("{path}/README.md"),
506            ],
507        );
508        let v = eval_with(
509            &r,
510            &[
511                ("src", true),
512                ("src/foo", true),
513                ("src/foo/mod.rs", false), // has mod.rs, missing README
514            ],
515        );
516        assert_eq!(v.len(), 1);
517        assert!(
518            v[0].message.contains("README"),
519            "expected README in message; got {:?}",
520            v[0].message
521        );
522    }
523
524    #[test]
525    fn build_rejects_scope_filter_on_cross_file_rule() {
526        // for_each_dir is a cross-file rule (requires_full_index =
527        // true); scope_filter is per-file-rules-only. The build
528        // path must reject it with a clear message pointing at
529        // the for_each_dir + when_iter: alternative.
530        let yaml = r#"
531id: t
532kind: for_each_dir
533select: "src/*"
534require:
535  - kind: file_exists
536    paths: "{path}/mod.rs"
537level: error
538scope_filter:
539  has_ancestor: Cargo.toml
540"#;
541        let spec = crate::test_support::spec_yaml(yaml);
542        let err = build(&spec).unwrap_err().to_string();
543        assert!(
544            err.contains("scope_filter is supported on per-file rules only"),
545            "expected per-file-only message, got: {err}",
546        );
547        assert!(
548            err.contains("for_each_dir"),
549            "expected message to name the cross-file kind, got: {err}",
550        );
551    }
552}