Skip to main content

alint_rules/
for_each_dir.rs

1//! `for_each_dir` — iterate over every directory matching `select:` and
2//! evaluate a nested `require:` block against each. Path-template tokens
3//! in the nested specs are pre-substituted per iteration using the
4//! iterated directory as the anchor.
5//!
6//! Token conventions (shared with `for_each_file` and `pair`):
7//!
8//! - `{path}` — full relative path of the iterated entry.
9//! - `{dir}`  — parent directory of the iterated entry.
10//! - `{basename}` — name of the iterated entry.
11//! - `{stem}` — name with the final extension stripped.
12//! - `{ext}` — final extension without the dot.
13//! - `{parent_name}` — name of the entry's parent directory.
14//!
15//! When iterating *directories*, use `{path}` to name the iterated dir
16//! itself (e.g. `"{path}/mod.rs"` to require a `mod.rs` inside it). Use
17//! `{dir}` only when you need the parent of the matched entry.
18//!
19//! Canonical shape — for every direct subdirectory of `src/`, require a
20//! `mod.rs`:
21//!
22//! ```yaml
23//! - id: every-module-has-mod
24//!   kind: for_each_dir
25//!   select: "src/*"
26//!   require:
27//!     - kind: file_exists
28//!       paths: "{path}/mod.rs"
29//!   level: error
30//! ```
31
32use alint_core::template::PathTokens;
33use alint_core::when::{IterEnv, WhenExpr};
34use alint_core::{Context, Error, Level, NestedRuleSpec, Result, Rule, RuleSpec, Scope, Violation};
35use serde::Deserialize;
36
37#[derive(Debug, Deserialize)]
38#[serde(deny_unknown_fields)]
39struct Options {
40    select: String,
41    /// Optional per-iteration filter — evaluated against each
42    /// iterated entry's `iter` context. Common shape:
43    /// `iter.has_file("Cargo.toml")` to scope the iteration to
44    /// directories that look like a workspace member.
45    #[serde(default)]
46    when_iter: Option<String>,
47    require: Vec<NestedRuleSpec>,
48}
49
50#[derive(Debug)]
51pub struct ForEachDirRule {
52    id: String,
53    level: Level,
54    policy_url: Option<String>,
55    select_scope: Scope,
56    when_iter: Option<WhenExpr>,
57    require: Vec<NestedRuleSpec>,
58}
59
60impl Rule for ForEachDirRule {
61    fn id(&self) -> &str {
62        &self.id
63    }
64    fn level(&self) -> Level {
65        self.level
66    }
67    fn policy_url(&self) -> Option<&str> {
68        self.policy_url.as_deref()
69    }
70
71    fn requires_full_index(&self) -> bool {
72        // Cross-file: per-directory verdicts depend on what's in
73        // each iterated dir as a whole, not just changed entries.
74        // A `for_each_dir` over `src/*` requiring `mod.rs` must
75        // see every `src/*` even if only one file inside it
76        // changed. Per roadmap, opts out of `--changed` filtering.
77        true
78    }
79
80    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
81        evaluate_for_each(
82            &self.id,
83            self.level,
84            &self.select_scope,
85            self.when_iter.as_ref(),
86            &self.require,
87            ctx,
88            IterateMode::Dirs,
89        )
90    }
91}
92
93pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
94    alint_core::reject_scope_filter_on_cross_file(spec, "for_each_dir")?;
95    let opts: Options = spec
96        .deserialize_options()
97        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
98    if opts.require.is_empty() {
99        return Err(Error::rule_config(
100            &spec.id,
101            "for_each_dir requires at least one nested rule under `require:`",
102        ));
103    }
104    let select_scope = Scope::from_patterns(&[opts.select])?;
105    let when_iter = parse_when_iter(spec, opts.when_iter.as_deref())?;
106    Ok(Box::new(ForEachDirRule {
107        id: spec.id.clone(),
108        level: spec.level,
109        policy_url: spec.policy_url.clone(),
110        select_scope,
111        when_iter,
112        require: opts.require,
113    }))
114}
115
116/// Compile a `when_iter:` source string into a `WhenExpr` at
117/// rule-build time. Public to the crate so the sibling
118/// `for_each_file` and `every_matching_has` rules can reuse the
119/// same error shape.
120pub(crate) fn parse_when_iter(spec: &RuleSpec, src: Option<&str>) -> Result<Option<WhenExpr>> {
121    let Some(src) = src else { return Ok(None) };
122    alint_core::when::parse(src)
123        .map(Some)
124        .map_err(|e| Error::rule_config(&spec.id, format!("invalid `when_iter:`: {e}")))
125}
126
127/// What to iterate in [`evaluate_for_each`].
128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub(crate) enum IterateMode {
130    Dirs,
131    Files,
132    /// Both files and dirs (dirs first) — used by `every_matching_has`.
133    Both,
134}
135
136/// Shared evaluation logic for `for_each_dir`, `for_each_file`, and
137/// `every_matching_has`. `mode` selects which entries to iterate.
138/// `when_iter` (compiled at rule-build time) gates each iteration:
139/// when present and false for an entry, that entry is skipped
140/// before any nested rule is built or evaluated.
141///
142/// 108 lines after the v0.9.8 literal-path bypass landed —
143/// extracting the bypass into a separate helper would require
144/// threading the `parent_id` / level / current entry / nested
145/// spec through 5 args, and the bypass and the fallback path
146/// share the violation-attribution loop. Reads better
147/// top-to-bottom as one phased dispatcher.
148#[allow(clippy::too_many_lines)]
149pub(crate) fn evaluate_for_each(
150    parent_id: &str,
151    level: Level,
152    select_scope: &Scope,
153    when_iter: Option<&WhenExpr>,
154    require: &[NestedRuleSpec],
155    ctx: &Context<'_>,
156    mode: IterateMode,
157) -> Result<Vec<Violation>> {
158    let Some(registry) = ctx.registry else {
159        return Err(Error::Other(format!(
160            "rule {parent_id}: nested-rule evaluation needs a RuleRegistry in the Context \
161             (likely an Engine constructed without one)",
162        )));
163    };
164
165    let entries: Box<dyn Iterator<Item = _>> = match mode {
166        IterateMode::Dirs => Box::new(ctx.index.dirs()),
167        IterateMode::Files => Box::new(ctx.index.files()),
168        IterateMode::Both => Box::new(ctx.index.dirs().chain(ctx.index.files())),
169    };
170
171    let mut violations = Vec::new();
172    for entry in entries {
173        if !select_scope.matches(&entry.path) {
174            continue;
175        }
176
177        // Per-iteration `when_iter:` filter. Cheap to evaluate
178        // (one IterEnv build + one expression walk per matched
179        // entry); skips the nested-rule build entirely on a
180        // false verdict, which is the whole point of the field.
181        let iter_env = IterEnv {
182            path: &entry.path,
183            is_dir: entry.is_dir,
184            index: ctx.index,
185        };
186        if let Some(expr) = when_iter {
187            if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
188                let env = alint_core::WhenEnv {
189                    facts,
190                    vars,
191                    iter: Some(iter_env),
192                };
193                match expr.evaluate(&env) {
194                    Ok(true) => {}
195                    Ok(false) => continue,
196                    Err(e) => {
197                        violations.push(
198                            Violation::new(format!("{parent_id}: when_iter error: {e}"))
199                                .with_path(entry.path.clone()),
200                        );
201                        continue;
202                    }
203                }
204            }
205        }
206
207        let tokens = PathTokens::from_path(&entry.path);
208        for (i, nested) in require.iter().enumerate() {
209            let nested_spec = nested.instantiate(parent_id, i, level, &tokens);
210            // Gate the nested rule on its `when:` clause (if
211            // present). Same `iter.*` context is available, so a
212            // nested rule can reach back to the iteration just
213            // like the outer `when_iter:` does.
214            if let Some(when_src) = &nested_spec.when {
215                if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
216                    let expr = alint_core::when::parse(when_src).map_err(|e| {
217                        Error::rule_config(
218                            parent_id,
219                            format!("nested rule #{i}: invalid when: {e}"),
220                        )
221                    })?;
222                    let env = alint_core::WhenEnv {
223                        facts,
224                        vars,
225                        iter: Some(iter_env),
226                    };
227                    match expr.evaluate(&env) {
228                        Ok(true) => {}
229                        Ok(false) => continue,
230                        Err(e) => {
231                            violations.push(
232                                Violation::new(format!(
233                                    "{parent_id}: nested rule #{i} when error: {e}"
234                                ))
235                                .with_path(entry.path.clone()),
236                            );
237                            continue;
238                        }
239                    }
240                }
241            }
242            let nested_rule = match registry.build(&nested_spec) {
243                Ok(r) => r,
244                Err(e) => {
245                    violations.push(
246                        Violation::new(format!(
247                            "{parent_id}: failed to build nested rule #{i} for {}: {e}",
248                            entry.path.display()
249                        ))
250                        .with_path(entry.path.clone()),
251                    );
252                    continue;
253                }
254            };
255            // v0.9.8: when the nested rule's `paths:` template
256            // resolved to a single literal path AND the rule is
257            // a per-file rule, bypass `rule.evaluate(ctx)` —
258            // which would iterate `ctx.index.files()` (1M
259            // entries) for a single-target lookup — and dispatch
260            // via `evaluate_file` against the in-index entry
261            // directly. Closes the v0.9.7 → v0.9.8 cliff for the
262            // canonical for_each_file × per-file-content-rule
263            // shape (S7's `every-lib-has-content` was 484s under
264            // v0.9.7's full-index scan; this drops it to a few
265            // milliseconds × N iterations).
266            //
267            // For non-per-file rules (e.g. `file_exists`,
268            // `toml_path_matches`), fall through to the rule's
269            // own evaluate — file_exists has its own literal-
270            // path fast path (contains_file lookup) since
271            // v0.9.5; toml_path_matches reads the file
272            // directly without scanning the full index.
273            if let Some(literal) = nested_spec_single_literal(&nested_spec)
274                && let Some(pf) = nested_rule.as_per_file()
275                && pf.path_scope().matches(&literal)
276            {
277                let nested_violations = evaluate_one_per_file_rule(parent_id, i, &literal, pf, ctx);
278                for mut v in nested_violations {
279                    if v.path.is_none() {
280                        v.path = Some(entry.path.clone());
281                    }
282                    violations.push(v);
283                }
284                continue;
285            }
286            let nested_violations = nested_rule.evaluate(ctx)?;
287            for mut v in nested_violations {
288                if v.path.is_none() {
289                    v.path = Some(entry.path.clone());
290                }
291                violations.push(v);
292            }
293        }
294    }
295    Ok(violations)
296}
297
298/// Extract a single literal relative path from a nested rule
299/// spec's `paths:` field, or `None` if the spec carries multiple
300/// patterns / a glob / an include-exclude shape. Used by
301/// [`evaluate_for_each`] to detect when a per-file nested rule
302/// can be dispatched via `evaluate_file` against a single
303/// in-index entry instead of going through the rule's own
304/// O(N) full-index scan.
305///
306/// Conservative: returns `None` for any pattern containing a
307/// glob metacharacter, even when the metacharacter is escaped —
308/// the bench cliff this exists to fix is the canonical
309/// `paths: "{path}/<basename>"` shape, which always resolves to
310/// a literal post-template-expansion. False positives here
311/// would silently bypass the rule's own glob handling.
312fn nested_spec_single_literal(spec: &alint_core::RuleSpec) -> Option<std::path::PathBuf> {
313    use alint_core::PathsSpec;
314    let paths = spec.paths.as_ref()?;
315    let single: &str = match paths {
316        PathsSpec::Single(s) => s,
317        PathsSpec::Many(v) if v.len() == 1 => &v[0],
318        _ => return None,
319    };
320    if single.is_empty() || single.starts_with('!') {
321        return None;
322    }
323    if single
324        .chars()
325        .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
326    {
327        return None;
328    }
329    Some(std::path::PathBuf::from(single))
330}
331
332/// Read the in-index file at `literal` once, dispatch to the
333/// per-file rule's `evaluate_file`, and return any violations
334/// (with `parent_id`-flavoured rule-error prefixing on failure
335/// to match the rule-major path's shape).
336fn evaluate_one_per_file_rule(
337    parent_id: &str,
338    nested_i: usize,
339    literal: &std::path::Path,
340    pf: &dyn alint_core::PerFileRule,
341    ctx: &Context<'_>,
342) -> Vec<Violation> {
343    if !ctx.index.contains_file(literal) {
344        // No in-index file at this path — same observable result
345        // as the rule's own `evaluate` would produce when its
346        // path_scope matches no files (i.e. zero violations).
347        return Vec::new();
348    }
349    let abs = ctx.root.join(literal);
350    let Ok(bytes) = std::fs::read(&abs) else {
351        // Mirror the rule-major behaviour: silent skip on read
352        // failure (permission flake, race with mid-walk delete).
353        return Vec::new();
354    };
355    match pf.evaluate_file(ctx, literal, &bytes) {
356        Ok(vs) => vs,
357        Err(e) => vec![Violation::new(format!(
358            "{parent_id}: nested rule #{nested_i} error on {}: {e}",
359            literal.display()
360        ))],
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367    use alint_core::{FileEntry, FileIndex, RuleRegistry};
368    use std::path::Path;
369
370    fn index(entries: &[(&str, bool)]) -> FileIndex {
371        FileIndex::from_entries(
372            entries
373                .iter()
374                .map(|(p, is_dir)| FileEntry {
375                    path: std::path::Path::new(p).into(),
376                    is_dir: *is_dir,
377                    size: 1,
378                })
379                .collect(),
380        )
381    }
382
383    fn registry() -> RuleRegistry {
384        crate::builtin_registry()
385    }
386
387    fn eval_with(rule: &ForEachDirRule, files: &[(&str, bool)]) -> Vec<Violation> {
388        let idx = index(files);
389        let reg = registry();
390        let ctx = Context {
391            root: Path::new("/"),
392            index: &idx,
393            registry: Some(&reg),
394            facts: None,
395            vars: None,
396            git_tracked: None,
397            git_blame: None,
398        };
399        rule.evaluate(&ctx).unwrap()
400    }
401
402    fn rule(select: &str, require: Vec<NestedRuleSpec>) -> ForEachDirRule {
403        ForEachDirRule {
404            id: "t".into(),
405            level: Level::Error,
406            policy_url: None,
407            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
408            when_iter: None,
409            require,
410        }
411    }
412
413    fn require_file_exists(path: &str) -> NestedRuleSpec {
414        // Build via YAML to exercise the same path production users take.
415        let yaml = format!("kind: file_exists\npaths: \"{path}\"\n");
416        serde_yaml_ng::from_str(&yaml).unwrap()
417    }
418
419    #[test]
420    fn passes_when_every_dir_has_required_file() {
421        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
422        let v = eval_with(
423            &r,
424            &[
425                ("src", true),
426                ("src/foo", true),
427                ("src/foo/mod.rs", false),
428                ("src/bar", true),
429                ("src/bar/mod.rs", false),
430            ],
431        );
432        assert!(v.is_empty(), "unexpected: {v:?}");
433    }
434
435    #[test]
436    fn violates_when_a_dir_missing_required_file() {
437        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
438        let v = eval_with(
439            &r,
440            &[
441                ("src", true),
442                ("src/foo", true),
443                ("src/foo/mod.rs", false),
444                ("src/bar", true), // no mod.rs
445            ],
446        );
447        assert_eq!(v.len(), 1);
448        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/bar")));
449    }
450
451    #[test]
452    fn no_matched_dirs_means_no_violations() {
453        let r = rule("components/*", vec![require_file_exists("{dir}/index.tsx")]);
454        let v = eval_with(&r, &[("src", true), ("src/foo", true)]);
455        assert!(v.is_empty());
456    }
457
458    #[test]
459    fn every_require_rule_evaluated_per_dir() {
460        let r = rule(
461            "src/*",
462            vec![
463                require_file_exists("{path}/mod.rs"),
464                require_file_exists("{path}/README.md"),
465            ],
466        );
467        let v = eval_with(
468            &r,
469            &[
470                ("src", true),
471                ("src/foo", true),
472                ("src/foo/mod.rs", false), // has mod.rs, missing README
473            ],
474        );
475        assert_eq!(v.len(), 1);
476        assert!(
477            v[0].message.contains("README"),
478            "expected README in message; got {:?}",
479            v[0].message
480        );
481    }
482
483    #[test]
484    fn build_rejects_scope_filter_on_cross_file_rule() {
485        // for_each_dir is a cross-file rule (requires_full_index =
486        // true); scope_filter is per-file-rules-only. The build
487        // path must reject it with a clear message pointing at
488        // the for_each_dir + when_iter: alternative.
489        let yaml = r#"
490id: t
491kind: for_each_dir
492select: "src/*"
493require:
494  - kind: file_exists
495    paths: "{path}/mod.rs"
496level: error
497scope_filter:
498  has_ancestor: Cargo.toml
499"#;
500        let spec = crate::test_support::spec_yaml(yaml);
501        let err = build(&spec).unwrap_err().to_string();
502        assert!(
503            err.contains("scope_filter is supported on per-file rules only"),
504            "expected per-file-only message, got: {err}",
505        );
506        assert!(
507            err.contains("for_each_dir"),
508            "expected message to name the cross-file kind, got: {err}",
509        );
510    }
511}