alint_rules/for_each_dir.rs
1//! `for_each_dir` — iterate over every directory matching `select:` and
2//! evaluate a nested `require:` block against each. Path-template tokens
3//! in the nested specs are pre-substituted per iteration using the
4//! iterated directory as the anchor.
5//!
6//! Token conventions (shared with `for_each_file` and `pair`):
7//!
8//! - `{path}` — full relative path of the iterated entry.
9//! - `{dir}` — parent directory of the iterated entry.
10//! - `{basename}` — name of the iterated entry.
11//! - `{stem}` — name with the final extension stripped.
12//! - `{ext}` — final extension without the dot.
13//! - `{parent_name}` — name of the entry's parent directory.
14//!
15//! When iterating *directories*, use `{path}` to name the iterated dir
16//! itself (e.g. `"{path}/mod.rs"` to require a `mod.rs` inside it). Use
17//! `{dir}` only when you need the parent of the matched entry.
18//!
19//! Canonical shape — for every direct subdirectory of `src/`, require a
20//! `mod.rs`:
21//!
22//! ```yaml
23//! - id: every-module-has-mod
24//! kind: for_each_dir
25//! select: "src/*"
26//! require:
27//! - kind: file_exists
28//! paths: "{path}/mod.rs"
29//! level: error
30//! ```
31
32use alint_core::template::PathTokens;
33use alint_core::when::{IterEnv, WhenExpr};
34use alint_core::{
35 CompiledNestedSpec, Context, Error, Level, NestedRuleSpec, Result, Rule, RuleSpec, Scope,
36 Violation,
37};
38use serde::Deserialize;
39
40#[derive(Debug, Deserialize)]
41#[serde(deny_unknown_fields)]
42struct Options {
43 select: String,
44 /// Optional per-iteration filter — evaluated against each
45 /// iterated entry's `iter` context. Common shape:
46 /// `iter.has_file("Cargo.toml")` to scope the iteration to
47 /// directories that look like a workspace member.
48 #[serde(default)]
49 when_iter: Option<String>,
50 require: Vec<NestedRuleSpec>,
51}
52
53#[derive(Debug)]
54pub struct ForEachDirRule {
55 id: String,
56 level: Level,
57 policy_url: Option<String>,
58 select_scope: Scope,
59 when_iter: Option<WhenExpr>,
60 require: Vec<CompiledNestedSpec>,
61}
62
63impl Rule for ForEachDirRule {
64 alint_core::rule_common_impl!();
65
66 fn requires_full_index(&self) -> bool {
67 // Cross-file: per-directory verdicts depend on what's in
68 // each iterated dir as a whole, not just changed entries.
69 // A `for_each_dir` over `src/*` requiring `mod.rs` must
70 // see every `src/*` even if only one file inside it
71 // changed. Per roadmap, opts out of `--changed` filtering.
72 true
73 }
74
75 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
76 evaluate_for_each(
77 &self.id,
78 self.level,
79 &self.select_scope,
80 self.when_iter.as_ref(),
81 &self.require,
82 ctx,
83 IterateMode::Dirs,
84 )
85 }
86}
87
88pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
89 alint_core::reject_scope_filter_on_cross_file(spec, "for_each_dir")?;
90 let opts: Options = spec
91 .deserialize_options()
92 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
93 if opts.require.is_empty() {
94 return Err(Error::rule_config(
95 &spec.id,
96 "for_each_dir requires at least one nested rule under `require:`",
97 ));
98 }
99 let select_scope = Scope::from_patterns(&[opts.select])?;
100 let when_iter = parse_when_iter(spec, opts.when_iter.as_deref())?;
101 let require = compile_nested_require(&spec.id, opts.require)?;
102 Ok(Box::new(ForEachDirRule {
103 id: spec.id.clone(),
104 level: spec.level,
105 policy_url: spec.policy_url.clone(),
106 select_scope,
107 when_iter,
108 require,
109 }))
110}
111
112/// Pre-compile each `NestedRuleSpec` in `require:` so its
113/// `when:` source is parsed exactly once at rule-build time.
114/// Shared by `for_each_dir`, `for_each_file`, and
115/// `every_matching_has` — all three accept nested rules with
116/// optional `when:` clauses, and all three pre-v0.9.12 re-
117/// parsed the source per iteration. This helper is the single
118/// place new cross-file iteration rules thread their require
119/// list through.
120pub(crate) fn compile_nested_require(
121 parent_id: &str,
122 require: Vec<NestedRuleSpec>,
123) -> Result<Vec<CompiledNestedSpec>> {
124 require
125 .into_iter()
126 .enumerate()
127 .map(|(idx, spec)| CompiledNestedSpec::compile(spec, parent_id, idx))
128 .collect()
129}
130
131/// Compile a `when_iter:` source string into a `WhenExpr` at
132/// rule-build time. Public to the crate so the sibling
133/// `for_each_file` and `every_matching_has` rules can reuse the
134/// same error shape.
135pub(crate) fn parse_when_iter(spec: &RuleSpec, src: Option<&str>) -> Result<Option<WhenExpr>> {
136 let Some(src) = src else { return Ok(None) };
137 alint_core::when::parse(src)
138 .map(Some)
139 .map_err(|e| Error::rule_config(&spec.id, format!("invalid `when_iter:`: {e}")))
140}
141
142/// What to iterate in [`evaluate_for_each`].
143#[derive(Debug, Clone, Copy, PartialEq, Eq)]
144pub(crate) enum IterateMode {
145 Dirs,
146 Files,
147 /// Both files and dirs (dirs first) — used by `every_matching_has`.
148 Both,
149}
150
151/// Shared evaluation logic for `for_each_dir`, `for_each_file`, and
152/// `every_matching_has`. `mode` selects which entries to iterate.
153/// `when_iter` (compiled at rule-build time) gates each iteration:
154/// when present and false for an entry, that entry is skipped
155/// before any nested rule is built or evaluated.
156///
157/// 108 lines after the v0.9.8 literal-path bypass landed —
158/// extracting the bypass into a separate helper would require
159/// threading the `parent_id` / level / current entry / nested
160/// spec through 5 args, and the bypass and the fallback path
161/// share the violation-attribution loop. Reads better
162/// top-to-bottom as one phased dispatcher.
163#[allow(clippy::too_many_lines)]
164pub(crate) fn evaluate_for_each(
165 parent_id: &str,
166 level: Level,
167 select_scope: &Scope,
168 when_iter: Option<&WhenExpr>,
169 require: &[CompiledNestedSpec],
170 ctx: &Context<'_>,
171 mode: IterateMode,
172) -> Result<Vec<Violation>> {
173 let Some(registry) = ctx.registry else {
174 return Err(Error::Other(format!(
175 "rule {parent_id}: nested-rule evaluation needs a RuleRegistry in the Context \
176 (likely an Engine constructed without one)",
177 )));
178 };
179
180 let entries: Box<dyn Iterator<Item = _>> = match mode {
181 IterateMode::Dirs => Box::new(ctx.index.dirs()),
182 IterateMode::Files => Box::new(ctx.index.files()),
183 IterateMode::Both => Box::new(ctx.index.dirs().chain(ctx.index.files())),
184 };
185
186 let mut violations = Vec::new();
187 for entry in entries {
188 if !select_scope.matches(&entry.path, ctx.index) {
189 continue;
190 }
191
192 // Per-iteration `when_iter:` filter. Cheap to evaluate
193 // (one IterEnv build + one expression walk per matched
194 // entry); skips the nested-rule build entirely on a
195 // false verdict, which is the whole point of the field.
196 let iter_env = IterEnv {
197 path: &entry.path,
198 is_dir: entry.is_dir,
199 index: ctx.index,
200 };
201 if let Some(expr) = when_iter {
202 if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
203 let env = alint_core::WhenEnv {
204 facts,
205 vars,
206 iter: Some(iter_env),
207 };
208 match expr.evaluate(&env) {
209 Ok(true) => {}
210 Ok(false) => continue,
211 Err(e) => {
212 violations.push(
213 Violation::new(format!("{parent_id}: when_iter error: {e}"))
214 .with_path(entry.path.clone()),
215 );
216 continue;
217 }
218 }
219 }
220 }
221
222 let tokens = PathTokens::from_path(&entry.path);
223 for (i, nested) in require.iter().enumerate() {
224 // v0.9.12: nested `when:` is pre-compiled at rule-
225 // build time (`CompiledNestedSpec`) — gate on the
226 // already-parsed expression instead of re-parsing
227 // the source per iteration. Same `iter.*` context
228 // is available so a nested rule can reach back to
229 // the iteration just like the outer `when_iter:`
230 // does. We instantiate the per-iteration spec only
231 // AFTER the gate so a falsy `when:` skips both the
232 // template-render work AND the registry build.
233 if let Some(expr) = &nested.when {
234 if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
235 let env = alint_core::WhenEnv {
236 facts,
237 vars,
238 iter: Some(iter_env),
239 };
240 match expr.evaluate(&env) {
241 Ok(true) => {}
242 Ok(false) => continue,
243 Err(e) => {
244 violations.push(
245 Violation::new(format!(
246 "{parent_id}: nested rule #{i} when error: {e}"
247 ))
248 .with_path(entry.path.clone()),
249 );
250 continue;
251 }
252 }
253 }
254 }
255 let nested_spec = nested.spec.instantiate(parent_id, i, level, &tokens);
256 let nested_rule = match registry.build(&nested_spec) {
257 Ok(r) => r,
258 Err(e) => {
259 violations.push(
260 Violation::new(format!(
261 "{parent_id}: failed to build nested rule #{i} for {}: {e}",
262 entry.path.display()
263 ))
264 .with_path(entry.path.clone()),
265 );
266 continue;
267 }
268 };
269 // v0.9.8: when the nested rule's `paths:` template
270 // resolved to a single literal path AND the rule is
271 // a per-file rule, bypass `rule.evaluate(ctx)` —
272 // which would iterate `ctx.index.files()` (1M
273 // entries) for a single-target lookup — and dispatch
274 // via `evaluate_file` against the in-index entry
275 // directly. Closes the v0.9.7 → v0.9.8 cliff for the
276 // canonical for_each_file × per-file-content-rule
277 // shape (S7's `every-lib-has-content` was 484s under
278 // v0.9.7's full-index scan; this drops it to a few
279 // milliseconds × N iterations).
280 //
281 // For non-per-file rules (e.g. `file_exists`,
282 // `toml_path_matches`), fall through to the rule's
283 // own evaluate — file_exists has its own literal-
284 // path fast path (contains_file lookup) since
285 // v0.9.5; toml_path_matches reads the file
286 // directly without scanning the full index.
287 // v0.9.10: a single `path_scope().matches(literal, ctx.index)`
288 // covers both the path-glob AND the per-rule
289 // `scope_filter` ancestor predicate, since `Scope`
290 // now owns its `Option<ScopeFilter>` and `matches`
291 // consults it. The earlier v0.9.9
292 // `nested_rule.scope_filter()` guard this bypass
293 // had is no longer needed.
294 if let Some(literal) = nested_spec_single_literal(&nested_spec)
295 && let Some(pf) = nested_rule.as_per_file()
296 && pf.path_scope().matches(&literal, ctx.index)
297 {
298 let nested_violations = evaluate_one_per_file_rule(parent_id, i, &literal, pf, ctx);
299 for mut v in nested_violations {
300 if v.path.is_none() {
301 v.path = Some(entry.path.clone());
302 }
303 violations.push(v);
304 }
305 continue;
306 }
307 let nested_violations = nested_rule.evaluate(ctx)?;
308 for mut v in nested_violations {
309 if v.path.is_none() {
310 v.path = Some(entry.path.clone());
311 }
312 violations.push(v);
313 }
314 }
315 }
316 Ok(violations)
317}
318
319/// Extract a single literal relative path from a nested rule
320/// spec's `paths:` field, or `None` if the spec carries multiple
321/// patterns / a glob / an include-exclude shape. Used by
322/// [`evaluate_for_each`] to detect when a per-file nested rule
323/// can be dispatched via `evaluate_file` against a single
324/// in-index entry instead of going through the rule's own
325/// O(N) full-index scan.
326///
327/// Conservative: returns `None` for any pattern containing a
328/// glob metacharacter, even when the metacharacter is escaped —
329/// the bench cliff this exists to fix is the canonical
330/// `paths: "{path}/<basename>"` shape, which always resolves to
331/// a literal post-template-expansion. False positives here
332/// would silently bypass the rule's own glob handling.
333fn nested_spec_single_literal(spec: &alint_core::RuleSpec) -> Option<std::path::PathBuf> {
334 use alint_core::PathsSpec;
335 let paths = spec.paths.as_ref()?;
336 let single: &str = match paths {
337 PathsSpec::Single(s) => s,
338 PathsSpec::Many(v) if v.len() == 1 => &v[0],
339 _ => return None,
340 };
341 if single.is_empty() || single.starts_with('!') {
342 return None;
343 }
344 if single
345 .chars()
346 .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
347 {
348 return None;
349 }
350 Some(std::path::PathBuf::from(single))
351}
352
353/// Read the in-index file at `literal` once, dispatch to the
354/// per-file rule's `evaluate_file`, and return any violations
355/// (with `parent_id`-flavoured rule-error prefixing on failure
356/// to match the rule-major path's shape).
357fn evaluate_one_per_file_rule(
358 parent_id: &str,
359 nested_i: usize,
360 literal: &std::path::Path,
361 pf: &dyn alint_core::PerFileRule,
362 ctx: &Context<'_>,
363) -> Vec<Violation> {
364 if !ctx.index.contains_file(literal) {
365 // No in-index file at this path — same observable result
366 // as the rule's own `evaluate` would produce when its
367 // path_scope matches no files (i.e. zero violations).
368 return Vec::new();
369 }
370 let abs = ctx.root.join(literal);
371 let Ok(bytes) = std::fs::read(&abs) else {
372 // Mirror the rule-major behaviour: silent skip on read
373 // failure (permission flake, race with mid-walk delete).
374 return Vec::new();
375 };
376 match pf.evaluate_file(ctx, literal, &bytes) {
377 Ok(vs) => vs,
378 Err(e) => vec![Violation::new(format!(
379 "{parent_id}: nested rule #{nested_i} error on {}: {e}",
380 literal.display()
381 ))],
382 }
383}
384
385#[cfg(test)]
386mod tests {
387 use super::*;
388 use alint_core::{FileEntry, FileIndex, RuleRegistry};
389 use std::path::Path;
390
391 fn index(entries: &[(&str, bool)]) -> FileIndex {
392 FileIndex::from_entries(
393 entries
394 .iter()
395 .map(|(p, is_dir)| FileEntry {
396 path: std::path::Path::new(p).into(),
397 is_dir: *is_dir,
398 size: 1,
399 })
400 .collect(),
401 )
402 }
403
404 fn registry() -> RuleRegistry {
405 crate::builtin_registry()
406 }
407
408 fn eval_with(rule: &ForEachDirRule, files: &[(&str, bool)]) -> Vec<Violation> {
409 let idx = index(files);
410 let reg = registry();
411 let ctx = Context {
412 root: Path::new("/"),
413 index: &idx,
414 registry: Some(®),
415 facts: None,
416 vars: None,
417 git_tracked: None,
418 git_blame: None,
419 };
420 rule.evaluate(&ctx).unwrap()
421 }
422
423 fn rule(select: &str, require: Vec<NestedRuleSpec>) -> ForEachDirRule {
424 let require = compile_nested_require("t", require).unwrap();
425 ForEachDirRule {
426 id: "t".into(),
427 level: Level::Error,
428 policy_url: None,
429 select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
430 when_iter: None,
431 require,
432 }
433 }
434
435 fn require_file_exists(path: &str) -> NestedRuleSpec {
436 // Build via YAML to exercise the same path production users take.
437 let yaml = format!("kind: file_exists\npaths: \"{path}\"\n");
438 serde_yaml_ng::from_str(&yaml).unwrap()
439 }
440
441 #[test]
442 fn passes_when_every_dir_has_required_file() {
443 let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
444 let v = eval_with(
445 &r,
446 &[
447 ("src", true),
448 ("src/foo", true),
449 ("src/foo/mod.rs", false),
450 ("src/bar", true),
451 ("src/bar/mod.rs", false),
452 ],
453 );
454 assert!(v.is_empty(), "unexpected: {v:?}");
455 }
456
457 #[test]
458 fn violates_when_a_dir_missing_required_file() {
459 let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
460 let v = eval_with(
461 &r,
462 &[
463 ("src", true),
464 ("src/foo", true),
465 ("src/foo/mod.rs", false),
466 ("src/bar", true), // no mod.rs
467 ],
468 );
469 assert_eq!(v.len(), 1);
470 assert_eq!(v[0].path.as_deref(), Some(Path::new("src/bar")));
471 }
472
473 #[test]
474 fn no_matched_dirs_means_no_violations() {
475 let r = rule("components/*", vec![require_file_exists("{dir}/index.tsx")]);
476 let v = eval_with(&r, &[("src", true), ("src/foo", true)]);
477 assert!(v.is_empty());
478 }
479
480 #[test]
481 fn every_require_rule_evaluated_per_dir() {
482 let r = rule(
483 "src/*",
484 vec![
485 require_file_exists("{path}/mod.rs"),
486 require_file_exists("{path}/README.md"),
487 ],
488 );
489 let v = eval_with(
490 &r,
491 &[
492 ("src", true),
493 ("src/foo", true),
494 ("src/foo/mod.rs", false), // has mod.rs, missing README
495 ],
496 );
497 assert_eq!(v.len(), 1);
498 assert!(
499 v[0].message.contains("README"),
500 "expected README in message; got {:?}",
501 v[0].message
502 );
503 }
504
505 #[test]
506 fn build_rejects_scope_filter_on_cross_file_rule() {
507 // for_each_dir is a cross-file rule (requires_full_index =
508 // true); scope_filter is per-file-rules-only. The build
509 // path must reject it with a clear message pointing at
510 // the for_each_dir + when_iter: alternative.
511 let yaml = r#"
512id: t
513kind: for_each_dir
514select: "src/*"
515require:
516 - kind: file_exists
517 paths: "{path}/mod.rs"
518level: error
519scope_filter:
520 has_ancestor: Cargo.toml
521"#;
522 let spec = crate::test_support::spec_yaml(yaml);
523 let err = build(&spec).unwrap_err().to_string();
524 assert!(
525 err.contains("scope_filter is supported on per-file rules only"),
526 "expected per-file-only message, got: {err}",
527 );
528 assert!(
529 err.contains("for_each_dir"),
530 "expected message to name the cross-file kind, got: {err}",
531 );
532 }
533}