alint_rules/for_each_dir.rs
1//! `for_each_dir` — iterate over every directory matching `select:` and
2//! evaluate a nested `require:` block against each. Path-template tokens
3//! in the nested specs are pre-substituted per iteration using the
4//! iterated directory as the anchor.
5//!
6//! Token conventions (shared with `for_each_file` and `pair`):
7//!
8//! - `{path}` — full relative path of the iterated entry.
9//! - `{dir}` — parent directory of the iterated entry.
10//! - `{basename}` — name of the iterated entry.
11//! - `{stem}` — name with the final extension stripped.
12//! - `{ext}` — final extension without the dot.
13//! - `{parent_name}` — name of the entry's parent directory.
14//!
15//! When iterating *directories*, use `{path}` to name the iterated dir
16//! itself (e.g. `"{path}/mod.rs"` to require a `mod.rs` inside it). Use
17//! `{dir}` only when you need the parent of the matched entry.
18//!
19//! Canonical shape — for every direct subdirectory of `src/`, require a
20//! `mod.rs`:
21//!
22//! ```yaml
23//! - id: every-module-has-mod
24//! kind: for_each_dir
25//! select: "src/*"
26//! require:
27//! - kind: file_exists
28//! paths: "{path}/mod.rs"
29//! level: error
30//! ```
31
32use alint_core::template::PathTokens;
33use alint_core::when::{IterEnv, WhenExpr};
34use alint_core::{
35 CompiledNestedSpec, Context, Error, Level, NestedRuleSpec, Result, Rule, RuleSpec, Scope,
36 Violation,
37};
38use serde::Deserialize;
39
40#[derive(Debug, Deserialize)]
41#[serde(deny_unknown_fields)]
42struct Options {
43 select: String,
44 /// Optional per-iteration filter — evaluated against each
45 /// iterated entry's `iter` context. Common shape:
46 /// `iter.has_file("Cargo.toml")` to scope the iteration to
47 /// directories that look like a workspace member.
48 #[serde(default)]
49 when_iter: Option<String>,
50 require: Vec<NestedRuleSpec>,
51}
52
53#[derive(Debug)]
54pub struct ForEachDirRule {
55 id: String,
56 level: Level,
57 policy_url: Option<String>,
58 select_scope: Scope,
59 when_iter: Option<WhenExpr>,
60 require: Vec<CompiledNestedSpec>,
61}
62
63impl Rule for ForEachDirRule {
64 alint_core::rule_common_impl!();
65
66 fn requires_full_index(&self) -> bool {
67 // Cross-file: per-directory verdicts depend on what's in
68 // each iterated dir as a whole, not just changed entries.
69 // A `for_each_dir` over `src/*` requiring `mod.rs` must
70 // see every `src/*` even if only one file inside it
71 // changed. Per roadmap, opts out of `--changed` filtering.
72 true
73 }
74
75 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
76 evaluate_for_each(
77 &self.id,
78 self.level,
79 &self.select_scope,
80 self.when_iter.as_ref(),
81 &self.require,
82 ctx,
83 IterateMode::Dirs,
84 )
85 }
86}
87
88pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
89 alint_core::reject_scope_filter_on_cross_file(spec, "for_each_dir")?;
90 let opts: Options = spec
91 .deserialize_options()
92 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
93 if opts.require.is_empty() {
94 return Err(Error::rule_config(
95 &spec.id,
96 "for_each_dir requires at least one nested rule under `require:`",
97 ));
98 }
99 let select_scope = Scope::from_patterns(&[opts.select])?;
100 let when_iter = parse_when_iter(spec, opts.when_iter.as_deref())?;
101 let require = compile_nested_require(&spec.id, opts.require)?;
102 Ok(Box::new(ForEachDirRule {
103 id: spec.id.clone(),
104 level: spec.level,
105 policy_url: spec.policy_url.clone(),
106 select_scope,
107 when_iter,
108 require,
109 }))
110}
111
112/// Pre-compile each `NestedRuleSpec` in `require:` so its
113/// `when:` source is parsed exactly once at rule-build time.
114/// Shared by `for_each_dir`, `for_each_file`, and
115/// `every_matching_has` — all three accept nested rules with
116/// optional `when:` clauses, and all three pre-v0.9.12 re-
117/// parsed the source per iteration. This helper is the single
118/// place new cross-file iteration rules thread their require
119/// list through.
120pub(crate) fn compile_nested_require(
121 parent_id: &str,
122 require: Vec<NestedRuleSpec>,
123) -> Result<Vec<CompiledNestedSpec>> {
124 require
125 .into_iter()
126 .enumerate()
127 .map(|(idx, spec)| CompiledNestedSpec::compile(spec, parent_id, idx))
128 .collect()
129}
130
131/// Compile a `when_iter:` source string into a `WhenExpr` at
132/// rule-build time. Public to the crate so the sibling
133/// `for_each_file` and `every_matching_has` rules can reuse the
134/// same error shape.
135pub(crate) fn parse_when_iter(spec: &RuleSpec, src: Option<&str>) -> Result<Option<WhenExpr>> {
136 let Some(src) = src else { return Ok(None) };
137 alint_core::when::parse(src)
138 .map(Some)
139 .map_err(|e| Error::rule_config(&spec.id, format!("invalid `when_iter:`: {e}")))
140}
141
142/// What to iterate in [`evaluate_for_each`].
143#[derive(Debug, Clone, Copy, PartialEq, Eq)]
144pub(crate) enum IterateMode {
145 Dirs,
146 Files,
147 /// Both files and dirs (dirs first) — used by `every_matching_has`.
148 Both,
149}
150
151/// Shared evaluation logic for `for_each_dir`, `for_each_file`, and
152/// `every_matching_has`. `mode` selects which entries to iterate.
153/// `when_iter` (compiled at rule-build time) gates each iteration:
154/// when present and false for an entry, that entry is skipped
155/// before any nested rule is built or evaluated.
156///
157/// 108 lines after the v0.9.8 literal-path bypass landed —
158/// extracting the bypass into a separate helper would require
159/// threading the `parent_id` / level / current entry / nested
160/// spec through 5 args, and the bypass and the fallback path
161/// share the violation-attribution loop. Reads better
162/// top-to-bottom as one phased dispatcher.
163#[allow(clippy::too_many_lines)]
164pub(crate) fn evaluate_for_each(
165 parent_id: &str,
166 level: Level,
167 select_scope: &Scope,
168 when_iter: Option<&WhenExpr>,
169 require: &[CompiledNestedSpec],
170 ctx: &Context<'_>,
171 mode: IterateMode,
172) -> Result<Vec<Violation>> {
173 let Some(registry) = ctx.registry else {
174 return Err(Error::Other(format!(
175 "rule {parent_id}: nested-rule evaluation needs a RuleRegistry in the Context \
176 (likely an Engine constructed without one)",
177 )));
178 };
179
180 let entries: Box<dyn Iterator<Item = _>> = match mode {
181 IterateMode::Dirs => Box::new(ctx.index.dirs()),
182 IterateMode::Files => Box::new(ctx.index.files()),
183 IterateMode::Both => Box::new(ctx.index.dirs().chain(ctx.index.files())),
184 };
185
186 let mut violations = Vec::new();
187 for entry in entries {
188 if !select_scope.matches(&entry.path, ctx.index) {
189 continue;
190 }
191
192 // Per-iteration `when_iter:` filter. Cheap to evaluate
193 // (one IterEnv build + one expression walk per matched
194 // entry); skips the nested-rule build entirely on a
195 // false verdict, which is the whole point of the field.
196 let iter_env = IterEnv {
197 path: &entry.path,
198 is_dir: entry.is_dir,
199 index: ctx.index,
200 };
201 if let Some(expr) = when_iter {
202 if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
203 let env = alint_core::WhenEnv {
204 facts,
205 vars,
206 iter: Some(iter_env),
207 };
208 match expr.evaluate(&env) {
209 Ok(true) => {}
210 Ok(false) => continue,
211 Err(e) => {
212 violations.push(
213 Violation::new(format!("{parent_id}: when_iter error: {e}"))
214 .with_path(entry.path.clone()),
215 );
216 continue;
217 }
218 }
219 }
220 }
221
222 let tokens = PathTokens::from_path(&entry.path);
223 for (i, nested) in require.iter().enumerate() {
224 // v0.9.12: nested `when:` is pre-compiled at rule-
225 // build time (`CompiledNestedSpec`) — gate on the
226 // already-parsed expression instead of re-parsing
227 // the source per iteration. Same `iter.*` context
228 // is available so a nested rule can reach back to
229 // the iteration just like the outer `when_iter:`
230 // does. We instantiate the per-iteration spec only
231 // AFTER the gate so a falsy `when:` skips both the
232 // template-render work AND the registry build.
233 if let Some(expr) = &nested.when {
234 if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
235 let env = alint_core::WhenEnv {
236 facts,
237 vars,
238 iter: Some(iter_env),
239 };
240 match expr.evaluate(&env) {
241 Ok(true) => {}
242 Ok(false) => continue,
243 Err(e) => {
244 violations.push(
245 Violation::new(format!(
246 "{parent_id}: nested rule #{i} when error: {e}"
247 ))
248 .with_path(entry.path.clone()),
249 );
250 continue;
251 }
252 }
253 }
254 }
255 let nested_spec = nested.spec.instantiate(parent_id, i, level, &tokens);
256 let nested_rule = match registry.build(&nested_spec) {
257 Ok(r) => r,
258 Err(e) => {
259 violations.push(
260 Violation::new(format!(
261 "{parent_id}: failed to build nested rule #{i} for {}: {e}",
262 entry.path.display()
263 ))
264 .with_path(entry.path.clone()),
265 );
266 continue;
267 }
268 };
269 // v0.9.8: when the nested rule's `paths:` template
270 // resolved to a single literal path AND the rule is
271 // a per-file rule, bypass `rule.evaluate(ctx)` —
272 // which would iterate `ctx.index.files()` (1M
273 // entries) for a single-target lookup — and dispatch
274 // via `evaluate_file` against the in-index entry
275 // directly. Closes the v0.9.7 → v0.9.8 cliff for the
276 // canonical for_each_file × per-file-content-rule
277 // shape (S7's `every-lib-has-content` was 484s under
278 // v0.9.7's full-index scan; this drops it to a few
279 // milliseconds × N iterations).
280 //
281 // For non-per-file rules (e.g. `file_exists`,
282 // `toml_path_matches`), fall through to the rule's
283 // own evaluate — file_exists has its own literal-
284 // path fast path (contains_file lookup) since
285 // v0.9.5; toml_path_matches reads the file
286 // directly without scanning the full index.
287 // v0.9.10: a single `path_scope().matches(literal, ctx.index)`
288 // covers both the path-glob AND the per-rule
289 // `scope_filter` ancestor predicate, since `Scope`
290 // now owns its `Option<ScopeFilter>` and `matches`
291 // consults it. The earlier v0.9.9
292 // `nested_rule.scope_filter()` guard this bypass
293 // had is no longer needed.
294 if let Some(literal) = nested_spec_single_literal(&nested_spec)
295 && let Some(pf) = nested_rule.as_per_file()
296 && pf.path_scope().matches(&literal, ctx.index)
297 {
298 let nested_violations = evaluate_one_per_file_rule(parent_id, i, &literal, pf, ctx);
299 for mut v in nested_violations {
300 if v.path.is_none() {
301 v.path = Some(entry.path.clone());
302 }
303 violations.push(v);
304 }
305 continue;
306 }
307 let nested_violations = nested_rule.evaluate(ctx)?;
308 for mut v in nested_violations {
309 if v.path.is_none() {
310 v.path = Some(entry.path.clone());
311 }
312 violations.push(v);
313 }
314 }
315 }
316 Ok(violations)
317}
318
319/// Extract a single literal relative path from a nested rule
320/// spec's `paths:` field, or `None` if the spec carries multiple
321/// patterns / a glob / an include-exclude shape. Used by
322/// [`evaluate_for_each`] to detect when a per-file nested rule
323/// can be dispatched via `evaluate_file` against a single
324/// in-index entry instead of going through the rule's own
325/// O(N) full-index scan.
326///
327/// Conservative: returns `None` for any pattern containing a
328/// glob metacharacter, even when the metacharacter is escaped —
329/// the bench cliff this exists to fix is the canonical
330/// `paths: "{path}/<basename>"` shape, which always resolves to
331/// a literal post-template-expansion. False positives here
332/// would silently bypass the rule's own glob handling.
333fn nested_spec_single_literal(spec: &alint_core::RuleSpec) -> Option<std::path::PathBuf> {
334 use alint_core::PathsSpec;
335 let paths = spec.paths.as_ref()?;
336 let single: &str = match paths {
337 PathsSpec::Single(s) => s,
338 PathsSpec::Many(v) if v.len() == 1 => &v[0],
339 _ => return None,
340 };
341 if single.is_empty() || single.starts_with('!') {
342 return None;
343 }
344 if single
345 .chars()
346 .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
347 {
348 return None;
349 }
350 Some(std::path::PathBuf::from(single))
351}
352
353/// Read the in-index file at `literal` once, dispatch to the
354/// per-file rule's `evaluate_file`, and return any violations
355/// (with `parent_id`-flavoured rule-error prefixing on failure
356/// to match the rule-major path's shape).
357fn evaluate_one_per_file_rule(
358 parent_id: &str,
359 nested_i: usize,
360 literal: &std::path::Path,
361 pf: &dyn alint_core::PerFileRule,
362 ctx: &Context<'_>,
363) -> Vec<Violation> {
364 if !ctx.index.contains_file(literal) {
365 // No in-index file at this path — same observable result
366 // as the rule's own `evaluate` would produce when its
367 // path_scope matches no files (i.e. zero violations).
368 return Vec::new();
369 }
370 let abs = ctx.root.join(literal);
371 let bytes = match crate::io::read_capped(&abs) {
372 Ok(b) => b,
373 Err(crate::io::ReadCapError::TooLarge(n)) => {
374 // Over the 256 MiB whole-file cap — surface a clear
375 // violation rather than silently skipping (which used
376 // to mask an OOM-DoS surface on hostile / accidental
377 // multi-GB files reached via a `for_each_dir` literal
378 // path).
379 return vec![
380 Violation::new(format!(
381 "{parent_id}: nested rule #{nested_i} cannot analyze {} \
382 — file is too large ({n} bytes; {} MiB cap)",
383 literal.display(),
384 crate::io::MAX_ANALYZE_BYTES / (1024 * 1024),
385 ))
386 .with_path(literal),
387 ];
388 }
389 Err(crate::io::ReadCapError::Io(_)) => {
390 // Mirror the rule-major behaviour: silent skip on read
391 // failure (permission flake, race with mid-walk delete).
392 return Vec::new();
393 }
394 };
395 match pf.evaluate_file(ctx, literal, &bytes) {
396 Ok(vs) => vs,
397 Err(e) => vec![Violation::new(format!(
398 "{parent_id}: nested rule #{nested_i} error on {}: {e}",
399 literal.display()
400 ))],
401 }
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407 use alint_core::{FileEntry, FileIndex, RuleRegistry};
408 use std::path::Path;
409
410 fn index(entries: &[(&str, bool)]) -> FileIndex {
411 FileIndex::from_entries(
412 entries
413 .iter()
414 .map(|(p, is_dir)| FileEntry {
415 path: std::path::Path::new(p).into(),
416 is_dir: *is_dir,
417 size: 1,
418 })
419 .collect(),
420 )
421 }
422
423 fn registry() -> RuleRegistry {
424 crate::builtin_registry()
425 }
426
427 fn eval_with(rule: &ForEachDirRule, files: &[(&str, bool)]) -> Vec<Violation> {
428 let idx = index(files);
429 let reg = registry();
430 let ctx = Context {
431 root: Path::new("/"),
432 index: &idx,
433 registry: Some(®),
434 facts: None,
435 vars: None,
436 git_tracked: None,
437 git_blame: None,
438 };
439 rule.evaluate(&ctx).unwrap()
440 }
441
442 fn rule(select: &str, require: Vec<NestedRuleSpec>) -> ForEachDirRule {
443 let require = compile_nested_require("t", require).unwrap();
444 ForEachDirRule {
445 id: "t".into(),
446 level: Level::Error,
447 policy_url: None,
448 select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
449 when_iter: None,
450 require,
451 }
452 }
453
454 fn require_file_exists(path: &str) -> NestedRuleSpec {
455 // Build via YAML to exercise the same path production users take.
456 let yaml = format!("kind: file_exists\npaths: \"{path}\"\n");
457 serde_yaml_ng::from_str(&yaml).unwrap()
458 }
459
460 #[test]
461 fn passes_when_every_dir_has_required_file() {
462 let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
463 let v = eval_with(
464 &r,
465 &[
466 ("src", true),
467 ("src/foo", true),
468 ("src/foo/mod.rs", false),
469 ("src/bar", true),
470 ("src/bar/mod.rs", false),
471 ],
472 );
473 assert!(v.is_empty(), "unexpected: {v:?}");
474 }
475
476 #[test]
477 fn violates_when_a_dir_missing_required_file() {
478 let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
479 let v = eval_with(
480 &r,
481 &[
482 ("src", true),
483 ("src/foo", true),
484 ("src/foo/mod.rs", false),
485 ("src/bar", true), // no mod.rs
486 ],
487 );
488 assert_eq!(v.len(), 1);
489 assert_eq!(v[0].path.as_deref(), Some(Path::new("src/bar")));
490 }
491
492 #[test]
493 fn no_matched_dirs_means_no_violations() {
494 let r = rule("components/*", vec![require_file_exists("{dir}/index.tsx")]);
495 let v = eval_with(&r, &[("src", true), ("src/foo", true)]);
496 assert!(v.is_empty());
497 }
498
499 #[test]
500 fn every_require_rule_evaluated_per_dir() {
501 let r = rule(
502 "src/*",
503 vec![
504 require_file_exists("{path}/mod.rs"),
505 require_file_exists("{path}/README.md"),
506 ],
507 );
508 let v = eval_with(
509 &r,
510 &[
511 ("src", true),
512 ("src/foo", true),
513 ("src/foo/mod.rs", false), // has mod.rs, missing README
514 ],
515 );
516 assert_eq!(v.len(), 1);
517 assert!(
518 v[0].message.contains("README"),
519 "expected README in message; got {:?}",
520 v[0].message
521 );
522 }
523
524 #[test]
525 fn build_rejects_scope_filter_on_cross_file_rule() {
526 // for_each_dir is a cross-file rule (requires_full_index =
527 // true); scope_filter is per-file-rules-only. The build
528 // path must reject it with a clear message pointing at
529 // the for_each_dir + when_iter: alternative.
530 let yaml = r#"
531id: t
532kind: for_each_dir
533select: "src/*"
534require:
535 - kind: file_exists
536 paths: "{path}/mod.rs"
537level: error
538scope_filter:
539 has_ancestor: Cargo.toml
540"#;
541 let spec = crate::test_support::spec_yaml(yaml);
542 let err = build(&spec).unwrap_err().to_string();
543 assert!(
544 err.contains("scope_filter is supported on per-file rules only"),
545 "expected per-file-only message, got: {err}",
546 );
547 assert!(
548 err.contains("for_each_dir"),
549 "expected message to name the cross-file kind, got: {err}",
550 );
551 }
552}