alint_rules/for_each_dir.rs
1//! `for_each_dir` — iterate over every directory matching `select:` and
2//! evaluate a nested `require:` block against each. Path-template tokens
3//! in the nested specs are pre-substituted per iteration using the
4//! iterated directory as the anchor.
5//!
6//! Token conventions (shared with `for_each_file` and `pair`):
7//!
8//! - `{path}` — full relative path of the iterated entry.
9//! - `{dir}` — parent directory of the iterated entry.
10//! - `{basename}` — name of the iterated entry.
11//! - `{stem}` — name with the final extension stripped.
12//! - `{ext}` — final extension without the dot.
13//! - `{parent_name}` — name of the entry's parent directory.
14//!
15//! When iterating *directories*, use `{path}` to name the iterated dir
16//! itself (e.g. `"{path}/mod.rs"` to require a `mod.rs` inside it). Use
17//! `{dir}` only when you need the parent of the matched entry.
18//!
19//! Canonical shape — for every direct subdirectory of `src/`, require a
20//! `mod.rs`:
21//!
22//! ```yaml
23//! - id: every-module-has-mod
24//! kind: for_each_dir
25//! select: "src/*"
26//! require:
27//! - kind: file_exists
28//! paths: "{path}/mod.rs"
29//! level: error
30//! ```
31
32use alint_core::template::PathTokens;
33use alint_core::when::{IterEnv, WhenExpr};
34use alint_core::{Context, Error, Level, NestedRuleSpec, Result, Rule, RuleSpec, Scope, Violation};
35use serde::Deserialize;
36
37#[derive(Debug, Deserialize)]
38#[serde(deny_unknown_fields)]
39struct Options {
40 select: String,
41 /// Optional per-iteration filter — evaluated against each
42 /// iterated entry's `iter` context. Common shape:
43 /// `iter.has_file("Cargo.toml")` to scope the iteration to
44 /// directories that look like a workspace member.
45 #[serde(default)]
46 when_iter: Option<String>,
47 require: Vec<NestedRuleSpec>,
48}
49
50#[derive(Debug)]
51pub struct ForEachDirRule {
52 id: String,
53 level: Level,
54 policy_url: Option<String>,
55 select_scope: Scope,
56 when_iter: Option<WhenExpr>,
57 require: Vec<NestedRuleSpec>,
58}
59
60impl Rule for ForEachDirRule {
61 fn id(&self) -> &str {
62 &self.id
63 }
64 fn level(&self) -> Level {
65 self.level
66 }
67 fn policy_url(&self) -> Option<&str> {
68 self.policy_url.as_deref()
69 }
70
71 fn requires_full_index(&self) -> bool {
72 // Cross-file: per-directory verdicts depend on what's in
73 // each iterated dir as a whole, not just changed entries.
74 // A `for_each_dir` over `src/*` requiring `mod.rs` must
75 // see every `src/*` even if only one file inside it
76 // changed. Per roadmap, opts out of `--changed` filtering.
77 true
78 }
79
80 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
81 evaluate_for_each(
82 &self.id,
83 self.level,
84 &self.select_scope,
85 self.when_iter.as_ref(),
86 &self.require,
87 ctx,
88 IterateMode::Dirs,
89 )
90 }
91}
92
93pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
94 alint_core::reject_scope_filter_on_cross_file(spec, "for_each_dir")?;
95 let opts: Options = spec
96 .deserialize_options()
97 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
98 if opts.require.is_empty() {
99 return Err(Error::rule_config(
100 &spec.id,
101 "for_each_dir requires at least one nested rule under `require:`",
102 ));
103 }
104 let select_scope = Scope::from_patterns(&[opts.select])?;
105 let when_iter = parse_when_iter(spec, opts.when_iter.as_deref())?;
106 Ok(Box::new(ForEachDirRule {
107 id: spec.id.clone(),
108 level: spec.level,
109 policy_url: spec.policy_url.clone(),
110 select_scope,
111 when_iter,
112 require: opts.require,
113 }))
114}
115
116/// Compile a `when_iter:` source string into a `WhenExpr` at
117/// rule-build time. Public to the crate so the sibling
118/// `for_each_file` and `every_matching_has` rules can reuse the
119/// same error shape.
120pub(crate) fn parse_when_iter(spec: &RuleSpec, src: Option<&str>) -> Result<Option<WhenExpr>> {
121 let Some(src) = src else { return Ok(None) };
122 alint_core::when::parse(src)
123 .map(Some)
124 .map_err(|e| Error::rule_config(&spec.id, format!("invalid `when_iter:`: {e}")))
125}
126
127/// What to iterate in [`evaluate_for_each`].
128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub(crate) enum IterateMode {
130 Dirs,
131 Files,
132 /// Both files and dirs (dirs first) — used by `every_matching_has`.
133 Both,
134}
135
136/// Shared evaluation logic for `for_each_dir`, `for_each_file`, and
137/// `every_matching_has`. `mode` selects which entries to iterate.
138/// `when_iter` (compiled at rule-build time) gates each iteration:
139/// when present and false for an entry, that entry is skipped
140/// before any nested rule is built or evaluated.
141///
142/// 108 lines after the v0.9.8 literal-path bypass landed —
143/// extracting the bypass into a separate helper would require
144/// threading the `parent_id` / level / current entry / nested
145/// spec through 5 args, and the bypass and the fallback path
146/// share the violation-attribution loop. Reads better
147/// top-to-bottom as one phased dispatcher.
148#[allow(clippy::too_many_lines)]
149pub(crate) fn evaluate_for_each(
150 parent_id: &str,
151 level: Level,
152 select_scope: &Scope,
153 when_iter: Option<&WhenExpr>,
154 require: &[NestedRuleSpec],
155 ctx: &Context<'_>,
156 mode: IterateMode,
157) -> Result<Vec<Violation>> {
158 let Some(registry) = ctx.registry else {
159 return Err(Error::Other(format!(
160 "rule {parent_id}: nested-rule evaluation needs a RuleRegistry in the Context \
161 (likely an Engine constructed without one)",
162 )));
163 };
164
165 let entries: Box<dyn Iterator<Item = _>> = match mode {
166 IterateMode::Dirs => Box::new(ctx.index.dirs()),
167 IterateMode::Files => Box::new(ctx.index.files()),
168 IterateMode::Both => Box::new(ctx.index.dirs().chain(ctx.index.files())),
169 };
170
171 let mut violations = Vec::new();
172 for entry in entries {
173 if !select_scope.matches(&entry.path) {
174 continue;
175 }
176
177 // Per-iteration `when_iter:` filter. Cheap to evaluate
178 // (one IterEnv build + one expression walk per matched
179 // entry); skips the nested-rule build entirely on a
180 // false verdict, which is the whole point of the field.
181 let iter_env = IterEnv {
182 path: &entry.path,
183 is_dir: entry.is_dir,
184 index: ctx.index,
185 };
186 if let Some(expr) = when_iter {
187 if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
188 let env = alint_core::WhenEnv {
189 facts,
190 vars,
191 iter: Some(iter_env),
192 };
193 match expr.evaluate(&env) {
194 Ok(true) => {}
195 Ok(false) => continue,
196 Err(e) => {
197 violations.push(
198 Violation::new(format!("{parent_id}: when_iter error: {e}"))
199 .with_path(entry.path.clone()),
200 );
201 continue;
202 }
203 }
204 }
205 }
206
207 let tokens = PathTokens::from_path(&entry.path);
208 for (i, nested) in require.iter().enumerate() {
209 let nested_spec = nested.instantiate(parent_id, i, level, &tokens);
210 // Gate the nested rule on its `when:` clause (if
211 // present). Same `iter.*` context is available, so a
212 // nested rule can reach back to the iteration just
213 // like the outer `when_iter:` does.
214 if let Some(when_src) = &nested_spec.when {
215 if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
216 let expr = alint_core::when::parse(when_src).map_err(|e| {
217 Error::rule_config(
218 parent_id,
219 format!("nested rule #{i}: invalid when: {e}"),
220 )
221 })?;
222 let env = alint_core::WhenEnv {
223 facts,
224 vars,
225 iter: Some(iter_env),
226 };
227 match expr.evaluate(&env) {
228 Ok(true) => {}
229 Ok(false) => continue,
230 Err(e) => {
231 violations.push(
232 Violation::new(format!(
233 "{parent_id}: nested rule #{i} when error: {e}"
234 ))
235 .with_path(entry.path.clone()),
236 );
237 continue;
238 }
239 }
240 }
241 }
242 let nested_rule = match registry.build(&nested_spec) {
243 Ok(r) => r,
244 Err(e) => {
245 violations.push(
246 Violation::new(format!(
247 "{parent_id}: failed to build nested rule #{i} for {}: {e}",
248 entry.path.display()
249 ))
250 .with_path(entry.path.clone()),
251 );
252 continue;
253 }
254 };
255 // v0.9.8: when the nested rule's `paths:` template
256 // resolved to a single literal path AND the rule is
257 // a per-file rule, bypass `rule.evaluate(ctx)` —
258 // which would iterate `ctx.index.files()` (1M
259 // entries) for a single-target lookup — and dispatch
260 // via `evaluate_file` against the in-index entry
261 // directly. Closes the v0.9.7 → v0.9.8 cliff for the
262 // canonical for_each_file × per-file-content-rule
263 // shape (S7's `every-lib-has-content` was 484s under
264 // v0.9.7's full-index scan; this drops it to a few
265 // milliseconds × N iterations).
266 //
267 // For non-per-file rules (e.g. `file_exists`,
268 // `toml_path_matches`), fall through to the rule's
269 // own evaluate — file_exists has its own literal-
270 // path fast path (contains_file lookup) since
271 // v0.9.5; toml_path_matches reads the file
272 // directly without scanning the full index.
273 // v0.9.9: also gate on `nested_rule.scope_filter()`
274 // so the bypass produces the same observable result
275 // as the rule's own `evaluate` would. Without this
276 // guard, a nested rule that carries `scope_filter:`
277 // (propagated through `NestedRuleSpec.scope_filter`)
278 // would have the bypass execute against the literal
279 // regardless of whether the literal's ancestor chain
280 // satisfies the filter — divergent from the rule-
281 // major fallback at the `nested_rule.evaluate(ctx)`
282 // arm below. We consult the `Rule`-side accessor
283 // (already overridden by every per-file rule whose
284 // spec carries `scope_filter:`) instead of duplicating
285 // it onto `PerFileRule`.
286 if let Some(literal) = nested_spec_single_literal(&nested_spec)
287 && let Some(pf) = nested_rule.as_per_file()
288 && pf.path_scope().matches(&literal)
289 && nested_rule
290 .scope_filter()
291 .is_none_or(|f| f.matches(&literal, ctx.index))
292 {
293 let nested_violations = evaluate_one_per_file_rule(parent_id, i, &literal, pf, ctx);
294 for mut v in nested_violations {
295 if v.path.is_none() {
296 v.path = Some(entry.path.clone());
297 }
298 violations.push(v);
299 }
300 continue;
301 }
302 let nested_violations = nested_rule.evaluate(ctx)?;
303 for mut v in nested_violations {
304 if v.path.is_none() {
305 v.path = Some(entry.path.clone());
306 }
307 violations.push(v);
308 }
309 }
310 }
311 Ok(violations)
312}
313
314/// Extract a single literal relative path from a nested rule
315/// spec's `paths:` field, or `None` if the spec carries multiple
316/// patterns / a glob / an include-exclude shape. Used by
317/// [`evaluate_for_each`] to detect when a per-file nested rule
318/// can be dispatched via `evaluate_file` against a single
319/// in-index entry instead of going through the rule's own
320/// O(N) full-index scan.
321///
322/// Conservative: returns `None` for any pattern containing a
323/// glob metacharacter, even when the metacharacter is escaped —
324/// the bench cliff this exists to fix is the canonical
325/// `paths: "{path}/<basename>"` shape, which always resolves to
326/// a literal post-template-expansion. False positives here
327/// would silently bypass the rule's own glob handling.
328fn nested_spec_single_literal(spec: &alint_core::RuleSpec) -> Option<std::path::PathBuf> {
329 use alint_core::PathsSpec;
330 let paths = spec.paths.as_ref()?;
331 let single: &str = match paths {
332 PathsSpec::Single(s) => s,
333 PathsSpec::Many(v) if v.len() == 1 => &v[0],
334 _ => return None,
335 };
336 if single.is_empty() || single.starts_with('!') {
337 return None;
338 }
339 if single
340 .chars()
341 .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
342 {
343 return None;
344 }
345 Some(std::path::PathBuf::from(single))
346}
347
348/// Read the in-index file at `literal` once, dispatch to the
349/// per-file rule's `evaluate_file`, and return any violations
350/// (with `parent_id`-flavoured rule-error prefixing on failure
351/// to match the rule-major path's shape).
352fn evaluate_one_per_file_rule(
353 parent_id: &str,
354 nested_i: usize,
355 literal: &std::path::Path,
356 pf: &dyn alint_core::PerFileRule,
357 ctx: &Context<'_>,
358) -> Vec<Violation> {
359 if !ctx.index.contains_file(literal) {
360 // No in-index file at this path — same observable result
361 // as the rule's own `evaluate` would produce when its
362 // path_scope matches no files (i.e. zero violations).
363 return Vec::new();
364 }
365 let abs = ctx.root.join(literal);
366 let Ok(bytes) = std::fs::read(&abs) else {
367 // Mirror the rule-major behaviour: silent skip on read
368 // failure (permission flake, race with mid-walk delete).
369 return Vec::new();
370 };
371 match pf.evaluate_file(ctx, literal, &bytes) {
372 Ok(vs) => vs,
373 Err(e) => vec![Violation::new(format!(
374 "{parent_id}: nested rule #{nested_i} error on {}: {e}",
375 literal.display()
376 ))],
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383 use alint_core::{FileEntry, FileIndex, RuleRegistry};
384 use std::path::Path;
385
386 fn index(entries: &[(&str, bool)]) -> FileIndex {
387 FileIndex::from_entries(
388 entries
389 .iter()
390 .map(|(p, is_dir)| FileEntry {
391 path: std::path::Path::new(p).into(),
392 is_dir: *is_dir,
393 size: 1,
394 })
395 .collect(),
396 )
397 }
398
399 fn registry() -> RuleRegistry {
400 crate::builtin_registry()
401 }
402
403 fn eval_with(rule: &ForEachDirRule, files: &[(&str, bool)]) -> Vec<Violation> {
404 let idx = index(files);
405 let reg = registry();
406 let ctx = Context {
407 root: Path::new("/"),
408 index: &idx,
409 registry: Some(®),
410 facts: None,
411 vars: None,
412 git_tracked: None,
413 git_blame: None,
414 };
415 rule.evaluate(&ctx).unwrap()
416 }
417
418 fn rule(select: &str, require: Vec<NestedRuleSpec>) -> ForEachDirRule {
419 ForEachDirRule {
420 id: "t".into(),
421 level: Level::Error,
422 policy_url: None,
423 select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
424 when_iter: None,
425 require,
426 }
427 }
428
429 fn require_file_exists(path: &str) -> NestedRuleSpec {
430 // Build via YAML to exercise the same path production users take.
431 let yaml = format!("kind: file_exists\npaths: \"{path}\"\n");
432 serde_yaml_ng::from_str(&yaml).unwrap()
433 }
434
435 #[test]
436 fn passes_when_every_dir_has_required_file() {
437 let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
438 let v = eval_with(
439 &r,
440 &[
441 ("src", true),
442 ("src/foo", true),
443 ("src/foo/mod.rs", false),
444 ("src/bar", true),
445 ("src/bar/mod.rs", false),
446 ],
447 );
448 assert!(v.is_empty(), "unexpected: {v:?}");
449 }
450
451 #[test]
452 fn violates_when_a_dir_missing_required_file() {
453 let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
454 let v = eval_with(
455 &r,
456 &[
457 ("src", true),
458 ("src/foo", true),
459 ("src/foo/mod.rs", false),
460 ("src/bar", true), // no mod.rs
461 ],
462 );
463 assert_eq!(v.len(), 1);
464 assert_eq!(v[0].path.as_deref(), Some(Path::new("src/bar")));
465 }
466
467 #[test]
468 fn no_matched_dirs_means_no_violations() {
469 let r = rule("components/*", vec![require_file_exists("{dir}/index.tsx")]);
470 let v = eval_with(&r, &[("src", true), ("src/foo", true)]);
471 assert!(v.is_empty());
472 }
473
474 #[test]
475 fn every_require_rule_evaluated_per_dir() {
476 let r = rule(
477 "src/*",
478 vec![
479 require_file_exists("{path}/mod.rs"),
480 require_file_exists("{path}/README.md"),
481 ],
482 );
483 let v = eval_with(
484 &r,
485 &[
486 ("src", true),
487 ("src/foo", true),
488 ("src/foo/mod.rs", false), // has mod.rs, missing README
489 ],
490 );
491 assert_eq!(v.len(), 1);
492 assert!(
493 v[0].message.contains("README"),
494 "expected README in message; got {:?}",
495 v[0].message
496 );
497 }
498
499 #[test]
500 fn build_rejects_scope_filter_on_cross_file_rule() {
501 // for_each_dir is a cross-file rule (requires_full_index =
502 // true); scope_filter is per-file-rules-only. The build
503 // path must reject it with a clear message pointing at
504 // the for_each_dir + when_iter: alternative.
505 let yaml = r#"
506id: t
507kind: for_each_dir
508select: "src/*"
509require:
510 - kind: file_exists
511 paths: "{path}/mod.rs"
512level: error
513scope_filter:
514 has_ancestor: Cargo.toml
515"#;
516 let spec = crate::test_support::spec_yaml(yaml);
517 let err = build(&spec).unwrap_err().to_string();
518 assert!(
519 err.contains("scope_filter is supported on per-file rules only"),
520 "expected per-file-only message, got: {err}",
521 );
522 assert!(
523 err.contains("for_each_dir"),
524 "expected message to name the cross-file kind, got: {err}",
525 );
526 }
527}