alint-rules 0.10.2

//! `for_each_dir` — iterate over every directory matching `select:` and
//! evaluate a nested `require:` block against each. Path-template tokens
//! in the nested specs are pre-substituted per iteration using the
//! iterated directory as the anchor.
//!
//! Token conventions (shared with `for_each_file` and `pair`):
//!
//! - `{path}` — full relative path of the iterated entry.
//! - `{dir}`  — parent directory of the iterated entry.
//! - `{basename}` — name of the iterated entry.
//! - `{stem}` — name with the final extension stripped.
//! - `{ext}` — final extension without the dot.
//! - `{parent_name}` — name of the entry's parent directory.
//!
//! When iterating *directories*, use `{path}` to name the iterated dir
//! itself (e.g. `"{path}/mod.rs"` to require a `mod.rs` inside it). Use
//! `{dir}` only when you need the parent of the matched entry.
//!
//! Canonical shape — for every direct subdirectory of `src/`, require a
//! `mod.rs`:
//!
//! ```yaml
//! - id: every-module-has-mod
//!   kind: for_each_dir
//!   select: "src/*"
//!   require:
//!     - kind: file_exists
//!       paths: "{path}/mod.rs"
//!   level: error
//! ```

use alint_core::template::PathTokens;
use alint_core::when::{IterEnv, WhenExpr};
use alint_core::{
    CompiledNestedSpec, Context, Error, Level, NestedRuleSpec, Result, Rule, RuleSpec, Scope,
    Violation,
};
use serde::Deserialize;

#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
struct Options {
    select: String,
    /// Optional per-iteration filter — evaluated against each
    /// iterated entry's `iter` context. Common shape:
    /// `iter.has_file("Cargo.toml")` to scope the iteration to
    /// directories that look like a workspace member.
    #[serde(default)]
    when_iter: Option<String>,
    require: Vec<NestedRuleSpec>,
}

#[derive(Debug)]
pub struct ForEachDirRule {
    id: String,
    level: Level,
    policy_url: Option<String>,
    select_scope: Scope,
    when_iter: Option<WhenExpr>,
    require: Vec<CompiledNestedSpec>,
}

impl Rule for ForEachDirRule {
    alint_core::rule_common_impl!();

    fn requires_full_index(&self) -> bool {
        // Cross-file: per-directory verdicts depend on what's in
        // each iterated dir as a whole, not just changed entries.
        // A `for_each_dir` over `src/*` requiring `mod.rs` must
        // see every `src/*` even if only one file inside it
        // changed. Per roadmap, opts out of `--changed` filtering.
        true
    }

    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
        evaluate_for_each(
            &self.id,
            self.level,
            &self.select_scope,
            self.when_iter.as_ref(),
            &self.require,
            ctx,
            IterateMode::Dirs,
        )
    }
}

pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
    alint_core::reject_scope_filter_on_cross_file(spec, "for_each_dir")?;
    let opts: Options = spec
        .deserialize_options()
        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
    if opts.require.is_empty() {
        return Err(Error::rule_config(
            &spec.id,
            "for_each_dir requires at least one nested rule under `require:`",
        ));
    }
    let select_scope = Scope::from_patterns(&[opts.select])?;
    let when_iter = parse_when_iter(spec, opts.when_iter.as_deref())?;
    let require = compile_nested_require(&spec.id, opts.require)?;
    Ok(Box::new(ForEachDirRule {
        id: spec.id.clone(),
        level: spec.level,
        policy_url: spec.policy_url.clone(),
        select_scope,
        when_iter,
        require,
    }))
}

/// Pre-compile each `NestedRuleSpec` in `require:` so its
/// `when:` source is parsed exactly once at rule-build time.
/// Shared by `for_each_dir`, `for_each_file`, and
/// `every_matching_has` — all three accept nested rules with
/// optional `when:` clauses, and all three pre-v0.9.12 re-
/// parsed the source per iteration. This helper is the single
/// place new cross-file iteration rules thread their require
/// list through.
pub(crate) fn compile_nested_require(
    parent_id: &str,
    require: Vec<NestedRuleSpec>,
) -> Result<Vec<CompiledNestedSpec>> {
    require
        .into_iter()
        .enumerate()
        .map(|(idx, spec)| CompiledNestedSpec::compile(spec, parent_id, idx))
        .collect()
}

/// Compile a `when_iter:` source string into a `WhenExpr` at
/// rule-build time. Public to the crate so the sibling
/// `for_each_file` and `every_matching_has` rules can reuse the
/// same error shape.
pub(crate) fn parse_when_iter(spec: &RuleSpec, src: Option<&str>) -> Result<Option<WhenExpr>> {
    let Some(src) = src else { return Ok(None) };
    alint_core::when::parse(src)
        .map(Some)
        .map_err(|e| Error::rule_config(&spec.id, format!("invalid `when_iter:`: {e}")))
}

/// What to iterate in [`evaluate_for_each`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum IterateMode {
    Dirs,
    Files,
    /// Both files and dirs (dirs first) — used by `every_matching_has`.
    Both,
}

/// Shared evaluation logic for `for_each_dir`, `for_each_file`, and
/// `every_matching_has`. `mode` selects which entries to iterate.
/// `when_iter` (compiled at rule-build time) gates each iteration:
/// when present and false for an entry, that entry is skipped
/// before any nested rule is built or evaluated.
///
/// 108 lines after the v0.9.8 literal-path bypass landed —
/// extracting the bypass into a separate helper would require
/// threading the `parent_id` / level / current entry / nested
/// spec through 5 args, and the bypass and the fallback path
/// share the violation-attribution loop. Reads better
/// top-to-bottom as one phased dispatcher.
#[allow(clippy::too_many_lines)]
pub(crate) fn evaluate_for_each(
    parent_id: &str,
    level: Level,
    select_scope: &Scope,
    when_iter: Option<&WhenExpr>,
    require: &[CompiledNestedSpec],
    ctx: &Context<'_>,
    mode: IterateMode,
) -> Result<Vec<Violation>> {
    let Some(registry) = ctx.registry else {
        return Err(Error::Other(format!(
            "rule {parent_id}: nested-rule evaluation needs a RuleRegistry in the Context \
             (likely an Engine constructed without one)",
        )));
    };

    let entries: Box<dyn Iterator<Item = _>> = match mode {
        IterateMode::Dirs => Box::new(ctx.index.dirs()),
        IterateMode::Files => Box::new(ctx.index.files()),
        IterateMode::Both => Box::new(ctx.index.dirs().chain(ctx.index.files())),
    };

    let mut violations = Vec::new();
    for entry in entries {
        if !select_scope.matches(&entry.path, ctx.index) {
            continue;
        }

        // Per-iteration `when_iter:` filter. Cheap to evaluate
        // (one IterEnv build + one expression walk per matched
        // entry); skips the nested-rule build entirely on a
        // false verdict, which is the whole point of the field.
        let iter_env = IterEnv {
            path: &entry.path,
            is_dir: entry.is_dir,
            index: ctx.index,
        };
        if let Some(expr) = when_iter {
            if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
                let env = alint_core::WhenEnv {
                    facts,
                    vars,
                    iter: Some(iter_env),
                };
                match expr.evaluate(&env) {
                    Ok(true) => {}
                    Ok(false) => continue,
                    Err(e) => {
                        violations.push(
                            Violation::new(format!("{parent_id}: when_iter error: {e}"))
                                .with_path(entry.path.clone()),
                        );
                        continue;
                    }
                }
            }
        }

        let tokens = PathTokens::from_path(&entry.path);
        for (i, nested) in require.iter().enumerate() {
            // v0.9.12: nested `when:` is pre-compiled at rule-
            // build time (`CompiledNestedSpec`) — gate on the
            // already-parsed expression instead of re-parsing
            // the source per iteration. Same `iter.*` context
            // is available so a nested rule can reach back to
            // the iteration just like the outer `when_iter:`
            // does. We instantiate the per-iteration spec only
            // AFTER the gate so a falsy `when:` skips both the
            // template-render work AND the registry build.
            if let Some(expr) = &nested.when {
                if let (Some(facts), Some(vars)) = (ctx.facts, ctx.vars) {
                    let env = alint_core::WhenEnv {
                        facts,
                        vars,
                        iter: Some(iter_env),
                    };
                    match expr.evaluate(&env) {
                        Ok(true) => {}
                        Ok(false) => continue,
                        Err(e) => {
                            violations.push(
                                Violation::new(format!(
                                    "{parent_id}: nested rule #{i} when error: {e}"
                                ))
                                .with_path(entry.path.clone()),
                            );
                            continue;
                        }
                    }
                }
            }
            let nested_spec = nested.spec.instantiate(parent_id, i, level, &tokens);
            let nested_rule = match registry.build(&nested_spec) {
                Ok(r) => r,
                Err(e) => {
                    violations.push(
                        Violation::new(format!(
                            "{parent_id}: failed to build nested rule #{i} for {}: {e}",
                            entry.path.display()
                        ))
                        .with_path(entry.path.clone()),
                    );
                    continue;
                }
            };
            // v0.9.8: when the nested rule's `paths:` template
            // resolved to a single literal path AND the rule is
            // a per-file rule, bypass `rule.evaluate(ctx)` —
            // which would iterate `ctx.index.files()` (1M
            // entries) for a single-target lookup — and dispatch
            // via `evaluate_file` against the in-index entry
            // directly. Closes the v0.9.7 → v0.9.8 cliff for the
            // canonical for_each_file × per-file-content-rule
            // shape (S7's `every-lib-has-content` was 484s under
            // v0.9.7's full-index scan; this drops it to a few
            // milliseconds × N iterations).
            //
            // For non-per-file rules (e.g. `file_exists`,
            // `toml_path_matches`), fall through to the rule's
            // own evaluate — file_exists has its own literal-
            // path fast path (contains_file lookup) since
            // v0.9.5; toml_path_matches reads the file
            // directly without scanning the full index.
            // v0.9.10: a single `path_scope().matches(literal, ctx.index)`
            // covers both the path-glob AND the per-rule
            // `scope_filter` ancestor predicate, since `Scope`
            // now owns its `Option<ScopeFilter>` and `matches`
            // consults it. The earlier v0.9.9
            // `nested_rule.scope_filter()` guard this bypass
            // had is no longer needed.
            if let Some(literal) = nested_spec_single_literal(&nested_spec)
                && let Some(pf) = nested_rule.as_per_file()
                && pf.path_scope().matches(&literal, ctx.index)
            {
                let nested_violations = evaluate_one_per_file_rule(parent_id, i, &literal, pf, ctx);
                for mut v in nested_violations {
                    if v.path.is_none() {
                        v.path = Some(entry.path.clone());
                    }
                    violations.push(v);
                }
                continue;
            }
            let nested_violations = nested_rule.evaluate(ctx)?;
            for mut v in nested_violations {
                if v.path.is_none() {
                    v.path = Some(entry.path.clone());
                }
                violations.push(v);
            }
        }
    }
    Ok(violations)
}

/// Extract a single literal relative path from a nested rule
/// spec's `paths:` field, or `None` if the spec carries multiple
/// patterns / a glob / an include-exclude shape. Used by
/// [`evaluate_for_each`] to detect when a per-file nested rule
/// can be dispatched via `evaluate_file` against a single
/// in-index entry instead of going through the rule's own
/// O(N) full-index scan.
///
/// Conservative: returns `None` for any pattern containing a
/// glob metacharacter, even when the metacharacter is escaped —
/// the bench cliff this exists to fix is the canonical
/// `paths: "{path}/<basename>"` shape, which always resolves to
/// a literal post-template-expansion. False positives here
/// would silently bypass the rule's own glob handling.
fn nested_spec_single_literal(spec: &alint_core::RuleSpec) -> Option<std::path::PathBuf> {
    use alint_core::PathsSpec;
    let paths = spec.paths.as_ref()?;
    let single: &str = match paths {
        PathsSpec::Single(s) => s,
        PathsSpec::Many(v) if v.len() == 1 => &v[0],
        _ => return None,
    };
    if single.is_empty() || single.starts_with('!') {
        return None;
    }
    if single
        .chars()
        .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
    {
        return None;
    }
    Some(std::path::PathBuf::from(single))
}

/// Read the in-index file at `literal` once, dispatch to the
/// per-file rule's `evaluate_file`, and return any violations
/// (with `parent_id`-flavoured rule-error prefixing on failure
/// to match the rule-major path's shape).
fn evaluate_one_per_file_rule(
    parent_id: &str,
    nested_i: usize,
    literal: &std::path::Path,
    pf: &dyn alint_core::PerFileRule,
    ctx: &Context<'_>,
) -> Vec<Violation> {
    if !ctx.index.contains_file(literal) {
        // No in-index file at this path — same observable result
        // as the rule's own `evaluate` would produce when its
        // path_scope matches no files (i.e. zero violations).
        return Vec::new();
    }
    let abs = ctx.root.join(literal);
    let bytes = match crate::io::read_capped(&abs) {
        Ok(b) => b,
        Err(crate::io::ReadCapError::TooLarge(n)) => {
            // Over the 256 MiB whole-file cap — surface a clear
            // violation rather than silently skipping (which used
            // to mask an OOM-DoS surface on hostile / accidental
            // multi-GB files reached via a `for_each_dir` literal
            // path).
            return vec![
                Violation::new(format!(
                    "{parent_id}: nested rule #{nested_i} cannot analyze {} \
                     — file is too large ({n} bytes; {} MiB cap)",
                    literal.display(),
                    crate::io::MAX_ANALYZE_BYTES / (1024 * 1024),
                ))
                .with_path(literal),
            ];
        }
        Err(crate::io::ReadCapError::Io(_)) => {
            // Mirror the rule-major behaviour: silent skip on read
            // failure (permission flake, race with mid-walk delete).
            return Vec::new();
        }
    };
    match pf.evaluate_file(ctx, literal, &bytes) {
        Ok(vs) => vs,
        Err(e) => vec![Violation::new(format!(
            "{parent_id}: nested rule #{nested_i} error on {}: {e}",
            literal.display()
        ))],
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use alint_core::{FileEntry, FileIndex, RuleRegistry};
    use std::path::Path;

    fn index(entries: &[(&str, bool)]) -> FileIndex {
        FileIndex::from_entries(
            entries
                .iter()
                .map(|(p, is_dir)| FileEntry {
                    path: std::path::Path::new(p).into(),
                    is_dir: *is_dir,
                    size: 1,
                })
                .collect(),
        )
    }

    fn registry() -> RuleRegistry {
        crate::builtin_registry()
    }

    fn eval_with(rule: &ForEachDirRule, files: &[(&str, bool)]) -> Vec<Violation> {
        let idx = index(files);
        let reg = registry();
        let ctx = Context {
            root: Path::new("/"),
            index: &idx,
            registry: Some(&reg),
            facts: None,
            vars: None,
            git_tracked: None,
            git_blame: None,
        };
        rule.evaluate(&ctx).unwrap()
    }

    fn rule(select: &str, require: Vec<NestedRuleSpec>) -> ForEachDirRule {
        let require = compile_nested_require("t", require).unwrap();
        ForEachDirRule {
            id: "t".into(),
            level: Level::Error,
            policy_url: None,
            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
            when_iter: None,
            require,
        }
    }

    fn require_file_exists(path: &str) -> NestedRuleSpec {
        // Build via YAML to exercise the same path production users take.
        let yaml = format!("kind: file_exists\npaths: \"{path}\"\n");
        serde_yaml_ng::from_str(&yaml).unwrap()
    }

    #[test]
    fn passes_when_every_dir_has_required_file() {
        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
        let v = eval_with(
            &r,
            &[
                ("src", true),
                ("src/foo", true),
                ("src/foo/mod.rs", false),
                ("src/bar", true),
                ("src/bar/mod.rs", false),
            ],
        );
        assert!(v.is_empty(), "unexpected: {v:?}");
    }

    #[test]
    fn violates_when_a_dir_missing_required_file() {
        let r = rule("src/*", vec![require_file_exists("{path}/mod.rs")]);
        let v = eval_with(
            &r,
            &[
                ("src", true),
                ("src/foo", true),
                ("src/foo/mod.rs", false),
                ("src/bar", true), // no mod.rs
            ],
        );
        assert_eq!(v.len(), 1);
        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/bar")));
    }

    #[test]
    fn no_matched_dirs_means_no_violations() {
        let r = rule("components/*", vec![require_file_exists("{dir}/index.tsx")]);
        let v = eval_with(&r, &[("src", true), ("src/foo", true)]);
        assert!(v.is_empty());
    }

    #[test]
    fn every_require_rule_evaluated_per_dir() {
        let r = rule(
            "src/*",
            vec![
                require_file_exists("{path}/mod.rs"),
                require_file_exists("{path}/README.md"),
            ],
        );
        let v = eval_with(
            &r,
            &[
                ("src", true),
                ("src/foo", true),
                ("src/foo/mod.rs", false), // has mod.rs, missing README
            ],
        );
        assert_eq!(v.len(), 1);
        assert!(
            v[0].message.contains("README"),
            "expected README in message; got {:?}",
            v[0].message
        );
    }

    #[test]
    fn build_rejects_scope_filter_on_cross_file_rule() {
        // for_each_dir is a cross-file rule (requires_full_index =
        // true); scope_filter is per-file-rules-only. The build
        // path must reject it with a clear message pointing at
        // the for_each_dir + when_iter: alternative.
        let yaml = r#"
id: t
kind: for_each_dir
select: "src/*"
require:
  - kind: file_exists
    paths: "{path}/mod.rs"
level: error
scope_filter:
  has_ancestor: Cargo.toml
"#;
        let spec = crate::test_support::spec_yaml(yaml);
        let err = build(&spec).unwrap_err().to_string();
        assert!(
            err.contains("scope_filter is supported on per-file rules only"),
            "expected per-file-only message, got: {err}",
        );
        assert!(
            err.contains("for_each_dir"),
            "expected message to name the cross-file kind, got: {err}",
        );
    }
}