Skip to main content

alint_core/
engine.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4use std::sync::atomic::{AtomicU64, Ordering};
5use std::time::Instant;
6
7use rayon::prelude::*;
8
9use crate::error::{Error, Result};
10use crate::facts::{FactSpec, FactValues, evaluate_facts};
11use crate::registry::RuleRegistry;
12use crate::report::{FixItem, FixReport, FixRuleResult, FixStatus, Report};
13use crate::rule::{Context, FixContext, FixOutcome, Rule, RuleResult, Violation};
14use crate::walker::FileIndex;
15use crate::when::{WhenEnv, WhenExpr};
16
17/// Cheap helper: emit a `tracing::info!` event with elapsed
18/// nanoseconds since `start` plus arbitrary key/value pairs.
19/// Used by the engine's phase + per-rule timing breakdown so a
20/// scaling profile (`RUST_LOG=alint_core::engine=info` at
21/// 10k/100k/1M) can show which phase (or rule) is growing
22/// super-linearly. Off by default — only fires when info is
23/// enabled for this target, so production runs pay nothing.
24macro_rules! phase {
25    ($start:expr, $phase:expr $(, $k:ident = $v:expr)* $(,)?) => {
26        // u128 → u64 saturating cast: `elapsed_us` overflows u64 only
27        // after ~584,000 years of wall time. The lossy cast is
28        // intentional (we never need the high bits) — picking
29        // `try_into().unwrap_or(u64::MAX)` instead of an `as` cast
30        // also pegs the rare overflow at u64::MAX rather than
31        // silently wrapping, which keeps log readers honest.
32        #[allow(clippy::cast_possible_truncation)]
33        let elapsed_us: u64 = $start.elapsed().as_micros() as u64;
34        tracing::info!(
35            phase = $phase,
36            elapsed_us = elapsed_us,
37            $($k = $v,)*
38            "engine.phase",
39        );
40    };
41}
42
43/// Pre-filtered `FileIndex`es for git-tracked rules. v0.9.11
44/// structural fix lets the engine narrow the index handed to
45/// each opted-in rule, so the rule's `evaluate()` no longer
46/// needs to do its own `is_git_tracked(...)` check per file
47/// (the `git_tracked_only`-silently-dropped recurrence-risk
48/// shape that audit-tested in v0.9.10 is closed).
49///
50/// Each variant is `Option<FileIndex>` so the engine only pays
51/// the build cost for modes that at least one rule opts into.
52#[derive(Debug)]
53struct GitTrackedIndexes {
54    /// Index containing only files where `git_tracked.contains(path)`.
55    /// Handed to rules with [`GitTrackedMode::FileOnly`].
56    file_only: Option<FileIndex>,
57    /// Index containing dirs where `dir_has_tracked_files(path,
58    /// &git_tracked)` plus tracked files. Handed to rules with
59    /// [`GitTrackedMode::DirAware`].
60    dir_aware: Option<FileIndex>,
61}
62
63/// Return of [`Engine::collect_live_per_file_entries`]: the per-file
64/// entries that should evaluate this run (paired with their position in
65/// `self.entries`), plus any `when`-evaluation-error results to emit
66/// verbatim.
67type LivePerFileEntries<'a> = (Vec<(usize, &'a RuleEntry)>, Vec<(usize, RuleResult)>);
68
69/// A rule bundled with an optional `when` expression. Rules with a `when`
70/// that evaluates to false at runtime are skipped (no `RuleResult` is
71/// produced) — same observable effect as `level: off`, but gated on facts.
72#[derive(Debug)]
73pub struct RuleEntry {
74    pub rule: Box<dyn Rule>,
75    pub when: Option<WhenExpr>,
76}
77
78impl RuleEntry {
79    pub fn new(rule: Box<dyn Rule>) -> Self {
80        Self { rule, when: None }
81    }
82
83    #[must_use]
84    pub fn with_when(mut self, expr: WhenExpr) -> Self {
85        self.when = Some(expr);
86        self
87    }
88}
89
90/// Executes a set of rules against a pre-built [`FileIndex`].
91///
92/// The engine owns a [`RuleRegistry`] so cross-file rules (e.g.
93/// `for_each_dir`) can build nested rules on demand during evaluation.
94/// Optional `facts` and `vars` (set via the builder chain) are evaluated
95/// at run time and threaded into each rule's [`Context`] and into the
96/// `when` expression evaluator that gates rules.
97#[derive(Debug)]
98pub struct Engine {
99    entries: Vec<RuleEntry>,
100    registry: RuleRegistry,
101    facts: Vec<FactSpec>,
102    vars: HashMap<String, String>,
103    fix_size_limit: Option<u64>,
104    /// In `--changed` mode, the set of paths (relative to root)
105    /// that the user wants linted. `None` means "full check"; the
106    /// engine bypasses every changed-set short-circuit. See
107    /// [`Engine::with_changed_paths`] for the contract.
108    changed_paths: Option<HashSet<PathBuf>>,
109}
110
111impl Engine {
112    /// Backward-compatible: wrap each rule in a [`RuleEntry`] with no `when`.
113    pub fn new(rules: Vec<Box<dyn Rule>>, registry: RuleRegistry) -> Self {
114        let entries = rules.into_iter().map(RuleEntry::new).collect();
115        Self {
116            entries,
117            registry,
118            facts: Vec::new(),
119            vars: HashMap::new(),
120            fix_size_limit: Some(1 << 20),
121            changed_paths: None,
122        }
123    }
124
125    /// Construct from rule entries (each carrying an optional `when`).
126    pub fn from_entries(entries: Vec<RuleEntry>, registry: RuleRegistry) -> Self {
127        Self {
128            entries,
129            registry,
130            facts: Vec::new(),
131            vars: HashMap::new(),
132            fix_size_limit: Some(1 << 20),
133            changed_paths: None,
134        }
135    }
136
137    #[must_use]
138    pub fn with_fix_size_limit(mut self, limit: Option<u64>) -> Self {
139        self.fix_size_limit = limit;
140        self
141    }
142
143    #[must_use]
144    pub fn with_facts(mut self, facts: Vec<FactSpec>) -> Self {
145        self.facts = facts;
146        self
147    }
148
149    #[must_use]
150    pub fn with_vars(mut self, vars: HashMap<String, String>) -> Self {
151        self.vars = vars;
152        self
153    }
154
155    /// Restrict evaluation to the given set of paths (relative to
156    /// the alint root). Per-file rules see a [`FileIndex`]
157    /// filtered to only these paths; rules that override
158    /// [`Rule::requires_full_index`] (cross-file + existence
159    /// rules) still see the full index but are skipped when
160    /// their [`Rule::path_scope`] doesn't intersect the set.
161    ///
162    /// An empty set short-circuits to a no-op report — there's
163    /// nothing to lint. Pass `None` (or omit) to disable
164    /// `--changed` semantics entirely.
165    #[must_use]
166    pub fn with_changed_paths(mut self, set: HashSet<PathBuf>) -> Self {
167        self.changed_paths = Some(set);
168        self
169    }
170
171    pub fn rule_count(&self) -> usize {
172        self.entries.len()
173    }
174
175    /// The fixer for the loaded rule with this id, if the rule declares
176    /// one. Lets a caller (the LSP server) build an "Apply fix" edit for
177    /// a specific violation without re-deriving the rule set.
178    pub fn fixer_for(&self, rule_id: &str) -> Option<&dyn crate::rule::Fixer> {
179        self.entries
180            .iter()
181            .find(|e| e.rule.id() == rule_id)
182            .and_then(|e| e.rule.fixer())
183    }
184
185    /// Whether the loaded rule with this id is a per-file rule (the kind
186    /// [`Engine::run_for_file`] re-evaluates). Lets the LSP server tell
187    /// per-file findings (refreshed on every edit) apart from cross-file
188    /// ones (refreshed only on save), so it can preserve the latter
189    /// while re-running the former. Unknown ids return `false`.
190    pub fn is_per_file(&self, rule_id: &str) -> bool {
191        self.entries
192            .iter()
193            .find(|e| e.rule.id() == rule_id)
194            .is_some_and(|e| e.rule.as_per_file().is_some())
195    }
196
197    // ~125 lines but each block has its own purpose (changed-set
198    // short-circuit, fact eval, git probe, filtered-index build,
199    // cross-file partition, per-file partition, assembly). Splitting
200    // would mean threading the same ~6-arg context tuple through
201    // four helpers that share lifetimes — net worse for the reader.
202    // The function reads top-to-bottom as one phased pipeline.
203    #[allow(clippy::too_many_lines)]
204    pub fn run(&self, root: &Path, index: &FileIndex) -> Result<Report> {
205        let t_total = Instant::now();
206        // Empty changed-set fast path: nothing to lint, return
207        // an empty report rather than walk the entries list at
208        // all. Saves the fact-evaluation pass too.
209        if self.changed_paths.as_ref().is_some_and(HashSet::is_empty) {
210            return Ok(Report {
211                results: Vec::new(),
212            });
213        }
214
215        let t_facts = Instant::now();
216        let fact_values = evaluate_facts(&self.facts, root, index)?;
217        phase!(t_facts, "evaluate_facts", facts = self.facts.len() as u64);
218
219        let t_git = Instant::now();
220        let git_tracked = self.collect_git_tracked_if_needed(root);
221        let git_blame = self.build_blame_cache_if_needed(root);
222        phase!(t_git, "git_setup");
223
224        let t_filter = Instant::now();
225        let filtered_index = self.build_filtered_index(index);
226        phase!(
227            t_filter,
228            "build_filtered_index",
229            files = index.entries.len() as u64,
230        );
231
232        let t_git_idx = Instant::now();
233        let git_tracked_indexes = self.build_git_tracked_indexes(index, git_tracked.as_ref());
234        phase!(
235            t_git_idx,
236            "build_git_tracked_indexes",
237            built = u64::from(git_tracked_indexes.is_some()),
238        );
239
240        let full_ctx = Context {
241            root,
242            index,
243            registry: Some(&self.registry),
244            facts: Some(&fact_values),
245            vars: Some(&self.vars),
246            git_tracked: git_tracked.as_ref(),
247            git_blame: git_blame.as_ref(),
248        };
249        let filtered_ctx = filtered_index.as_ref().map(|fi| Context {
250            root,
251            index: fi,
252            registry: Some(&self.registry),
253            facts: Some(&fact_values),
254            vars: Some(&self.vars),
255            git_tracked: git_tracked.as_ref(),
256            git_blame: git_blame.as_ref(),
257        });
258        let git_file_only_ctx = git_tracked_indexes
259            .as_ref()
260            .and_then(|gti| gti.file_only.as_ref())
261            .map(|fi| Context {
262                root,
263                index: fi,
264                registry: Some(&self.registry),
265                facts: Some(&fact_values),
266                vars: Some(&self.vars),
267                git_tracked: git_tracked.as_ref(),
268                git_blame: git_blame.as_ref(),
269            });
270        let git_dir_aware_ctx = git_tracked_indexes
271            .as_ref()
272            .and_then(|gti| gti.dir_aware.as_ref())
273            .map(|fi| Context {
274                root,
275                index: fi,
276                registry: Some(&self.registry),
277                facts: Some(&fact_values),
278                vars: Some(&self.vars),
279                git_tracked: git_tracked.as_ref(),
280                git_blame: git_blame.as_ref(),
281            });
282        let when_env = WhenEnv {
283            facts: &fact_values,
284            vars: &self.vars,
285            iter: None,
286            env: None,
287        };
288
289        // Per-rule wall-time accumulator for the cross-file
290        // partition. One AtomicU64 per entry, indexed by
291        // entry position in `self.entries`. Workers add their
292        // rule's elapsed nanoseconds atomically; we dump the
293        // breakdown after the partition completes. Per-rule
294        // timing in a parallel partition is necessarily
295        // wall-time (a single rule can't span threads), so
296        // the totals here = sum of per-thread elapsed across
297        // workers, which still localises which rule dominates.
298        let cross_rule_ns: Vec<AtomicU64> =
299            (0..self.entries.len()).map(|_| AtomicU64::new(0)).collect();
300
301        // Cross-file partition: rules that don't opt into the
302        // file-major dispatch path (cross-file rules + per-file
303        // rules that haven't migrated yet). Same parallelism
304        // shape as v0.9.2 — rule-major par_iter.
305        let t_cross = Instant::now();
306        let cross_results: Vec<(usize, RuleResult)> = self
307            .entries
308            .par_iter()
309            .enumerate()
310            .filter_map(|(idx, entry)| {
311                if entry.rule.as_per_file().is_some() {
312                    return None;
313                }
314                if self.skip_for_changed(entry.rule.as_ref(), full_ctx.index) {
315                    return None;
316                }
317                let ctx = pick_ctx(
318                    entry.rule.as_ref(),
319                    &full_ctx,
320                    filtered_ctx.as_ref(),
321                    git_file_only_ctx.as_ref(),
322                    git_dir_aware_ctx.as_ref(),
323                );
324                let t_rule = Instant::now();
325                let result = run_entry(entry, ctx, &when_env, &fact_values);
326                // u128 → u64 saturating: same rationale as the
327                // `phase!` macro — elapsed_ns overflows u64 only
328                // after ~584 years per rule, and we want lossy
329                // truncation rather than a runtime panic on the
330                // hot path.
331                #[allow(clippy::cast_possible_truncation)]
332                let elapsed_ns = t_rule.elapsed().as_nanos() as u64;
333                cross_rule_ns[idx].fetch_add(elapsed_ns, Ordering::Relaxed);
334                result.map(|rr| (idx, rr))
335            })
336            .collect();
337        phase!(
338            t_cross,
339            "cross_file_partition",
340            rules = self
341                .entries
342                .iter()
343                .filter(|e| e.rule.as_per_file().is_none())
344                .count() as u64,
345        );
346        // Per-rule cross-file dump: skip zero-elapsed slots
347        // (rules that ran on the per-file path or were
348        // skipped by `--changed`). Sorted descending by
349        // elapsed so the worst offenders are at the top of
350        // the log.
351        if tracing::level_enabled!(tracing::Level::INFO) {
352            let mut rows: Vec<(&str, u64)> = self
353                .entries
354                .iter()
355                .enumerate()
356                .filter_map(|(idx, entry)| {
357                    let ns = cross_rule_ns[idx].load(Ordering::Relaxed);
358                    if ns == 0 {
359                        return None;
360                    }
361                    Some((entry.rule.id(), ns))
362                })
363                .collect();
364            rows.sort_by_key(|(_, ns)| std::cmp::Reverse(*ns));
365            for (rule_id, ns) in rows {
366                tracing::info!(
367                    phase = "cross_file_rule",
368                    rule = rule_id,
369                    elapsed_us = ns / 1000,
370                    "engine.phase",
371                );
372            }
373        }
374
375        // Resolve `scope_filter.changed_since:` diffs once, before the
376        // per-file dispatch reads the per-file `Scope::matches` cache.
377        self.resolve_changed_paths(root, index)?;
378
379        // Per-file partition: file-major loop reads each file
380        // once and dispatches to every per-file rule whose scope
381        // matches. Coalesces N reads of one file across N rules
382        // sharing it.
383        let t_per_file = Instant::now();
384        let per_file_results = self.run_per_file(root, &full_ctx, filtered_ctx.as_ref(), &when_env);
385        phase!(
386            t_per_file,
387            "per_file_partition",
388            rules = self
389                .entries
390                .iter()
391                .filter(|e| e.rule.as_per_file().is_some())
392                .count() as u64,
393        );
394
395        // Final assembly preserves `self.entries` order so the
396        // output Vec is deterministic + tests that index by
397        // position keep working. Each entry slot fills from
398        // either the cross-file or per-file partition; rules
399        // filtered out (by `--changed` scope, `when: false`, or
400        // passing with no violations) leave their slot empty.
401        let t_assembly = Instant::now();
402        let mut cross_by_idx: HashMap<usize, RuleResult> = cross_results.into_iter().collect();
403        let mut per_file_by_idx: HashMap<usize, RuleResult> =
404            per_file_results.into_iter().collect();
405        let mut results = Vec::with_capacity(self.entries.len());
406        for idx in 0..self.entries.len() {
407            if let Some(rr) = cross_by_idx.remove(&idx) {
408                results.push(rr);
409            } else if let Some(rr) = per_file_by_idx.remove(&idx) {
410                results.push(rr);
411            }
412        }
413        phase!(t_assembly, "assembly", results = results.len() as u64);
414        phase!(t_total, "engine_run_total");
415        Ok(Report { results })
416    }
417
418    /// Per-file dispatch loop. Walks `index.files()` in parallel
419    /// and, for each file, calls every applicable per-file rule's
420    /// `evaluate_file` against a single `std::fs::read`. Returns
421    /// `(entry-index, RuleResult)` tuples for every per-file
422    /// rule that emitted at least one violation; passing rules
423    /// (zero violations) are omitted, matching the rule-major
424    /// path's semantics.
425    #[allow(clippy::too_many_lines)]
426    fn run_per_file<'a>(
427        &'a self,
428        root: &'a Path,
429        full_ctx: &'a Context<'a>,
430        filtered_ctx: Option<&'a Context<'a>>,
431        when_env: &'a WhenEnv<'a>,
432    ) -> Vec<(usize, RuleResult)> {
433        let (live, when_errors) = self.collect_live_per_file_entries(full_ctx.index, when_env);
434        if live.is_empty() {
435            return when_errors;
436        }
437
438        let per_file_ctx = filtered_ctx.unwrap_or(full_ctx);
439
440        // Each file-major iteration produces a Vec of
441        // `(entry-index, Violation)` tuples. The flatten
442        // gathers them all; aggregation below buckets them by
443        // entry-index back into per-rule `RuleResult`s.
444        //
445        // We iterate `index.entries` (a Vec) via `par_iter()`
446        // and filter out directories *inside* the parallel
447        // pipeline rather than calling `index.files().par_bridge()`.
448        // `par_bridge` wraps a sequential iterator using a
449        // Mutex-guarded channel; at 1M entries that lock turns
450        // into a contention bottleneck across 24 worker
451        // threads. The native `par_iter` on the underlying Vec
452        // uses Rayon's work-stealing slabs instead — same
453        // observable iteration, no shared lock on the hot
454        // path.
455        let by_file: Vec<(usize, Violation)> = per_file_ctx
456            .index
457            .entries
458            .par_iter()
459            .filter(|e| !e.is_dir)
460            .flat_map_iter(|file_entry| {
461                // 1. Decide which per-file rules apply to this
462                // file. Per-file rules expose their scope via
463                // `PerFileRule::path_scope`; we filter on it
464                // before any I/O so files no rule cares about
465                // never get read. Carrying `entry_idx` through
466                // here avoids an O(L) `position` lookup per
467                // applicable rule per file inside the inner
468                // dispatch loop below.
469                let applicable: Vec<(usize, &RuleEntry)> = live
470                    .iter()
471                    .filter(|(_, entry)| {
472                        // 1a. Path-scope glob — cheap, dropping
473                        // files no rule cares about before any
474                        // further work.
475                        // v0.9.10: `Scope::matches` consults both
476                        // path-glob AND `scope_filter` in one
477                        // call (Scope owns its optional filter
478                        // since the v0.9.10 structural fix). The
479                        // separate v0.9.6 `entry.rule.scope_filter()`
480                        // check this used to do is now folded in.
481                        entry
482                            .rule
483                            .as_per_file()
484                            .expect("live entries are per-file rules by construction")
485                            .path_scope()
486                            .matches(&file_entry.path, per_file_ctx.index)
487                    })
488                    .map(|(idx, entry)| (*idx, *entry))
489                    .collect();
490                if applicable.is_empty() {
491                    return Vec::new();
492                }
493                // 2. Read once. Read failures (file deleted
494                // mid-walk, permission flake) skip the file
495                // silently — same shape as today's per-rule
496                // `let Ok(bytes) = std::fs::read(...) else
497                // continue;`.
498                let abs = root.join(&file_entry.path);
499                let Ok(bytes) = std::fs::read(&abs) else {
500                    return Vec::new();
501                };
502                // 3. Dispatch. Every applicable rule sees the
503                // same byte slice; the file is read exactly once
504                // even though N rules may produce violations
505                // against it.
506                let mut out: Vec<(usize, Violation)> = Vec::new();
507                for (entry_idx, entry) in applicable {
508                    let pf = entry
509                        .rule
510                        .as_per_file()
511                        .expect("live entries are per-file rules by construction");
512                    let result = pf.evaluate_file(per_file_ctx, &file_entry.path, &bytes);
513                    match result {
514                        Ok(vs) => {
515                            for v in vs {
516                                out.push((entry_idx, v));
517                            }
518                        }
519                        Err(e) => {
520                            out.push((entry_idx, Violation::new(format!("rule error: {e}"))));
521                        }
522                    }
523                }
524                out
525            })
526            .collect();
527
528        // Bucket violations by entry-index, then rebuild
529        // `RuleResult` per live entry preserving each rule's
530        // metadata (level / policy_url / is_fixable).
531        let mut bucket: HashMap<usize, Vec<Violation>> = HashMap::new();
532        for (idx, v) in by_file {
533            bucket.entry(idx).or_default().push(v);
534        }
535        let mut results = when_errors;
536        for (idx, entry) in live {
537            // A live per-file rule that produced no violations is a
538            // passing rule — emit an empty-violations `RuleResult` so
539            // it appears in the pass count, matching the cross-file
540            // path (which always emits a result). Previously these
541            // were dropped, so a silently-passing per-file rule was
542            // missing from "All N rule(s) passed" (the count read as 0).
543            let violations = bucket.remove(&idx).unwrap_or_default();
544            results.push((
545                idx,
546                RuleResult::new(
547                    Arc::from(entry.rule.id()),
548                    entry.rule.level(),
549                    entry.rule.policy_url().map(Arc::from),
550                    violations,
551                    entry.rule.fixer().is_some(),
552                ),
553            ));
554        }
555        results
556    }
557
558    /// Pre-filter the per-file entries that should evaluate this run:
559    /// opt-in via `as_per_file`, not skipped by `--changed`, and `when`
560    /// resolved. `when` evaluates against constant facts + vars (no
561    /// `iter` namespace at the engine level), so its verdict is
562    /// independent of the file being scanned — resolve it once per rule
563    /// here rather than per file. A `when` error short-circuits to a
564    /// per-rule result carrying the error message, matching the
565    /// rule-major path's `run_entry` for parity.
566    ///
567    /// Returns `(live entries, when-error results)`. Shared by
568    /// [`Engine::run`]'s file-major loop and [`Engine::run_for_file`].
569    fn collect_live_per_file_entries<'a>(
570        &'a self,
571        index: &FileIndex,
572        when_env: &WhenEnv<'_>,
573    ) -> LivePerFileEntries<'a> {
574        let mut live: Vec<(usize, &RuleEntry)> = Vec::new();
575        let mut when_errors: Vec<(usize, RuleResult)> = Vec::new();
576        for (idx, entry) in self.entries.iter().enumerate() {
577            if entry.rule.as_per_file().is_none() {
578                continue;
579            }
580            if self.skip_for_changed(entry.rule.as_ref(), index) {
581                continue;
582            }
583            if let Some(expr) = &entry.when {
584                match expr.evaluate(when_env) {
585                    Ok(true) => {}
586                    Ok(false) => continue,
587                    Err(e) => {
588                        when_errors.push((
589                            idx,
590                            RuleResult {
591                                rule_id: Arc::from(entry.rule.id()),
592                                level: entry.rule.level(),
593                                policy_url: entry.rule.policy_url().map(Arc::from),
594                                violations: vec![Violation::new(format!(
595                                    "when evaluation error: {e}"
596                                ))],
597                                notes: Vec::new(),
598                                is_fixable: entry.rule.fixer().is_some(),
599                            },
600                        ));
601                        continue;
602                    }
603                }
604            }
605            live.push((idx, entry));
606        }
607        (live, when_errors)
608    }
609
610    /// Re-evaluate only the per-file rules that apply to a single file,
611    /// using caller-supplied `bytes` (the LSP server's in-memory edited
612    /// copy is authoritative for unsaved edits — see
613    /// `docs/design/v0.11/single_file_reevaluation.md`). The cost is
614    /// proportional to *one* file's evaluation, not the whole tree's, so
615    /// an editor can call this on every (debounced) keystroke.
616    ///
617    /// Cross-file rules (those without an `as_per_file` view) are
618    /// intentionally NOT run — the caller re-runs those on save, and
619    /// only the ones whose scope intersects the changed file. `when:` is
620    /// resolved once against the engine's constant facts/vars, exactly
621    /// as [`Engine::run`] does.
622    ///
623    /// Returns [`Error::FileNotInIndex`] when `file_path` isn't in the
624    /// cached `index` — distinct from "ran but found nothing." The
625    /// caller reads it as "this file is excluded from linting"
626    /// (`.gitignore` / `ignore:` / outside the walked tree).
627    pub fn run_for_file(
628        &self,
629        root: &Path,
630        index: &FileIndex,
631        file_path: &Path,
632        bytes: &[u8],
633    ) -> Result<Vec<RuleResult>> {
634        if !index.contains_file(file_path) {
635            return Err(Error::file_not_in_index(file_path));
636        }
637
638        // Facts are constant for an index's lifetime, so cache them on
639        // the index: the LSP calls `run_for_file` on every keystroke and
640        // re-scanning the tree for facts each time would dominate the
641        // cost. First call computes + caches; the rest reuse.
642        let fact_values: &FactValues = if let Some(values) = index.cached_facts() {
643            values
644        } else {
645            let computed = evaluate_facts(&self.facts, root, index)?;
646            index.set_facts(computed);
647            index.cached_facts().expect("facts just set on the index")
648        };
649        let git_tracked = self.collect_git_tracked_if_needed(root);
650        let git_blame = self.build_blame_cache_if_needed(root);
651        // Per-file rules may carry `scope_filter.changed_since:`; resolve
652        // (and cache) the diff before any `Scope::matches` reads it.
653        self.resolve_changed_paths(root, index)?;
654
655        let ctx = Context {
656            root,
657            index,
658            registry: Some(&self.registry),
659            facts: Some(fact_values),
660            vars: Some(&self.vars),
661            git_tracked: git_tracked.as_ref(),
662            git_blame: git_blame.as_ref(),
663        };
664        let when_env = WhenEnv {
665            facts: fact_values,
666            vars: &self.vars,
667            iter: None,
668            env: None,
669        };
670
671        let (live, when_errors) = self.collect_live_per_file_entries(index, &when_env);
672
673        // Dispatch each in-scope rule against the supplied bytes. The
674        // raw violations (notes included) are bucketed by entry index;
675        // `RuleResult::new` partitions notes out below. A rule that is
676        // applicable but emits nothing leaves no bucket entry → no
677        // result, matching `run`'s "passing rules omitted" semantics.
678        let mut bucket: HashMap<usize, Vec<Violation>> = HashMap::new();
679        for (idx, entry) in &live {
680            let pf = entry
681                .rule
682                .as_per_file()
683                .expect("live entries are per-file rules by construction");
684            if !pf.path_scope().matches(file_path, index) {
685                continue;
686            }
687            match pf.evaluate_file(&ctx, file_path, bytes) {
688                Ok(vs) => {
689                    if !vs.is_empty() {
690                        bucket.entry(*idx).or_default().extend(vs);
691                    }
692                }
693                Err(e) => bucket
694                    .entry(*idx)
695                    .or_default()
696                    .push(Violation::new(format!("rule error: {e}"))),
697            }
698        }
699
700        let mut by_idx: HashMap<usize, RuleResult> = when_errors.into_iter().collect();
701        for (idx, entry) in &live {
702            if let Some(violations) = bucket.remove(idx) {
703                by_idx.insert(
704                    *idx,
705                    RuleResult::new(
706                        Arc::from(entry.rule.id()),
707                        entry.rule.level(),
708                        entry.rule.policy_url().map(Arc::from),
709                        violations,
710                        entry.rule.fixer().is_some(),
711                    ),
712                );
713            }
714        }
715        // Preserve `self.entries` order, mirroring `run`'s assembly.
716        let mut results = Vec::with_capacity(by_idx.len());
717        for idx in 0..self.entries.len() {
718            if let Some(rr) = by_idx.remove(&idx) {
719                results.push(rr);
720            }
721        }
722        Ok(results)
723    }
724
725    /// Evaluate every rule and apply fixers for their violations.
726    /// Fixes run sequentially — rules whose fixers touch the filesystem
727    /// must not race. Rules with no fixer contribute
728    /// [`FixStatus::Unfixable`] entries so the caller sees them in the
729    /// report. Rules that pass (no violations) are omitted from the
730    /// result, same as [`Engine::run`]'s usual behaviour.
731    #[allow(clippy::too_many_lines)]
732    pub fn fix(&self, root: &Path, index: &FileIndex, dry_run: bool) -> Result<FixReport> {
733        if self.changed_paths.as_ref().is_some_and(HashSet::is_empty) {
734            return Ok(FixReport {
735                results: Vec::new(),
736            });
737        }
738
739        let fact_values = evaluate_facts(&self.facts, root, index)?;
740        let git_tracked = self.collect_git_tracked_if_needed(root);
741        let git_blame = self.build_blame_cache_if_needed(root);
742        let filtered_index = self.build_filtered_index(index);
743        let git_tracked_indexes = self.build_git_tracked_indexes(index, git_tracked.as_ref());
744        let full_ctx = Context {
745            root,
746            index,
747            registry: Some(&self.registry),
748            facts: Some(&fact_values),
749            vars: Some(&self.vars),
750            git_tracked: git_tracked.as_ref(),
751            git_blame: git_blame.as_ref(),
752        };
753        let filtered_ctx = filtered_index.as_ref().map(|fi| Context {
754            root,
755            index: fi,
756            registry: Some(&self.registry),
757            facts: Some(&fact_values),
758            vars: Some(&self.vars),
759            git_tracked: git_tracked.as_ref(),
760            git_blame: git_blame.as_ref(),
761        });
762        let git_file_only_ctx = git_tracked_indexes
763            .as_ref()
764            .and_then(|gti| gti.file_only.as_ref())
765            .map(|fi| Context {
766                root,
767                index: fi,
768                registry: Some(&self.registry),
769                facts: Some(&fact_values),
770                vars: Some(&self.vars),
771                git_tracked: git_tracked.as_ref(),
772                git_blame: git_blame.as_ref(),
773            });
774        let git_dir_aware_ctx = git_tracked_indexes
775            .as_ref()
776            .and_then(|gti| gti.dir_aware.as_ref())
777            .map(|fi| Context {
778                root,
779                index: fi,
780                registry: Some(&self.registry),
781                facts: Some(&fact_values),
782                vars: Some(&self.vars),
783                git_tracked: git_tracked.as_ref(),
784                git_blame: git_blame.as_ref(),
785            });
786        let when_env = WhenEnv {
787            facts: &fact_values,
788            vars: &self.vars,
789            iter: None,
790            env: None,
791        };
792        let fix_ctx = FixContext {
793            root,
794            dry_run,
795            fix_size_limit: self.fix_size_limit,
796        };
797
798        // Same `scope_filter.changed_since:` resolution as `run`, so a
799        // fix pass respects per-rule diff scoping too.
800        self.resolve_changed_paths(root, index)?;
801
802        let mut results: Vec<FixRuleResult> = Vec::new();
803        for entry in &self.entries {
804            if self.skip_for_changed(entry.rule.as_ref(), full_ctx.index) {
805                continue;
806            }
807            let ctx = pick_ctx(
808                entry.rule.as_ref(),
809                &full_ctx,
810                filtered_ctx.as_ref(),
811                git_file_only_ctx.as_ref(),
812                git_dir_aware_ctx.as_ref(),
813            );
814            if let Some(expr) = &entry.when {
815                match expr.evaluate(&when_env) {
816                    Ok(true) => {}
817                    Ok(false) => continue,
818                    Err(e) => {
819                        results.push(FixRuleResult {
820                            rule_id: Arc::from(entry.rule.id()),
821                            level: entry.rule.level(),
822                            items: vec![FixItem {
823                                violation: Violation::new(format!("when evaluation error: {e}")),
824                                status: FixStatus::Unfixable,
825                            }],
826                        });
827                        continue;
828                    }
829                }
830            }
831            let violations = match entry.rule.evaluate(ctx) {
832                Ok(v) => v,
833                Err(e) => vec![Violation::new(format!("rule error: {e}"))],
834            };
835            if violations.is_empty() {
836                continue;
837            }
838            let fixer = entry.rule.fixer();
839            let items: Vec<FixItem> = violations
840                .into_iter()
841                .map(|v| {
842                    let status = match fixer {
843                        Some(f) => match f.apply(&v, &fix_ctx) {
844                            Ok(FixOutcome::Applied(s)) => FixStatus::Applied(s),
845                            Ok(FixOutcome::Skipped(s)) => FixStatus::Skipped(s),
846                            Err(e) => FixStatus::Skipped(format!("fix error: {e}")),
847                        },
848                        None => FixStatus::Unfixable,
849                    };
850                    FixItem {
851                        violation: v,
852                        status,
853                    }
854                })
855                .collect();
856            results.push(FixRuleResult {
857                rule_id: Arc::from(entry.rule.id()),
858                level: entry.rule.level(),
859                items,
860            });
861        }
862        Ok(FixReport { results })
863    }
864
865    /// Collect git's tracked-paths set, but only if at least one
866    /// loaded rule asked for it. Most repos / configs never opt
867    /// in, so this returns `None` zero-cost in the common case.
868    /// Inside a non-git directory, or when `git` exits non-zero
869    /// (corrupt repo, missing binary), the helper also returns
870    /// `None` — rules that consult it then treat every entry as
871    /// "untracked," which is the right default for absence-style
872    /// rules with `git_tracked_only: true`.
873    fn collect_git_tracked_if_needed(
874        &self,
875        root: &Path,
876    ) -> Option<std::collections::HashSet<std::path::PathBuf>> {
877        let any_wants = self
878            .entries
879            .iter()
880            .any(|e| e.rule.git_tracked_mode() != crate::rule::GitTrackedMode::Off);
881        if !any_wants {
882            return None;
883        }
884        crate::git::collect_tracked_paths(root)
885    }
886
887    /// Build the per-file `git blame` cache when at least one
888    /// loaded rule asked for it. Returns `None` otherwise — the
889    /// common case (most configs have no `git_blame_age` rules)
890    /// pays nothing. The cache itself is empty at construction;
891    /// rules trigger blame on first access per file.
892    ///
893    /// We use [`crate::git::collect_tracked_paths`] as the
894    /// is-this-a-git-repo probe so the rule no-ops cleanly
895    /// outside a repo without per-file blame failures littering
896    /// the cache. When the user opts into BOTH `git_tracked_only`
897    /// and `git_blame_age`, the probe runs once via
898    /// [`Engine::collect_git_tracked_if_needed`] and once here —
899    /// negligible cost (sub-ms) compared to the blame work.
900    fn build_blame_cache_if_needed(&self, root: &Path) -> Option<crate::git::BlameCache> {
901        let any_wants = self.entries.iter().any(|e| e.rule.wants_git_blame());
902        if !any_wants {
903            return None;
904        }
905        // Probe: a non-git workspace short-circuits to `None` so
906        // the rule's "silent no-op outside git" path is exercised
907        // at the engine level rather than per-file.
908        crate::git::collect_tracked_paths(root)?;
909        Some(crate::git::BlameCache::new(root.to_path_buf()))
910    }
911
912    /// Build a [`FileIndex`] containing only the entries the user
913    /// said they care about (the `--changed` set). Returns `None`
914    /// when no changed-set is configured — callers fall back to
915    /// the full index.
916    fn build_filtered_index(&self, full: &FileIndex) -> Option<FileIndex> {
917        let set = self.changed_paths.as_ref()?;
918        let entries = full
919            .entries
920            .iter()
921            .filter(|e| set.contains(&*e.path))
922            .cloned()
923            .collect();
924        Some(FileIndex::from_entries(entries))
925    }
926
927    /// Build the per-mode pre-filtered indexes for git-tracked
928    /// rules. v0.9.11 structural fix for the
929    /// `git_tracked_only`-silently-dropped recurrence-risk
930    /// shape (see `docs/design/v0.9/git-tracked-filtered-index.md`).
931    ///
932    /// Returns `None` when no rule opts in (no
933    /// `GitTrackedMode::FileOnly` or `DirAware` declared) OR
934    /// when the tracked-set is unavailable (no git repo). When
935    /// `Some`, contains:
936    ///
937    /// - `file_only`: files where `tracked.contains(path)`. The
938    ///   index `file_exists`-style rules iterate via
939    ///   `ctx.index.files()`. Dirs are dropped (file-mode rules
940    ///   don't iterate dirs).
941    /// - `dir_aware`: dirs where `dir_has_tracked_files(path,
942    ///   tracked)`. The index `dir_exists`-style rules iterate
943    ///   via `ctx.index.dirs()`. Tracked files are also kept so
944    ///   any nested per-file consultation by these rules still
945    ///   works against the same index.
946    ///
947    /// Build cost: O(N) per mode (one `HashSet` lookup or one
948    /// `dir_has_tracked_files` walk per entry). Amortised across
949    /// however many rules opt into each mode.
950    fn build_git_tracked_indexes(
951        &self,
952        full: &FileIndex,
953        tracked: Option<&std::collections::HashSet<std::path::PathBuf>>,
954    ) -> Option<GitTrackedIndexes> {
955        let mut any_file_only = false;
956        let mut any_dir_aware = false;
957        for entry in &self.entries {
958            match entry.rule.git_tracked_mode() {
959                crate::rule::GitTrackedMode::Off => {}
960                crate::rule::GitTrackedMode::FileOnly => any_file_only = true,
961                crate::rule::GitTrackedMode::DirAware => any_dir_aware = true,
962            }
963        }
964        if !any_file_only && !any_dir_aware {
965            return None;
966        }
967
968        // No git repo (or `git ls-files` failed): build EMPTY
969        // indexes for the modes that rules opt into. Preserves
970        // the pre-v0.9.11 silent-no-op semantics — rules that
971        // require git_tracked_only outside a git repo iterate
972        // an empty index and fire zero violations, matching
973        // user expectations for the "don't let X be committed"
974        // pattern.
975        let Some(tracked) = tracked else {
976            return Some(GitTrackedIndexes {
977                file_only: any_file_only.then(|| FileIndex::from_entries(Vec::new())),
978                dir_aware: any_dir_aware.then(|| FileIndex::from_entries(Vec::new())),
979            });
980        };
981
982        let file_only = if any_file_only {
983            let entries = full
984                .entries
985                .iter()
986                .filter(|e| !e.is_dir && tracked.contains(&*e.path))
987                .cloned()
988                .collect();
989            Some(FileIndex::from_entries(entries))
990        } else {
991            None
992        };
993
994        let dir_aware = if any_dir_aware {
995            let entries = full
996                .entries
997                .iter()
998                .filter(|e| {
999                    if e.is_dir {
1000                        crate::git::dir_has_tracked_files(&e.path, tracked)
1001                    } else {
1002                        tracked.contains(&*e.path)
1003                    }
1004                })
1005                .cloned()
1006                .collect();
1007            Some(FileIndex::from_entries(entries))
1008        } else {
1009            None
1010        };
1011
1012        Some(GitTrackedIndexes {
1013            file_only,
1014            dir_aware,
1015        })
1016    }
1017
1018    /// True when `--changed` mode is active AND the rule's
1019    /// `path_scope` exists AND no path in the changed-set
1020    /// satisfies it. Cross-file rules return `path_scope = None`
1021    /// per the roadmap contract — so they always return `false`
1022    /// here (i.e. never skipped).
1023    fn skip_for_changed(&self, rule: &dyn Rule, index: &FileIndex) -> bool {
1024        let Some(set) = &self.changed_paths else {
1025            return false;
1026        };
1027        let Some(scope) = rule.path_scope() else {
1028            return false;
1029        };
1030        !set.iter().any(|p| scope.matches(p, index))
1031    }
1032
1033    /// Resolve every distinct `scope_filter.changed_since:` ref across
1034    /// the rule set and cache each `<ref>...HEAD` diff on the index,
1035    /// once per run (before any `Scope::matches` reads it). A ref that
1036    /// isn't a git repo caches an empty set — the documented silent
1037    /// no-op. A ref that doesn't resolve *inside* a repo is a hard
1038    /// error with a shallow-clone hint, so the misconfiguration
1039    /// surfaces instead of silently matching nothing.
1040    fn resolve_changed_paths(&self, root: &Path, index: &FileIndex) -> Result<()> {
1041        if index.changed_paths_initialized() {
1042            return Ok(());
1043        }
1044        let mut refs: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
1045        for entry in &self.entries {
1046            // Per-file rules expose their scope via `PerFileRule::path_scope`
1047            // (the `Rule::path_scope` default is `None`); rule-major rules
1048            // expose it via `Rule::path_scope`. changed_since is a per-file
1049            // concept, so prefer the per-file scope, falling back to the
1050            // rule-level one.
1051            let scope = entry
1052                .rule
1053                .as_per_file()
1054                .map(super::rule::PerFileRule::path_scope)
1055                .or_else(|| entry.rule.path_scope());
1056            if let Some(scope) = scope
1057                && let Some(filter) = scope.scope_filter()
1058                && let Some(since) = filter.changed_since()
1059            {
1060                refs.insert(since);
1061            }
1062        }
1063        if refs.is_empty() {
1064            return Ok(());
1065        }
1066        let mut map = std::collections::HashMap::new();
1067        for since in refs {
1068            match crate::git::collect_changed_paths_checked(root, since) {
1069                Ok(Some(set)) => {
1070                    map.insert(since.to_string(), set);
1071                }
1072                Ok(None) => {
1073                    map.insert(since.to_string(), std::collections::HashSet::new());
1074                }
1075                Err(crate::git::CommitRangeError::BadRange { stderr }) => {
1076                    return Err(crate::error::Error::Other(format!(
1077                        "scope_filter.changed_since: could not resolve `{since}...HEAD`: \
1078                         {stderr}. Common cause: shallow clone. In a GitHub Actions PR \
1079                         workflow, use `actions/checkout@v4` with `fetch-depth: 0` so the \
1080                         base ref is reachable."
1081                    )));
1082                }
1083            }
1084        }
1085        index.set_changed_paths(map);
1086        Ok(())
1087    }
1088}
1089
1090/// Pick the [`Context`] a rule should evaluate against:
1091/// `full_ctx` if it [`requires_full_index`](Rule::requires_full_index),
1092/// otherwise the changed-only filtered context (falling back to
1093/// `full_ctx` when no `--changed` set is configured).
1094fn pick_ctx<'a>(
1095    rule: &dyn Rule,
1096    full_ctx: &'a Context<'a>,
1097    filtered_ctx: Option<&'a Context<'a>>,
1098    git_file_only_ctx: Option<&'a Context<'a>>,
1099    git_dir_aware_ctx: Option<&'a Context<'a>>,
1100) -> &'a Context<'a> {
1101    // v0.9.11: git-tracked filtering wins over both `--changed`
1102    // filtering and the full-index path. The 4 existence rules
1103    // that opt in already declare `requires_full_index = true`
1104    // (their verdict needs the whole tree, not just the changed
1105    // subset), so this substitution is safe — we're swapping
1106    // their full-index Context for a pre-narrowed one.
1107    match rule.git_tracked_mode() {
1108        crate::rule::GitTrackedMode::FileOnly => {
1109            return git_file_only_ctx.unwrap_or(full_ctx);
1110        }
1111        crate::rule::GitTrackedMode::DirAware => {
1112            return git_dir_aware_ctx.unwrap_or(full_ctx);
1113        }
1114        crate::rule::GitTrackedMode::Off => {}
1115    }
1116    if rule.requires_full_index() {
1117        full_ctx
1118    } else {
1119        filtered_ctx.unwrap_or(full_ctx)
1120    }
1121}
1122
1123fn run_entry(
1124    entry: &RuleEntry,
1125    ctx: &Context<'_>,
1126    when_env: &WhenEnv<'_>,
1127    _facts: &FactValues,
1128) -> Option<RuleResult> {
1129    if let Some(expr) = &entry.when {
1130        match expr.evaluate(when_env) {
1131            Ok(true) => {} // proceed
1132            Ok(false) => return None,
1133            Err(e) => {
1134                return Some(RuleResult {
1135                    rule_id: Arc::from(entry.rule.id()),
1136                    level: entry.rule.level(),
1137                    policy_url: entry.rule.policy_url().map(Arc::from),
1138                    violations: vec![Violation::new(format!("when evaluation error: {e}"))],
1139                    notes: Vec::new(),
1140                    is_fixable: entry.rule.fixer().is_some(),
1141                });
1142            }
1143        }
1144    }
1145    Some(run_one(entry.rule.as_ref(), ctx))
1146}
1147
1148fn run_one(rule: &dyn Rule, ctx: &Context<'_>) -> RuleResult {
1149    let violations = match rule.evaluate(ctx) {
1150        Ok(v) => v,
1151        Err(e) => vec![Violation::new(format!("rule error: {e}"))],
1152    };
1153    // `new` partitions any note-flagged violations into `notes`.
1154    RuleResult::new(
1155        Arc::from(rule.id()),
1156        rule.level(),
1157        rule.policy_url().map(Arc::from),
1158        violations,
1159        rule.fixer().is_some(),
1160    )
1161}
1162
1163#[cfg(test)]
1164mod tests {
1165    use super::*;
1166    use crate::level::Level;
1167    use crate::scope::Scope;
1168    use crate::walker::FileEntry;
1169    use std::path::Path;
1170
1171    /// Stub rule: emits one violation per matched file in scope.
1172    /// Configurable to advertise `requires_full_index` for
1173    /// cross-file rule simulation, and a `path_scope` for
1174    /// changed-mode tests.
1175    #[derive(Debug)]
1176    struct StubRule {
1177        id: String,
1178        level: Level,
1179        scope: Scope,
1180        full_index: bool,
1181        expose_scope: bool,
1182    }
1183
1184    impl Rule for StubRule {
1185        fn id(&self) -> &str {
1186            &self.id
1187        }
1188        fn level(&self) -> Level {
1189            self.level
1190        }
1191        fn requires_full_index(&self) -> bool {
1192            self.full_index
1193        }
1194        fn path_scope(&self) -> Option<&Scope> {
1195            self.expose_scope.then_some(&self.scope)
1196        }
1197        fn evaluate(&self, ctx: &Context<'_>) -> crate::error::Result<Vec<Violation>> {
1198            let mut out = Vec::new();
1199            for entry in ctx.index.files() {
1200                if self.scope.matches(&entry.path, ctx.index) {
1201                    out.push(Violation::new("hit").with_path(entry.path.clone()));
1202                }
1203            }
1204            Ok(out)
1205        }
1206    }
1207
1208    fn stub(id: &str, glob: &str) -> Box<dyn Rule> {
1209        Box::new(StubRule {
1210            id: id.into(),
1211            level: Level::Error,
1212            scope: Scope::from_patterns(&[glob.to_string()]).unwrap(),
1213            full_index: false,
1214            expose_scope: true,
1215        })
1216    }
1217
1218    fn full_index_stub(id: &str) -> Box<dyn Rule> {
1219        Box::new(StubRule {
1220            id: id.into(),
1221            level: Level::Error,
1222            scope: Scope::match_all(),
1223            full_index: true,
1224            expose_scope: false,
1225        })
1226    }
1227
1228    fn idx(paths: &[&str]) -> FileIndex {
1229        FileIndex::from_entries(
1230            paths
1231                .iter()
1232                .map(|p| FileEntry {
1233                    path: std::path::Path::new(p).into(),
1234                    is_dir: false,
1235                    size: 0,
1236                })
1237                .collect(),
1238        )
1239    }
1240
1241    #[test]
1242    fn run_empty_returns_empty_report() {
1243        let engine = Engine::new(Vec::new(), RuleRegistry::new());
1244        let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
1245        assert!(report.results.is_empty());
1246    }
1247
1248    #[test]
1249    fn run_single_rule_emits_per_match() {
1250        let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new());
1251        let report = engine
1252            .run(
1253                Path::new("/fake"),
1254                &idx(&["src/a.rs", "src/b.rs", "README.md"]),
1255            )
1256            .unwrap();
1257        assert_eq!(report.results.len(), 1);
1258        assert_eq!(report.results[0].violations.len(), 2);
1259    }
1260
1261    #[test]
1262    fn run_with_empty_changed_set_short_circuits() {
1263        // Per the contract: empty `--changed` set means "lint
1264        // nothing"; the engine returns an empty Report without
1265        // even evaluating facts.
1266        let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new())
1267            .with_changed_paths(HashSet::new());
1268        let report = engine.run(Path::new("/fake"), &idx(&["src/a.rs"])).unwrap();
1269        assert!(report.results.is_empty());
1270    }
1271
1272    #[test]
1273    fn changed_mode_skips_rule_whose_scope_misses_diff() {
1274        // Rule scoped to `src/**`; changed-set has only docs/
1275        // → rule skipped (no result emitted).
1276        let mut changed = HashSet::new();
1277        changed.insert(std::path::PathBuf::from("docs/README.md"));
1278        let engine = Engine::new(vec![stub("src-rule", "src/**/*.rs")], RuleRegistry::new())
1279            .with_changed_paths(changed);
1280        let report = engine
1281            .run(Path::new("/fake"), &idx(&["src/a.rs", "docs/README.md"]))
1282            .unwrap();
1283        assert!(
1284            report.results.is_empty(),
1285            "out-of-scope rule should be skipped: {:?}",
1286            report.results,
1287        );
1288    }
1289
1290    #[test]
1291    fn changed_mode_runs_rule_whose_scope_intersects_diff() {
1292        let mut changed = HashSet::new();
1293        changed.insert(std::path::PathBuf::from("src/a.rs"));
1294        let engine = Engine::new(vec![stub("src-rule", "src/**/*.rs")], RuleRegistry::new())
1295            .with_changed_paths(changed);
1296        let report = engine
1297            .run(Path::new("/fake"), &idx(&["src/a.rs", "src/b.rs"]))
1298            .unwrap();
1299        // Filtered index: only `src/a.rs` is visible. Rule
1300        // matches it → 1 violation.
1301        assert_eq!(report.results.len(), 1);
1302        assert_eq!(report.results[0].violations.len(), 1);
1303    }
1304
1305    #[test]
1306    fn requires_full_index_rule_runs_unconditionally_in_changed_mode() {
1307        // A rule with `requires_full_index = true` and no
1308        // `path_scope` opts out of the changed-set filter
1309        // entirely — its verdict is over the whole tree.
1310        let mut changed = HashSet::new();
1311        changed.insert(std::path::PathBuf::from("docs/README.md"));
1312        let engine = Engine::new(vec![full_index_stub("cross")], RuleRegistry::new())
1313            .with_changed_paths(changed);
1314        let report = engine
1315            .run(Path::new("/fake"), &idx(&["src/a.rs", "docs/README.md"]))
1316            .unwrap();
1317        // `cross` ran against the full index (not the filtered
1318        // one), so it sees both files.
1319        assert_eq!(report.results.len(), 1);
1320        assert_eq!(report.results[0].violations.len(), 2);
1321    }
1322
1323    #[test]
1324    fn rule_count_reflects_number_of_entries() {
1325        let engine = Engine::new(
1326            vec![stub("a", "**"), stub("b", "**"), stub("c", "**")],
1327            RuleRegistry::new(),
1328        );
1329        assert_eq!(engine.rule_count(), 3);
1330    }
1331
1332    #[test]
1333    fn from_entries_constructor_supports_when_clauses() {
1334        // A rule wrapped with a `when: false` expression should
1335        // be skipped during run — no result emitted.
1336        let entry = RuleEntry::new(stub("gated", "**/*.rs"))
1337            .with_when(crate::when::parse("false").unwrap());
1338        let engine = Engine::from_entries(vec![entry], RuleRegistry::new());
1339        let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
1340        assert!(
1341            report.results.is_empty(),
1342            "when-false rule must be skipped: {:?}",
1343            report.results,
1344        );
1345    }
1346
1347    #[test]
1348    fn fix_size_limit_default_is_one_mib() {
1349        // The builder default; tests that override engines via
1350        // `with_fix_size_limit` rely on this baseline.
1351        let engine = Engine::new(Vec::new(), RuleRegistry::new());
1352        // Implementation detail intentionally exposed for tests.
1353        // We can only verify the value indirectly via `with_*`
1354        // returning a different limit; assert the builder works.
1355        let updated = engine.with_fix_size_limit(Some(42));
1356        assert_eq!(updated.rule_count(), 0);
1357    }
1358
1359    #[test]
1360    fn skip_for_changed_returns_false_for_full_check() {
1361        // No `--changed` set → rule never skipped on that basis.
1362        let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new());
1363        let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
1364        assert_eq!(report.results.len(), 1);
1365    }
1366
1367    /// Per-file rule that emits one violation per file based on
1368    /// the byte content prefix. Used to verify the file-major
1369    /// dispatch path actually hands the bytes to the rule and
1370    /// aggregates the violations correctly.
1371    #[derive(Debug)]
1372    struct PerFileStub {
1373        id: String,
1374        scope: Scope,
1375        prefix: Vec<u8>,
1376    }
1377
1378    impl Rule for PerFileStub {
1379        fn id(&self) -> &str {
1380            &self.id
1381        }
1382        fn level(&self) -> Level {
1383            Level::Error
1384        }
1385        fn evaluate(&self, _ctx: &Context<'_>) -> crate::error::Result<Vec<Violation>> {
1386            // Rule-major fallback: not exercised when
1387            // `as_per_file` is set + the engine routes to the
1388            // file-major loop.
1389            Ok(Vec::new())
1390        }
1391        fn as_per_file(&self) -> Option<&dyn crate::PerFileRule> {
1392            Some(self)
1393        }
1394    }
1395
1396    impl crate::PerFileRule for PerFileStub {
1397        fn path_scope(&self) -> &Scope {
1398            &self.scope
1399        }
1400        fn evaluate_file(
1401            &self,
1402            _ctx: &Context<'_>,
1403            path: &std::path::Path,
1404            bytes: &[u8],
1405        ) -> crate::error::Result<Vec<Violation>> {
1406            if !bytes.starts_with(&self.prefix) {
1407                return Ok(vec![
1408                    Violation::new("missing prefix")
1409                        .with_path(std::sync::Arc::<std::path::Path>::from(path)),
1410                ]);
1411            }
1412            Ok(Vec::new())
1413        }
1414    }
1415
1416    #[test]
1417    fn dispatch_flip_routes_per_file_rule_through_file_major_loop() {
1418        // Real filesystem so the engine's `std::fs::read` works.
1419        // The PerFileStub fires when a file does NOT start with
1420        // `MAGIC` — exercises the slice-handing-in path end-to-end.
1421        let tmp = tempfile::tempdir().unwrap();
1422        std::fs::write(tmp.path().join("good.txt"), b"MAGIC + payload").unwrap();
1423        std::fs::write(tmp.path().join("bad.txt"), b"no magic here").unwrap();
1424
1425        let rule = Box::new(PerFileStub {
1426            id: "needs-magic".into(),
1427            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1428            prefix: b"MAGIC".to_vec(),
1429        });
1430        let engine = Engine::new(vec![rule], RuleRegistry::new());
1431
1432        let opts = crate::WalkOptions::default();
1433        let index = crate::walk(tmp.path(), &opts).unwrap();
1434        let report = engine.run(tmp.path(), &index).unwrap();
1435
1436        assert_eq!(report.results.len(), 1, "results: {:?}", report.results);
1437        let r = &report.results[0];
1438        assert_eq!(&*r.rule_id, "needs-magic");
1439        assert_eq!(r.violations.len(), 1, "violations: {:?}", r.violations);
1440        assert_eq!(
1441            r.violations[0].path.as_deref(),
1442            Some(std::path::Path::new("bad.txt")),
1443        );
1444    }
1445
1446    #[test]
1447    fn dispatch_flip_aggregates_multiple_per_file_rules() {
1448        // Two per-file rules sharing one scope: the file-major
1449        // loop reads each file once and dispatches both rules
1450        // against the same byte buffer. Verifies the aggregation
1451        // step buckets violations per rule correctly (not
1452        // per-file).
1453        let tmp = tempfile::tempdir().unwrap();
1454        std::fs::write(tmp.path().join("a.txt"), b"ZZZ stuff").unwrap();
1455        std::fs::write(tmp.path().join("b.txt"), b"BBB stuff").unwrap();
1456
1457        let rule_a = Box::new(PerFileStub {
1458            id: "needs-AAA".into(),
1459            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1460            prefix: b"AAA".to_vec(),
1461        });
1462        let rule_b = Box::new(PerFileStub {
1463            id: "needs-BBB".into(),
1464            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1465            prefix: b"BBB".to_vec(),
1466        });
1467        let engine = Engine::new(vec![rule_a, rule_b], RuleRegistry::new());
1468
1469        let opts = crate::WalkOptions::default();
1470        let index = crate::walk(tmp.path(), &opts).unwrap();
1471        let report = engine.run(tmp.path(), &index).unwrap();
1472
1473        // `needs-AAA` fires on both files (neither starts with
1474        // "AAA"). `needs-BBB` fires only on `a.txt`.
1475        let by_id: HashMap<&str, &RuleResult> =
1476            report.results.iter().map(|r| (&*r.rule_id, r)).collect();
1477        assert_eq!(
1478            by_id.len(),
1479            2,
1480            "expected both rules in the report: {:?}",
1481            report.results
1482        );
1483        assert_eq!(by_id["needs-AAA"].violations.len(), 2);
1484        assert_eq!(by_id["needs-BBB"].violations.len(), 1);
1485        assert_eq!(
1486            by_id["needs-BBB"].violations[0].path.as_deref(),
1487            Some(std::path::Path::new("a.txt")),
1488        );
1489    }
1490
1491    #[test]
1492    fn passing_per_file_rule_appears_in_the_report() {
1493        // A live per-file rule that finds no violations is a PASSING
1494        // rule — it now appears in the report with empty violations,
1495        // so the pass count ("All N rule(s) passed") includes it,
1496        // matching the cross-file path (which always emits a result).
1497        // Previously these were dropped, so a silently-passing
1498        // per-file rule read as "All 0 rule(s) passed".
1499        let tmp = tempfile::tempdir().unwrap();
1500        std::fs::write(tmp.path().join("a.txt"), b"MAGIC ok").unwrap();
1501
1502        let rule = Box::new(PerFileStub {
1503            id: "needs-magic".into(),
1504            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1505            prefix: b"MAGIC".to_vec(),
1506        });
1507        let engine = Engine::new(vec![rule], RuleRegistry::new());
1508
1509        let opts = crate::WalkOptions::default();
1510        let index = crate::walk(tmp.path(), &opts).unwrap();
1511        let report = engine.run(tmp.path(), &index).unwrap();
1512
1513        assert_eq!(report.results.len(), 1, "results: {:?}", report.results);
1514        assert!(report.results[0].violations.is_empty());
1515        assert_eq!(report.passing_rules(), 1);
1516    }
1517
1518    #[test]
1519    fn run_for_file_runs_only_in_scope_per_file_rules() {
1520        // 3-rule fixture: a per-file rule in scope for `.txt`, a
1521        // per-file rule scoped to `.rs` (out of scope), and a
1522        // cross-file rule (must never run via run_for_file). Only the
1523        // in-scope per-file rule should fire.
1524        let tmp = tempfile::tempdir().unwrap();
1525        std::fs::write(tmp.path().join("a.txt"), b"no magic").unwrap();
1526
1527        let in_scope = Box::new(PerFileStub {
1528            id: "txt-needs-magic".into(),
1529            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1530            prefix: b"MAGIC".to_vec(),
1531        });
1532        let out_of_scope = Box::new(PerFileStub {
1533            id: "rs-needs-magic".into(),
1534            scope: Scope::from_patterns(&["**/*.rs".to_string()]).unwrap(),
1535            prefix: b"MAGIC".to_vec(),
1536        });
1537        let cross = stub("cross", "**/*.txt");
1538        let engine = Engine::new(vec![in_scope, out_of_scope, cross], RuleRegistry::new());
1539
1540        let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1541        let results = engine
1542            .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"no magic")
1543            .unwrap();
1544
1545        assert_eq!(results.len(), 1, "results: {results:?}");
1546        assert_eq!(&*results[0].rule_id, "txt-needs-magic");
1547        assert_eq!(results[0].violations.len(), 1);
1548    }
1549
1550    #[test]
1551    fn run_for_file_uses_supplied_bytes_not_disk() {
1552        // On-disk content passes the rule; the in-memory edited bytes
1553        // (handed to run_for_file) fail it. The supplied bytes win —
1554        // this is the whole point of the LSP unsaved-edit contract.
1555        let tmp = tempfile::tempdir().unwrap();
1556        std::fs::write(tmp.path().join("a.txt"), b"MAGIC on disk").unwrap();
1557
1558        let rule = Box::new(PerFileStub {
1559            id: "needs-magic".into(),
1560            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1561            prefix: b"MAGIC".to_vec(),
1562        });
1563        let engine = Engine::new(vec![rule], RuleRegistry::new());
1564        let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1565
1566        let results = engine
1567            .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"edited, no prefix")
1568            .unwrap();
1569        assert_eq!(results.len(), 1, "edited bytes should fail the rule");
1570        assert_eq!(&*results[0].rule_id, "needs-magic");
1571    }
1572
1573    #[test]
1574    fn run_for_file_passing_rule_omitted() {
1575        let tmp = tempfile::tempdir().unwrap();
1576        std::fs::write(tmp.path().join("a.txt"), b"whatever").unwrap();
1577        let rule = Box::new(PerFileStub {
1578            id: "needs-magic".into(),
1579            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1580            prefix: b"MAGIC".to_vec(),
1581        });
1582        let engine = Engine::new(vec![rule], RuleRegistry::new());
1583        let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1584        let results = engine
1585            .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"MAGIC passes")
1586            .unwrap();
1587        assert!(
1588            results.is_empty(),
1589            "passing rule must be omitted: {results:?}"
1590        );
1591    }
1592
1593    #[test]
1594    fn is_per_file_classifies_rules() {
1595        let pf = Box::new(PerFileStub {
1596            id: "pf".into(),
1597            scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
1598            prefix: b"X".to_vec(),
1599        });
1600        let cross = stub("cross", "**/*");
1601        let engine = Engine::new(vec![pf, cross], RuleRegistry::new());
1602        assert!(engine.is_per_file("pf"));
1603        assert!(!engine.is_per_file("cross"));
1604        assert!(!engine.is_per_file("unknown-id"));
1605    }
1606
1607    #[test]
1608    fn run_for_file_caches_facts_on_index() {
1609        let tmp = tempfile::tempdir().unwrap();
1610        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
1611        let rule = Box::new(PerFileStub {
1612            id: "pf".into(),
1613            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1614            prefix: b"MAGIC".to_vec(),
1615        });
1616        let engine = Engine::new(vec![rule], RuleRegistry::new());
1617        let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1618
1619        assert!(index.cached_facts().is_none());
1620        engine
1621            .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"x")
1622            .unwrap();
1623        assert!(
1624            index.cached_facts().is_some(),
1625            "facts should be cached after the first run_for_file"
1626        );
1627        // Second call reuses the cache without error.
1628        engine
1629            .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"x")
1630            .unwrap();
1631    }
1632
1633    #[test]
1634    fn run_for_file_errors_when_file_not_in_index() {
1635        let tmp = tempfile::tempdir().unwrap();
1636        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
1637        let engine = Engine::new(vec![], RuleRegistry::new());
1638        let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1639        let err = engine
1640            .run_for_file(tmp.path(), &index, Path::new("ghost.txt"), b"x")
1641            .unwrap_err();
1642        assert!(
1643            matches!(err, Error::FileNotInIndex { .. }),
1644            "expected FileNotInIndex, got: {err:?}"
1645        );
1646    }
1647
1648    #[test]
1649    fn dispatch_flip_preserves_cross_file_rules_unchanged() {
1650        // A rule that opts out of `as_per_file` (the default
1651        // `None`) keeps the rule-major path. Mixing with a
1652        // per-file rule should produce both results.
1653        let tmp = tempfile::tempdir().unwrap();
1654        std::fs::write(tmp.path().join("a.txt"), b"hi").unwrap();
1655
1656        let cross_rule = stub("cross", "**/*.txt");
1657        let per_file_rule = Box::new(PerFileStub {
1658            id: "needs-magic".into(),
1659            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1660            prefix: b"MAGIC".to_vec(),
1661        });
1662        let engine = Engine::new(vec![cross_rule, per_file_rule], RuleRegistry::new());
1663
1664        let opts = crate::WalkOptions::default();
1665        let index = crate::walk(tmp.path(), &opts).unwrap();
1666        let report = engine.run(tmp.path(), &index).unwrap();
1667
1668        assert_eq!(report.results.len(), 2, "results: {:?}", report.results);
1669        // Order follows entry-registration order.
1670        assert_eq!(&*report.results[0].rule_id, "cross");
1671        assert_eq!(&*report.results[1].rule_id, "needs-magic");
1672    }
1673}