Skip to main content

alint_core/
engine.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use crate::error::Result;
8use crate::facts::{FactSpec, FactValues, evaluate_facts};
9use crate::registry::RuleRegistry;
10use crate::report::{FixItem, FixReport, FixRuleResult, FixStatus, Report};
11use crate::rule::{Context, FixContext, FixOutcome, Rule, RuleResult, Violation};
12use crate::walker::FileIndex;
13use crate::when::{WhenEnv, WhenExpr};
14
15/// A rule bundled with an optional `when` expression. Rules with a `when`
16/// that evaluates to false at runtime are skipped (no `RuleResult` is
17/// produced) — same observable effect as `level: off`, but gated on facts.
18#[derive(Debug)]
19pub struct RuleEntry {
20    pub rule: Box<dyn Rule>,
21    pub when: Option<WhenExpr>,
22}
23
24impl RuleEntry {
25    pub fn new(rule: Box<dyn Rule>) -> Self {
26        Self { rule, when: None }
27    }
28
29    #[must_use]
30    pub fn with_when(mut self, expr: WhenExpr) -> Self {
31        self.when = Some(expr);
32        self
33    }
34}
35
36/// Executes a set of rules against a pre-built [`FileIndex`].
37///
38/// The engine owns a [`RuleRegistry`] so cross-file rules (e.g.
39/// `for_each_dir`) can build nested rules on demand during evaluation.
40/// Optional `facts` and `vars` (set via the builder chain) are evaluated
41/// at run time and threaded into each rule's [`Context`] and into the
42/// `when` expression evaluator that gates rules.
43#[derive(Debug)]
44pub struct Engine {
45    entries: Vec<RuleEntry>,
46    registry: RuleRegistry,
47    facts: Vec<FactSpec>,
48    vars: HashMap<String, String>,
49    fix_size_limit: Option<u64>,
50    /// In `--changed` mode, the set of paths (relative to root)
51    /// that the user wants linted. `None` means "full check"; the
52    /// engine bypasses every changed-set short-circuit. See
53    /// [`Engine::with_changed_paths`] for the contract.
54    changed_paths: Option<HashSet<PathBuf>>,
55}
56
57impl Engine {
58    /// Backward-compatible: wrap each rule in a [`RuleEntry`] with no `when`.
59    pub fn new(rules: Vec<Box<dyn Rule>>, registry: RuleRegistry) -> Self {
60        let entries = rules.into_iter().map(RuleEntry::new).collect();
61        Self {
62            entries,
63            registry,
64            facts: Vec::new(),
65            vars: HashMap::new(),
66            fix_size_limit: Some(1 << 20),
67            changed_paths: None,
68        }
69    }
70
71    /// Construct from rule entries (each carrying an optional `when`).
72    pub fn from_entries(entries: Vec<RuleEntry>, registry: RuleRegistry) -> Self {
73        Self {
74            entries,
75            registry,
76            facts: Vec::new(),
77            vars: HashMap::new(),
78            fix_size_limit: Some(1 << 20),
79            changed_paths: None,
80        }
81    }
82
83    #[must_use]
84    pub fn with_fix_size_limit(mut self, limit: Option<u64>) -> Self {
85        self.fix_size_limit = limit;
86        self
87    }
88
89    #[must_use]
90    pub fn with_facts(mut self, facts: Vec<FactSpec>) -> Self {
91        self.facts = facts;
92        self
93    }
94
95    #[must_use]
96    pub fn with_vars(mut self, vars: HashMap<String, String>) -> Self {
97        self.vars = vars;
98        self
99    }
100
101    /// Restrict evaluation to the given set of paths (relative to
102    /// the alint root). Per-file rules see a [`FileIndex`]
103    /// filtered to only these paths; rules that override
104    /// [`Rule::requires_full_index`] (cross-file + existence
105    /// rules) still see the full index but are skipped when
106    /// their [`Rule::path_scope`] doesn't intersect the set.
107    ///
108    /// An empty set short-circuits to a no-op report — there's
109    /// nothing to lint. Pass `None` (or omit) to disable
110    /// `--changed` semantics entirely.
111    #[must_use]
112    pub fn with_changed_paths(mut self, set: HashSet<PathBuf>) -> Self {
113        self.changed_paths = Some(set);
114        self
115    }
116
117    pub fn rule_count(&self) -> usize {
118        self.entries.len()
119    }
120
121    pub fn run(&self, root: &Path, index: &FileIndex) -> Result<Report> {
122        // Empty changed-set fast path: nothing to lint, return
123        // an empty report rather than walk the entries list at
124        // all. Saves the fact-evaluation pass too.
125        if self.changed_paths.as_ref().is_some_and(HashSet::is_empty) {
126            return Ok(Report {
127                results: Vec::new(),
128            });
129        }
130
131        let fact_values = evaluate_facts(&self.facts, root, index)?;
132        let git_tracked = self.collect_git_tracked_if_needed(root);
133        let git_blame = self.build_blame_cache_if_needed(root);
134        let filtered_index = self.build_filtered_index(index);
135        let full_ctx = Context {
136            root,
137            index,
138            registry: Some(&self.registry),
139            facts: Some(&fact_values),
140            vars: Some(&self.vars),
141            git_tracked: git_tracked.as_ref(),
142            git_blame: git_blame.as_ref(),
143        };
144        let filtered_ctx = filtered_index.as_ref().map(|fi| Context {
145            root,
146            index: fi,
147            registry: Some(&self.registry),
148            facts: Some(&fact_values),
149            vars: Some(&self.vars),
150            git_tracked: git_tracked.as_ref(),
151            git_blame: git_blame.as_ref(),
152        });
153        let when_env = WhenEnv {
154            facts: &fact_values,
155            vars: &self.vars,
156            iter: None,
157        };
158
159        // Cross-file partition: rules that don't opt into the
160        // file-major dispatch path (cross-file rules + per-file
161        // rules that haven't migrated yet). Same parallelism
162        // shape as v0.9.2 — rule-major par_iter.
163        let cross_results: Vec<(usize, RuleResult)> = self
164            .entries
165            .par_iter()
166            .enumerate()
167            .filter_map(|(idx, entry)| {
168                if entry.rule.as_per_file().is_some() {
169                    return None;
170                }
171                if self.skip_for_changed(entry.rule.as_ref()) {
172                    return None;
173                }
174                let ctx = pick_ctx(entry.rule.as_ref(), &full_ctx, filtered_ctx.as_ref());
175                run_entry(entry, ctx, &when_env, &fact_values).map(|rr| (idx, rr))
176            })
177            .collect();
178
179        // Per-file partition: file-major loop reads each file
180        // once and dispatches to every per-file rule whose scope
181        // matches. Coalesces N reads of one file across N rules
182        // sharing it.
183        let per_file_results = self.run_per_file(root, &full_ctx, filtered_ctx.as_ref(), &when_env);
184
185        // Final assembly preserves `self.entries` order so the
186        // output Vec is deterministic + tests that index by
187        // position keep working. Each entry slot fills from
188        // either the cross-file or per-file partition; rules
189        // filtered out (by `--changed` scope, `when: false`, or
190        // passing with no violations) leave their slot empty.
191        let mut cross_by_idx: HashMap<usize, RuleResult> = cross_results.into_iter().collect();
192        let mut per_file_by_idx: HashMap<usize, RuleResult> =
193            per_file_results.into_iter().collect();
194        let mut results = Vec::with_capacity(self.entries.len());
195        for idx in 0..self.entries.len() {
196            if let Some(rr) = cross_by_idx.remove(&idx) {
197                results.push(rr);
198            } else if let Some(rr) = per_file_by_idx.remove(&idx) {
199                results.push(rr);
200            }
201        }
202        Ok(Report { results })
203    }
204
205    /// Per-file dispatch loop. Walks `index.files()` in parallel
206    /// and, for each file, calls every applicable per-file rule's
207    /// `evaluate_file` against a single `std::fs::read`. Returns
208    /// `(entry-index, RuleResult)` tuples for every per-file
209    /// rule that emitted at least one violation; passing rules
210    /// (zero violations) are omitted, matching the rule-major
211    /// path's semantics.
212    #[allow(clippy::too_many_lines)]
213    fn run_per_file<'a>(
214        &'a self,
215        root: &'a Path,
216        full_ctx: &'a Context<'a>,
217        filtered_ctx: Option<&'a Context<'a>>,
218        when_env: &'a WhenEnv<'a>,
219    ) -> Vec<(usize, RuleResult)> {
220        // Pre-filter live per-file entries: opt-in via
221        // `as_per_file`, not skipped by `--changed`, and `when`
222        // resolved. `when` evaluates against constant facts +
223        // vars (no `iter` namespace at the engine level), so its
224        // verdict is independent of the file being scanned —
225        // resolve it once per rule before entering the file
226        // loop. `when` errors short-circuit to a per-rule result
227        // with the error message; behaviour matches the
228        // rule-major path's `run_entry` for parity.
229        let mut live: Vec<(usize, &RuleEntry)> = Vec::new();
230        let mut when_errors: Vec<(usize, RuleResult)> = Vec::new();
231        for (idx, entry) in self.entries.iter().enumerate() {
232            if entry.rule.as_per_file().is_none() {
233                continue;
234            }
235            if self.skip_for_changed(entry.rule.as_ref()) {
236                continue;
237            }
238            if let Some(expr) = &entry.when {
239                match expr.evaluate(when_env) {
240                    Ok(true) => {}
241                    Ok(false) => continue,
242                    Err(e) => {
243                        when_errors.push((
244                            idx,
245                            RuleResult {
246                                rule_id: Arc::from(entry.rule.id()),
247                                level: entry.rule.level(),
248                                policy_url: entry.rule.policy_url().map(Arc::from),
249                                violations: vec![Violation::new(format!(
250                                    "when evaluation error: {e}"
251                                ))],
252                                is_fixable: entry.rule.fixer().is_some(),
253                            },
254                        ));
255                        continue;
256                    }
257                }
258            }
259            live.push((idx, entry));
260        }
261        if live.is_empty() {
262            return when_errors;
263        }
264
265        let per_file_ctx = filtered_ctx.unwrap_or(full_ctx);
266
267        // Each file-major iteration produces a Vec of
268        // `(entry-index, Violation)` tuples. The flatten
269        // gathers them all; aggregation below buckets them by
270        // entry-index back into per-rule `RuleResult`s.
271        let by_file: Vec<(usize, Violation)> = per_file_ctx
272            .index
273            .files()
274            .par_bridge()
275            .flat_map_iter(|file_entry| {
276                // 1. Decide which per-file rules apply to this
277                // file. Per-file rules expose their scope via
278                // `PerFileRule::path_scope`; we filter on it
279                // before any I/O so files no rule cares about
280                // never get read.
281                let applicable: Vec<&RuleEntry> = live
282                    .iter()
283                    .filter(|(_, entry)| {
284                        entry
285                            .rule
286                            .as_per_file()
287                            .expect("live entries are per-file rules by construction")
288                            .path_scope()
289                            .matches(&file_entry.path)
290                    })
291                    .map(|(_, entry)| *entry)
292                    .collect();
293                if applicable.is_empty() {
294                    return Vec::new();
295                }
296                // 2. Read once. Read failures (file deleted
297                // mid-walk, permission flake) skip the file
298                // silently — same shape as today's per-rule
299                // `let Ok(bytes) = std::fs::read(...) else
300                // continue;`.
301                let abs = root.join(&file_entry.path);
302                let Ok(bytes) = std::fs::read(&abs) else {
303                    return Vec::new();
304                };
305                // 3. Dispatch. Every applicable rule sees the
306                // same byte slice; the file is read exactly once
307                // even though N rules may produce violations
308                // against it.
309                let mut out: Vec<(usize, Violation)> = Vec::new();
310                for entry in applicable {
311                    let live_idx = live
312                        .iter()
313                        .position(|(_, e)| std::ptr::eq(*e, entry))
314                        .expect("applicable entry must be in live list");
315                    let entry_idx = live[live_idx].0;
316                    let pf = entry
317                        .rule
318                        .as_per_file()
319                        .expect("live entries are per-file rules by construction");
320                    let result = pf.evaluate_file(per_file_ctx, &file_entry.path, &bytes);
321                    match result {
322                        Ok(vs) => {
323                            for v in vs {
324                                out.push((entry_idx, v));
325                            }
326                        }
327                        Err(e) => {
328                            out.push((entry_idx, Violation::new(format!("rule error: {e}"))));
329                        }
330                    }
331                }
332                out
333            })
334            .collect();
335
336        // Bucket violations by entry-index, then rebuild
337        // `RuleResult` per live entry preserving each rule's
338        // metadata (level / policy_url / is_fixable).
339        let mut bucket: HashMap<usize, Vec<Violation>> = HashMap::new();
340        for (idx, v) in by_file {
341            bucket.entry(idx).or_default().push(v);
342        }
343        let mut results = when_errors;
344        for (idx, entry) in live {
345            let Some(violations) = bucket.remove(&idx) else {
346                // Rule was applicable to zero files (or every
347                // file was empty / unreadable) — passing rule;
348                // omit, matching today's behaviour.
349                continue;
350            };
351            results.push((
352                idx,
353                RuleResult {
354                    rule_id: Arc::from(entry.rule.id()),
355                    level: entry.rule.level(),
356                    policy_url: entry.rule.policy_url().map(Arc::from),
357                    violations,
358                    is_fixable: entry.rule.fixer().is_some(),
359                },
360            ));
361        }
362        results
363    }
364
365    /// Evaluate every rule and apply fixers for their violations.
366    /// Fixes run sequentially — rules whose fixers touch the filesystem
367    /// must not race. Rules with no fixer contribute
368    /// [`FixStatus::Unfixable`] entries so the caller sees them in the
369    /// report. Rules that pass (no violations) are omitted from the
370    /// result, same as [`Engine::run`]'s usual behaviour.
371    pub fn fix(&self, root: &Path, index: &FileIndex, dry_run: bool) -> Result<FixReport> {
372        if self.changed_paths.as_ref().is_some_and(HashSet::is_empty) {
373            return Ok(FixReport {
374                results: Vec::new(),
375            });
376        }
377
378        let fact_values = evaluate_facts(&self.facts, root, index)?;
379        let git_tracked = self.collect_git_tracked_if_needed(root);
380        let git_blame = self.build_blame_cache_if_needed(root);
381        let filtered_index = self.build_filtered_index(index);
382        let full_ctx = Context {
383            root,
384            index,
385            registry: Some(&self.registry),
386            facts: Some(&fact_values),
387            vars: Some(&self.vars),
388            git_tracked: git_tracked.as_ref(),
389            git_blame: git_blame.as_ref(),
390        };
391        let filtered_ctx = filtered_index.as_ref().map(|fi| Context {
392            root,
393            index: fi,
394            registry: Some(&self.registry),
395            facts: Some(&fact_values),
396            vars: Some(&self.vars),
397            git_tracked: git_tracked.as_ref(),
398            git_blame: git_blame.as_ref(),
399        });
400        let when_env = WhenEnv {
401            facts: &fact_values,
402            vars: &self.vars,
403            iter: None,
404        };
405        let fix_ctx = FixContext {
406            root,
407            dry_run,
408            fix_size_limit: self.fix_size_limit,
409        };
410
411        let mut results: Vec<FixRuleResult> = Vec::new();
412        for entry in &self.entries {
413            if self.skip_for_changed(entry.rule.as_ref()) {
414                continue;
415            }
416            let ctx = pick_ctx(entry.rule.as_ref(), &full_ctx, filtered_ctx.as_ref());
417            if let Some(expr) = &entry.when {
418                match expr.evaluate(&when_env) {
419                    Ok(true) => {}
420                    Ok(false) => continue,
421                    Err(e) => {
422                        results.push(FixRuleResult {
423                            rule_id: Arc::from(entry.rule.id()),
424                            level: entry.rule.level(),
425                            items: vec![FixItem {
426                                violation: Violation::new(format!("when evaluation error: {e}")),
427                                status: FixStatus::Unfixable,
428                            }],
429                        });
430                        continue;
431                    }
432                }
433            }
434            let violations = match entry.rule.evaluate(ctx) {
435                Ok(v) => v,
436                Err(e) => vec![Violation::new(format!("rule error: {e}"))],
437            };
438            if violations.is_empty() {
439                continue;
440            }
441            let fixer = entry.rule.fixer();
442            let items: Vec<FixItem> = violations
443                .into_iter()
444                .map(|v| {
445                    let status = match fixer {
446                        Some(f) => match f.apply(&v, &fix_ctx) {
447                            Ok(FixOutcome::Applied(s)) => FixStatus::Applied(s),
448                            Ok(FixOutcome::Skipped(s)) => FixStatus::Skipped(s),
449                            Err(e) => FixStatus::Skipped(format!("fix error: {e}")),
450                        },
451                        None => FixStatus::Unfixable,
452                    };
453                    FixItem {
454                        violation: v,
455                        status,
456                    }
457                })
458                .collect();
459            results.push(FixRuleResult {
460                rule_id: Arc::from(entry.rule.id()),
461                level: entry.rule.level(),
462                items,
463            });
464        }
465        Ok(FixReport { results })
466    }
467
468    /// Collect git's tracked-paths set, but only if at least one
469    /// loaded rule asked for it. Most repos / configs never opt
470    /// in, so this returns `None` zero-cost in the common case.
471    /// Inside a non-git directory, or when `git` exits non-zero
472    /// (corrupt repo, missing binary), the helper also returns
473    /// `None` — rules that consult it then treat every entry as
474    /// "untracked," which is the right default for absence-style
475    /// rules with `git_tracked_only: true`.
476    fn collect_git_tracked_if_needed(
477        &self,
478        root: &Path,
479    ) -> Option<std::collections::HashSet<std::path::PathBuf>> {
480        let any_wants = self.entries.iter().any(|e| e.rule.wants_git_tracked());
481        if !any_wants {
482            return None;
483        }
484        crate::git::collect_tracked_paths(root)
485    }
486
487    /// Build the per-file `git blame` cache when at least one
488    /// loaded rule asked for it. Returns `None` otherwise — the
489    /// common case (most configs have no `git_blame_age` rules)
490    /// pays nothing. The cache itself is empty at construction;
491    /// rules trigger blame on first access per file.
492    ///
493    /// We use [`crate::git::collect_tracked_paths`] as the
494    /// is-this-a-git-repo probe so the rule no-ops cleanly
495    /// outside a repo without per-file blame failures littering
496    /// the cache. When the user opts into BOTH `git_tracked_only`
497    /// and `git_blame_age`, the probe runs once via
498    /// [`Engine::collect_git_tracked_if_needed`] and once here —
499    /// negligible cost (sub-ms) compared to the blame work.
500    fn build_blame_cache_if_needed(&self, root: &Path) -> Option<crate::git::BlameCache> {
501        let any_wants = self.entries.iter().any(|e| e.rule.wants_git_blame());
502        if !any_wants {
503            return None;
504        }
505        // Probe: a non-git workspace short-circuits to `None` so
506        // the rule's "silent no-op outside git" path is exercised
507        // at the engine level rather than per-file.
508        crate::git::collect_tracked_paths(root)?;
509        Some(crate::git::BlameCache::new(root.to_path_buf()))
510    }
511
512    /// Build a [`FileIndex`] containing only the entries the user
513    /// said they care about (the `--changed` set). Returns `None`
514    /// when no changed-set is configured — callers fall back to
515    /// the full index.
516    fn build_filtered_index(&self, full: &FileIndex) -> Option<FileIndex> {
517        let set = self.changed_paths.as_ref()?;
518        let entries = full
519            .entries
520            .iter()
521            .filter(|e| set.contains(&*e.path))
522            .cloned()
523            .collect();
524        Some(FileIndex { entries })
525    }
526
527    /// True when `--changed` mode is active AND the rule's
528    /// `path_scope` exists AND no path in the changed-set
529    /// satisfies it. Cross-file rules return `path_scope = None`
530    /// per the roadmap contract — so they always return `false`
531    /// here (i.e. never skipped).
532    fn skip_for_changed(&self, rule: &dyn Rule) -> bool {
533        let Some(set) = &self.changed_paths else {
534            return false;
535        };
536        let Some(scope) = rule.path_scope() else {
537            return false;
538        };
539        !set.iter().any(|p| scope.matches(p))
540    }
541}
542
543/// Pick the [`Context`] a rule should evaluate against:
544/// `full_ctx` if it [`requires_full_index`](Rule::requires_full_index),
545/// otherwise the changed-only filtered context (falling back to
546/// `full_ctx` when no `--changed` set is configured).
547fn pick_ctx<'a>(
548    rule: &dyn Rule,
549    full_ctx: &'a Context<'a>,
550    filtered_ctx: Option<&'a Context<'a>>,
551) -> &'a Context<'a> {
552    if rule.requires_full_index() {
553        full_ctx
554    } else {
555        filtered_ctx.unwrap_or(full_ctx)
556    }
557}
558
559fn run_entry(
560    entry: &RuleEntry,
561    ctx: &Context<'_>,
562    when_env: &WhenEnv<'_>,
563    _facts: &FactValues,
564) -> Option<RuleResult> {
565    if let Some(expr) = &entry.when {
566        match expr.evaluate(when_env) {
567            Ok(true) => {} // proceed
568            Ok(false) => return None,
569            Err(e) => {
570                return Some(RuleResult {
571                    rule_id: Arc::from(entry.rule.id()),
572                    level: entry.rule.level(),
573                    policy_url: entry.rule.policy_url().map(Arc::from),
574                    violations: vec![Violation::new(format!("when evaluation error: {e}"))],
575                    is_fixable: entry.rule.fixer().is_some(),
576                });
577            }
578        }
579    }
580    Some(run_one(entry.rule.as_ref(), ctx))
581}
582
583fn run_one(rule: &dyn Rule, ctx: &Context<'_>) -> RuleResult {
584    let violations = match rule.evaluate(ctx) {
585        Ok(v) => v,
586        Err(e) => vec![Violation::new(format!("rule error: {e}"))],
587    };
588    RuleResult {
589        rule_id: Arc::from(rule.id()),
590        level: rule.level(),
591        policy_url: rule.policy_url().map(Arc::from),
592        violations,
593        is_fixable: rule.fixer().is_some(),
594    }
595}
596
597#[cfg(test)]
598mod tests {
599    use super::*;
600    use crate::level::Level;
601    use crate::scope::Scope;
602    use crate::walker::FileEntry;
603    use std::path::Path;
604
605    /// Stub rule: emits one violation per matched file in scope.
606    /// Configurable to advertise `requires_full_index` for
607    /// cross-file rule simulation, and a `path_scope` for
608    /// changed-mode tests.
609    #[derive(Debug)]
610    struct StubRule {
611        id: String,
612        level: Level,
613        scope: Scope,
614        full_index: bool,
615        expose_scope: bool,
616    }
617
618    impl Rule for StubRule {
619        fn id(&self) -> &str {
620            &self.id
621        }
622        fn level(&self) -> Level {
623            self.level
624        }
625        fn requires_full_index(&self) -> bool {
626            self.full_index
627        }
628        fn path_scope(&self) -> Option<&Scope> {
629            self.expose_scope.then_some(&self.scope)
630        }
631        fn evaluate(&self, ctx: &Context<'_>) -> crate::error::Result<Vec<Violation>> {
632            let mut out = Vec::new();
633            for entry in ctx.index.files() {
634                if self.scope.matches(&entry.path) {
635                    out.push(Violation::new("hit").with_path(entry.path.clone()));
636                }
637            }
638            Ok(out)
639        }
640    }
641
642    fn stub(id: &str, glob: &str) -> Box<dyn Rule> {
643        Box::new(StubRule {
644            id: id.into(),
645            level: Level::Error,
646            scope: Scope::from_patterns(&[glob.to_string()]).unwrap(),
647            full_index: false,
648            expose_scope: true,
649        })
650    }
651
652    fn full_index_stub(id: &str) -> Box<dyn Rule> {
653        Box::new(StubRule {
654            id: id.into(),
655            level: Level::Error,
656            scope: Scope::match_all(),
657            full_index: true,
658            expose_scope: false,
659        })
660    }
661
662    fn idx(paths: &[&str]) -> FileIndex {
663        FileIndex {
664            entries: paths
665                .iter()
666                .map(|p| FileEntry {
667                    path: std::path::Path::new(p).into(),
668                    is_dir: false,
669                    size: 0,
670                })
671                .collect(),
672        }
673    }
674
675    #[test]
676    fn run_empty_returns_empty_report() {
677        let engine = Engine::new(Vec::new(), RuleRegistry::new());
678        let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
679        assert!(report.results.is_empty());
680    }
681
682    #[test]
683    fn run_single_rule_emits_per_match() {
684        let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new());
685        let report = engine
686            .run(
687                Path::new("/fake"),
688                &idx(&["src/a.rs", "src/b.rs", "README.md"]),
689            )
690            .unwrap();
691        assert_eq!(report.results.len(), 1);
692        assert_eq!(report.results[0].violations.len(), 2);
693    }
694
695    #[test]
696    fn run_with_empty_changed_set_short_circuits() {
697        // Per the contract: empty `--changed` set means "lint
698        // nothing"; the engine returns an empty Report without
699        // even evaluating facts.
700        let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new())
701            .with_changed_paths(HashSet::new());
702        let report = engine.run(Path::new("/fake"), &idx(&["src/a.rs"])).unwrap();
703        assert!(report.results.is_empty());
704    }
705
706    #[test]
707    fn changed_mode_skips_rule_whose_scope_misses_diff() {
708        // Rule scoped to `src/**`; changed-set has only docs/
709        // → rule skipped (no result emitted).
710        let mut changed = HashSet::new();
711        changed.insert(std::path::PathBuf::from("docs/README.md"));
712        let engine = Engine::new(vec![stub("src-rule", "src/**/*.rs")], RuleRegistry::new())
713            .with_changed_paths(changed);
714        let report = engine
715            .run(Path::new("/fake"), &idx(&["src/a.rs", "docs/README.md"]))
716            .unwrap();
717        assert!(
718            report.results.is_empty(),
719            "out-of-scope rule should be skipped: {:?}",
720            report.results,
721        );
722    }
723
724    #[test]
725    fn changed_mode_runs_rule_whose_scope_intersects_diff() {
726        let mut changed = HashSet::new();
727        changed.insert(std::path::PathBuf::from("src/a.rs"));
728        let engine = Engine::new(vec![stub("src-rule", "src/**/*.rs")], RuleRegistry::new())
729            .with_changed_paths(changed);
730        let report = engine
731            .run(Path::new("/fake"), &idx(&["src/a.rs", "src/b.rs"]))
732            .unwrap();
733        // Filtered index: only `src/a.rs` is visible. Rule
734        // matches it → 1 violation.
735        assert_eq!(report.results.len(), 1);
736        assert_eq!(report.results[0].violations.len(), 1);
737    }
738
739    #[test]
740    fn requires_full_index_rule_runs_unconditionally_in_changed_mode() {
741        // A rule with `requires_full_index = true` and no
742        // `path_scope` opts out of the changed-set filter
743        // entirely — its verdict is over the whole tree.
744        let mut changed = HashSet::new();
745        changed.insert(std::path::PathBuf::from("docs/README.md"));
746        let engine = Engine::new(vec![full_index_stub("cross")], RuleRegistry::new())
747            .with_changed_paths(changed);
748        let report = engine
749            .run(Path::new("/fake"), &idx(&["src/a.rs", "docs/README.md"]))
750            .unwrap();
751        // `cross` ran against the full index (not the filtered
752        // one), so it sees both files.
753        assert_eq!(report.results.len(), 1);
754        assert_eq!(report.results[0].violations.len(), 2);
755    }
756
757    #[test]
758    fn rule_count_reflects_number_of_entries() {
759        let engine = Engine::new(
760            vec![stub("a", "**"), stub("b", "**"), stub("c", "**")],
761            RuleRegistry::new(),
762        );
763        assert_eq!(engine.rule_count(), 3);
764    }
765
766    #[test]
767    fn from_entries_constructor_supports_when_clauses() {
768        // A rule wrapped with a `when: false` expression should
769        // be skipped during run — no result emitted.
770        let entry = RuleEntry::new(stub("gated", "**/*.rs"))
771            .with_when(crate::when::parse("false").unwrap());
772        let engine = Engine::from_entries(vec![entry], RuleRegistry::new());
773        let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
774        assert!(
775            report.results.is_empty(),
776            "when-false rule must be skipped: {:?}",
777            report.results,
778        );
779    }
780
781    #[test]
782    fn fix_size_limit_default_is_one_mib() {
783        // The builder default; tests that override engines via
784        // `with_fix_size_limit` rely on this baseline.
785        let engine = Engine::new(Vec::new(), RuleRegistry::new());
786        // Implementation detail intentionally exposed for tests.
787        // We can only verify the value indirectly via `with_*`
788        // returning a different limit; assert the builder works.
789        let updated = engine.with_fix_size_limit(Some(42));
790        assert_eq!(updated.rule_count(), 0);
791    }
792
793    #[test]
794    fn skip_for_changed_returns_false_for_full_check() {
795        // No `--changed` set → rule never skipped on that basis.
796        let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new());
797        let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
798        assert_eq!(report.results.len(), 1);
799    }
800
801    /// Per-file rule that emits one violation per file based on
802    /// the byte content prefix. Used to verify the file-major
803    /// dispatch path actually hands the bytes to the rule and
804    /// aggregates the violations correctly.
805    #[derive(Debug)]
806    struct PerFileStub {
807        id: String,
808        scope: Scope,
809        prefix: Vec<u8>,
810    }
811
812    impl Rule for PerFileStub {
813        fn id(&self) -> &str {
814            &self.id
815        }
816        fn level(&self) -> Level {
817            Level::Error
818        }
819        fn evaluate(&self, _ctx: &Context<'_>) -> crate::error::Result<Vec<Violation>> {
820            // Rule-major fallback: not exercised when
821            // `as_per_file` is set + the engine routes to the
822            // file-major loop.
823            Ok(Vec::new())
824        }
825        fn as_per_file(&self) -> Option<&dyn crate::PerFileRule> {
826            Some(self)
827        }
828    }
829
830    impl crate::PerFileRule for PerFileStub {
831        fn path_scope(&self) -> &Scope {
832            &self.scope
833        }
834        fn evaluate_file(
835            &self,
836            _ctx: &Context<'_>,
837            path: &std::path::Path,
838            bytes: &[u8],
839        ) -> crate::error::Result<Vec<Violation>> {
840            if !bytes.starts_with(&self.prefix) {
841                return Ok(vec![
842                    Violation::new("missing prefix")
843                        .with_path(std::sync::Arc::<std::path::Path>::from(path)),
844                ]);
845            }
846            Ok(Vec::new())
847        }
848    }
849
850    #[test]
851    fn dispatch_flip_routes_per_file_rule_through_file_major_loop() {
852        // Real filesystem so the engine's `std::fs::read` works.
853        // The PerFileStub fires when a file does NOT start with
854        // `MAGIC` — exercises the slice-handing-in path end-to-end.
855        let tmp = tempfile::tempdir().unwrap();
856        std::fs::write(tmp.path().join("good.txt"), b"MAGIC + payload").unwrap();
857        std::fs::write(tmp.path().join("bad.txt"), b"no magic here").unwrap();
858
859        let rule = Box::new(PerFileStub {
860            id: "needs-magic".into(),
861            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
862            prefix: b"MAGIC".to_vec(),
863        });
864        let engine = Engine::new(vec![rule], RuleRegistry::new());
865
866        let opts = crate::WalkOptions::default();
867        let index = crate::walk(tmp.path(), &opts).unwrap();
868        let report = engine.run(tmp.path(), &index).unwrap();
869
870        assert_eq!(report.results.len(), 1, "results: {:?}", report.results);
871        let r = &report.results[0];
872        assert_eq!(&*r.rule_id, "needs-magic");
873        assert_eq!(r.violations.len(), 1, "violations: {:?}", r.violations);
874        assert_eq!(
875            r.violations[0].path.as_deref(),
876            Some(std::path::Path::new("bad.txt")),
877        );
878    }
879
880    #[test]
881    fn dispatch_flip_aggregates_multiple_per_file_rules() {
882        // Two per-file rules sharing one scope: the file-major
883        // loop reads each file once and dispatches both rules
884        // against the same byte buffer. Verifies the aggregation
885        // step buckets violations per rule correctly (not
886        // per-file).
887        let tmp = tempfile::tempdir().unwrap();
888        std::fs::write(tmp.path().join("a.txt"), b"ZZZ stuff").unwrap();
889        std::fs::write(tmp.path().join("b.txt"), b"BBB stuff").unwrap();
890
891        let rule_a = Box::new(PerFileStub {
892            id: "needs-AAA".into(),
893            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
894            prefix: b"AAA".to_vec(),
895        });
896        let rule_b = Box::new(PerFileStub {
897            id: "needs-BBB".into(),
898            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
899            prefix: b"BBB".to_vec(),
900        });
901        let engine = Engine::new(vec![rule_a, rule_b], RuleRegistry::new());
902
903        let opts = crate::WalkOptions::default();
904        let index = crate::walk(tmp.path(), &opts).unwrap();
905        let report = engine.run(tmp.path(), &index).unwrap();
906
907        // `needs-AAA` fires on both files (neither starts with
908        // "AAA"). `needs-BBB` fires only on `a.txt`.
909        let by_id: HashMap<&str, &RuleResult> =
910            report.results.iter().map(|r| (&*r.rule_id, r)).collect();
911        assert_eq!(
912            by_id.len(),
913            2,
914            "expected both rules in the report: {:?}",
915            report.results
916        );
917        assert_eq!(by_id["needs-AAA"].violations.len(), 2);
918        assert_eq!(by_id["needs-BBB"].violations.len(), 1);
919        assert_eq!(
920            by_id["needs-BBB"].violations[0].path.as_deref(),
921            Some(std::path::Path::new("a.txt")),
922        );
923    }
924
925    #[test]
926    fn dispatch_flip_passes_when_no_violations() {
927        // A per-file rule that finds no violations in any file
928        // should be omitted from the report entirely (matching
929        // the rule-major path's "passing rules omitted"
930        // semantics).
931        let tmp = tempfile::tempdir().unwrap();
932        std::fs::write(tmp.path().join("a.txt"), b"MAGIC ok").unwrap();
933
934        let rule = Box::new(PerFileStub {
935            id: "needs-magic".into(),
936            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
937            prefix: b"MAGIC".to_vec(),
938        });
939        let engine = Engine::new(vec![rule], RuleRegistry::new());
940
941        let opts = crate::WalkOptions::default();
942        let index = crate::walk(tmp.path(), &opts).unwrap();
943        let report = engine.run(tmp.path(), &index).unwrap();
944
945        assert!(report.results.is_empty(), "results: {:?}", report.results);
946    }
947
948    #[test]
949    fn dispatch_flip_preserves_cross_file_rules_unchanged() {
950        // A rule that opts out of `as_per_file` (the default
951        // `None`) keeps the rule-major path. Mixing with a
952        // per-file rule should produce both results.
953        let tmp = tempfile::tempdir().unwrap();
954        std::fs::write(tmp.path().join("a.txt"), b"hi").unwrap();
955
956        let cross_rule = stub("cross", "**/*.txt");
957        let per_file_rule = Box::new(PerFileStub {
958            id: "needs-magic".into(),
959            scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
960            prefix: b"MAGIC".to_vec(),
961        });
962        let engine = Engine::new(vec![cross_rule, per_file_rule], RuleRegistry::new());
963
964        let opts = crate::WalkOptions::default();
965        let index = crate::walk(tmp.path(), &opts).unwrap();
966        let report = engine.run(tmp.path(), &index).unwrap();
967
968        assert_eq!(report.results.len(), 2, "results: {:?}", report.results);
969        // Order follows entry-registration order.
970        assert_eq!(&*report.results[0].rule_id, "cross");
971        assert_eq!(&*report.results[1].rule_id, "needs-magic");
972    }
973}