1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4use std::sync::atomic::{AtomicU64, Ordering};
5use std::time::Instant;
6
7use rayon::prelude::*;
8
9use crate::error::{Error, Result};
10use crate::facts::{FactSpec, FactValues, evaluate_facts};
11use crate::registry::RuleRegistry;
12use crate::report::{FixItem, FixReport, FixRuleResult, FixStatus, Report};
13use crate::rule::{Context, FixContext, FixOutcome, Rule, RuleResult, Violation};
14use crate::walker::FileIndex;
15use crate::when::{WhenEnv, WhenExpr};
16
17macro_rules! phase {
25 ($start:expr, $phase:expr $(, $k:ident = $v:expr)* $(,)?) => {
26 #[allow(clippy::cast_possible_truncation)]
33 let elapsed_us: u64 = $start.elapsed().as_micros() as u64;
34 tracing::info!(
35 phase = $phase,
36 elapsed_us = elapsed_us,
37 $($k = $v,)*
38 "engine.phase",
39 );
40 };
41}
42
43#[derive(Debug)]
53struct GitTrackedIndexes {
54 file_only: Option<FileIndex>,
57 dir_aware: Option<FileIndex>,
61}
62
63type LivePerFileEntries<'a> = (Vec<(usize, &'a RuleEntry)>, Vec<(usize, RuleResult)>);
68
69#[derive(Debug)]
73pub struct RuleEntry {
74 pub rule: Box<dyn Rule>,
75 pub when: Option<WhenExpr>,
76}
77
78impl RuleEntry {
79 pub fn new(rule: Box<dyn Rule>) -> Self {
80 Self { rule, when: None }
81 }
82
83 #[must_use]
84 pub fn with_when(mut self, expr: WhenExpr) -> Self {
85 self.when = Some(expr);
86 self
87 }
88}
89
90#[derive(Debug)]
98pub struct Engine {
99 entries: Vec<RuleEntry>,
100 registry: RuleRegistry,
101 facts: Vec<FactSpec>,
102 vars: HashMap<String, String>,
103 fix_size_limit: Option<u64>,
104 changed_paths: Option<HashSet<PathBuf>>,
109}
110
111impl Engine {
112 pub fn new(rules: Vec<Box<dyn Rule>>, registry: RuleRegistry) -> Self {
114 let entries = rules.into_iter().map(RuleEntry::new).collect();
115 Self {
116 entries,
117 registry,
118 facts: Vec::new(),
119 vars: HashMap::new(),
120 fix_size_limit: Some(1 << 20),
121 changed_paths: None,
122 }
123 }
124
125 pub fn from_entries(entries: Vec<RuleEntry>, registry: RuleRegistry) -> Self {
127 Self {
128 entries,
129 registry,
130 facts: Vec::new(),
131 vars: HashMap::new(),
132 fix_size_limit: Some(1 << 20),
133 changed_paths: None,
134 }
135 }
136
137 #[must_use]
138 pub fn with_fix_size_limit(mut self, limit: Option<u64>) -> Self {
139 self.fix_size_limit = limit;
140 self
141 }
142
143 #[must_use]
144 pub fn with_facts(mut self, facts: Vec<FactSpec>) -> Self {
145 self.facts = facts;
146 self
147 }
148
149 #[must_use]
150 pub fn with_vars(mut self, vars: HashMap<String, String>) -> Self {
151 self.vars = vars;
152 self
153 }
154
155 #[must_use]
166 pub fn with_changed_paths(mut self, set: HashSet<PathBuf>) -> Self {
167 self.changed_paths = Some(set);
168 self
169 }
170
171 pub fn rule_count(&self) -> usize {
172 self.entries.len()
173 }
174
175 pub fn fixer_for(&self, rule_id: &str) -> Option<&dyn crate::rule::Fixer> {
179 self.entries
180 .iter()
181 .find(|e| e.rule.id() == rule_id)
182 .and_then(|e| e.rule.fixer())
183 }
184
185 pub fn is_per_file(&self, rule_id: &str) -> bool {
191 self.entries
192 .iter()
193 .find(|e| e.rule.id() == rule_id)
194 .is_some_and(|e| e.rule.as_per_file().is_some())
195 }
196
197 #[allow(clippy::too_many_lines)]
204 pub fn run(&self, root: &Path, index: &FileIndex) -> Result<Report> {
205 let t_total = Instant::now();
206 if self.changed_paths.as_ref().is_some_and(HashSet::is_empty) {
210 return Ok(Report {
211 results: Vec::new(),
212 });
213 }
214
215 let t_facts = Instant::now();
216 let fact_values = evaluate_facts(&self.facts, root, index)?;
217 phase!(t_facts, "evaluate_facts", facts = self.facts.len() as u64);
218
219 let t_git = Instant::now();
220 let git_tracked = self.collect_git_tracked_if_needed(root);
221 let git_blame = self.build_blame_cache_if_needed(root);
222 phase!(t_git, "git_setup");
223
224 let t_filter = Instant::now();
225 let filtered_index = self.build_filtered_index(index);
226 phase!(
227 t_filter,
228 "build_filtered_index",
229 files = index.entries.len() as u64,
230 );
231
232 let t_git_idx = Instant::now();
233 let git_tracked_indexes = self.build_git_tracked_indexes(index, git_tracked.as_ref());
234 phase!(
235 t_git_idx,
236 "build_git_tracked_indexes",
237 built = u64::from(git_tracked_indexes.is_some()),
238 );
239
240 let full_ctx = Context {
241 root,
242 index,
243 registry: Some(&self.registry),
244 facts: Some(&fact_values),
245 vars: Some(&self.vars),
246 git_tracked: git_tracked.as_ref(),
247 git_blame: git_blame.as_ref(),
248 };
249 let filtered_ctx = filtered_index.as_ref().map(|fi| Context {
250 root,
251 index: fi,
252 registry: Some(&self.registry),
253 facts: Some(&fact_values),
254 vars: Some(&self.vars),
255 git_tracked: git_tracked.as_ref(),
256 git_blame: git_blame.as_ref(),
257 });
258 let git_file_only_ctx = git_tracked_indexes
259 .as_ref()
260 .and_then(|gti| gti.file_only.as_ref())
261 .map(|fi| Context {
262 root,
263 index: fi,
264 registry: Some(&self.registry),
265 facts: Some(&fact_values),
266 vars: Some(&self.vars),
267 git_tracked: git_tracked.as_ref(),
268 git_blame: git_blame.as_ref(),
269 });
270 let git_dir_aware_ctx = git_tracked_indexes
271 .as_ref()
272 .and_then(|gti| gti.dir_aware.as_ref())
273 .map(|fi| Context {
274 root,
275 index: fi,
276 registry: Some(&self.registry),
277 facts: Some(&fact_values),
278 vars: Some(&self.vars),
279 git_tracked: git_tracked.as_ref(),
280 git_blame: git_blame.as_ref(),
281 });
282 let when_env = WhenEnv {
283 facts: &fact_values,
284 vars: &self.vars,
285 iter: None,
286 env: None,
287 };
288
289 let cross_rule_ns: Vec<AtomicU64> =
299 (0..self.entries.len()).map(|_| AtomicU64::new(0)).collect();
300
301 let t_cross = Instant::now();
306 let cross_results: Vec<(usize, RuleResult)> = self
307 .entries
308 .par_iter()
309 .enumerate()
310 .filter_map(|(idx, entry)| {
311 if entry.rule.as_per_file().is_some() {
312 return None;
313 }
314 if self.skip_for_changed(entry.rule.as_ref(), full_ctx.index) {
315 return None;
316 }
317 let ctx = pick_ctx(
318 entry.rule.as_ref(),
319 &full_ctx,
320 filtered_ctx.as_ref(),
321 git_file_only_ctx.as_ref(),
322 git_dir_aware_ctx.as_ref(),
323 );
324 let t_rule = Instant::now();
325 let result = run_entry(entry, ctx, &when_env, &fact_values);
326 #[allow(clippy::cast_possible_truncation)]
332 let elapsed_ns = t_rule.elapsed().as_nanos() as u64;
333 cross_rule_ns[idx].fetch_add(elapsed_ns, Ordering::Relaxed);
334 result.map(|rr| (idx, rr))
335 })
336 .collect();
337 phase!(
338 t_cross,
339 "cross_file_partition",
340 rules = self
341 .entries
342 .iter()
343 .filter(|e| e.rule.as_per_file().is_none())
344 .count() as u64,
345 );
346 if tracing::level_enabled!(tracing::Level::INFO) {
352 let mut rows: Vec<(&str, u64)> = self
353 .entries
354 .iter()
355 .enumerate()
356 .filter_map(|(idx, entry)| {
357 let ns = cross_rule_ns[idx].load(Ordering::Relaxed);
358 if ns == 0 {
359 return None;
360 }
361 Some((entry.rule.id(), ns))
362 })
363 .collect();
364 rows.sort_by_key(|(_, ns)| std::cmp::Reverse(*ns));
365 for (rule_id, ns) in rows {
366 tracing::info!(
367 phase = "cross_file_rule",
368 rule = rule_id,
369 elapsed_us = ns / 1000,
370 "engine.phase",
371 );
372 }
373 }
374
375 self.resolve_changed_paths(root, index)?;
378
379 let t_per_file = Instant::now();
384 let per_file_results = self.run_per_file(root, &full_ctx, filtered_ctx.as_ref(), &when_env);
385 phase!(
386 t_per_file,
387 "per_file_partition",
388 rules = self
389 .entries
390 .iter()
391 .filter(|e| e.rule.as_per_file().is_some())
392 .count() as u64,
393 );
394
395 let t_assembly = Instant::now();
402 let mut cross_by_idx: HashMap<usize, RuleResult> = cross_results.into_iter().collect();
403 let mut per_file_by_idx: HashMap<usize, RuleResult> =
404 per_file_results.into_iter().collect();
405 let mut results = Vec::with_capacity(self.entries.len());
406 for idx in 0..self.entries.len() {
407 if let Some(rr) = cross_by_idx.remove(&idx) {
408 results.push(rr);
409 } else if let Some(rr) = per_file_by_idx.remove(&idx) {
410 results.push(rr);
411 }
412 }
413 phase!(t_assembly, "assembly", results = results.len() as u64);
414 phase!(t_total, "engine_run_total");
415 Ok(Report { results })
416 }
417
418 #[allow(clippy::too_many_lines)]
426 fn run_per_file<'a>(
427 &'a self,
428 root: &'a Path,
429 full_ctx: &'a Context<'a>,
430 filtered_ctx: Option<&'a Context<'a>>,
431 when_env: &'a WhenEnv<'a>,
432 ) -> Vec<(usize, RuleResult)> {
433 let (live, when_errors) = self.collect_live_per_file_entries(full_ctx.index, when_env);
434 if live.is_empty() {
435 return when_errors;
436 }
437
438 let per_file_ctx = filtered_ctx.unwrap_or(full_ctx);
439
440 let by_file: Vec<(usize, Violation)> = per_file_ctx
456 .index
457 .entries
458 .par_iter()
459 .filter(|e| !e.is_dir)
460 .flat_map_iter(|file_entry| {
461 let applicable: Vec<(usize, &RuleEntry)> = live
470 .iter()
471 .filter(|(_, entry)| {
472 entry
482 .rule
483 .as_per_file()
484 .expect("live entries are per-file rules by construction")
485 .path_scope()
486 .matches(&file_entry.path, per_file_ctx.index)
487 })
488 .map(|(idx, entry)| (*idx, *entry))
489 .collect();
490 if applicable.is_empty() {
491 return Vec::new();
492 }
493 let abs = root.join(&file_entry.path);
499 let Ok(bytes) = std::fs::read(&abs) else {
500 return Vec::new();
501 };
502 let mut out: Vec<(usize, Violation)> = Vec::new();
507 for (entry_idx, entry) in applicable {
508 let pf = entry
509 .rule
510 .as_per_file()
511 .expect("live entries are per-file rules by construction");
512 let result = pf.evaluate_file(per_file_ctx, &file_entry.path, &bytes);
513 match result {
514 Ok(vs) => {
515 for v in vs {
516 out.push((entry_idx, v));
517 }
518 }
519 Err(e) => {
520 out.push((entry_idx, Violation::new(format!("rule error: {e}"))));
521 }
522 }
523 }
524 out
525 })
526 .collect();
527
528 let mut bucket: HashMap<usize, Vec<Violation>> = HashMap::new();
532 for (idx, v) in by_file {
533 bucket.entry(idx).or_default().push(v);
534 }
535 let mut results = when_errors;
536 for (idx, entry) in live {
537 let violations = bucket.remove(&idx).unwrap_or_default();
544 results.push((
545 idx,
546 RuleResult::new(
547 Arc::from(entry.rule.id()),
548 entry.rule.level(),
549 entry.rule.policy_url().map(Arc::from),
550 violations,
551 entry.rule.fixer().is_some(),
552 ),
553 ));
554 }
555 results
556 }
557
558 fn collect_live_per_file_entries<'a>(
570 &'a self,
571 index: &FileIndex,
572 when_env: &WhenEnv<'_>,
573 ) -> LivePerFileEntries<'a> {
574 let mut live: Vec<(usize, &RuleEntry)> = Vec::new();
575 let mut when_errors: Vec<(usize, RuleResult)> = Vec::new();
576 for (idx, entry) in self.entries.iter().enumerate() {
577 if entry.rule.as_per_file().is_none() {
578 continue;
579 }
580 if self.skip_for_changed(entry.rule.as_ref(), index) {
581 continue;
582 }
583 if let Some(expr) = &entry.when {
584 match expr.evaluate(when_env) {
585 Ok(true) => {}
586 Ok(false) => continue,
587 Err(e) => {
588 when_errors.push((
589 idx,
590 RuleResult {
591 rule_id: Arc::from(entry.rule.id()),
592 level: entry.rule.level(),
593 policy_url: entry.rule.policy_url().map(Arc::from),
594 violations: vec![Violation::new(format!(
595 "when evaluation error: {e}"
596 ))],
597 notes: Vec::new(),
598 is_fixable: entry.rule.fixer().is_some(),
599 },
600 ));
601 continue;
602 }
603 }
604 }
605 live.push((idx, entry));
606 }
607 (live, when_errors)
608 }
609
610 pub fn run_for_file(
628 &self,
629 root: &Path,
630 index: &FileIndex,
631 file_path: &Path,
632 bytes: &[u8],
633 ) -> Result<Vec<RuleResult>> {
634 if !index.contains_file(file_path) {
635 return Err(Error::file_not_in_index(file_path));
636 }
637
638 let fact_values: &FactValues = if let Some(values) = index.cached_facts() {
643 values
644 } else {
645 let computed = evaluate_facts(&self.facts, root, index)?;
646 index.set_facts(computed);
647 index.cached_facts().expect("facts just set on the index")
648 };
649 let git_tracked = self.collect_git_tracked_if_needed(root);
650 let git_blame = self.build_blame_cache_if_needed(root);
651 self.resolve_changed_paths(root, index)?;
654
655 let ctx = Context {
656 root,
657 index,
658 registry: Some(&self.registry),
659 facts: Some(fact_values),
660 vars: Some(&self.vars),
661 git_tracked: git_tracked.as_ref(),
662 git_blame: git_blame.as_ref(),
663 };
664 let when_env = WhenEnv {
665 facts: fact_values,
666 vars: &self.vars,
667 iter: None,
668 env: None,
669 };
670
671 let (live, when_errors) = self.collect_live_per_file_entries(index, &when_env);
672
673 let mut bucket: HashMap<usize, Vec<Violation>> = HashMap::new();
679 for (idx, entry) in &live {
680 let pf = entry
681 .rule
682 .as_per_file()
683 .expect("live entries are per-file rules by construction");
684 if !pf.path_scope().matches(file_path, index) {
685 continue;
686 }
687 match pf.evaluate_file(&ctx, file_path, bytes) {
688 Ok(vs) => {
689 if !vs.is_empty() {
690 bucket.entry(*idx).or_default().extend(vs);
691 }
692 }
693 Err(e) => bucket
694 .entry(*idx)
695 .or_default()
696 .push(Violation::new(format!("rule error: {e}"))),
697 }
698 }
699
700 let mut by_idx: HashMap<usize, RuleResult> = when_errors.into_iter().collect();
701 for (idx, entry) in &live {
702 if let Some(violations) = bucket.remove(idx) {
703 by_idx.insert(
704 *idx,
705 RuleResult::new(
706 Arc::from(entry.rule.id()),
707 entry.rule.level(),
708 entry.rule.policy_url().map(Arc::from),
709 violations,
710 entry.rule.fixer().is_some(),
711 ),
712 );
713 }
714 }
715 let mut results = Vec::with_capacity(by_idx.len());
717 for idx in 0..self.entries.len() {
718 if let Some(rr) = by_idx.remove(&idx) {
719 results.push(rr);
720 }
721 }
722 Ok(results)
723 }
724
725 #[allow(clippy::too_many_lines)]
732 pub fn fix(&self, root: &Path, index: &FileIndex, dry_run: bool) -> Result<FixReport> {
733 if self.changed_paths.as_ref().is_some_and(HashSet::is_empty) {
734 return Ok(FixReport {
735 results: Vec::new(),
736 });
737 }
738
739 let fact_values = evaluate_facts(&self.facts, root, index)?;
740 let git_tracked = self.collect_git_tracked_if_needed(root);
741 let git_blame = self.build_blame_cache_if_needed(root);
742 let filtered_index = self.build_filtered_index(index);
743 let git_tracked_indexes = self.build_git_tracked_indexes(index, git_tracked.as_ref());
744 let full_ctx = Context {
745 root,
746 index,
747 registry: Some(&self.registry),
748 facts: Some(&fact_values),
749 vars: Some(&self.vars),
750 git_tracked: git_tracked.as_ref(),
751 git_blame: git_blame.as_ref(),
752 };
753 let filtered_ctx = filtered_index.as_ref().map(|fi| Context {
754 root,
755 index: fi,
756 registry: Some(&self.registry),
757 facts: Some(&fact_values),
758 vars: Some(&self.vars),
759 git_tracked: git_tracked.as_ref(),
760 git_blame: git_blame.as_ref(),
761 });
762 let git_file_only_ctx = git_tracked_indexes
763 .as_ref()
764 .and_then(|gti| gti.file_only.as_ref())
765 .map(|fi| Context {
766 root,
767 index: fi,
768 registry: Some(&self.registry),
769 facts: Some(&fact_values),
770 vars: Some(&self.vars),
771 git_tracked: git_tracked.as_ref(),
772 git_blame: git_blame.as_ref(),
773 });
774 let git_dir_aware_ctx = git_tracked_indexes
775 .as_ref()
776 .and_then(|gti| gti.dir_aware.as_ref())
777 .map(|fi| Context {
778 root,
779 index: fi,
780 registry: Some(&self.registry),
781 facts: Some(&fact_values),
782 vars: Some(&self.vars),
783 git_tracked: git_tracked.as_ref(),
784 git_blame: git_blame.as_ref(),
785 });
786 let when_env = WhenEnv {
787 facts: &fact_values,
788 vars: &self.vars,
789 iter: None,
790 env: None,
791 };
792 let fix_ctx = FixContext {
793 root,
794 dry_run,
795 fix_size_limit: self.fix_size_limit,
796 };
797
798 self.resolve_changed_paths(root, index)?;
801
802 let mut results: Vec<FixRuleResult> = Vec::new();
803 for entry in &self.entries {
804 if self.skip_for_changed(entry.rule.as_ref(), full_ctx.index) {
805 continue;
806 }
807 let ctx = pick_ctx(
808 entry.rule.as_ref(),
809 &full_ctx,
810 filtered_ctx.as_ref(),
811 git_file_only_ctx.as_ref(),
812 git_dir_aware_ctx.as_ref(),
813 );
814 if let Some(expr) = &entry.when {
815 match expr.evaluate(&when_env) {
816 Ok(true) => {}
817 Ok(false) => continue,
818 Err(e) => {
819 results.push(FixRuleResult {
820 rule_id: Arc::from(entry.rule.id()),
821 level: entry.rule.level(),
822 items: vec![FixItem {
823 violation: Violation::new(format!("when evaluation error: {e}")),
824 status: FixStatus::Unfixable,
825 }],
826 });
827 continue;
828 }
829 }
830 }
831 let violations = match entry.rule.evaluate(ctx) {
832 Ok(v) => v,
833 Err(e) => vec![Violation::new(format!("rule error: {e}"))],
834 };
835 if violations.is_empty() {
836 continue;
837 }
838 let fixer = entry.rule.fixer();
839 let items: Vec<FixItem> = violations
840 .into_iter()
841 .map(|v| {
842 let status = match fixer {
843 Some(f) => match f.apply(&v, &fix_ctx) {
844 Ok(FixOutcome::Applied(s)) => FixStatus::Applied(s),
845 Ok(FixOutcome::Skipped(s)) => FixStatus::Skipped(s),
846 Err(e) => FixStatus::Skipped(format!("fix error: {e}")),
847 },
848 None => FixStatus::Unfixable,
849 };
850 FixItem {
851 violation: v,
852 status,
853 }
854 })
855 .collect();
856 results.push(FixRuleResult {
857 rule_id: Arc::from(entry.rule.id()),
858 level: entry.rule.level(),
859 items,
860 });
861 }
862 Ok(FixReport { results })
863 }
864
865 fn collect_git_tracked_if_needed(
874 &self,
875 root: &Path,
876 ) -> Option<std::collections::HashSet<std::path::PathBuf>> {
877 let any_wants = self
878 .entries
879 .iter()
880 .any(|e| e.rule.git_tracked_mode() != crate::rule::GitTrackedMode::Off);
881 if !any_wants {
882 return None;
883 }
884 crate::git::collect_tracked_paths(root)
885 }
886
887 fn build_blame_cache_if_needed(&self, root: &Path) -> Option<crate::git::BlameCache> {
901 let any_wants = self.entries.iter().any(|e| e.rule.wants_git_blame());
902 if !any_wants {
903 return None;
904 }
905 crate::git::collect_tracked_paths(root)?;
909 Some(crate::git::BlameCache::new(root.to_path_buf()))
910 }
911
912 fn build_filtered_index(&self, full: &FileIndex) -> Option<FileIndex> {
917 let set = self.changed_paths.as_ref()?;
918 let entries = full
919 .entries
920 .iter()
921 .filter(|e| set.contains(&*e.path))
922 .cloned()
923 .collect();
924 Some(FileIndex::from_entries(entries))
925 }
926
927 fn build_git_tracked_indexes(
951 &self,
952 full: &FileIndex,
953 tracked: Option<&std::collections::HashSet<std::path::PathBuf>>,
954 ) -> Option<GitTrackedIndexes> {
955 let mut any_file_only = false;
956 let mut any_dir_aware = false;
957 for entry in &self.entries {
958 match entry.rule.git_tracked_mode() {
959 crate::rule::GitTrackedMode::Off => {}
960 crate::rule::GitTrackedMode::FileOnly => any_file_only = true,
961 crate::rule::GitTrackedMode::DirAware => any_dir_aware = true,
962 }
963 }
964 if !any_file_only && !any_dir_aware {
965 return None;
966 }
967
968 let Some(tracked) = tracked else {
976 return Some(GitTrackedIndexes {
977 file_only: any_file_only.then(|| FileIndex::from_entries(Vec::new())),
978 dir_aware: any_dir_aware.then(|| FileIndex::from_entries(Vec::new())),
979 });
980 };
981
982 let file_only = if any_file_only {
983 let entries = full
984 .entries
985 .iter()
986 .filter(|e| !e.is_dir && tracked.contains(&*e.path))
987 .cloned()
988 .collect();
989 Some(FileIndex::from_entries(entries))
990 } else {
991 None
992 };
993
994 let dir_aware = if any_dir_aware {
995 let entries = full
996 .entries
997 .iter()
998 .filter(|e| {
999 if e.is_dir {
1000 crate::git::dir_has_tracked_files(&e.path, tracked)
1001 } else {
1002 tracked.contains(&*e.path)
1003 }
1004 })
1005 .cloned()
1006 .collect();
1007 Some(FileIndex::from_entries(entries))
1008 } else {
1009 None
1010 };
1011
1012 Some(GitTrackedIndexes {
1013 file_only,
1014 dir_aware,
1015 })
1016 }
1017
1018 fn skip_for_changed(&self, rule: &dyn Rule, index: &FileIndex) -> bool {
1024 let Some(set) = &self.changed_paths else {
1025 return false;
1026 };
1027 let Some(scope) = rule.path_scope() else {
1028 return false;
1029 };
1030 !set.iter().any(|p| scope.matches(p, index))
1031 }
1032
1033 fn resolve_changed_paths(&self, root: &Path, index: &FileIndex) -> Result<()> {
1041 if index.changed_paths_initialized() {
1042 return Ok(());
1043 }
1044 let mut refs: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
1045 for entry in &self.entries {
1046 let scope = entry
1052 .rule
1053 .as_per_file()
1054 .map(super::rule::PerFileRule::path_scope)
1055 .or_else(|| entry.rule.path_scope());
1056 if let Some(scope) = scope
1057 && let Some(filter) = scope.scope_filter()
1058 && let Some(since) = filter.changed_since()
1059 {
1060 refs.insert(since);
1061 }
1062 }
1063 if refs.is_empty() {
1064 return Ok(());
1065 }
1066 let mut map = std::collections::HashMap::new();
1067 for since in refs {
1068 match crate::git::collect_changed_paths_checked(root, since) {
1069 Ok(Some(set)) => {
1070 map.insert(since.to_string(), set);
1071 }
1072 Ok(None) => {
1073 map.insert(since.to_string(), std::collections::HashSet::new());
1074 }
1075 Err(crate::git::CommitRangeError::BadRange { stderr }) => {
1076 return Err(crate::error::Error::Other(format!(
1077 "scope_filter.changed_since: could not resolve `{since}...HEAD`: \
1078 {stderr}. Common cause: shallow clone. In a GitHub Actions PR \
1079 workflow, use `actions/checkout@v4` with `fetch-depth: 0` so the \
1080 base ref is reachable."
1081 )));
1082 }
1083 }
1084 }
1085 index.set_changed_paths(map);
1086 Ok(())
1087 }
1088}
1089
1090fn pick_ctx<'a>(
1095 rule: &dyn Rule,
1096 full_ctx: &'a Context<'a>,
1097 filtered_ctx: Option<&'a Context<'a>>,
1098 git_file_only_ctx: Option<&'a Context<'a>>,
1099 git_dir_aware_ctx: Option<&'a Context<'a>>,
1100) -> &'a Context<'a> {
1101 match rule.git_tracked_mode() {
1108 crate::rule::GitTrackedMode::FileOnly => {
1109 return git_file_only_ctx.unwrap_or(full_ctx);
1110 }
1111 crate::rule::GitTrackedMode::DirAware => {
1112 return git_dir_aware_ctx.unwrap_or(full_ctx);
1113 }
1114 crate::rule::GitTrackedMode::Off => {}
1115 }
1116 if rule.requires_full_index() {
1117 full_ctx
1118 } else {
1119 filtered_ctx.unwrap_or(full_ctx)
1120 }
1121}
1122
1123fn run_entry(
1124 entry: &RuleEntry,
1125 ctx: &Context<'_>,
1126 when_env: &WhenEnv<'_>,
1127 _facts: &FactValues,
1128) -> Option<RuleResult> {
1129 if let Some(expr) = &entry.when {
1130 match expr.evaluate(when_env) {
1131 Ok(true) => {} Ok(false) => return None,
1133 Err(e) => {
1134 return Some(RuleResult {
1135 rule_id: Arc::from(entry.rule.id()),
1136 level: entry.rule.level(),
1137 policy_url: entry.rule.policy_url().map(Arc::from),
1138 violations: vec![Violation::new(format!("when evaluation error: {e}"))],
1139 notes: Vec::new(),
1140 is_fixable: entry.rule.fixer().is_some(),
1141 });
1142 }
1143 }
1144 }
1145 Some(run_one(entry.rule.as_ref(), ctx))
1146}
1147
1148fn run_one(rule: &dyn Rule, ctx: &Context<'_>) -> RuleResult {
1149 let violations = match rule.evaluate(ctx) {
1150 Ok(v) => v,
1151 Err(e) => vec![Violation::new(format!("rule error: {e}"))],
1152 };
1153 RuleResult::new(
1155 Arc::from(rule.id()),
1156 rule.level(),
1157 rule.policy_url().map(Arc::from),
1158 violations,
1159 rule.fixer().is_some(),
1160 )
1161}
1162
1163#[cfg(test)]
1164mod tests {
1165 use super::*;
1166 use crate::level::Level;
1167 use crate::scope::Scope;
1168 use crate::walker::FileEntry;
1169 use std::path::Path;
1170
1171 #[derive(Debug)]
1176 struct StubRule {
1177 id: String,
1178 level: Level,
1179 scope: Scope,
1180 full_index: bool,
1181 expose_scope: bool,
1182 }
1183
1184 impl Rule for StubRule {
1185 fn id(&self) -> &str {
1186 &self.id
1187 }
1188 fn level(&self) -> Level {
1189 self.level
1190 }
1191 fn requires_full_index(&self) -> bool {
1192 self.full_index
1193 }
1194 fn path_scope(&self) -> Option<&Scope> {
1195 self.expose_scope.then_some(&self.scope)
1196 }
1197 fn evaluate(&self, ctx: &Context<'_>) -> crate::error::Result<Vec<Violation>> {
1198 let mut out = Vec::new();
1199 for entry in ctx.index.files() {
1200 if self.scope.matches(&entry.path, ctx.index) {
1201 out.push(Violation::new("hit").with_path(entry.path.clone()));
1202 }
1203 }
1204 Ok(out)
1205 }
1206 }
1207
1208 fn stub(id: &str, glob: &str) -> Box<dyn Rule> {
1209 Box::new(StubRule {
1210 id: id.into(),
1211 level: Level::Error,
1212 scope: Scope::from_patterns(&[glob.to_string()]).unwrap(),
1213 full_index: false,
1214 expose_scope: true,
1215 })
1216 }
1217
1218 fn full_index_stub(id: &str) -> Box<dyn Rule> {
1219 Box::new(StubRule {
1220 id: id.into(),
1221 level: Level::Error,
1222 scope: Scope::match_all(),
1223 full_index: true,
1224 expose_scope: false,
1225 })
1226 }
1227
1228 fn idx(paths: &[&str]) -> FileIndex {
1229 FileIndex::from_entries(
1230 paths
1231 .iter()
1232 .map(|p| FileEntry {
1233 path: std::path::Path::new(p).into(),
1234 is_dir: false,
1235 size: 0,
1236 })
1237 .collect(),
1238 )
1239 }
1240
1241 #[test]
1242 fn run_empty_returns_empty_report() {
1243 let engine = Engine::new(Vec::new(), RuleRegistry::new());
1244 let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
1245 assert!(report.results.is_empty());
1246 }
1247
1248 #[test]
1249 fn run_single_rule_emits_per_match() {
1250 let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new());
1251 let report = engine
1252 .run(
1253 Path::new("/fake"),
1254 &idx(&["src/a.rs", "src/b.rs", "README.md"]),
1255 )
1256 .unwrap();
1257 assert_eq!(report.results.len(), 1);
1258 assert_eq!(report.results[0].violations.len(), 2);
1259 }
1260
1261 #[test]
1262 fn run_with_empty_changed_set_short_circuits() {
1263 let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new())
1267 .with_changed_paths(HashSet::new());
1268 let report = engine.run(Path::new("/fake"), &idx(&["src/a.rs"])).unwrap();
1269 assert!(report.results.is_empty());
1270 }
1271
1272 #[test]
1273 fn changed_mode_skips_rule_whose_scope_misses_diff() {
1274 let mut changed = HashSet::new();
1277 changed.insert(std::path::PathBuf::from("docs/README.md"));
1278 let engine = Engine::new(vec![stub("src-rule", "src/**/*.rs")], RuleRegistry::new())
1279 .with_changed_paths(changed);
1280 let report = engine
1281 .run(Path::new("/fake"), &idx(&["src/a.rs", "docs/README.md"]))
1282 .unwrap();
1283 assert!(
1284 report.results.is_empty(),
1285 "out-of-scope rule should be skipped: {:?}",
1286 report.results,
1287 );
1288 }
1289
1290 #[test]
1291 fn changed_mode_runs_rule_whose_scope_intersects_diff() {
1292 let mut changed = HashSet::new();
1293 changed.insert(std::path::PathBuf::from("src/a.rs"));
1294 let engine = Engine::new(vec![stub("src-rule", "src/**/*.rs")], RuleRegistry::new())
1295 .with_changed_paths(changed);
1296 let report = engine
1297 .run(Path::new("/fake"), &idx(&["src/a.rs", "src/b.rs"]))
1298 .unwrap();
1299 assert_eq!(report.results.len(), 1);
1302 assert_eq!(report.results[0].violations.len(), 1);
1303 }
1304
1305 #[test]
1306 fn requires_full_index_rule_runs_unconditionally_in_changed_mode() {
1307 let mut changed = HashSet::new();
1311 changed.insert(std::path::PathBuf::from("docs/README.md"));
1312 let engine = Engine::new(vec![full_index_stub("cross")], RuleRegistry::new())
1313 .with_changed_paths(changed);
1314 let report = engine
1315 .run(Path::new("/fake"), &idx(&["src/a.rs", "docs/README.md"]))
1316 .unwrap();
1317 assert_eq!(report.results.len(), 1);
1320 assert_eq!(report.results[0].violations.len(), 2);
1321 }
1322
1323 #[test]
1324 fn rule_count_reflects_number_of_entries() {
1325 let engine = Engine::new(
1326 vec![stub("a", "**"), stub("b", "**"), stub("c", "**")],
1327 RuleRegistry::new(),
1328 );
1329 assert_eq!(engine.rule_count(), 3);
1330 }
1331
1332 #[test]
1333 fn from_entries_constructor_supports_when_clauses() {
1334 let entry = RuleEntry::new(stub("gated", "**/*.rs"))
1337 .with_when(crate::when::parse("false").unwrap());
1338 let engine = Engine::from_entries(vec![entry], RuleRegistry::new());
1339 let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
1340 assert!(
1341 report.results.is_empty(),
1342 "when-false rule must be skipped: {:?}",
1343 report.results,
1344 );
1345 }
1346
1347 #[test]
1348 fn fix_size_limit_default_is_one_mib() {
1349 let engine = Engine::new(Vec::new(), RuleRegistry::new());
1352 let updated = engine.with_fix_size_limit(Some(42));
1356 assert_eq!(updated.rule_count(), 0);
1357 }
1358
1359 #[test]
1360 fn skip_for_changed_returns_false_for_full_check() {
1361 let engine = Engine::new(vec![stub("t", "**/*.rs")], RuleRegistry::new());
1363 let report = engine.run(Path::new("/fake"), &idx(&["a.rs"])).unwrap();
1364 assert_eq!(report.results.len(), 1);
1365 }
1366
1367 #[derive(Debug)]
1372 struct PerFileStub {
1373 id: String,
1374 scope: Scope,
1375 prefix: Vec<u8>,
1376 }
1377
1378 impl Rule for PerFileStub {
1379 fn id(&self) -> &str {
1380 &self.id
1381 }
1382 fn level(&self) -> Level {
1383 Level::Error
1384 }
1385 fn evaluate(&self, _ctx: &Context<'_>) -> crate::error::Result<Vec<Violation>> {
1386 Ok(Vec::new())
1390 }
1391 fn as_per_file(&self) -> Option<&dyn crate::PerFileRule> {
1392 Some(self)
1393 }
1394 }
1395
1396 impl crate::PerFileRule for PerFileStub {
1397 fn path_scope(&self) -> &Scope {
1398 &self.scope
1399 }
1400 fn evaluate_file(
1401 &self,
1402 _ctx: &Context<'_>,
1403 path: &std::path::Path,
1404 bytes: &[u8],
1405 ) -> crate::error::Result<Vec<Violation>> {
1406 if !bytes.starts_with(&self.prefix) {
1407 return Ok(vec![
1408 Violation::new("missing prefix")
1409 .with_path(std::sync::Arc::<std::path::Path>::from(path)),
1410 ]);
1411 }
1412 Ok(Vec::new())
1413 }
1414 }
1415
1416 #[test]
1417 fn dispatch_flip_routes_per_file_rule_through_file_major_loop() {
1418 let tmp = tempfile::tempdir().unwrap();
1422 std::fs::write(tmp.path().join("good.txt"), b"MAGIC + payload").unwrap();
1423 std::fs::write(tmp.path().join("bad.txt"), b"no magic here").unwrap();
1424
1425 let rule = Box::new(PerFileStub {
1426 id: "needs-magic".into(),
1427 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1428 prefix: b"MAGIC".to_vec(),
1429 });
1430 let engine = Engine::new(vec![rule], RuleRegistry::new());
1431
1432 let opts = crate::WalkOptions::default();
1433 let index = crate::walk(tmp.path(), &opts).unwrap();
1434 let report = engine.run(tmp.path(), &index).unwrap();
1435
1436 assert_eq!(report.results.len(), 1, "results: {:?}", report.results);
1437 let r = &report.results[0];
1438 assert_eq!(&*r.rule_id, "needs-magic");
1439 assert_eq!(r.violations.len(), 1, "violations: {:?}", r.violations);
1440 assert_eq!(
1441 r.violations[0].path.as_deref(),
1442 Some(std::path::Path::new("bad.txt")),
1443 );
1444 }
1445
1446 #[test]
1447 fn dispatch_flip_aggregates_multiple_per_file_rules() {
1448 let tmp = tempfile::tempdir().unwrap();
1454 std::fs::write(tmp.path().join("a.txt"), b"ZZZ stuff").unwrap();
1455 std::fs::write(tmp.path().join("b.txt"), b"BBB stuff").unwrap();
1456
1457 let rule_a = Box::new(PerFileStub {
1458 id: "needs-AAA".into(),
1459 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1460 prefix: b"AAA".to_vec(),
1461 });
1462 let rule_b = Box::new(PerFileStub {
1463 id: "needs-BBB".into(),
1464 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1465 prefix: b"BBB".to_vec(),
1466 });
1467 let engine = Engine::new(vec![rule_a, rule_b], RuleRegistry::new());
1468
1469 let opts = crate::WalkOptions::default();
1470 let index = crate::walk(tmp.path(), &opts).unwrap();
1471 let report = engine.run(tmp.path(), &index).unwrap();
1472
1473 let by_id: HashMap<&str, &RuleResult> =
1476 report.results.iter().map(|r| (&*r.rule_id, r)).collect();
1477 assert_eq!(
1478 by_id.len(),
1479 2,
1480 "expected both rules in the report: {:?}",
1481 report.results
1482 );
1483 assert_eq!(by_id["needs-AAA"].violations.len(), 2);
1484 assert_eq!(by_id["needs-BBB"].violations.len(), 1);
1485 assert_eq!(
1486 by_id["needs-BBB"].violations[0].path.as_deref(),
1487 Some(std::path::Path::new("a.txt")),
1488 );
1489 }
1490
1491 #[test]
1492 fn passing_per_file_rule_appears_in_the_report() {
1493 let tmp = tempfile::tempdir().unwrap();
1500 std::fs::write(tmp.path().join("a.txt"), b"MAGIC ok").unwrap();
1501
1502 let rule = Box::new(PerFileStub {
1503 id: "needs-magic".into(),
1504 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1505 prefix: b"MAGIC".to_vec(),
1506 });
1507 let engine = Engine::new(vec![rule], RuleRegistry::new());
1508
1509 let opts = crate::WalkOptions::default();
1510 let index = crate::walk(tmp.path(), &opts).unwrap();
1511 let report = engine.run(tmp.path(), &index).unwrap();
1512
1513 assert_eq!(report.results.len(), 1, "results: {:?}", report.results);
1514 assert!(report.results[0].violations.is_empty());
1515 assert_eq!(report.passing_rules(), 1);
1516 }
1517
1518 #[test]
1519 fn run_for_file_runs_only_in_scope_per_file_rules() {
1520 let tmp = tempfile::tempdir().unwrap();
1525 std::fs::write(tmp.path().join("a.txt"), b"no magic").unwrap();
1526
1527 let in_scope = Box::new(PerFileStub {
1528 id: "txt-needs-magic".into(),
1529 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1530 prefix: b"MAGIC".to_vec(),
1531 });
1532 let out_of_scope = Box::new(PerFileStub {
1533 id: "rs-needs-magic".into(),
1534 scope: Scope::from_patterns(&["**/*.rs".to_string()]).unwrap(),
1535 prefix: b"MAGIC".to_vec(),
1536 });
1537 let cross = stub("cross", "**/*.txt");
1538 let engine = Engine::new(vec![in_scope, out_of_scope, cross], RuleRegistry::new());
1539
1540 let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1541 let results = engine
1542 .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"no magic")
1543 .unwrap();
1544
1545 assert_eq!(results.len(), 1, "results: {results:?}");
1546 assert_eq!(&*results[0].rule_id, "txt-needs-magic");
1547 assert_eq!(results[0].violations.len(), 1);
1548 }
1549
1550 #[test]
1551 fn run_for_file_uses_supplied_bytes_not_disk() {
1552 let tmp = tempfile::tempdir().unwrap();
1556 std::fs::write(tmp.path().join("a.txt"), b"MAGIC on disk").unwrap();
1557
1558 let rule = Box::new(PerFileStub {
1559 id: "needs-magic".into(),
1560 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1561 prefix: b"MAGIC".to_vec(),
1562 });
1563 let engine = Engine::new(vec![rule], RuleRegistry::new());
1564 let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1565
1566 let results = engine
1567 .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"edited, no prefix")
1568 .unwrap();
1569 assert_eq!(results.len(), 1, "edited bytes should fail the rule");
1570 assert_eq!(&*results[0].rule_id, "needs-magic");
1571 }
1572
1573 #[test]
1574 fn run_for_file_passing_rule_omitted() {
1575 let tmp = tempfile::tempdir().unwrap();
1576 std::fs::write(tmp.path().join("a.txt"), b"whatever").unwrap();
1577 let rule = Box::new(PerFileStub {
1578 id: "needs-magic".into(),
1579 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1580 prefix: b"MAGIC".to_vec(),
1581 });
1582 let engine = Engine::new(vec![rule], RuleRegistry::new());
1583 let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1584 let results = engine
1585 .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"MAGIC passes")
1586 .unwrap();
1587 assert!(
1588 results.is_empty(),
1589 "passing rule must be omitted: {results:?}"
1590 );
1591 }
1592
1593 #[test]
1594 fn is_per_file_classifies_rules() {
1595 let pf = Box::new(PerFileStub {
1596 id: "pf".into(),
1597 scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
1598 prefix: b"X".to_vec(),
1599 });
1600 let cross = stub("cross", "**/*");
1601 let engine = Engine::new(vec![pf, cross], RuleRegistry::new());
1602 assert!(engine.is_per_file("pf"));
1603 assert!(!engine.is_per_file("cross"));
1604 assert!(!engine.is_per_file("unknown-id"));
1605 }
1606
1607 #[test]
1608 fn run_for_file_caches_facts_on_index() {
1609 let tmp = tempfile::tempdir().unwrap();
1610 std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
1611 let rule = Box::new(PerFileStub {
1612 id: "pf".into(),
1613 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1614 prefix: b"MAGIC".to_vec(),
1615 });
1616 let engine = Engine::new(vec![rule], RuleRegistry::new());
1617 let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1618
1619 assert!(index.cached_facts().is_none());
1620 engine
1621 .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"x")
1622 .unwrap();
1623 assert!(
1624 index.cached_facts().is_some(),
1625 "facts should be cached after the first run_for_file"
1626 );
1627 engine
1629 .run_for_file(tmp.path(), &index, Path::new("a.txt"), b"x")
1630 .unwrap();
1631 }
1632
1633 #[test]
1634 fn run_for_file_errors_when_file_not_in_index() {
1635 let tmp = tempfile::tempdir().unwrap();
1636 std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
1637 let engine = Engine::new(vec![], RuleRegistry::new());
1638 let index = crate::walk(tmp.path(), &crate::WalkOptions::default()).unwrap();
1639 let err = engine
1640 .run_for_file(tmp.path(), &index, Path::new("ghost.txt"), b"x")
1641 .unwrap_err();
1642 assert!(
1643 matches!(err, Error::FileNotInIndex { .. }),
1644 "expected FileNotInIndex, got: {err:?}"
1645 );
1646 }
1647
1648 #[test]
1649 fn dispatch_flip_preserves_cross_file_rules_unchanged() {
1650 let tmp = tempfile::tempdir().unwrap();
1654 std::fs::write(tmp.path().join("a.txt"), b"hi").unwrap();
1655
1656 let cross_rule = stub("cross", "**/*.txt");
1657 let per_file_rule = Box::new(PerFileStub {
1658 id: "needs-magic".into(),
1659 scope: Scope::from_patterns(&["**/*.txt".to_string()]).unwrap(),
1660 prefix: b"MAGIC".to_vec(),
1661 });
1662 let engine = Engine::new(vec![cross_rule, per_file_rule], RuleRegistry::new());
1663
1664 let opts = crate::WalkOptions::default();
1665 let index = crate::walk(tmp.path(), &opts).unwrap();
1666 let report = engine.run(tmp.path(), &index).unwrap();
1667
1668 assert_eq!(report.results.len(), 2, "results: {:?}", report.results);
1669 assert_eq!(&*report.results[0].rule_id, "cross");
1671 assert_eq!(&*report.results[1].rule_id, "needs-magic");
1672 }
1673}