alint_core/rule.rs
1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::path::Path;
4use std::sync::Arc;
5
6use crate::error::Result;
7use crate::facts::FactValues;
8use crate::level::Level;
9use crate::registry::RuleRegistry;
10use crate::walker::FileIndex;
11
12/// A single linting violation produced by a rule.
13///
14/// `path` holds an [`Arc<Path>`]; rules clone the [`Arc`] from
15/// [`FileEntry::path`](crate::walker::FileEntry::path) (a cheap
16/// atomic refcount bump) rather than copying the path bytes. At
17/// 100k violations this saves 100k path-byte allocations.
18///
19/// `message` is a [`Cow<'static, str>`]; per-match templated
20/// messages live as `Cow::Owned(String)` (no change in cost),
21/// while fixed messages can live as `Cow::Borrowed("…")` if a
22/// rule chooses to construct them that way. Public API on the
23/// struct is unchanged at the byte level — `Display` and serde
24/// `Serialize` impls go through the inner `&str` / `&Path`.
25#[derive(Debug, Clone)]
26pub struct Violation {
27 pub path: Option<Arc<Path>>,
28 pub message: Cow<'static, str>,
29 pub line: Option<usize>,
30 pub column: Option<usize>,
31}
32
33impl Violation {
34 pub fn new(message: impl Into<Cow<'static, str>>) -> Self {
35 Self {
36 path: None,
37 message: message.into(),
38 line: None,
39 column: None,
40 }
41 }
42
43 /// Attach a path to the violation. Accepts anything convertible
44 /// into `Arc<Path>` — the canonical caller is
45 /// `.with_path(entry.path.clone())` where `entry.path` is the
46 /// `Arc<Path>` already owned by the [`FileIndex`]; this clones
47 /// the [`Arc`] (atomic refcount bump) rather than the bytes.
48 /// `PathBuf`, `&Path`, and `Box<Path>` are also accepted via
49 /// std's `From` impls; for an ad-hoc `&str` use
50 /// `Path::new("a.rs")` to convert first.
51 #[must_use]
52 pub fn with_path(mut self, path: impl Into<Arc<Path>>) -> Self {
53 self.path = Some(path.into());
54 self
55 }
56
57 #[must_use]
58 pub fn with_location(mut self, line: usize, column: usize) -> Self {
59 self.line = Some(line);
60 self.column = Some(column);
61 self
62 }
63}
64
65/// The collected outcome of evaluating a single rule.
66///
67/// `rule_id` holds an [`Arc<str>`]: the engine builds it once
68/// per rule run and shares it across every violation that rule
69/// produces, saving N-1 allocations per rule. `policy_url`
70/// follows the same shape via [`Arc<str>`] — set once per rule,
71/// shared across violations.
72#[derive(Debug, Clone)]
73pub struct RuleResult {
74 pub rule_id: Arc<str>,
75 pub level: Level,
76 pub policy_url: Option<Arc<str>>,
77 pub violations: Vec<Violation>,
78 /// Whether the rule declares a [`Fixer`] — surfaced here so
79 /// the human formatter can tag violations as `fixable`
80 /// without threading the rule registry into the renderer.
81 pub is_fixable: bool,
82}
83
84impl RuleResult {
85 pub fn passed(&self) -> bool {
86 self.violations.is_empty()
87 }
88}
89
90/// Execution context handed to each rule during evaluation.
91///
92/// - `registry` — available for rules that need to build and evaluate nested
93/// rules at runtime (e.g. `for_each_dir`). Tests that don't exercise
94/// nested evaluation can set this to `None`.
95/// - `facts` — resolved fact values, computed once per `Engine::run`.
96/// - `vars` — user-supplied string variables from the config's `vars:` section.
97/// - `git_tracked` — set of repo paths reported by `git ls-files`,
98/// computed once per run when at least one rule has
99/// `git_tracked_only: true`. `None` outside a git repo or when
100/// no rule asked for it. Rules that opt in consult it via
101/// [`Context::is_git_tracked`].
102/// - `git_blame` — per-file `git blame` cache, computed lazily
103/// when at least one rule reports `wants_git_blame()`. `None`
104/// when no rule asked for it. Rules consult it via
105/// [`crate::git::BlameCache::get`]; both "outside a git repo"
106/// and "blame failed for this file" surface as a `None`
107/// lookup, which the rule treats as "silent no-op."
108#[derive(Debug)]
109pub struct Context<'a> {
110 pub root: &'a Path,
111 pub index: &'a FileIndex,
112 pub registry: Option<&'a RuleRegistry>,
113 pub facts: Option<&'a FactValues>,
114 pub vars: Option<&'a HashMap<String, String>>,
115 pub git_tracked: Option<&'a std::collections::HashSet<std::path::PathBuf>>,
116 pub git_blame: Option<&'a crate::git::BlameCache>,
117}
118
119impl Context<'_> {
120 /// True if `rel_path` is in git's index. Returns `false` when
121 /// no tracked-set was computed (no git repo, or no rule asked
122 /// for it). Rules that opt into `git_tracked_only` therefore
123 /// silently skip every entry outside a git repo, which is the
124 /// right behaviour for the canonical "don't let X be
125 /// committed" use case.
126 pub fn is_git_tracked(&self, rel_path: &Path) -> bool {
127 match self.git_tracked {
128 Some(set) => set.contains(rel_path),
129 None => false,
130 }
131 }
132
133 /// True if the directory at `rel_path` contains at least one
134 /// git-tracked file. Used by `dir_*` rules opting into
135 /// `git_tracked_only`. Same `None`-means-untracked semantics
136 /// as [`Context::is_git_tracked`].
137 pub fn dir_has_tracked_files(&self, rel_path: &Path) -> bool {
138 match self.git_tracked {
139 Some(set) => crate::git::dir_has_tracked_files(rel_path, set),
140 None => false,
141 }
142 }
143}
144
145/// Trait every built-in and plugin rule implements.
146pub trait Rule: Send + Sync + std::fmt::Debug {
147 fn id(&self) -> &str;
148 fn level(&self) -> Level;
149 fn policy_url(&self) -> Option<&str> {
150 None
151 }
152 /// Whether this rule needs the git-tracked-paths set on
153 /// [`Context`]. Default `false`; rule kinds that support
154 /// `git_tracked_only` override to return `true` only when
155 /// the user actually opted in. The engine collects the set
156 /// (via `git ls-files`) once per run when ANY rule returns
157 /// `true`, so the cost is paid at most once even if many
158 /// rules opt in.
159 fn wants_git_tracked(&self) -> bool {
160 false
161 }
162
163 /// Whether this rule needs `git blame` output on
164 /// [`Context`]. Default `false`; the `git_blame_age` rule
165 /// kind overrides to return `true`. The engine builds the
166 /// shared [`crate::git::BlameCache`] once per run when any
167 /// rule opts in, so multiple blame-aware rules over
168 /// overlapping `paths:` re-use the parsed result.
169 fn wants_git_blame(&self) -> bool {
170 false
171 }
172
173 /// In `--changed` mode, return `true` to evaluate this rule
174 /// against the **full** [`FileIndex`] rather than the
175 /// changed-only filtered subset. Default `false` (per-file
176 /// semantics — the rule sees only changed files in scope).
177 ///
178 /// Cross-file rules (`pair`, `for_each_dir`,
179 /// `every_matching_has`, `unique_by`, `dir_contains`,
180 /// `dir_only_contains`) override to `true` because their
181 /// inputs span the whole tree by definition — a verdict on
182 /// the changed file depends on what's still in the rest of
183 /// the tree. Existence rules (`file_exists`, `file_absent`,
184 /// `dir_exists`, `dir_absent`) likewise consult the whole
185 /// tree to answer "is X present?" correctly.
186 fn requires_full_index(&self) -> bool {
187 false
188 }
189
190 /// In `--changed` mode, return the [`Scope`](crate::Scope)
191 /// this rule is scoped to (typically the rule's `paths:`
192 /// field). The engine intersects the scope with the
193 /// changed-set; rules whose scope doesn't intersect are
194 /// skipped, which is the optimisation `--changed` exists
195 /// for.
196 ///
197 /// Default `None` ("no scope information") means the rule is
198 /// always evaluated. Cross-file rules deliberately leave this
199 /// as `None` (they always evaluate per the roadmap contract).
200 /// Per-file rules with a single `Scope` field should override
201 /// to return `Some(&self.scope)`.
202 fn path_scope(&self) -> Option<&crate::scope::Scope> {
203 None
204 }
205
206 /// Per-file ancestor-manifest gate. Default `None` (no
207 /// filter); existing rules don't change behaviour.
208 /// Per-file rules whose spec carries a `scope_filter:`
209 /// field override to return `Some(&self.scope_filter)`.
210 ///
211 /// Cross-file rules MUST return `None`; the build-time
212 /// validation in
213 /// [`crate::reject_scope_filter_on_cross_file`]
214 /// enforces this from the rule-builder side. Returning
215 /// `Some` from a cross-file rule is a programming error
216 /// — the engine consults this method only on the
217 /// per-file dispatch path.
218 fn scope_filter(&self) -> Option<&crate::ScopeFilter> {
219 None
220 }
221
222 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>>;
223
224 /// Optional automatic-fix strategy. Rules whose violations can be
225 /// mechanically corrected (e.g. creating a missing file, removing a
226 /// forbidden one, renaming to the correct case) return a
227 /// [`Fixer`] here; the default implementation reports the rule as
228 /// unfixable.
229 fn fixer(&self) -> Option<&dyn Fixer> {
230 None
231 }
232
233 /// Opt into the file-major dispatch path. Per-file rules that
234 /// can evaluate one file at a time given a pre-loaded byte
235 /// slice override this to return `Some(self)`; cross-file
236 /// rules and any rule with `requires_full_index() == true`
237 /// leave it as `None` and keep evaluating under the rule-
238 /// major loop.
239 ///
240 /// When the engine has multiple per-file rules sharing one
241 /// scope, the file-major loop reads each matched file once
242 /// and dispatches to every applicable per-file rule against
243 /// the same byte buffer — coalescing N reads of one file
244 /// into 1.
245 fn as_per_file(&self) -> Option<&dyn PerFileRule> {
246 None
247 }
248}
249
250/// File-major dispatch entry-point for a per-file rule.
251///
252/// Rules that can evaluate one file at a time given a pre-loaded
253/// byte slice implement this trait alongside [`Rule`] and opt
254/// into the file-major path via [`Rule::as_per_file`]. The
255/// engine reads each file once per evaluation pass and calls
256/// `evaluate_file` on every per-file rule whose
257/// [`path_scope`](PerFileRule::path_scope) matches that file —
258/// avoiding the per-rule `std::fs::read` the rule-major loop
259/// would otherwise duplicate.
260///
261/// Implementations MUST NOT call `std::fs::read` themselves; the
262/// `bytes` argument is the engine's already-read content. The
263/// rule's existing [`Rule::evaluate`] implementation (which does
264/// read the file) stays in place as the rule-major fallback —
265/// it's still the path used by `alint fix` (sequential
266/// filesystem mutation rules out coalesced reads there) and by
267/// fallback test harnesses.
268pub trait PerFileRule: Send + Sync + std::fmt::Debug {
269 /// The rule's scope. The engine checks
270 /// `path_scope().matches(path)` before calling
271 /// `evaluate_file`; a rule that returns
272 /// [`Scope::match_all`](crate::scope::Scope::match_all) is
273 /// in scope for every file.
274 fn path_scope(&self) -> &crate::scope::Scope;
275
276 /// Evaluate one file given the engine's already-read byte
277 /// content. The `path` is the relative path from the lint
278 /// root; the rule should `with_path(path.into())` (or clone
279 /// the matched [`FileEntry::path`](crate::walker::FileEntry::path)
280 /// if it has one in hand) on emitted violations.
281 fn evaluate_file(&self, ctx: &Context<'_>, path: &Path, bytes: &[u8])
282 -> Result<Vec<Violation>>;
283
284 /// Optional lower bound on the bytes the rule needs to
285 /// evaluate. Default `None` means "I need the whole file."
286 /// Used as a hint; the engine in v0.9.3 reads the whole
287 /// file regardless and hands it to every applicable rule —
288 /// the hint is reserved for a future engine-side bounded-
289 /// read optimisation.
290 fn max_bytes_needed(&self) -> Option<usize> {
291 None
292 }
293}
294
295/// Runtime context for applying a fix.
296#[derive(Debug)]
297pub struct FixContext<'a> {
298 pub root: &'a Path,
299 /// When true, fixers must describe what they would do without
300 /// touching the filesystem.
301 pub dry_run: bool,
302 /// Max bytes a content-editing fix will read + rewrite.
303 /// `None` means no cap. Honored by the `read_for_fix` helper
304 /// (and any custom fixer that opts in).
305 pub fix_size_limit: Option<u64>,
306}
307
308/// The result of applying (or simulating) one fix against one violation.
309#[derive(Debug, Clone)]
310pub enum FixOutcome {
311 /// The fix was applied (or would be, under `dry_run`). The string
312 /// is a human-readable one-liner — e.g. `"created LICENSE"`,
313 /// `"would remove target/debug.log"`.
314 Applied(String),
315 /// The fixer intentionally did nothing; the string explains why
316 /// (e.g. `"already exists"`, `"no path on violation"`). This is
317 /// distinct from a hard error returned via `Result::Err`.
318 Skipped(String),
319}
320
321/// A mechanical corrector for a specific rule's violations.
322pub trait Fixer: Send + Sync + std::fmt::Debug {
323 /// Short human-readable summary of what this fixer does,
324 /// independent of any specific violation.
325 fn describe(&self) -> String;
326
327 /// Apply the fix against a single violation.
328 fn apply(&self, violation: &Violation, ctx: &FixContext<'_>) -> Result<FixOutcome>;
329}
330
331/// Result of [`read_for_fix`] — either the bytes of the file,
332/// or a [`FixOutcome::Skipped`] the caller should return.
333///
334/// Content-editing fixers (`file_prepend`, `file_append`,
335/// `file_trim_trailing_whitespace`, …) funnel their initial read
336/// through this helper so the `fix_size_limit` guard is enforced
337/// uniformly: over-limit files are reported as `Skipped` with a
338/// clear reason, and a one-line warning is printed to stderr so
339/// scripted runs notice.
340#[derive(Debug)]
341pub enum ReadForFix {
342 Bytes(Vec<u8>),
343 Skipped(FixOutcome),
344}
345
346/// Check whether `abs` is within the `fix_size_limit` on `ctx`.
347/// Returns `Some(outcome)` when the file is over-limit (the
348/// caller returns this directly); returns `None` when the fix
349/// can proceed. Emits a one-line stderr warning on over-limit.
350///
351/// Use this in fixers that modify the file without reading the
352/// full body (e.g. streaming append). For read-modify-write
353/// flows, prefer [`read_for_fix`] which folds the check in.
354pub fn check_fix_size(
355 abs: &Path,
356 display_path: &std::path::Path,
357 ctx: &FixContext<'_>,
358) -> Result<Option<FixOutcome>> {
359 let Some(limit) = ctx.fix_size_limit else {
360 return Ok(None);
361 };
362 let metadata = std::fs::metadata(abs).map_err(|source| crate::error::Error::Io {
363 path: abs.to_path_buf(),
364 source,
365 })?;
366 if metadata.len() > limit {
367 let reason = format!(
368 "{} is {} bytes; exceeds fix_size_limit ({}). Raise \
369 `fix_size_limit` in .alint.yml (or set it to `null` to disable) \
370 to fix files this large.",
371 display_path.display(),
372 metadata.len(),
373 limit,
374 );
375 eprintln!("alint: warning: {reason}");
376 return Ok(Some(FixOutcome::Skipped(reason)));
377 }
378 Ok(None)
379}
380
381/// Read `abs` subject to the size limit on `ctx`. Over-limit
382/// files return `ReadForFix::Skipped(Outcome::Skipped(_))` and
383/// emit a one-line stderr warning; in-limit files return
384/// `ReadForFix::Bytes(...)`. Pass-through I/O errors propagate.
385pub fn read_for_fix(
386 abs: &Path,
387 display_path: &std::path::Path,
388 ctx: &FixContext<'_>,
389) -> Result<ReadForFix> {
390 if let Some(outcome) = check_fix_size(abs, display_path, ctx)? {
391 return Ok(ReadForFix::Skipped(outcome));
392 }
393 let bytes = std::fs::read(abs).map_err(|source| crate::error::Error::Io {
394 path: abs.to_path_buf(),
395 source,
396 })?;
397 Ok(ReadForFix::Bytes(bytes))
398}
399
400#[cfg(test)]
401mod tests {
402 use super::*;
403
404 fn empty_index() -> FileIndex {
405 FileIndex::default()
406 }
407
408 #[test]
409 fn violation_builder_sets_fields_via_chain() {
410 let v = Violation::new("trailing whitespace")
411 .with_path(Path::new("src/main.rs"))
412 .with_location(12, 4);
413 assert_eq!(v.message, "trailing whitespace");
414 assert_eq!(v.path.as_deref(), Some(Path::new("src/main.rs")));
415 assert_eq!(v.line, Some(12));
416 assert_eq!(v.column, Some(4));
417 }
418
419 #[test]
420 fn violation_new_starts_with_no_path_or_location() {
421 let v = Violation::new("global note");
422 assert!(v.path.is_none());
423 assert!(v.line.is_none());
424 assert!(v.column.is_none());
425 }
426
427 #[test]
428 fn rule_result_passed_iff_violations_empty() {
429 let mut r = RuleResult {
430 rule_id: "x".into(),
431 level: Level::Error,
432 policy_url: None,
433 violations: Vec::new(),
434 is_fixable: false,
435 };
436 assert!(r.passed());
437 r.violations.push(Violation::new("oops"));
438 assert!(!r.passed());
439 }
440
441 #[test]
442 fn context_is_git_tracked_returns_false_outside_repo() {
443 let idx = empty_index();
444 let ctx = Context {
445 root: Path::new("/tmp"),
446 index: &idx,
447 registry: None,
448 facts: None,
449 vars: None,
450 git_tracked: None, // outside-a-repo / no rule opted in
451 git_blame: None,
452 };
453 assert!(!ctx.is_git_tracked(Path::new("anything.rs")));
454 assert!(!ctx.dir_has_tracked_files(Path::new("src")));
455 }
456
457 #[test]
458 fn context_is_git_tracked_consults_set_when_present() {
459 let mut tracked: std::collections::HashSet<std::path::PathBuf> =
460 std::collections::HashSet::new();
461 tracked.insert(std::path::PathBuf::from("src/main.rs"));
462 let idx = empty_index();
463 let ctx = Context {
464 root: Path::new("/tmp"),
465 index: &idx,
466 registry: None,
467 facts: None,
468 vars: None,
469 git_tracked: Some(&tracked),
470 git_blame: None,
471 };
472 assert!(ctx.is_git_tracked(Path::new("src/main.rs")));
473 assert!(!ctx.is_git_tracked(Path::new("README.md")));
474 }
475
476 /// Stand-in `Rule` impl that returns the trait defaults.
477 /// Lets us assert the documented defaults without dragging
478 /// in a real registered rule.
479 #[derive(Debug)]
480 struct DefaultRule;
481
482 impl Rule for DefaultRule {
483 fn id(&self) -> &'static str {
484 "default"
485 }
486 fn level(&self) -> Level {
487 Level::Warning
488 }
489 fn evaluate(&self, _ctx: &Context<'_>) -> Result<Vec<Violation>> {
490 Ok(Vec::new())
491 }
492 }
493
494 #[test]
495 fn rule_trait_defaults_are_safe_no_ops() {
496 let r = DefaultRule;
497 assert_eq!(r.policy_url(), None);
498 assert!(!r.wants_git_tracked());
499 assert!(!r.wants_git_blame());
500 assert!(!r.requires_full_index());
501 assert!(r.path_scope().is_none());
502 assert!(r.fixer().is_none());
503 }
504
505 #[test]
506 fn check_fix_size_returns_none_when_limit_disabled() {
507 let dir = tempfile::tempdir().unwrap();
508 let f = dir.path().join("a.txt");
509 std::fs::write(&f, b"hello").unwrap();
510 let ctx = FixContext {
511 root: dir.path(),
512 dry_run: false,
513 fix_size_limit: None,
514 };
515 let outcome = check_fix_size(&f, Path::new("a.txt"), &ctx).unwrap();
516 assert!(outcome.is_none());
517 }
518
519 #[test]
520 fn check_fix_size_skips_over_limit_files() {
521 let dir = tempfile::tempdir().unwrap();
522 let f = dir.path().join("big.txt");
523 std::fs::write(&f, vec![b'x'; 1024]).unwrap();
524 let ctx = FixContext {
525 root: dir.path(),
526 dry_run: false,
527 fix_size_limit: Some(64),
528 };
529 let outcome = check_fix_size(&f, Path::new("big.txt"), &ctx).unwrap();
530 match outcome {
531 Some(FixOutcome::Skipped(reason)) => {
532 assert!(reason.contains("exceeds fix_size_limit"));
533 assert!(reason.contains("big.txt"));
534 }
535 other => panic!("expected Skipped, got {other:?}"),
536 }
537 }
538
539 #[test]
540 fn read_for_fix_returns_bytes_when_in_limit() {
541 let dir = tempfile::tempdir().unwrap();
542 let f = dir.path().join("a.txt");
543 std::fs::write(&f, b"hello").unwrap();
544 let ctx = FixContext {
545 root: dir.path(),
546 dry_run: false,
547 fix_size_limit: Some(1 << 20),
548 };
549 match read_for_fix(&f, Path::new("a.txt"), &ctx).unwrap() {
550 ReadForFix::Bytes(b) => assert_eq!(b, b"hello"),
551 ReadForFix::Skipped(_) => panic!("expected Bytes, got Skipped"),
552 }
553 }
554
555 #[test]
556 fn read_for_fix_returns_skipped_when_over_limit() {
557 let dir = tempfile::tempdir().unwrap();
558 let f = dir.path().join("big.txt");
559 std::fs::write(&f, vec![b'x'; 1024]).unwrap();
560 let ctx = FixContext {
561 root: dir.path(),
562 dry_run: false,
563 fix_size_limit: Some(64),
564 };
565 match read_for_fix(&f, Path::new("big.txt"), &ctx).unwrap() {
566 ReadForFix::Skipped(FixOutcome::Skipped(_)) => {}
567 ReadForFix::Skipped(FixOutcome::Applied(_)) => {
568 panic!("expected Skipped, got Skipped(Applied)")
569 }
570 ReadForFix::Bytes(_) => panic!("expected Skipped, got Bytes"),
571 }
572 }
573
574 #[test]
575 fn fix_outcome_variants_are_constructible() {
576 // Sanity: documented variant shapes haven't drifted.
577 let _applied = FixOutcome::Applied("created LICENSE".into());
578 let _skipped = FixOutcome::Skipped("already exists".into());
579 }
580}