Skip to main content

alint_rules/
file_exists.rs

1//! `file_exists` — require that at least one file matching any of the given
2//! globs exists in the repository.
3
4use std::path::{Path, PathBuf};
5
6use alint_core::{
7    Context, Error, FixSpec, Fixer, Level, PathsSpec, Result, Rule, RuleSpec, Scope, Violation,
8};
9use serde::Deserialize;
10
11use crate::fixers::FileCreateFixer;
12
13#[derive(Debug, Deserialize)]
14#[serde(deny_unknown_fields)]
15struct Options {
16    #[serde(default)]
17    root_only: bool,
18}
19
20#[derive(Debug)]
21pub struct FileExistsRule {
22    id: String,
23    level: Level,
24    policy_url: Option<String>,
25    message: Option<String>,
26    scope: Scope,
27    patterns: Vec<String>,
28    /// `Some(paths)` when every entry in `patterns` is a literal
29    /// path (no glob metacharacters, no `!` excludes) and the
30    /// rule does not opt into `git_tracked_only`. The fast path
31    /// uses these to do O(1) `FileIndex::contains_file` lookups
32    /// instead of iterating every entry through
33    /// `Scope::matches`. At 1M files in a 5,000-package
34    /// monorepo, `for_each_dir` rules spawn one nested
35    /// `file_exists` per directory; without this short-circuit
36    /// each one is an O(N) scan and the fan-out becomes
37    /// O(D × N). With it, they collapse to O(D) lookups.
38    literal_paths: Option<Vec<PathBuf>>,
39    root_only: bool,
40    /// When `true`, only consider walked entries that are also
41    /// in git's index. Outside a git repo this becomes a silent
42    /// no-op — no entries qualify, so the rule reports the
43    /// "missing" violation as if no file existed.
44    git_tracked_only: bool,
45    /// When `Some(false)`, the literal-path fast path also
46    /// checks the filesystem directly via `ctx.root.join(p)` —
47    /// finds files that are present-on-disk but
48    /// `.gitignore`-masked from the walker (closes the
49    /// `bazel-style "tracked AND gitignored"` pattern from
50    /// pitfall #18 in `docs/development/CONFIG-AUTHORING.md`).
51    /// Default `None` (inherit workspace `respect_gitignore`).
52    respect_gitignore: Option<bool>,
53    fixer: Option<FileCreateFixer>,
54}
55
56/// True when `pattern` is a plain literal path string — no glob
57/// metacharacters, no `!` exclude prefix. Such patterns can be
58/// answered by an O(1) hash-set lookup against
59/// [`alint_core::FileIndex::contains_file`] instead of a O(N)
60/// scope-match scan.
61fn is_literal_path(pattern: &str) -> bool {
62    !pattern.starts_with('!')
63        && !pattern
64            .chars()
65            .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
66}
67
68/// True iff `paths` is a flat list (single string or `Many`)
69/// with no excludes — `IncludeExclude` form is excluded since
70/// the fast path can't honour excludes by hash lookup alone.
71fn paths_spec_has_no_excludes(spec: &PathsSpec) -> bool {
72    match spec {
73        PathsSpec::Single(_) | PathsSpec::Many(_) => true,
74        PathsSpec::IncludeExclude { exclude, .. } => exclude.is_empty(),
75    }
76}
77
78impl FileExistsRule {
79    fn describe_patterns(&self) -> String {
80        self.patterns.join(", ")
81    }
82}
83
84impl Rule for FileExistsRule {
85    fn id(&self) -> &str {
86        &self.id
87    }
88    fn level(&self) -> Level {
89        self.level
90    }
91    fn policy_url(&self) -> Option<&str> {
92        self.policy_url.as_deref()
93    }
94
95    fn git_tracked_mode(&self) -> alint_core::GitTrackedMode {
96        if self.git_tracked_only {
97            alint_core::GitTrackedMode::FileOnly
98        } else {
99            alint_core::GitTrackedMode::Off
100        }
101    }
102
103    fn requires_full_index(&self) -> bool {
104        // Existence is an aggregate verdict over the whole tree —
105        // "is at least one matching file present?". In `--changed`
106        // mode, evaluate against the full index (so an unchanged
107        // LICENSE still counts) but let the engine skip the rule
108        // entirely when its scope doesn't intersect the diff.
109        true
110    }
111
112    fn path_scope(&self) -> Option<&Scope> {
113        Some(&self.scope)
114    }
115
116    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
117        let found = if let Some(literals) = self.literal_paths.as_ref() {
118            // Fast path: each pattern is a literal relative
119            // path. Hash-lookup against the index's lazily-
120            // built path set is O(1) per pattern; for
121            // `for_each_dir`-spawned rules at 1M scale this is
122            // the difference between O(D × N) and O(D).
123            //
124            // Pitfall #18 (per-rule `respect_gitignore: false`):
125            // when set, also check the filesystem directly so a
126            // `.bazelversion`-style tracked-but-gitignored file is
127            // found even though the walker pre-filtered it out.
128            // Direct stat is O(1) per literal regardless of tree
129            // size, so the cost is bounded.
130            let bypass_walker_for_ignored = self.respect_gitignore == Some(false);
131            literals.iter().any(|p| {
132                if self.root_only && literal_is_nested(p) {
133                    return false;
134                }
135                if ctx.index.contains_file(p) {
136                    return true;
137                }
138                if bypass_walker_for_ignored && ctx.root.join(p).is_file() {
139                    return true;
140                }
141                false
142            })
143        } else {
144            // Slow path: glob patterns. v0.9.11: when
145            // `git_tracked_only` is set the engine hands us a
146            // pre-filtered `ctx.index` (file_only mode), so the
147            // per-entry `is_git_tracked` check that lived here
148            // pre-v0.9.11 is no longer needed — `ctx.index.files()`
149            // already iterates only tracked files.
150            ctx.index.files().any(|entry| {
151                if self.root_only && entry.path.components().count() != 1 {
152                    return false;
153                }
154                if !self.scope.matches(&entry.path, ctx.index) {
155                    return false;
156                }
157                true
158            })
159        };
160        if found {
161            Ok(Vec::new())
162        } else {
163            let message = self.message.clone().unwrap_or_else(|| {
164                let scope = if self.root_only {
165                    " at the repo root"
166                } else {
167                    ""
168                };
169                let tracked = if self.git_tracked_only {
170                    " (tracked in git)"
171                } else {
172                    ""
173                };
174                format!(
175                    "expected a file matching [{}]{scope}{tracked}",
176                    self.describe_patterns()
177                )
178            });
179            Ok(vec![Violation::new(message)])
180        }
181    }
182
183    fn fixer(&self) -> Option<&dyn Fixer> {
184        self.fixer.as_ref().map(|f| f as &dyn Fixer)
185    }
186}
187
188pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
189    alint_core::reject_scope_filter_on_cross_file(spec, "file_exists")?;
190    let Some(paths) = &spec.paths else {
191        return Err(Error::rule_config(
192            &spec.id,
193            "file_exists requires a `paths` field",
194        ));
195    };
196    let patterns = patterns_of(paths);
197    let scope = Scope::from_paths_spec(paths)?;
198    let opts: Options = spec
199        .deserialize_options()
200        .unwrap_or(Options { root_only: false });
201    // The fast path needs every pattern to be a plain relative
202    // path (no glob metacharacters, no `!` exclude). v0.9.11:
203    // `git_tracked_only` no longer disqualifies the fast path
204    // — the engine routes the rule to a tracked-files-only
205    // pre-filtered index, so `FileIndex::contains_file` against
206    // that index naturally returns true iff the literal is BOTH
207    // present AND tracked. When all preconditions hold,
208    // `literal_paths` carries the parsed `PathBuf`s ready for
209    // `FileIndex::contains_file` lookup at evaluate time.
210    let literal_paths =
211        if paths_spec_has_no_excludes(paths) && patterns.iter().all(|p| is_literal_path(p)) {
212            Some(patterns.iter().map(PathBuf::from).collect())
213        } else {
214            None
215        };
216    let fixer = match &spec.fix {
217        Some(FixSpec::FileCreate { file_create: cfg }) => {
218            let target = cfg
219                .path
220                .clone()
221                .or_else(|| first_literal_path(&patterns))
222                .ok_or_else(|| {
223                    Error::rule_config(
224                        &spec.id,
225                        "fix.file_create needs a `path` — none of the rule's `paths:` \
226                         entries is a literal filename",
227                    )
228                })?;
229            let source = alint_core::resolve_content_source(
230                &spec.id,
231                "file_create",
232                &cfg.content,
233                &cfg.content_from,
234            )?;
235            Some(FileCreateFixer::new(target, source, cfg.create_parents))
236        }
237        Some(other) => {
238            return Err(Error::rule_config(
239                &spec.id,
240                format!("fix.{} is not compatible with file_exists", other.op_name()),
241            ));
242        }
243        None => None,
244    };
245    Ok(Box::new(FileExistsRule {
246        id: spec.id.clone(),
247        level: spec.level,
248        policy_url: spec.policy_url.clone(),
249        message: spec.message.clone(),
250        scope,
251        patterns,
252        literal_paths,
253        root_only: opts.root_only,
254        git_tracked_only: spec.git_tracked_only,
255        respect_gitignore: spec.respect_gitignore,
256        fixer,
257    }))
258}
259
260/// True when a literal `paths:` pattern names something nested
261/// (more than one path component). Mirrors the slow-path
262/// `entry.path.components().count() != 1` check used to honour
263/// `root_only` against entries during a scope-match scan.
264fn literal_is_nested(p: &Path) -> bool {
265    p.components().count() != 1
266}
267
268/// Best-effort: return the first entry in `patterns` that has no glob
269/// metacharacters (so it's a usable file path). Returns `None` if every
270/// pattern is a glob — in that case the caller must require an
271/// explicit `fix.file_create.path`.
272fn first_literal_path(patterns: &[String]) -> Option<PathBuf> {
273    patterns
274        .iter()
275        .find(|p| !p.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')))
276        .map(PathBuf::from)
277}
278
279fn patterns_of(spec: &PathsSpec) -> Vec<String> {
280    match spec {
281        PathsSpec::Single(s) => vec![s.clone()],
282        PathsSpec::Many(v) => v.clone(),
283        PathsSpec::IncludeExclude { include, .. } => include.clone(),
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290    use crate::test_support::{ctx, index, spec_yaml};
291    use std::path::Path;
292
293    #[test]
294    fn build_rejects_missing_paths_field() {
295        let spec = spec_yaml(
296            "id: t\n\
297             kind: file_exists\n\
298             level: error\n",
299        );
300        let err = build(&spec).unwrap_err().to_string();
301        assert!(err.contains("paths"), "unexpected: {err}");
302    }
303
304    #[test]
305    fn build_accepts_root_only_option() {
306        // `root_only: true` is the supported option; building
307        // it should succeed and produce a configured rule.
308        // (Unknown options are tolerated by file_exists' build
309        // path via `.unwrap_or(default)`; the JSON Schema and
310        // DSL loader catch typos at config-load time before
311        // we get here, which is the right layer for that
312        // check.)
313        let spec = spec_yaml(
314            "id: t\n\
315             kind: file_exists\n\
316             paths: \"LICENSE\"\n\
317             level: error\n\
318             root_only: true\n",
319        );
320        assert!(build(&spec).is_ok());
321    }
322
323    #[test]
324    fn build_rejects_incompatible_fix_op() {
325        // file_exists supports `file_create` only; `file_remove`
326        // (or any other op) must surface a clear config error so
327        // a typo doesn't silently disable the fix path.
328        let spec = spec_yaml(
329            "id: t\n\
330             kind: file_exists\n\
331             paths: \"LICENSE\"\n\
332             level: error\n\
333             fix:\n  \
334               file_remove: {}\n",
335        );
336        let err = build(&spec).unwrap_err().to_string();
337        assert!(err.contains("file_remove"), "unexpected: {err}");
338    }
339
340    #[test]
341    fn build_file_create_needs_explicit_path_for_glob_only_paths() {
342        // When every entry in `paths:` is a glob, the fixer
343        // can't pick a literal target; the user must supply
344        // `fix.file_create.path` explicitly.
345        let spec = spec_yaml(
346            "id: t\n\
347             kind: file_exists\n\
348             paths: \"docs/**/*.md\"\n\
349             level: error\n\
350             fix:\n  \
351               file_create:\n    \
352                 content: \"# title\\n\"\n",
353        );
354        let err = build(&spec).unwrap_err().to_string();
355        assert!(err.contains("path"), "unexpected: {err}");
356    }
357
358    #[test]
359    fn evaluate_passes_when_matching_file_present() {
360        let spec = spec_yaml(
361            "id: t\n\
362             kind: file_exists\n\
363             paths: \"README.md\"\n\
364             level: error\n",
365        );
366        let rule = build(&spec).unwrap();
367        let idx = index(&["README.md", "Cargo.toml"]);
368        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
369        assert!(v.is_empty(), "unexpected violations: {v:?}");
370    }
371
372    #[test]
373    fn evaluate_fires_when_no_matching_file_present() {
374        let spec = spec_yaml(
375            "id: t\n\
376             kind: file_exists\n\
377             paths: \"LICENSE\"\n\
378             level: error\n",
379        );
380        let rule = build(&spec).unwrap();
381        let idx = index(&["README.md"]);
382        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
383        assert_eq!(v.len(), 1, "expected one violation; got: {v:?}");
384    }
385
386    #[test]
387    fn evaluate_root_only_excludes_nested_matches() {
388        // `root_only: true` only counts entries whose path has
389        // no parent component — `LICENSE` qualifies,
390        // `pkg/LICENSE` does not.
391        let spec = spec_yaml(
392            "id: t\n\
393             kind: file_exists\n\
394             paths: \"LICENSE\"\n\
395             level: error\n\
396             root_only: true\n",
397        );
398        let rule = build(&spec).unwrap();
399        let idx_only_nested = index(&["pkg/LICENSE"]);
400        let v = rule
401            .evaluate(&ctx(Path::new("/fake"), &idx_only_nested))
402            .unwrap();
403        assert_eq!(v.len(), 1, "nested match shouldn't satisfy root_only");
404    }
405
406    #[test]
407    fn first_literal_path_picks_first_non_glob() {
408        let patterns = vec!["docs/**/*.md".into(), "LICENSE".into(), "README.md".into()];
409        assert_eq!(
410            first_literal_path(&patterns).as_deref(),
411            Some(Path::new("LICENSE")),
412        );
413    }
414
415    #[test]
416    fn first_literal_path_returns_none_when_all_glob() {
417        let patterns = vec!["docs/**/*.md".into(), "src/[a-z]*.rs".into()];
418        assert!(first_literal_path(&patterns).is_none());
419    }
420
421    #[test]
422    fn patterns_of_handles_every_paths_spec_shape() {
423        assert_eq!(patterns_of(&PathsSpec::Single("a".into())), vec!["a"]);
424        assert_eq!(
425            patterns_of(&PathsSpec::Many(vec!["a".into(), "b".into()])),
426            vec!["a", "b"],
427        );
428        assert_eq!(
429            patterns_of(&PathsSpec::IncludeExclude {
430                include: vec!["a".into()],
431                exclude: vec!["b".into()],
432            }),
433            vec!["a"],
434        );
435    }
436
437    #[test]
438    fn build_rejects_scope_filter_on_cross_file_rule() {
439        // file_exists is a cross-file rule (requires_full_index =
440        // true); scope_filter is per-file-rules-only. The build
441        // path must reject it with a clear message pointing at
442        // the for_each_dir + when_iter: alternative.
443        let yaml = r#"
444id: t
445kind: file_exists
446paths: "LICENSE"
447level: error
448scope_filter:
449  has_ancestor: Cargo.toml
450"#;
451        let spec = spec_yaml(yaml);
452        let err = build(&spec).unwrap_err().to_string();
453        assert!(
454            err.contains("scope_filter is supported on per-file rules only"),
455            "expected per-file-only message, got: {err}",
456        );
457        assert!(
458            err.contains("file_exists"),
459            "expected message to name the cross-file kind, got: {err}",
460        );
461    }
462}