Skip to main content

alint_rules/
file_exists.rs

1//! `file_exists` — require that at least one file matching any of the given
2//! globs exists in the repository.
3
4use std::path::{Path, PathBuf};
5
6use alint_core::{
7    Context, Error, FixSpec, Fixer, Level, PathsSpec, Result, Rule, RuleSpec, Scope, Violation,
8};
9use serde::Deserialize;
10
11use crate::fixers::FileCreateFixer;
12
13#[derive(Debug, Deserialize)]
14#[serde(deny_unknown_fields)]
15struct Options {
16    #[serde(default)]
17    root_only: bool,
18}
19
20#[derive(Debug)]
21pub struct FileExistsRule {
22    id: String,
23    level: Level,
24    policy_url: Option<String>,
25    message: Option<String>,
26    scope: Scope,
27    patterns: Vec<String>,
28    /// `Some(paths)` when every entry in `patterns` is a literal
29    /// path (no glob metacharacters, no `!` excludes) and the
30    /// rule does not opt into `git_tracked_only`. The fast path
31    /// uses these to do O(1) `FileIndex::contains_file` lookups
32    /// instead of iterating every entry through
33    /// `Scope::matches`. At 1M files in a 5,000-package
34    /// monorepo, `for_each_dir` rules spawn one nested
35    /// `file_exists` per directory; without this short-circuit
36    /// each one is an O(N) scan and the fan-out becomes
37    /// O(D × N). With it, they collapse to O(D) lookups.
38    literal_paths: Option<Vec<PathBuf>>,
39    root_only: bool,
40    /// When `true`, only consider walked entries that are also
41    /// in git's index. Outside a git repo this becomes a silent
42    /// no-op — no entries qualify, so the rule reports the
43    /// "missing" violation as if no file existed.
44    git_tracked_only: bool,
45    /// When `Some(false)`, the literal-path fast path also
46    /// checks the filesystem directly via `ctx.root.join(p)` —
47    /// finds files that are present-on-disk but
48    /// `.gitignore`-masked from the walker (closes the
49    /// `bazel-style "tracked AND gitignored"` pattern from
50    /// pitfall #18 in `docs/development/CONFIG-AUTHORING.md`).
51    /// Default `None` (inherit workspace `respect_gitignore`).
52    respect_gitignore: Option<bool>,
53    fixer: Option<FileCreateFixer>,
54}
55
56/// True when `pattern` is a plain literal path string — no glob
57/// metacharacters, no `!` exclude prefix. Such patterns can be
58/// answered by an O(1) hash-set lookup against
59/// [`alint_core::FileIndex::contains_file`] instead of a O(N)
60/// scope-match scan.
61fn is_literal_path(pattern: &str) -> bool {
62    !pattern.starts_with('!')
63        && !pattern
64            .chars()
65            .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
66}
67
68/// True iff `paths` is a flat list (single string or `Many`)
69/// with no excludes — `IncludeExclude` form is excluded since
70/// the fast path can't honour excludes by hash lookup alone.
71fn paths_spec_has_no_excludes(spec: &PathsSpec) -> bool {
72    match spec {
73        PathsSpec::Single(_) | PathsSpec::Many(_) => true,
74        PathsSpec::IncludeExclude { exclude, .. } => exclude.is_empty(),
75    }
76}
77
78impl FileExistsRule {
79    fn describe_patterns(&self) -> String {
80        self.patterns.join(", ")
81    }
82}
83
84impl Rule for FileExistsRule {
85    alint_core::rule_common_impl!();
86
87    fn git_tracked_mode(&self) -> alint_core::GitTrackedMode {
88        if self.git_tracked_only {
89            alint_core::GitTrackedMode::FileOnly
90        } else {
91            alint_core::GitTrackedMode::Off
92        }
93    }
94
95    fn requires_full_index(&self) -> bool {
96        // Existence is an aggregate verdict over the whole tree —
97        // "is at least one matching file present?". In `--changed`
98        // mode, evaluate against the full index (so an unchanged
99        // LICENSE still counts) but let the engine skip the rule
100        // entirely when its scope doesn't intersect the diff.
101        true
102    }
103
104    fn path_scope(&self) -> Option<&Scope> {
105        Some(&self.scope)
106    }
107
108    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
109        let found = if let Some(literals) = self.literal_paths.as_ref() {
110            // Fast path: each pattern is a literal relative
111            // path. Hash-lookup against the index's lazily-
112            // built path set is O(1) per pattern; for
113            // `for_each_dir`-spawned rules at 1M scale this is
114            // the difference between O(D × N) and O(D).
115            //
116            // Pitfall #18 (per-rule `respect_gitignore: false`):
117            // when set, also check the filesystem directly so a
118            // `.bazelversion`-style tracked-but-gitignored file is
119            // found even though the walker pre-filtered it out.
120            // Direct stat is O(1) per literal regardless of tree
121            // size, so the cost is bounded.
122            let bypass_walker_for_ignored = self.respect_gitignore == Some(false);
123            literals.iter().any(|p| {
124                if self.root_only && literal_is_nested(p) {
125                    return false;
126                }
127                if ctx.index.contains_file(p) {
128                    return true;
129                }
130                if bypass_walker_for_ignored && ctx.root.join(p).is_file() {
131                    return true;
132                }
133                false
134            })
135        } else {
136            // Slow path: glob patterns. v0.9.11: when
137            // `git_tracked_only` is set the engine hands us a
138            // pre-filtered `ctx.index` (file_only mode), so the
139            // per-entry `is_git_tracked` check that lived here
140            // pre-v0.9.11 is no longer needed — `ctx.index.files()`
141            // already iterates only tracked files.
142            ctx.index.files().any(|entry| {
143                if self.root_only && entry.path.components().count() != 1 {
144                    return false;
145                }
146                if !self.scope.matches(&entry.path, ctx.index) {
147                    return false;
148                }
149                true
150            })
151        };
152        if found {
153            Ok(Vec::new())
154        } else {
155            let message = self.message.clone().unwrap_or_else(|| {
156                let scope = if self.root_only {
157                    " at the repo root"
158                } else {
159                    ""
160                };
161                let tracked = if self.git_tracked_only {
162                    " (tracked in git)"
163                } else {
164                    ""
165                };
166                format!(
167                    "expected a file matching [{}]{scope}{tracked}",
168                    self.describe_patterns()
169                )
170            });
171            Ok(vec![Violation::new(message)])
172        }
173    }
174
175    fn fixer(&self) -> Option<&dyn Fixer> {
176        self.fixer.as_ref().map(|f| f as &dyn Fixer)
177    }
178}
179
180pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
181    alint_core::reject_scope_filter_on_cross_file(spec, "file_exists")?;
182    let Some(paths) = &spec.paths else {
183        return Err(Error::rule_config(
184            &spec.id,
185            "file_exists requires a `paths` field",
186        ));
187    };
188    let patterns = patterns_of(paths);
189    let scope = Scope::from_paths_spec(paths)?;
190    let opts: Options = spec
191        .deserialize_options()
192        .unwrap_or(Options { root_only: false });
193    // The fast path needs every pattern to be a plain relative
194    // path (no glob metacharacters, no `!` exclude). v0.9.11:
195    // `git_tracked_only` no longer disqualifies the fast path
196    // — the engine routes the rule to a tracked-files-only
197    // pre-filtered index, so `FileIndex::contains_file` against
198    // that index naturally returns true iff the literal is BOTH
199    // present AND tracked. When all preconditions hold,
200    // `literal_paths` carries the parsed `PathBuf`s ready for
201    // `FileIndex::contains_file` lookup at evaluate time.
202    let literal_paths =
203        if paths_spec_has_no_excludes(paths) && patterns.iter().all(|p| is_literal_path(p)) {
204            Some(patterns.iter().map(PathBuf::from).collect())
205        } else {
206            None
207        };
208    let fixer = match &spec.fix {
209        Some(FixSpec::FileCreate { file_create: cfg }) => {
210            let target = cfg
211                .path
212                .clone()
213                .or_else(|| first_literal_path(&patterns))
214                .ok_or_else(|| {
215                    Error::rule_config(
216                        &spec.id,
217                        "fix.file_create needs a `path` — none of the rule's `paths:` \
218                         entries is a literal filename",
219                    )
220                })?;
221            let source = alint_core::resolve_content_source(
222                &spec.id,
223                "file_create",
224                &cfg.content,
225                &cfg.content_from,
226            )?;
227            Some(FileCreateFixer::new(target, source, cfg.create_parents))
228        }
229        Some(other) => {
230            return Err(Error::rule_config(
231                &spec.id,
232                format!("fix.{} is not compatible with file_exists", other.op_name()),
233            ));
234        }
235        None => None,
236    };
237    Ok(Box::new(FileExistsRule {
238        id: spec.id.clone(),
239        level: spec.level,
240        policy_url: spec.policy_url.clone(),
241        message: spec.message.clone(),
242        scope,
243        patterns,
244        literal_paths,
245        root_only: opts.root_only,
246        git_tracked_only: spec.git_tracked_only,
247        respect_gitignore: spec.respect_gitignore,
248        fixer,
249    }))
250}
251
252/// True when a literal `paths:` pattern names something nested
253/// (more than one path component). Mirrors the slow-path
254/// `entry.path.components().count() != 1` check used to honour
255/// `root_only` against entries during a scope-match scan.
256fn literal_is_nested(p: &Path) -> bool {
257    p.components().count() != 1
258}
259
260/// Best-effort: return the first entry in `patterns` that has no glob
261/// metacharacters (so it's a usable file path). Returns `None` if every
262/// pattern is a glob — in that case the caller must require an
263/// explicit `fix.file_create.path`.
264fn first_literal_path(patterns: &[String]) -> Option<PathBuf> {
265    patterns
266        .iter()
267        .find(|p| !p.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')))
268        .map(PathBuf::from)
269}
270
271fn patterns_of(spec: &PathsSpec) -> Vec<String> {
272    match spec {
273        PathsSpec::Single(s) => vec![s.clone()],
274        PathsSpec::Many(v) => v.clone(),
275        PathsSpec::IncludeExclude { include, .. } => include.clone(),
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use crate::test_support::{ctx, index, spec_yaml};
283    use std::path::Path;
284
285    #[test]
286    fn build_rejects_missing_paths_field() {
287        let spec = spec_yaml(
288            "id: t\n\
289             kind: file_exists\n\
290             level: error\n",
291        );
292        let err = build(&spec).unwrap_err().to_string();
293        assert!(err.contains("paths"), "unexpected: {err}");
294    }
295
296    #[test]
297    fn build_accepts_root_only_option() {
298        // `root_only: true` is the supported option; building
299        // it should succeed and produce a configured rule.
300        // (Unknown options are tolerated by file_exists' build
301        // path via `.unwrap_or(default)`; the JSON Schema and
302        // DSL loader catch typos at config-load time before
303        // we get here, which is the right layer for that
304        // check.)
305        let spec = spec_yaml(
306            "id: t\n\
307             kind: file_exists\n\
308             paths: \"LICENSE\"\n\
309             level: error\n\
310             root_only: true\n",
311        );
312        assert!(build(&spec).is_ok());
313    }
314
315    #[test]
316    fn build_rejects_incompatible_fix_op() {
317        // file_exists supports `file_create` only; `file_remove`
318        // (or any other op) must surface a clear config error so
319        // a typo doesn't silently disable the fix path.
320        let spec = spec_yaml(
321            "id: t\n\
322             kind: file_exists\n\
323             paths: \"LICENSE\"\n\
324             level: error\n\
325             fix:\n  \
326               file_remove: {}\n",
327        );
328        let err = build(&spec).unwrap_err().to_string();
329        assert!(err.contains("file_remove"), "unexpected: {err}");
330    }
331
332    #[test]
333    fn build_file_create_needs_explicit_path_for_glob_only_paths() {
334        // When every entry in `paths:` is a glob, the fixer
335        // can't pick a literal target; the user must supply
336        // `fix.file_create.path` explicitly.
337        let spec = spec_yaml(
338            "id: t\n\
339             kind: file_exists\n\
340             paths: \"docs/**/*.md\"\n\
341             level: error\n\
342             fix:\n  \
343               file_create:\n    \
344                 content: \"# title\\n\"\n",
345        );
346        let err = build(&spec).unwrap_err().to_string();
347        assert!(err.contains("path"), "unexpected: {err}");
348    }
349
350    #[test]
351    fn evaluate_passes_when_matching_file_present() {
352        let spec = spec_yaml(
353            "id: t\n\
354             kind: file_exists\n\
355             paths: \"README.md\"\n\
356             level: error\n",
357        );
358        let rule = build(&spec).unwrap();
359        let idx = index(&["README.md", "Cargo.toml"]);
360        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
361        assert!(v.is_empty(), "unexpected violations: {v:?}");
362    }
363
364    #[test]
365    fn evaluate_fires_when_no_matching_file_present() {
366        let spec = spec_yaml(
367            "id: t\n\
368             kind: file_exists\n\
369             paths: \"LICENSE\"\n\
370             level: error\n",
371        );
372        let rule = build(&spec).unwrap();
373        let idx = index(&["README.md"]);
374        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
375        assert_eq!(v.len(), 1, "expected one violation; got: {v:?}");
376    }
377
378    #[test]
379    fn evaluate_root_only_excludes_nested_matches() {
380        // `root_only: true` only counts entries whose path has
381        // no parent component — `LICENSE` qualifies,
382        // `pkg/LICENSE` does not.
383        let spec = spec_yaml(
384            "id: t\n\
385             kind: file_exists\n\
386             paths: \"LICENSE\"\n\
387             level: error\n\
388             root_only: true\n",
389        );
390        let rule = build(&spec).unwrap();
391        let idx_only_nested = index(&["pkg/LICENSE"]);
392        let v = rule
393            .evaluate(&ctx(Path::new("/fake"), &idx_only_nested))
394            .unwrap();
395        assert_eq!(v.len(), 1, "nested match shouldn't satisfy root_only");
396    }
397
398    #[test]
399    fn first_literal_path_picks_first_non_glob() {
400        let patterns = vec!["docs/**/*.md".into(), "LICENSE".into(), "README.md".into()];
401        assert_eq!(
402            first_literal_path(&patterns).as_deref(),
403            Some(Path::new("LICENSE")),
404        );
405    }
406
407    #[test]
408    fn first_literal_path_returns_none_when_all_glob() {
409        let patterns = vec!["docs/**/*.md".into(), "src/[a-z]*.rs".into()];
410        assert!(first_literal_path(&patterns).is_none());
411    }
412
413    #[test]
414    fn patterns_of_handles_every_paths_spec_shape() {
415        assert_eq!(patterns_of(&PathsSpec::Single("a".into())), vec!["a"]);
416        assert_eq!(
417            patterns_of(&PathsSpec::Many(vec!["a".into(), "b".into()])),
418            vec!["a", "b"],
419        );
420        assert_eq!(
421            patterns_of(&PathsSpec::IncludeExclude {
422                include: vec!["a".into()],
423                exclude: vec!["b".into()],
424            }),
425            vec!["a"],
426        );
427    }
428
429    #[test]
430    fn build_rejects_scope_filter_on_cross_file_rule() {
431        // file_exists is a cross-file rule (requires_full_index =
432        // true); scope_filter is per-file-rules-only. The build
433        // path must reject it with a clear message pointing at
434        // the for_each_dir + when_iter: alternative.
435        let yaml = r#"
436id: t
437kind: file_exists
438paths: "LICENSE"
439level: error
440scope_filter:
441  has_ancestor: Cargo.toml
442"#;
443        let spec = spec_yaml(yaml);
444        let err = build(&spec).unwrap_err().to_string();
445        assert!(
446            err.contains("scope_filter is supported on per-file rules only"),
447            "expected per-file-only message, got: {err}",
448        );
449        assert!(
450            err.contains("file_exists"),
451            "expected message to name the cross-file kind, got: {err}",
452        );
453    }
454}