Skip to main content

alint_rules/
file_exists.rs

1//! `file_exists` — require that at least one file matching any of the given
2//! globs exists in the repository.
3
4use std::path::{Path, PathBuf};
5
6use alint_core::{
7    Context, Error, FixSpec, Fixer, Level, PathsSpec, Result, Rule, RuleSpec, Scope, Violation,
8};
9use serde::Deserialize;
10
11use crate::fixers::FileCreateFixer;
12
13#[derive(Debug, Deserialize)]
14#[serde(deny_unknown_fields)]
15struct Options {
16    #[serde(default)]
17    root_only: bool,
18}
19
20#[derive(Debug)]
21pub struct FileExistsRule {
22    id: String,
23    level: Level,
24    policy_url: Option<String>,
25    message: Option<String>,
26    scope: Scope,
27    patterns: Vec<String>,
28    /// `Some(paths)` when every entry in `patterns` is a literal
29    /// path (no glob metacharacters, no `!` excludes) and the
30    /// rule does not opt into `git_tracked_only`. The fast path
31    /// uses these to do O(1) `FileIndex::contains_file` lookups
32    /// instead of iterating every entry through
33    /// `Scope::matches`. At 1M files in a 5,000-package
34    /// monorepo, `for_each_dir` rules spawn one nested
35    /// `file_exists` per directory; without this short-circuit
36    /// each one is an O(N) scan and the fan-out becomes
37    /// O(D × N). With it, they collapse to O(D) lookups.
38    literal_paths: Option<Vec<PathBuf>>,
39    root_only: bool,
40    /// When `true`, only consider walked entries that are also
41    /// in git's index. Outside a git repo this becomes a silent
42    /// no-op — no entries qualify, so the rule reports the
43    /// "missing" violation as if no file existed.
44    git_tracked_only: bool,
45    fixer: Option<FileCreateFixer>,
46}
47
48/// True when `pattern` is a plain literal path string — no glob
49/// metacharacters, no `!` exclude prefix. Such patterns can be
50/// answered by an O(1) hash-set lookup against
51/// [`alint_core::FileIndex::contains_file`] instead of a O(N)
52/// scope-match scan.
53fn is_literal_path(pattern: &str) -> bool {
54    !pattern.starts_with('!')
55        && !pattern
56            .chars()
57            .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
58}
59
60/// True iff `paths` is a flat list (single string or `Many`)
61/// with no excludes — `IncludeExclude` form is excluded since
62/// the fast path can't honour excludes by hash lookup alone.
63fn paths_spec_has_no_excludes(spec: &PathsSpec) -> bool {
64    match spec {
65        PathsSpec::Single(_) | PathsSpec::Many(_) => true,
66        PathsSpec::IncludeExclude { exclude, .. } => exclude.is_empty(),
67    }
68}
69
70impl FileExistsRule {
71    fn describe_patterns(&self) -> String {
72        self.patterns.join(", ")
73    }
74}
75
76impl Rule for FileExistsRule {
77    fn id(&self) -> &str {
78        &self.id
79    }
80    fn level(&self) -> Level {
81        self.level
82    }
83    fn policy_url(&self) -> Option<&str> {
84        self.policy_url.as_deref()
85    }
86
87    fn git_tracked_mode(&self) -> alint_core::GitTrackedMode {
88        if self.git_tracked_only {
89            alint_core::GitTrackedMode::FileOnly
90        } else {
91            alint_core::GitTrackedMode::Off
92        }
93    }
94
95    fn requires_full_index(&self) -> bool {
96        // Existence is an aggregate verdict over the whole tree —
97        // "is at least one matching file present?". In `--changed`
98        // mode, evaluate against the full index (so an unchanged
99        // LICENSE still counts) but let the engine skip the rule
100        // entirely when its scope doesn't intersect the diff.
101        true
102    }
103
104    fn path_scope(&self) -> Option<&Scope> {
105        Some(&self.scope)
106    }
107
108    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
109        let found = if let Some(literals) = self.literal_paths.as_ref() {
110            // Fast path: each pattern is a literal relative
111            // path. Hash-lookup against the index's lazily-
112            // built path set is O(1) per pattern; for
113            // `for_each_dir`-spawned rules at 1M scale this is
114            // the difference between O(D × N) and O(D).
115            literals.iter().any(|p| {
116                if self.root_only && literal_is_nested(p) {
117                    return false;
118                }
119                ctx.index.contains_file(p)
120            })
121        } else {
122            // Slow path: glob patterns. v0.9.11: when
123            // `git_tracked_only` is set the engine hands us a
124            // pre-filtered `ctx.index` (file_only mode), so the
125            // per-entry `is_git_tracked` check that lived here
126            // pre-v0.9.11 is no longer needed — `ctx.index.files()`
127            // already iterates only tracked files.
128            ctx.index.files().any(|entry| {
129                if self.root_only && entry.path.components().count() != 1 {
130                    return false;
131                }
132                if !self.scope.matches(&entry.path, ctx.index) {
133                    return false;
134                }
135                true
136            })
137        };
138        if found {
139            Ok(Vec::new())
140        } else {
141            let message = self.message.clone().unwrap_or_else(|| {
142                let scope = if self.root_only {
143                    " at the repo root"
144                } else {
145                    ""
146                };
147                let tracked = if self.git_tracked_only {
148                    " (tracked in git)"
149                } else {
150                    ""
151                };
152                format!(
153                    "expected a file matching [{}]{scope}{tracked}",
154                    self.describe_patterns()
155                )
156            });
157            Ok(vec![Violation::new(message)])
158        }
159    }
160
161    fn fixer(&self) -> Option<&dyn Fixer> {
162        self.fixer.as_ref().map(|f| f as &dyn Fixer)
163    }
164}
165
166pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
167    alint_core::reject_scope_filter_on_cross_file(spec, "file_exists")?;
168    let Some(paths) = &spec.paths else {
169        return Err(Error::rule_config(
170            &spec.id,
171            "file_exists requires a `paths` field",
172        ));
173    };
174    let patterns = patterns_of(paths);
175    let scope = Scope::from_paths_spec(paths)?;
176    let opts: Options = spec
177        .deserialize_options()
178        .unwrap_or(Options { root_only: false });
179    // The fast path needs every pattern to be a plain relative
180    // path (no glob metacharacters, no `!` exclude). v0.9.11:
181    // `git_tracked_only` no longer disqualifies the fast path
182    // — the engine routes the rule to a tracked-files-only
183    // pre-filtered index, so `FileIndex::contains_file` against
184    // that index naturally returns true iff the literal is BOTH
185    // present AND tracked. When all preconditions hold,
186    // `literal_paths` carries the parsed `PathBuf`s ready for
187    // `FileIndex::contains_file` lookup at evaluate time.
188    let literal_paths =
189        if paths_spec_has_no_excludes(paths) && patterns.iter().all(|p| is_literal_path(p)) {
190            Some(patterns.iter().map(PathBuf::from).collect())
191        } else {
192            None
193        };
194    let fixer = match &spec.fix {
195        Some(FixSpec::FileCreate { file_create: cfg }) => {
196            let target = cfg
197                .path
198                .clone()
199                .or_else(|| first_literal_path(&patterns))
200                .ok_or_else(|| {
201                    Error::rule_config(
202                        &spec.id,
203                        "fix.file_create needs a `path` — none of the rule's `paths:` \
204                         entries is a literal filename",
205                    )
206                })?;
207            let source = alint_core::resolve_content_source(
208                &spec.id,
209                "file_create",
210                &cfg.content,
211                &cfg.content_from,
212            )?;
213            Some(FileCreateFixer::new(target, source, cfg.create_parents))
214        }
215        Some(other) => {
216            return Err(Error::rule_config(
217                &spec.id,
218                format!("fix.{} is not compatible with file_exists", other.op_name()),
219            ));
220        }
221        None => None,
222    };
223    Ok(Box::new(FileExistsRule {
224        id: spec.id.clone(),
225        level: spec.level,
226        policy_url: spec.policy_url.clone(),
227        message: spec.message.clone(),
228        scope,
229        patterns,
230        literal_paths,
231        root_only: opts.root_only,
232        git_tracked_only: spec.git_tracked_only,
233        fixer,
234    }))
235}
236
237/// True when a literal `paths:` pattern names something nested
238/// (more than one path component). Mirrors the slow-path
239/// `entry.path.components().count() != 1` check used to honour
240/// `root_only` against entries during a scope-match scan.
241fn literal_is_nested(p: &Path) -> bool {
242    p.components().count() != 1
243}
244
245/// Best-effort: return the first entry in `patterns` that has no glob
246/// metacharacters (so it's a usable file path). Returns `None` if every
247/// pattern is a glob — in that case the caller must require an
248/// explicit `fix.file_create.path`.
249fn first_literal_path(patterns: &[String]) -> Option<PathBuf> {
250    patterns
251        .iter()
252        .find(|p| !p.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')))
253        .map(PathBuf::from)
254}
255
256fn patterns_of(spec: &PathsSpec) -> Vec<String> {
257    match spec {
258        PathsSpec::Single(s) => vec![s.clone()],
259        PathsSpec::Many(v) => v.clone(),
260        PathsSpec::IncludeExclude { include, .. } => include.clone(),
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267    use crate::test_support::{ctx, index, spec_yaml};
268    use std::path::Path;
269
270    #[test]
271    fn build_rejects_missing_paths_field() {
272        let spec = spec_yaml(
273            "id: t\n\
274             kind: file_exists\n\
275             level: error\n",
276        );
277        let err = build(&spec).unwrap_err().to_string();
278        assert!(err.contains("paths"), "unexpected: {err}");
279    }
280
281    #[test]
282    fn build_accepts_root_only_option() {
283        // `root_only: true` is the supported option; building
284        // it should succeed and produce a configured rule.
285        // (Unknown options are tolerated by file_exists' build
286        // path via `.unwrap_or(default)`; the JSON Schema and
287        // DSL loader catch typos at config-load time before
288        // we get here, which is the right layer for that
289        // check.)
290        let spec = spec_yaml(
291            "id: t\n\
292             kind: file_exists\n\
293             paths: \"LICENSE\"\n\
294             level: error\n\
295             root_only: true\n",
296        );
297        assert!(build(&spec).is_ok());
298    }
299
300    #[test]
301    fn build_rejects_incompatible_fix_op() {
302        // file_exists supports `file_create` only; `file_remove`
303        // (or any other op) must surface a clear config error so
304        // a typo doesn't silently disable the fix path.
305        let spec = spec_yaml(
306            "id: t\n\
307             kind: file_exists\n\
308             paths: \"LICENSE\"\n\
309             level: error\n\
310             fix:\n  \
311               file_remove: {}\n",
312        );
313        let err = build(&spec).unwrap_err().to_string();
314        assert!(err.contains("file_remove"), "unexpected: {err}");
315    }
316
317    #[test]
318    fn build_file_create_needs_explicit_path_for_glob_only_paths() {
319        // When every entry in `paths:` is a glob, the fixer
320        // can't pick a literal target; the user must supply
321        // `fix.file_create.path` explicitly.
322        let spec = spec_yaml(
323            "id: t\n\
324             kind: file_exists\n\
325             paths: \"docs/**/*.md\"\n\
326             level: error\n\
327             fix:\n  \
328               file_create:\n    \
329                 content: \"# title\\n\"\n",
330        );
331        let err = build(&spec).unwrap_err().to_string();
332        assert!(err.contains("path"), "unexpected: {err}");
333    }
334
335    #[test]
336    fn evaluate_passes_when_matching_file_present() {
337        let spec = spec_yaml(
338            "id: t\n\
339             kind: file_exists\n\
340             paths: \"README.md\"\n\
341             level: error\n",
342        );
343        let rule = build(&spec).unwrap();
344        let idx = index(&["README.md", "Cargo.toml"]);
345        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
346        assert!(v.is_empty(), "unexpected violations: {v:?}");
347    }
348
349    #[test]
350    fn evaluate_fires_when_no_matching_file_present() {
351        let spec = spec_yaml(
352            "id: t\n\
353             kind: file_exists\n\
354             paths: \"LICENSE\"\n\
355             level: error\n",
356        );
357        let rule = build(&spec).unwrap();
358        let idx = index(&["README.md"]);
359        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
360        assert_eq!(v.len(), 1, "expected one violation; got: {v:?}");
361    }
362
363    #[test]
364    fn evaluate_root_only_excludes_nested_matches() {
365        // `root_only: true` only counts entries whose path has
366        // no parent component — `LICENSE` qualifies,
367        // `pkg/LICENSE` does not.
368        let spec = spec_yaml(
369            "id: t\n\
370             kind: file_exists\n\
371             paths: \"LICENSE\"\n\
372             level: error\n\
373             root_only: true\n",
374        );
375        let rule = build(&spec).unwrap();
376        let idx_only_nested = index(&["pkg/LICENSE"]);
377        let v = rule
378            .evaluate(&ctx(Path::new("/fake"), &idx_only_nested))
379            .unwrap();
380        assert_eq!(v.len(), 1, "nested match shouldn't satisfy root_only");
381    }
382
383    #[test]
384    fn first_literal_path_picks_first_non_glob() {
385        let patterns = vec!["docs/**/*.md".into(), "LICENSE".into(), "README.md".into()];
386        assert_eq!(
387            first_literal_path(&patterns).as_deref(),
388            Some(Path::new("LICENSE")),
389        );
390    }
391
392    #[test]
393    fn first_literal_path_returns_none_when_all_glob() {
394        let patterns = vec!["docs/**/*.md".into(), "src/[a-z]*.rs".into()];
395        assert!(first_literal_path(&patterns).is_none());
396    }
397
398    #[test]
399    fn patterns_of_handles_every_paths_spec_shape() {
400        assert_eq!(patterns_of(&PathsSpec::Single("a".into())), vec!["a"]);
401        assert_eq!(
402            patterns_of(&PathsSpec::Many(vec!["a".into(), "b".into()])),
403            vec!["a", "b"],
404        );
405        assert_eq!(
406            patterns_of(&PathsSpec::IncludeExclude {
407                include: vec!["a".into()],
408                exclude: vec!["b".into()],
409            }),
410            vec!["a"],
411        );
412    }
413
414    #[test]
415    fn build_rejects_scope_filter_on_cross_file_rule() {
416        // file_exists is a cross-file rule (requires_full_index =
417        // true); scope_filter is per-file-rules-only. The build
418        // path must reject it with a clear message pointing at
419        // the for_each_dir + when_iter: alternative.
420        let yaml = r#"
421id: t
422kind: file_exists
423paths: "LICENSE"
424level: error
425scope_filter:
426  has_ancestor: Cargo.toml
427"#;
428        let spec = spec_yaml(yaml);
429        let err = build(&spec).unwrap_err().to_string();
430        assert!(
431            err.contains("scope_filter is supported on per-file rules only"),
432            "expected per-file-only message, got: {err}",
433        );
434        assert!(
435            err.contains("file_exists"),
436            "expected message to name the cross-file kind, got: {err}",
437        );
438    }
439}