Skip to main content

alint_rules/
file_exists.rs

1//! `file_exists` — require that at least one file matching any of the given
2//! globs exists in the repository.
3
4use std::path::{Path, PathBuf};
5
6use alint_core::{
7    Context, Error, FixSpec, Fixer, Level, PathsSpec, Result, Rule, RuleSpec, Scope, Violation,
8};
9use serde::Deserialize;
10
11use crate::fixers::FileCreateFixer;
12
13#[derive(Debug, Deserialize)]
14#[serde(deny_unknown_fields)]
15struct Options {
16    #[serde(default)]
17    root_only: bool,
18}
19
20#[derive(Debug)]
21pub struct FileExistsRule {
22    id: String,
23    level: Level,
24    policy_url: Option<String>,
25    message: Option<String>,
26    scope: Scope,
27    patterns: Vec<String>,
28    /// `Some(paths)` when every entry in `patterns` is a literal
29    /// path (no glob metacharacters, no `!` excludes) and the
30    /// rule does not opt into `git_tracked_only`. The fast path
31    /// uses these to do O(1) `FileIndex::contains_file` lookups
32    /// instead of iterating every entry through
33    /// `Scope::matches`. At 1M files in a 5,000-package
34    /// monorepo, `for_each_dir` rules spawn one nested
35    /// `file_exists` per directory; without this short-circuit
36    /// each one is an O(N) scan and the fan-out becomes
37    /// O(D × N). With it, they collapse to O(D) lookups.
38    literal_paths: Option<Vec<PathBuf>>,
39    root_only: bool,
40    /// When `true`, only consider walked entries that are also
41    /// in git's index. Outside a git repo this becomes a silent
42    /// no-op — no entries qualify, so the rule reports the
43    /// "missing" violation as if no file existed.
44    git_tracked_only: bool,
45    fixer: Option<FileCreateFixer>,
46}
47
48/// True when `pattern` is a plain literal path string — no glob
49/// metacharacters, no `!` exclude prefix. Such patterns can be
50/// answered by an O(1) hash-set lookup against
51/// [`alint_core::FileIndex::contains_file`] instead of a O(N)
52/// scope-match scan.
53fn is_literal_path(pattern: &str) -> bool {
54    !pattern.starts_with('!')
55        && !pattern
56            .chars()
57            .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
58}
59
60/// True iff `paths` is a flat list (single string or `Many`)
61/// with no excludes — `IncludeExclude` form is excluded since
62/// the fast path can't honour excludes by hash lookup alone.
63fn paths_spec_has_no_excludes(spec: &PathsSpec) -> bool {
64    match spec {
65        PathsSpec::Single(_) | PathsSpec::Many(_) => true,
66        PathsSpec::IncludeExclude { exclude, .. } => exclude.is_empty(),
67    }
68}
69
70impl FileExistsRule {
71    fn describe_patterns(&self) -> String {
72        self.patterns.join(", ")
73    }
74}
75
76impl Rule for FileExistsRule {
77    fn id(&self) -> &str {
78        &self.id
79    }
80    fn level(&self) -> Level {
81        self.level
82    }
83    fn policy_url(&self) -> Option<&str> {
84        self.policy_url.as_deref()
85    }
86
87    fn wants_git_tracked(&self) -> bool {
88        self.git_tracked_only
89    }
90
91    fn requires_full_index(&self) -> bool {
92        // Existence is an aggregate verdict over the whole tree —
93        // "is at least one matching file present?". In `--changed`
94        // mode, evaluate against the full index (so an unchanged
95        // LICENSE still counts) but let the engine skip the rule
96        // entirely when its scope doesn't intersect the diff.
97        true
98    }
99
100    fn path_scope(&self) -> Option<&Scope> {
101        Some(&self.scope)
102    }
103
104    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
105        let found = if let Some(literals) = self.literal_paths.as_ref() {
106            // Fast path: each pattern is a literal relative
107            // path. Hash-lookup against the index's lazily-
108            // built path set is O(1) per pattern; for
109            // `for_each_dir`-spawned rules at 1M scale this is
110            // the difference between O(D × N) and O(D).
111            literals.iter().any(|p| {
112                if self.root_only && literal_is_nested(p) {
113                    return false;
114                }
115                ctx.index.contains_file(p)
116            })
117        } else {
118            // Slow path: glob patterns and/or `git_tracked_only`
119            // require iterating every entry. Same shape as the
120            // pre-v0.10 implementation — preserved verbatim so
121            // glob-using rules keep their existing semantics.
122            ctx.index.files().any(|entry| {
123                if self.root_only && entry.path.components().count() != 1 {
124                    return false;
125                }
126                if !self.scope.matches(&entry.path) {
127                    return false;
128                }
129                if self.git_tracked_only && !ctx.is_git_tracked(&entry.path) {
130                    return false;
131                }
132                true
133            })
134        };
135        if found {
136            Ok(Vec::new())
137        } else {
138            let message = self.message.clone().unwrap_or_else(|| {
139                let scope = if self.root_only {
140                    " at the repo root"
141                } else {
142                    ""
143                };
144                let tracked = if self.git_tracked_only {
145                    " (tracked in git)"
146                } else {
147                    ""
148                };
149                format!(
150                    "expected a file matching [{}]{scope}{tracked}",
151                    self.describe_patterns()
152                )
153            });
154            Ok(vec![Violation::new(message)])
155        }
156    }
157
158    fn fixer(&self) -> Option<&dyn Fixer> {
159        self.fixer.as_ref().map(|f| f as &dyn Fixer)
160    }
161}
162
163pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
164    alint_core::reject_scope_filter_on_cross_file(spec, "file_exists")?;
165    let Some(paths) = &spec.paths else {
166        return Err(Error::rule_config(
167            &spec.id,
168            "file_exists requires a `paths` field",
169        ));
170    };
171    let patterns = patterns_of(paths);
172    let scope = Scope::from_paths_spec(paths)?;
173    let opts: Options = spec
174        .deserialize_options()
175        .unwrap_or(Options { root_only: false });
176    // The fast path needs every pattern to be a plain relative
177    // path (no glob metacharacters, no `!` exclude) AND the
178    // rule must not opt into `git_tracked_only` (which requires
179    // a per-entry callback). When all preconditions hold,
180    // `literal_paths` carries the parsed `PathBuf`s ready for
181    // `FileIndex::contains_file` lookup at evaluate time.
182    let literal_paths = if !spec.git_tracked_only
183        && paths_spec_has_no_excludes(paths)
184        && patterns.iter().all(|p| is_literal_path(p))
185    {
186        Some(patterns.iter().map(PathBuf::from).collect())
187    } else {
188        None
189    };
190    let fixer = match &spec.fix {
191        Some(FixSpec::FileCreate { file_create: cfg }) => {
192            let target = cfg
193                .path
194                .clone()
195                .or_else(|| first_literal_path(&patterns))
196                .ok_or_else(|| {
197                    Error::rule_config(
198                        &spec.id,
199                        "fix.file_create needs a `path` — none of the rule's `paths:` \
200                         entries is a literal filename",
201                    )
202                })?;
203            let source = alint_core::resolve_content_source(
204                &spec.id,
205                "file_create",
206                &cfg.content,
207                &cfg.content_from,
208            )?;
209            Some(FileCreateFixer::new(target, source, cfg.create_parents))
210        }
211        Some(other) => {
212            return Err(Error::rule_config(
213                &spec.id,
214                format!("fix.{} is not compatible with file_exists", other.op_name()),
215            ));
216        }
217        None => None,
218    };
219    Ok(Box::new(FileExistsRule {
220        id: spec.id.clone(),
221        level: spec.level,
222        policy_url: spec.policy_url.clone(),
223        message: spec.message.clone(),
224        scope,
225        patterns,
226        literal_paths,
227        root_only: opts.root_only,
228        git_tracked_only: spec.git_tracked_only,
229        fixer,
230    }))
231}
232
233/// True when a literal `paths:` pattern names something nested
234/// (more than one path component). Mirrors the slow-path
235/// `entry.path.components().count() != 1` check used to honour
236/// `root_only` against entries during a scope-match scan.
237fn literal_is_nested(p: &Path) -> bool {
238    p.components().count() != 1
239}
240
241/// Best-effort: return the first entry in `patterns` that has no glob
242/// metacharacters (so it's a usable file path). Returns `None` if every
243/// pattern is a glob — in that case the caller must require an
244/// explicit `fix.file_create.path`.
245fn first_literal_path(patterns: &[String]) -> Option<PathBuf> {
246    patterns
247        .iter()
248        .find(|p| !p.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')))
249        .map(PathBuf::from)
250}
251
252fn patterns_of(spec: &PathsSpec) -> Vec<String> {
253    match spec {
254        PathsSpec::Single(s) => vec![s.clone()],
255        PathsSpec::Many(v) => v.clone(),
256        PathsSpec::IncludeExclude { include, .. } => include.clone(),
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use crate::test_support::{ctx, index, spec_yaml};
264    use std::path::Path;
265
266    #[test]
267    fn build_rejects_missing_paths_field() {
268        let spec = spec_yaml(
269            "id: t\n\
270             kind: file_exists\n\
271             level: error\n",
272        );
273        let err = build(&spec).unwrap_err().to_string();
274        assert!(err.contains("paths"), "unexpected: {err}");
275    }
276
277    #[test]
278    fn build_accepts_root_only_option() {
279        // `root_only: true` is the supported option; building
280        // it should succeed and produce a configured rule.
281        // (Unknown options are tolerated by file_exists' build
282        // path via `.unwrap_or(default)`; the JSON Schema and
283        // DSL loader catch typos at config-load time before
284        // we get here, which is the right layer for that
285        // check.)
286        let spec = spec_yaml(
287            "id: t\n\
288             kind: file_exists\n\
289             paths: \"LICENSE\"\n\
290             level: error\n\
291             root_only: true\n",
292        );
293        assert!(build(&spec).is_ok());
294    }
295
296    #[test]
297    fn build_rejects_incompatible_fix_op() {
298        // file_exists supports `file_create` only; `file_remove`
299        // (or any other op) must surface a clear config error so
300        // a typo doesn't silently disable the fix path.
301        let spec = spec_yaml(
302            "id: t\n\
303             kind: file_exists\n\
304             paths: \"LICENSE\"\n\
305             level: error\n\
306             fix:\n  \
307               file_remove: {}\n",
308        );
309        let err = build(&spec).unwrap_err().to_string();
310        assert!(err.contains("file_remove"), "unexpected: {err}");
311    }
312
313    #[test]
314    fn build_file_create_needs_explicit_path_for_glob_only_paths() {
315        // When every entry in `paths:` is a glob, the fixer
316        // can't pick a literal target; the user must supply
317        // `fix.file_create.path` explicitly.
318        let spec = spec_yaml(
319            "id: t\n\
320             kind: file_exists\n\
321             paths: \"docs/**/*.md\"\n\
322             level: error\n\
323             fix:\n  \
324               file_create:\n    \
325                 content: \"# title\\n\"\n",
326        );
327        let err = build(&spec).unwrap_err().to_string();
328        assert!(err.contains("path"), "unexpected: {err}");
329    }
330
331    #[test]
332    fn evaluate_passes_when_matching_file_present() {
333        let spec = spec_yaml(
334            "id: t\n\
335             kind: file_exists\n\
336             paths: \"README.md\"\n\
337             level: error\n",
338        );
339        let rule = build(&spec).unwrap();
340        let idx = index(&["README.md", "Cargo.toml"]);
341        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
342        assert!(v.is_empty(), "unexpected violations: {v:?}");
343    }
344
345    #[test]
346    fn evaluate_fires_when_no_matching_file_present() {
347        let spec = spec_yaml(
348            "id: t\n\
349             kind: file_exists\n\
350             paths: \"LICENSE\"\n\
351             level: error\n",
352        );
353        let rule = build(&spec).unwrap();
354        let idx = index(&["README.md"]);
355        let v = rule.evaluate(&ctx(Path::new("/fake"), &idx)).unwrap();
356        assert_eq!(v.len(), 1, "expected one violation; got: {v:?}");
357    }
358
359    #[test]
360    fn evaluate_root_only_excludes_nested_matches() {
361        // `root_only: true` only counts entries whose path has
362        // no parent component — `LICENSE` qualifies,
363        // `pkg/LICENSE` does not.
364        let spec = spec_yaml(
365            "id: t\n\
366             kind: file_exists\n\
367             paths: \"LICENSE\"\n\
368             level: error\n\
369             root_only: true\n",
370        );
371        let rule = build(&spec).unwrap();
372        let idx_only_nested = index(&["pkg/LICENSE"]);
373        let v = rule
374            .evaluate(&ctx(Path::new("/fake"), &idx_only_nested))
375            .unwrap();
376        assert_eq!(v.len(), 1, "nested match shouldn't satisfy root_only");
377    }
378
379    #[test]
380    fn first_literal_path_picks_first_non_glob() {
381        let patterns = vec!["docs/**/*.md".into(), "LICENSE".into(), "README.md".into()];
382        assert_eq!(
383            first_literal_path(&patterns).as_deref(),
384            Some(Path::new("LICENSE")),
385        );
386    }
387
388    #[test]
389    fn first_literal_path_returns_none_when_all_glob() {
390        let patterns = vec!["docs/**/*.md".into(), "src/[a-z]*.rs".into()];
391        assert!(first_literal_path(&patterns).is_none());
392    }
393
394    #[test]
395    fn patterns_of_handles_every_paths_spec_shape() {
396        assert_eq!(patterns_of(&PathsSpec::Single("a".into())), vec!["a"]);
397        assert_eq!(
398            patterns_of(&PathsSpec::Many(vec!["a".into(), "b".into()])),
399            vec!["a", "b"],
400        );
401        assert_eq!(
402            patterns_of(&PathsSpec::IncludeExclude {
403                include: vec!["a".into()],
404                exclude: vec!["b".into()],
405            }),
406            vec!["a"],
407        );
408    }
409
410    #[test]
411    fn build_rejects_scope_filter_on_cross_file_rule() {
412        // file_exists is a cross-file rule (requires_full_index =
413        // true); scope_filter is per-file-rules-only. The build
414        // path must reject it with a clear message pointing at
415        // the for_each_dir + when_iter: alternative.
416        let yaml = r#"
417id: t
418kind: file_exists
419paths: "LICENSE"
420level: error
421scope_filter:
422  has_ancestor: Cargo.toml
423"#;
424        let spec = spec_yaml(yaml);
425        let err = build(&spec).unwrap_err().to_string();
426        assert!(
427            err.contains("scope_filter is supported on per-file rules only"),
428            "expected per-file-only message, got: {err}",
429        );
430        assert!(
431            err.contains("file_exists"),
432            "expected message to name the cross-file kind, got: {err}",
433        );
434    }
435}