Skip to main content

alint_rules/
registry_paths_resolve.rs

1//! `registry_paths_resolve` — a manifest file enumerates
2//! path-like entries; each must resolve to an on-disk artefact.
3//! Optional reverse "orphan" check: on-disk artefacts in a
4//! declared space that no entry references.
5//!
6//! Cross-file: reads one manifest and resolves its entries
7//! against the engine `FileIndex` (O(1) per entry via the lazy
8//! path-set). Design + rationale + open-question resolutions:
9//! `docs/design/v0.10/registry_paths_resolve.md`.
10//!
11//! ```yaml
12//! - id: cargo-workspace-members-resolve
13//!   kind: registry_paths_resolve
14//!   source: Cargo.toml
15//!   extract: { toml: "$.workspace.members[*]" }
16//!   base: registry_dir          # registry_dir (default) | lint_root | "<path>"
17//!   entries_are_globs: true
18//!   expect: dir                 # any (default) | file | dir
19//!   must_contain: Cargo.toml
20//!   exclude_query: "$.workspace.exclude[*]"
21//!   orphans: { space: "crates/*", unreferenced: warn }
22//!   level: error
23//! ```
24
25use std::collections::HashSet;
26use std::path::{Path, PathBuf};
27
28use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
29use regex::Regex;
30use serde::Deserialize;
31
32use crate::extract::{Extract, ExtractSpec, extract_values, is_non_literal};
33
34#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
35#[serde(rename_all = "lowercase")]
36enum Expect {
37    #[default]
38    Any,
39    File,
40    Dir,
41}
42
43#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
44#[serde(rename_all = "lowercase")]
45enum Severity {
46    #[default]
47    Warn,
48    Error,
49    Off,
50}
51
52#[derive(Debug, Clone, Deserialize)]
53#[serde(deny_unknown_fields)]
54struct OrphansSpec {
55    /// Glob of on-disk artefacts that should each be referenced.
56    space: String,
57    #[serde(default)]
58    unreferenced: Severity,
59}
60
61#[derive(Debug, Deserialize)]
62#[serde(deny_unknown_fields)]
63struct Options {
64    source: String,
65    extract: ExtractSpec,
66    #[serde(default)]
67    base: Option<String>,
68    #[serde(default)]
69    entries_are_globs: bool,
70    #[serde(default)]
71    expect: Expect,
72    #[serde(default)]
73    must_contain: Option<String>,
74    #[serde(default)]
75    exclude_query: Option<String>,
76    #[serde(default)]
77    orphans: Option<OrphansSpec>,
78}
79
80/// Resolution base for entries.
81#[derive(Debug, Clone)]
82enum Base {
83    /// Directory containing the registry file (default; matches
84    /// Cargo / npm semantics + alint's nested-manifest model).
85    RegistryDir,
86    /// The lint root.
87    LintRoot,
88    /// An explicit path, relative to the lint root.
89    Explicit(PathBuf),
90}
91
92impl Base {
93    fn parse(raw: Option<&str>) -> Self {
94        match raw {
95            None | Some("registry_dir") => Self::RegistryDir,
96            Some("lint_root") => Self::LintRoot,
97            Some(p) => Self::Explicit(PathBuf::from(p)),
98        }
99    }
100}
101
102#[derive(Debug)]
103pub struct RegistryPathsResolveRule {
104    id: String,
105    level: Level,
106    policy_url: Option<String>,
107    message: Option<String>,
108    source: String,
109    registry_scope: Option<Scope>,
110    extract: Extract,
111    base: Base,
112    entries_are_globs: bool,
113    expect: Expect,
114    must_contain: Option<String>,
115    exclude_query: Option<String>,
116    orphans: Option<OrphansSpec>,
117}
118
119impl Rule for RegistryPathsResolveRule {
120    alint_core::rule_common_impl!();
121
122    fn requires_full_index(&self) -> bool {
123        // Cross-file: an entry's verdict depends on whether its
124        // target exists anywhere in the tree, and the orphan
125        // check needs the whole index — never `--changed`-scoped.
126        true
127    }
128
129    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
130        let mut violations = Vec::new();
131
132        // Directory existence: build the dir path-set once per
133        // eval (O(D)); per-entry lookups are then O(1), matching
134        // `contains_file`'s scaling so the rule stays index-fast.
135        let dir_set: HashSet<&Path> = if self.expect == Expect::Dir
136            || self.expect == Expect::Any
137            || self.must_contain.is_some()
138        {
139            ctx.index.dirs().map(|e| &*e.path).collect()
140        } else {
141            HashSet::new()
142        };
143
144        for registry_rel in self.registry_files(ctx) {
145            let abs = ctx.root.join(&registry_rel);
146            let text = match crate::io::read_capped(&abs) {
147                Ok(b) => String::from_utf8_lossy(&b).into_owned(),
148                Err(e) => {
149                    let why = match e {
150                        crate::io::ReadCapError::TooLarge(n) => {
151                            format!("is too large to analyze ({n} bytes; 256 MiB cap)")
152                        }
153                        crate::io::ReadCapError::Io(e) => {
154                            format!("could not be read: {e}")
155                        }
156                    };
157                    violations.push(
158                        Violation::new(format!("registry file {} {why}", registry_rel.display()))
159                            .with_path(registry_rel.clone()),
160                    );
161                    continue;
162                }
163            };
164
165            let (entries, skipped) = match self.extract_entries(&text) {
166                Ok(v) => v,
167                Err(e) => {
168                    violations.push(
169                        Violation::new(format!(
170                            "registry file {} could not be parsed for `extract`: {e}",
171                            registry_rel.display()
172                        ))
173                        .with_path(registry_rel.clone()),
174                    );
175                    continue;
176                }
177            };
178            // Non-literal (computed/interpolated) entries are
179            // intentionally skipped, not failed. The skip is
180            // silent in v0.10 — `alint check` has no
181            // informational-finding / `--explain` channel;
182            // visibly surfacing the skip list is a tracked
183            // v0.11 item (see the design doc).
184            let _ = skipped;
185
186            let excluded = self.excluded_entries(&text);
187            let base_dir = self.base_dir(&registry_rel);
188
189            let mut covered: Vec<PathBuf> = Vec::new();
190            for entry in &entries {
191                if excluded.contains(entry) {
192                    continue;
193                }
194                let resolved = normalise(&base_dir.join(entry));
195                if self.entries_are_globs {
196                    let matches = Self::glob_matches(ctx, &resolved);
197                    if matches.is_empty() {
198                        violations.push(self.violation(
199                            &registry_rel,
200                            entry,
201                            "matched no path on disk",
202                        ));
203                    } else {
204                        covered.extend(matches);
205                    }
206                    continue;
207                }
208                covered.push(resolved.clone());
209                if let Some(reason) = self.existence_problem(ctx, &resolved, &dir_set) {
210                    violations.push(self.violation(&registry_rel, entry, &reason));
211                }
212            }
213
214            // Globbed entries still need existence/kind checks on
215            // each expansion (a `crates/*` match must satisfy
216            // `must_contain`, etc.).
217            if self.entries_are_globs {
218                for p in &covered {
219                    if let Some(reason) = self.existence_problem(ctx, p, &dir_set) {
220                        violations.push(self.violation(
221                            &registry_rel,
222                            &p.display().to_string(),
223                            &reason,
224                        ));
225                    }
226                }
227            }
228
229            self.check_orphans(ctx, &registry_rel, &covered, &mut violations);
230        }
231
232        Ok(violations)
233    }
234}
235
236impl RegistryPathsResolveRule {
237    /// The registry file(s): a literal path, or every index path
238    /// matching the glob.
239    fn registry_files(&self, ctx: &Context<'_>) -> Vec<PathBuf> {
240        match &self.registry_scope {
241            None => vec![PathBuf::from(&self.source)],
242            Some(scope) => ctx
243                .index
244                .files()
245                .filter(|e| scope.matches(&e.path, ctx.index))
246                .map(|e| e.path.to_path_buf())
247                .collect(),
248        }
249    }
250
251    fn base_dir(&self, registry_rel: &Path) -> PathBuf {
252        match &self.base {
253            Base::RegistryDir => registry_rel
254                .parent()
255                .map(Path::to_path_buf)
256                .unwrap_or_default(),
257            Base::LintRoot => PathBuf::new(),
258            Base::Explicit(p) => p.clone(),
259        }
260    }
261
262    fn extract_entries(&self, text: &str) -> std::result::Result<(Vec<String>, usize), String> {
263        let raw = extract_values(&self.extract, text)?;
264        let before = raw.len();
265        let kept: Vec<String> = raw.into_iter().filter(|e| !is_non_literal(e)).collect();
266        let skipped = before - kept.len();
267        Ok((kept, skipped))
268    }
269
270    fn excluded_entries(&self, text: &str) -> HashSet<String> {
271        let Some(q) = &self.exclude_query else {
272            return HashSet::new();
273        };
274        // exclude_query is a structured query; for line/regex
275        // registries it has no meaning, so fall back to a TOML
276        // read (a misconfig surfaces as an empty set, not a panic).
277        let ex = match &self.extract {
278            Extract::Json(_) => Extract::Json(q.clone()),
279            Extract::Yaml(_) => Extract::Yaml(q.clone()),
280            _ => Extract::Toml(q.clone()),
281        };
282        extract_values(&ex, text)
283            .map(|v| v.into_iter().collect())
284            .unwrap_or_default()
285    }
286
287    /// Reverse-completeness: on-disk artefacts under `orphans.space`
288    /// that no (post-expansion) entry covered.
289    fn check_orphans(
290        &self,
291        ctx: &Context<'_>,
292        registry_rel: &Path,
293        covered: &[PathBuf],
294        out: &mut Vec<Violation>,
295    ) {
296        let Some(orph) = &self.orphans else {
297            return;
298        };
299        if orph.unreferenced == Severity::Off {
300            return;
301        }
302        let covered_set: HashSet<&Path> = covered.iter().map(PathBuf::as_path).collect();
303        let Ok(space) = Scope::from_patterns(std::slice::from_ref(&orph.space)) else {
304            return;
305        };
306        for e in ctx.index.files() {
307            if space.matches(&e.path, ctx.index) && !covered_set.contains(&*e.path) {
308                out.push(
309                    Violation::new(format!(
310                        "{} is under `{}` but no entry in {} references it",
311                        e.path.display(),
312                        orph.space,
313                        registry_rel.display(),
314                    ))
315                    .with_path(e.path.clone()),
316                );
317            }
318        }
319    }
320
321    fn glob_matches(ctx: &Context<'_>, pattern: &Path) -> Vec<PathBuf> {
322        let pat = pattern.to_string_lossy().into_owned();
323        let Ok(scope) = Scope::from_patterns(&[pat]) else {
324            return Vec::new();
325        };
326        ctx.index
327            .files()
328            .filter(|e| scope.matches(&e.path, ctx.index))
329            .map(|e| e.path.to_path_buf())
330            .chain(
331                ctx.index
332                    .dirs()
333                    .filter(|e| scope.matches(&e.path, ctx.index))
334                    .map(|e| e.path.to_path_buf()),
335            )
336            .collect()
337    }
338
339    /// `None` => the resolved path is fine. `Some(reason)` => a
340    /// violation message fragment.
341    fn existence_problem(
342        &self,
343        ctx: &Context<'_>,
344        path: &Path,
345        dir_set: &HashSet<&Path>,
346    ) -> Option<String> {
347        let is_file = ctx.index.contains_file(path);
348        let is_dir = dir_set.contains(path);
349        match self.expect {
350            Expect::File => {
351                if !is_file {
352                    return Some("does not resolve to a file on disk".into());
353                }
354            }
355            Expect::Dir => {
356                if !is_dir {
357                    return Some("does not resolve to a directory on disk".into());
358                }
359            }
360            Expect::Any => {
361                if !is_file && !is_dir {
362                    return Some("does not resolve to any path on disk".into());
363                }
364            }
365        }
366        if let Some(mc) = &self.must_contain {
367            // Only meaningful when the entry is a directory.
368            if is_dir && !ctx.index.contains_file(&path.join(mc)) {
369                return Some(format!("resolves to a directory missing `{mc}`"));
370            }
371        }
372        None
373    }
374
375    fn violation(&self, registry: &Path, entry: &str, reason: &str) -> Violation {
376        let msg = self
377            .message
378            .clone()
379            .unwrap_or_else(|| format!("{}: entry {entry:?} {reason}", registry.display()));
380        Violation::new(msg).with_path(registry.to_path_buf())
381    }
382}
383
384/// Collapse `a/./b` and `a/b/../c` so index lookups (which key on
385/// the walked relative path) match. Does not touch the
386/// filesystem.
387fn normalise(p: &Path) -> PathBuf {
388    let mut out = PathBuf::new();
389    for comp in p.components() {
390        use std::path::Component::{CurDir, Normal, ParentDir, Prefix, RootDir};
391        match comp {
392            CurDir => {}
393            ParentDir => {
394                out.pop();
395            }
396            Normal(c) => out.push(c),
397            RootDir | Prefix(_) => out.push(comp.as_os_str()),
398        }
399    }
400    out
401}
402
403pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
404    alint_core::reject_scope_filter_on_cross_file(spec, "registry_paths_resolve")?;
405    let opts: Options = spec
406        .deserialize_options()
407        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
408
409    if opts.source.trim().is_empty() {
410        return Err(Error::rule_config(
411            &spec.id,
412            "registry_paths_resolve `source` must not be empty",
413        ));
414    }
415    // A glob source is resolved against the index; a literal one
416    // is read directly. `is_glob` mirrors the structured-path /
417    // file_exists literal test.
418    let is_glob = opts
419        .source
420        .chars()
421        .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'));
422    let registry_scope = if is_glob {
423        Some(
424            Scope::from_patterns(std::slice::from_ref(&opts.source))
425                .map_err(|e| Error::rule_config(&spec.id, format!("invalid `source` glob: {e}")))?,
426        )
427    } else {
428        None
429    };
430    let extract = opts
431        .extract
432        .resolve()
433        .map_err(|e| Error::rule_config(&spec.id, format!("invalid `extract`: {e}")))?;
434    if let Extract::Regex(p) = &extract {
435        Regex::new(p)
436            .map_err(|e| Error::rule_config(&spec.id, format!("invalid `extract.regex`: {e}")))?;
437    }
438
439    Ok(Box::new(RegistryPathsResolveRule {
440        id: spec.id.clone(),
441        level: spec.level,
442        policy_url: spec.policy_url.clone(),
443        message: spec.message.clone(),
444        source: opts.source,
445        registry_scope,
446        extract,
447        base: Base::parse(opts.base.as_deref()),
448        entries_are_globs: opts.entries_are_globs,
449        expect: opts.expect,
450        must_contain: opts.must_contain,
451        exclude_query: opts.exclude_query,
452        orphans: opts.orphans,
453    }))
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459    use crate::extract::LinesOpts;
460    use alint_core::{FileEntry, FileIndex};
461
462    fn index(files: &[&str], dirs: &[&str]) -> FileIndex {
463        let mut e: Vec<FileEntry> = files
464            .iter()
465            .map(|p| FileEntry {
466                path: Path::new(p).into(),
467                is_dir: false,
468                size: 1,
469            })
470            .collect();
471        e.extend(dirs.iter().map(|p| FileEntry {
472            path: Path::new(p).into(),
473            is_dir: true,
474            size: 0,
475        }));
476        FileIndex::from_entries(e)
477    }
478
479    fn rule(opts: Options) -> RegistryPathsResolveRule {
480        RegistryPathsResolveRule {
481            id: "t".into(),
482            level: Level::Error,
483            policy_url: None,
484            message: None,
485            source: opts.source,
486            registry_scope: None,
487            extract: opts.extract.resolve().expect("test extract valid"),
488            base: Base::parse(opts.base.as_deref()),
489            entries_are_globs: opts.entries_are_globs,
490            expect: opts.expect,
491            must_contain: opts.must_contain,
492            exclude_query: opts.exclude_query,
493            orphans: opts.orphans,
494        }
495    }
496
497    fn opts(source: &str, extract: Extract) -> Options {
498        Options {
499            source: source.into(),
500            extract: extract.into(),
501            base: None,
502            entries_are_globs: false,
503            expect: Expect::Any,
504            must_contain: None,
505            exclude_query: None,
506            orphans: None,
507        }
508    }
509
510    fn eval(r: &RegistryPathsResolveRule, root: &Path, idx: &FileIndex) -> Vec<Violation> {
511        let ctx = Context {
512            root,
513            index: idx,
514            registry: None,
515            facts: None,
516            vars: None,
517            git_tracked: None,
518            git_blame: None,
519        };
520        r.evaluate(&ctx).unwrap()
521    }
522
523    #[test]
524    fn lines_entries_resolve_pass_and_fail() {
525        let dir = tempfile::tempdir().unwrap();
526        std::fs::write(
527            dir.path().join("MANIFEST"),
528            "src/a.rs\nsrc/b.rs\n# a comment\n",
529        )
530        .unwrap();
531        let r = rule(opts("MANIFEST", Extract::Lines(LinesOpts::default())));
532        // Both present -> pass.
533        let v = eval(
534            &r,
535            dir.path(),
536            &index(&["src/a.rs", "src/b.rs", "MANIFEST"], &[]),
537        );
538        assert!(v.is_empty(), "{v:?}");
539        // b.rs missing -> one violation.
540        let v = eval(&r, dir.path(), &index(&["src/a.rs", "MANIFEST"], &[]));
541        assert_eq!(v.len(), 1);
542        assert!(v[0].message.contains("src/b.rs"));
543    }
544
545    #[test]
546    fn toml_workspace_members_expect_dir_must_contain() {
547        let dir = tempfile::tempdir().unwrap();
548        std::fs::write(
549            dir.path().join("Cargo.toml"),
550            "[workspace]\nmembers = [\"crates/core\", \"crates/cli\"]\n",
551        )
552        .unwrap();
553        let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
554        o.expect = Expect::Dir;
555        o.must_contain = Some("Cargo.toml".into());
556        let r = rule(o);
557        // Both crate dirs exist and contain Cargo.toml -> pass.
558        let idx = index(
559            &[
560                "crates/core/Cargo.toml",
561                "crates/cli/Cargo.toml",
562                "Cargo.toml",
563            ],
564            &["crates/core", "crates/cli"],
565        );
566        assert!(eval(&r, dir.path(), &idx).is_empty());
567        // cli dir missing its Cargo.toml -> must_contain violation.
568        let idx = index(
569            &["crates/core/Cargo.toml", "Cargo.toml"],
570            &["crates/core", "crates/cli"],
571        );
572        let v = eval(&r, dir.path(), &idx);
573        assert_eq!(v.len(), 1, "{v:?}");
574        assert!(v[0].message.contains("crates/cli"));
575    }
576
577    #[test]
578    fn non_literal_entries_are_skipped_not_failed() {
579        let dir = tempfile::tempdir().unwrap();
580        std::fs::write(
581            dir.path().join("pkgs.nix"),
582            "callPackage ./pkgs/real {}\ncallPackage ${pkgs.x}/lib {}\n",
583        )
584        .unwrap();
585        let r = rule(opts(
586            "pkgs.nix",
587            Extract::Regex(r"callPackage\s+(\S+)".into()),
588        ));
589        // Only the literal `./pkgs/real` is checked; the
590        // genuinely interpolated `${pkgs.x}/lib` entry is
591        // skipped (not a violation). Narrowed is_non_literal:
592        // the captured token must carry a real `${`/`$(`/`{{`/
593        // `+ ` marker — a bare `(.`/`$` no longer over-skips a
594        // real literal path (v0.10 post-audit P2).
595        let idx = index(&["pkgs.nix"], &["pkgs/real"]);
596        let v = eval(&r, dir.path(), &idx);
597        assert!(v.is_empty(), "non-literal must be skipped, got {v:?}");
598    }
599
600    #[test]
601    fn entries_are_globs_zero_match_is_a_violation() {
602        let dir = tempfile::tempdir().unwrap();
603        std::fs::write(
604            dir.path().join("Cargo.toml"),
605            "[workspace]\nmembers = [\"crates/*\"]\n",
606        )
607        .unwrap();
608        let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
609        o.entries_are_globs = true;
610        let r = rule(o);
611        // No crates/* on disk -> the glob matched nothing.
612        let v = eval(&r, dir.path(), &index(&["Cargo.toml"], &[]));
613        assert_eq!(v.len(), 1, "{v:?}");
614        assert!(v[0].message.contains("no path"));
615    }
616
617    #[test]
618    fn orphans_flags_unreferenced_dir() {
619        let dir = tempfile::tempdir().unwrap();
620        std::fs::write(
621            dir.path().join("Cargo.toml"),
622            "[workspace]\nmembers = [\"crates/a\"]\n",
623        )
624        .unwrap();
625        let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
626        o.orphans = Some(OrphansSpec {
627            space: "crates/*/Cargo.toml".into(),
628            unreferenced: Severity::Error,
629        });
630        let r = rule(o);
631        // crates/b exists on disk but isn't a member -> orphan.
632        let idx = index(
633            &["crates/a/Cargo.toml", "crates/b/Cargo.toml", "Cargo.toml"],
634            &["crates/a", "crates/b"],
635        );
636        let v = eval(&r, dir.path(), &idx);
637        assert!(
638            v.iter().any(|x| x.message.contains("crates/b/Cargo.toml")),
639            "expected crates/b flagged as orphan, got {v:?}"
640        );
641    }
642
643    #[test]
644    fn exclude_query_subtracts_before_checking() {
645        let dir = tempfile::tempdir().unwrap();
646        std::fs::write(
647            dir.path().join("Cargo.toml"),
648            "[workspace]\nmembers = [\"a\", \"b\"]\nexclude = [\"b\"]\n",
649        )
650        .unwrap();
651        let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
652        o.exclude_query = Some("$.workspace.exclude[*]".into());
653        o.expect = Expect::Dir;
654        let r = rule(o);
655        // `b` is excluded, so its absence must not fail; `a` exists.
656        let idx = index(&["Cargo.toml"], &["a"]);
657        assert!(eval(&r, dir.path(), &idx).is_empty());
658    }
659}