use std::collections::HashSet;
use std::path::{Path, PathBuf};
use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
use regex::Regex;
use serde::Deserialize;
use crate::extract::{Extract, ExtractSpec, extract_values, is_non_literal};
#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
enum Expect {
#[default]
Any,
File,
Dir,
}
#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
enum Severity {
#[default]
Warn,
Error,
Off,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
struct OrphansSpec {
space: String,
#[serde(default)]
unreferenced: Severity,
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
struct Options {
source: String,
extract: ExtractSpec,
#[serde(default)]
base: Option<String>,
#[serde(default)]
entries_are_globs: bool,
#[serde(default)]
expect: Expect,
#[serde(default)]
must_contain: Option<String>,
#[serde(default)]
exclude_query: Option<String>,
#[serde(default)]
orphans: Option<OrphansSpec>,
}
#[derive(Debug, Clone)]
enum Base {
RegistryDir,
LintRoot,
Explicit(PathBuf),
}
impl Base {
fn parse(raw: Option<&str>) -> Self {
match raw {
None | Some("registry_dir") => Self::RegistryDir,
Some("lint_root") => Self::LintRoot,
Some(p) => Self::Explicit(PathBuf::from(p)),
}
}
}
#[derive(Debug)]
pub struct RegistryPathsResolveRule {
id: String,
level: Level,
policy_url: Option<String>,
message: Option<String>,
source: String,
registry_scope: Option<Scope>,
extract: Extract,
base: Base,
entries_are_globs: bool,
expect: Expect,
must_contain: Option<String>,
exclude_query: Option<String>,
orphans: Option<OrphansSpec>,
}
impl Rule for RegistryPathsResolveRule {
alint_core::rule_common_impl!();
fn requires_full_index(&self) -> bool {
true
}
fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
let mut violations = Vec::new();
let dir_set: HashSet<&Path> = if self.expect == Expect::Dir
|| self.expect == Expect::Any
|| self.must_contain.is_some()
{
ctx.index.dirs().map(|e| &*e.path).collect()
} else {
HashSet::new()
};
for registry_rel in self.registry_files(ctx) {
let abs = ctx.root.join(®istry_rel);
let text = match crate::io::read_capped(&abs) {
Ok(b) => String::from_utf8_lossy(&b).into_owned(),
Err(e) => {
let why = match e {
crate::io::ReadCapError::TooLarge(n) => {
format!("is too large to analyze ({n} bytes; 256 MiB cap)")
}
crate::io::ReadCapError::Io(e) => {
format!("could not be read: {e}")
}
};
violations.push(
Violation::new(format!("registry file {} {why}", registry_rel.display()))
.with_path(registry_rel.clone()),
);
continue;
}
};
let (entries, skipped) = match self.extract_entries(&text) {
Ok(v) => v,
Err(e) => {
violations.push(
Violation::new(format!(
"registry file {} could not be parsed for `extract`: {e}",
registry_rel.display()
))
.with_path(registry_rel.clone()),
);
continue;
}
};
let _ = skipped;
let excluded = self.excluded_entries(&text);
let base_dir = self.base_dir(®istry_rel);
let mut covered: Vec<PathBuf> = Vec::new();
for entry in &entries {
if excluded.contains(entry) {
continue;
}
let resolved = normalise(&base_dir.join(entry));
if self.entries_are_globs {
let matches = Self::glob_matches(ctx, &resolved);
if matches.is_empty() {
violations.push(self.violation(
®istry_rel,
entry,
"matched no path on disk",
));
} else {
covered.extend(matches);
}
continue;
}
covered.push(resolved.clone());
if let Some(reason) = self.existence_problem(ctx, &resolved, &dir_set) {
violations.push(self.violation(®istry_rel, entry, &reason));
}
}
if self.entries_are_globs {
for p in &covered {
if let Some(reason) = self.existence_problem(ctx, p, &dir_set) {
violations.push(self.violation(
®istry_rel,
&p.display().to_string(),
&reason,
));
}
}
}
self.check_orphans(ctx, ®istry_rel, &covered, &mut violations);
}
Ok(violations)
}
}
impl RegistryPathsResolveRule {
fn registry_files(&self, ctx: &Context<'_>) -> Vec<PathBuf> {
match &self.registry_scope {
None => vec![PathBuf::from(&self.source)],
Some(scope) => ctx
.index
.files()
.filter(|e| scope.matches(&e.path, ctx.index))
.map(|e| e.path.to_path_buf())
.collect(),
}
}
fn base_dir(&self, registry_rel: &Path) -> PathBuf {
match &self.base {
Base::RegistryDir => registry_rel
.parent()
.map(Path::to_path_buf)
.unwrap_or_default(),
Base::LintRoot => PathBuf::new(),
Base::Explicit(p) => p.clone(),
}
}
fn extract_entries(&self, text: &str) -> std::result::Result<(Vec<String>, usize), String> {
let raw = extract_values(&self.extract, text)?;
let before = raw.len();
let kept: Vec<String> = raw.into_iter().filter(|e| !is_non_literal(e)).collect();
let skipped = before - kept.len();
Ok((kept, skipped))
}
fn excluded_entries(&self, text: &str) -> HashSet<String> {
let Some(q) = &self.exclude_query else {
return HashSet::new();
};
let ex = match &self.extract {
Extract::Json(_) => Extract::Json(q.clone()),
Extract::Yaml(_) => Extract::Yaml(q.clone()),
_ => Extract::Toml(q.clone()),
};
extract_values(&ex, text)
.map(|v| v.into_iter().collect())
.unwrap_or_default()
}
fn check_orphans(
&self,
ctx: &Context<'_>,
registry_rel: &Path,
covered: &[PathBuf],
out: &mut Vec<Violation>,
) {
let Some(orph) = &self.orphans else {
return;
};
if orph.unreferenced == Severity::Off {
return;
}
let covered_set: HashSet<&Path> = covered.iter().map(PathBuf::as_path).collect();
let Ok(space) = Scope::from_patterns(std::slice::from_ref(&orph.space)) else {
return;
};
for e in ctx.index.files() {
if space.matches(&e.path, ctx.index) && !covered_set.contains(&*e.path) {
out.push(
Violation::new(format!(
"{} is under `{}` but no entry in {} references it",
e.path.display(),
orph.space,
registry_rel.display(),
))
.with_path(e.path.clone()),
);
}
}
}
fn glob_matches(ctx: &Context<'_>, pattern: &Path) -> Vec<PathBuf> {
let pat = pattern.to_string_lossy().into_owned();
let Ok(scope) = Scope::from_patterns(&[pat]) else {
return Vec::new();
};
ctx.index
.files()
.filter(|e| scope.matches(&e.path, ctx.index))
.map(|e| e.path.to_path_buf())
.chain(
ctx.index
.dirs()
.filter(|e| scope.matches(&e.path, ctx.index))
.map(|e| e.path.to_path_buf()),
)
.collect()
}
fn existence_problem(
&self,
ctx: &Context<'_>,
path: &Path,
dir_set: &HashSet<&Path>,
) -> Option<String> {
let is_file = ctx.index.contains_file(path);
let is_dir = dir_set.contains(path);
match self.expect {
Expect::File => {
if !is_file {
return Some("does not resolve to a file on disk".into());
}
}
Expect::Dir => {
if !is_dir {
return Some("does not resolve to a directory on disk".into());
}
}
Expect::Any => {
if !is_file && !is_dir {
return Some("does not resolve to any path on disk".into());
}
}
}
if let Some(mc) = &self.must_contain {
if is_dir && !ctx.index.contains_file(&path.join(mc)) {
return Some(format!("resolves to a directory missing `{mc}`"));
}
}
None
}
fn violation(&self, registry: &Path, entry: &str, reason: &str) -> Violation {
let msg = self
.message
.clone()
.unwrap_or_else(|| format!("{}: entry {entry:?} {reason}", registry.display()));
Violation::new(msg).with_path(registry.to_path_buf())
}
}
fn normalise(p: &Path) -> PathBuf {
let mut out = PathBuf::new();
for comp in p.components() {
use std::path::Component::{CurDir, Normal, ParentDir, Prefix, RootDir};
match comp {
CurDir => {}
ParentDir => {
out.pop();
}
Normal(c) => out.push(c),
RootDir | Prefix(_) => out.push(comp.as_os_str()),
}
}
out
}
pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
alint_core::reject_scope_filter_on_cross_file(spec, "registry_paths_resolve")?;
let opts: Options = spec
.deserialize_options()
.map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
if opts.source.trim().is_empty() {
return Err(Error::rule_config(
&spec.id,
"registry_paths_resolve `source` must not be empty",
));
}
let is_glob = opts
.source
.chars()
.any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'));
let registry_scope = if is_glob {
Some(
Scope::from_patterns(std::slice::from_ref(&opts.source))
.map_err(|e| Error::rule_config(&spec.id, format!("invalid `source` glob: {e}")))?,
)
} else {
None
};
let extract = opts
.extract
.resolve()
.map_err(|e| Error::rule_config(&spec.id, format!("invalid `extract`: {e}")))?;
if let Extract::Regex(p) = &extract {
Regex::new(p)
.map_err(|e| Error::rule_config(&spec.id, format!("invalid `extract.regex`: {e}")))?;
}
Ok(Box::new(RegistryPathsResolveRule {
id: spec.id.clone(),
level: spec.level,
policy_url: spec.policy_url.clone(),
message: spec.message.clone(),
source: opts.source,
registry_scope,
extract,
base: Base::parse(opts.base.as_deref()),
entries_are_globs: opts.entries_are_globs,
expect: opts.expect,
must_contain: opts.must_contain,
exclude_query: opts.exclude_query,
orphans: opts.orphans,
}))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::LinesOpts;
use alint_core::{FileEntry, FileIndex};
fn index(files: &[&str], dirs: &[&str]) -> FileIndex {
let mut e: Vec<FileEntry> = files
.iter()
.map(|p| FileEntry {
path: Path::new(p).into(),
is_dir: false,
size: 1,
})
.collect();
e.extend(dirs.iter().map(|p| FileEntry {
path: Path::new(p).into(),
is_dir: true,
size: 0,
}));
FileIndex::from_entries(e)
}
fn rule(opts: Options) -> RegistryPathsResolveRule {
RegistryPathsResolveRule {
id: "t".into(),
level: Level::Error,
policy_url: None,
message: None,
source: opts.source,
registry_scope: None,
extract: opts.extract.resolve().expect("test extract valid"),
base: Base::parse(opts.base.as_deref()),
entries_are_globs: opts.entries_are_globs,
expect: opts.expect,
must_contain: opts.must_contain,
exclude_query: opts.exclude_query,
orphans: opts.orphans,
}
}
fn opts(source: &str, extract: Extract) -> Options {
Options {
source: source.into(),
extract: extract.into(),
base: None,
entries_are_globs: false,
expect: Expect::Any,
must_contain: None,
exclude_query: None,
orphans: None,
}
}
fn eval(r: &RegistryPathsResolveRule, root: &Path, idx: &FileIndex) -> Vec<Violation> {
let ctx = Context {
root,
index: idx,
registry: None,
facts: None,
vars: None,
git_tracked: None,
git_blame: None,
};
r.evaluate(&ctx).unwrap()
}
#[test]
fn lines_entries_resolve_pass_and_fail() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("MANIFEST"),
"src/a.rs\nsrc/b.rs\n# a comment\n",
)
.unwrap();
let r = rule(opts("MANIFEST", Extract::Lines(LinesOpts::default())));
let v = eval(
&r,
dir.path(),
&index(&["src/a.rs", "src/b.rs", "MANIFEST"], &[]),
);
assert!(v.is_empty(), "{v:?}");
let v = eval(&r, dir.path(), &index(&["src/a.rs", "MANIFEST"], &[]));
assert_eq!(v.len(), 1);
assert!(v[0].message.contains("src/b.rs"));
}
#[test]
fn toml_workspace_members_expect_dir_must_contain() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
"[workspace]\nmembers = [\"crates/core\", \"crates/cli\"]\n",
)
.unwrap();
let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
o.expect = Expect::Dir;
o.must_contain = Some("Cargo.toml".into());
let r = rule(o);
let idx = index(
&[
"crates/core/Cargo.toml",
"crates/cli/Cargo.toml",
"Cargo.toml",
],
&["crates/core", "crates/cli"],
);
assert!(eval(&r, dir.path(), &idx).is_empty());
let idx = index(
&["crates/core/Cargo.toml", "Cargo.toml"],
&["crates/core", "crates/cli"],
);
let v = eval(&r, dir.path(), &idx);
assert_eq!(v.len(), 1, "{v:?}");
assert!(v[0].message.contains("crates/cli"));
}
#[test]
fn non_literal_entries_are_skipped_not_failed() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("pkgs.nix"),
"callPackage ./pkgs/real {}\ncallPackage ${pkgs.x}/lib {}\n",
)
.unwrap();
let r = rule(opts(
"pkgs.nix",
Extract::Regex(r"callPackage\s+(\S+)".into()),
));
let idx = index(&["pkgs.nix"], &["pkgs/real"]);
let v = eval(&r, dir.path(), &idx);
assert!(v.is_empty(), "non-literal must be skipped, got {v:?}");
}
#[test]
fn entries_are_globs_zero_match_is_a_violation() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
"[workspace]\nmembers = [\"crates/*\"]\n",
)
.unwrap();
let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
o.entries_are_globs = true;
let r = rule(o);
let v = eval(&r, dir.path(), &index(&["Cargo.toml"], &[]));
assert_eq!(v.len(), 1, "{v:?}");
assert!(v[0].message.contains("no path"));
}
#[test]
fn orphans_flags_unreferenced_dir() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
"[workspace]\nmembers = [\"crates/a\"]\n",
)
.unwrap();
let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
o.orphans = Some(OrphansSpec {
space: "crates/*/Cargo.toml".into(),
unreferenced: Severity::Error,
});
let r = rule(o);
let idx = index(
&["crates/a/Cargo.toml", "crates/b/Cargo.toml", "Cargo.toml"],
&["crates/a", "crates/b"],
);
let v = eval(&r, dir.path(), &idx);
assert!(
v.iter().any(|x| x.message.contains("crates/b/Cargo.toml")),
"expected crates/b flagged as orphan, got {v:?}"
);
}
#[test]
fn exclude_query_subtracts_before_checking() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
"[workspace]\nmembers = [\"a\", \"b\"]\nexclude = [\"b\"]\n",
)
.unwrap();
let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
o.exclude_query = Some("$.workspace.exclude[*]".into());
o.expect = Expect::Dir;
let r = rule(o);
let idx = index(&["Cargo.toml"], &["a"]);
assert!(eval(&r, dir.path(), &idx).is_empty());
}
}