1use std::collections::HashSet;
26use std::path::{Path, PathBuf};
27
28use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
29use regex::Regex;
30use serde::Deserialize;
31
32use crate::extract::{Extract, ExtractSpec, extract_values, is_non_literal};
33
34#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
35#[serde(rename_all = "lowercase")]
36enum Expect {
37 #[default]
38 Any,
39 File,
40 Dir,
41}
42
43#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
44#[serde(rename_all = "lowercase")]
45enum Severity {
46 #[default]
47 Warn,
48 Error,
49 Off,
50}
51
52#[derive(Debug, Clone, Deserialize)]
53#[serde(deny_unknown_fields)]
54struct OrphansSpec {
55 space: String,
57 #[serde(default)]
58 unreferenced: Severity,
59}
60
61#[derive(Debug, Deserialize)]
62#[serde(deny_unknown_fields)]
63struct Options {
64 source: String,
65 extract: ExtractSpec,
66 #[serde(default)]
67 base: Option<String>,
68 #[serde(default)]
69 entries_are_globs: bool,
70 #[serde(default)]
71 expect: Expect,
72 #[serde(default)]
73 must_contain: Option<String>,
74 #[serde(default)]
75 exclude_query: Option<String>,
76 #[serde(default)]
77 orphans: Option<OrphansSpec>,
78}
79
80#[derive(Debug, Clone)]
82enum Base {
83 RegistryDir,
86 LintRoot,
88 Explicit(PathBuf),
90}
91
92impl Base {
93 fn parse(raw: Option<&str>) -> Self {
94 match raw {
95 None | Some("registry_dir") => Self::RegistryDir,
96 Some("lint_root") => Self::LintRoot,
97 Some(p) => Self::Explicit(PathBuf::from(p)),
98 }
99 }
100}
101
102#[derive(Debug)]
103pub struct RegistryPathsResolveRule {
104 id: String,
105 level: Level,
106 policy_url: Option<String>,
107 message: Option<String>,
108 source: String,
109 registry_scope: Option<Scope>,
110 extract: Extract,
111 base: Base,
112 entries_are_globs: bool,
113 expect: Expect,
114 must_contain: Option<String>,
115 exclude_query: Option<String>,
116 orphans: Option<OrphansSpec>,
117}
118
119impl Rule for RegistryPathsResolveRule {
120 alint_core::rule_common_impl!();
121
122 fn requires_full_index(&self) -> bool {
123 true
127 }
128
129 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
130 let mut violations = Vec::new();
131
132 let dir_set: HashSet<&Path> = if self.expect == Expect::Dir
136 || self.expect == Expect::Any
137 || self.must_contain.is_some()
138 {
139 ctx.index.dirs().map(|e| &*e.path).collect()
140 } else {
141 HashSet::new()
142 };
143
144 for registry_rel in self.registry_files(ctx) {
145 let abs = ctx.root.join(®istry_rel);
146 let text = match crate::io::read_capped(&abs) {
147 Ok(b) => String::from_utf8_lossy(&b).into_owned(),
148 Err(e) => {
149 let why = match e {
150 crate::io::ReadCapError::TooLarge(n) => {
151 format!("is too large to analyze ({n} bytes; 256 MiB cap)")
152 }
153 crate::io::ReadCapError::Io(e) => {
154 format!("could not be read: {e}")
155 }
156 };
157 violations.push(
158 Violation::new(format!("registry file {} {why}", registry_rel.display()))
159 .with_path(registry_rel.clone()),
160 );
161 continue;
162 }
163 };
164
165 let (entries, skipped) = match self.extract_entries(&text) {
166 Ok(v) => v,
167 Err(e) => {
168 violations.push(
169 Violation::new(format!(
170 "registry file {} could not be parsed for `extract`: {e}",
171 registry_rel.display()
172 ))
173 .with_path(registry_rel.clone()),
174 );
175 continue;
176 }
177 };
178 let _ = skipped;
185
186 let excluded = self.excluded_entries(&text);
187 let base_dir = self.base_dir(®istry_rel);
188
189 let mut covered: Vec<PathBuf> = Vec::new();
190 for entry in &entries {
191 if excluded.contains(entry) {
192 continue;
193 }
194 let resolved = normalise(&base_dir.join(entry));
195 if self.entries_are_globs {
196 let matches = Self::glob_matches(ctx, &resolved);
197 if matches.is_empty() {
198 violations.push(self.violation(
199 ®istry_rel,
200 entry,
201 "matched no path on disk",
202 ));
203 } else {
204 covered.extend(matches);
205 }
206 continue;
207 }
208 covered.push(resolved.clone());
209 if let Some(reason) = self.existence_problem(ctx, &resolved, &dir_set) {
210 violations.push(self.violation(®istry_rel, entry, &reason));
211 }
212 }
213
214 if self.entries_are_globs {
218 for p in &covered {
219 if let Some(reason) = self.existence_problem(ctx, p, &dir_set) {
220 violations.push(self.violation(
221 ®istry_rel,
222 &p.display().to_string(),
223 &reason,
224 ));
225 }
226 }
227 }
228
229 self.check_orphans(ctx, ®istry_rel, &covered, &mut violations);
230 }
231
232 Ok(violations)
233 }
234}
235
236impl RegistryPathsResolveRule {
237 fn registry_files(&self, ctx: &Context<'_>) -> Vec<PathBuf> {
240 match &self.registry_scope {
241 None => vec![PathBuf::from(&self.source)],
242 Some(scope) => ctx
243 .index
244 .files()
245 .filter(|e| scope.matches(&e.path, ctx.index))
246 .map(|e| e.path.to_path_buf())
247 .collect(),
248 }
249 }
250
251 fn base_dir(&self, registry_rel: &Path) -> PathBuf {
252 match &self.base {
253 Base::RegistryDir => registry_rel
254 .parent()
255 .map(Path::to_path_buf)
256 .unwrap_or_default(),
257 Base::LintRoot => PathBuf::new(),
258 Base::Explicit(p) => p.clone(),
259 }
260 }
261
262 fn extract_entries(&self, text: &str) -> std::result::Result<(Vec<String>, usize), String> {
263 let raw = extract_values(&self.extract, text)?;
264 let before = raw.len();
265 let kept: Vec<String> = raw.into_iter().filter(|e| !is_non_literal(e)).collect();
266 let skipped = before - kept.len();
267 Ok((kept, skipped))
268 }
269
270 fn excluded_entries(&self, text: &str) -> HashSet<String> {
271 let Some(q) = &self.exclude_query else {
272 return HashSet::new();
273 };
274 let ex = match &self.extract {
278 Extract::Json(_) => Extract::Json(q.clone()),
279 Extract::Yaml(_) => Extract::Yaml(q.clone()),
280 _ => Extract::Toml(q.clone()),
281 };
282 extract_values(&ex, text)
283 .map(|v| v.into_iter().collect())
284 .unwrap_or_default()
285 }
286
287 fn check_orphans(
290 &self,
291 ctx: &Context<'_>,
292 registry_rel: &Path,
293 covered: &[PathBuf],
294 out: &mut Vec<Violation>,
295 ) {
296 let Some(orph) = &self.orphans else {
297 return;
298 };
299 if orph.unreferenced == Severity::Off {
300 return;
301 }
302 let covered_set: HashSet<&Path> = covered.iter().map(PathBuf::as_path).collect();
303 let Ok(space) = Scope::from_patterns(std::slice::from_ref(&orph.space)) else {
304 return;
305 };
306 for e in ctx.index.files() {
307 if space.matches(&e.path, ctx.index) && !covered_set.contains(&*e.path) {
308 out.push(
309 Violation::new(format!(
310 "{} is under `{}` but no entry in {} references it",
311 e.path.display(),
312 orph.space,
313 registry_rel.display(),
314 ))
315 .with_path(e.path.clone()),
316 );
317 }
318 }
319 }
320
321 fn glob_matches(ctx: &Context<'_>, pattern: &Path) -> Vec<PathBuf> {
322 let pat = pattern.to_string_lossy().into_owned();
323 let Ok(scope) = Scope::from_patterns(&[pat]) else {
324 return Vec::new();
325 };
326 ctx.index
327 .files()
328 .filter(|e| scope.matches(&e.path, ctx.index))
329 .map(|e| e.path.to_path_buf())
330 .chain(
331 ctx.index
332 .dirs()
333 .filter(|e| scope.matches(&e.path, ctx.index))
334 .map(|e| e.path.to_path_buf()),
335 )
336 .collect()
337 }
338
339 fn existence_problem(
342 &self,
343 ctx: &Context<'_>,
344 path: &Path,
345 dir_set: &HashSet<&Path>,
346 ) -> Option<String> {
347 let is_file = ctx.index.contains_file(path);
348 let is_dir = dir_set.contains(path);
349 match self.expect {
350 Expect::File => {
351 if !is_file {
352 return Some("does not resolve to a file on disk".into());
353 }
354 }
355 Expect::Dir => {
356 if !is_dir {
357 return Some("does not resolve to a directory on disk".into());
358 }
359 }
360 Expect::Any => {
361 if !is_file && !is_dir {
362 return Some("does not resolve to any path on disk".into());
363 }
364 }
365 }
366 if let Some(mc) = &self.must_contain {
367 if is_dir && !ctx.index.contains_file(&path.join(mc)) {
369 return Some(format!("resolves to a directory missing `{mc}`"));
370 }
371 }
372 None
373 }
374
375 fn violation(&self, registry: &Path, entry: &str, reason: &str) -> Violation {
376 let msg = self
377 .message
378 .clone()
379 .unwrap_or_else(|| format!("{}: entry {entry:?} {reason}", registry.display()));
380 Violation::new(msg).with_path(registry.to_path_buf())
381 }
382}
383
384fn normalise(p: &Path) -> PathBuf {
388 let mut out = PathBuf::new();
389 for comp in p.components() {
390 use std::path::Component::{CurDir, Normal, ParentDir, Prefix, RootDir};
391 match comp {
392 CurDir => {}
393 ParentDir => {
394 out.pop();
395 }
396 Normal(c) => out.push(c),
397 RootDir | Prefix(_) => out.push(comp.as_os_str()),
398 }
399 }
400 out
401}
402
403pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
404 alint_core::reject_scope_filter_on_cross_file(spec, "registry_paths_resolve")?;
405 let opts: Options = spec
406 .deserialize_options()
407 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
408
409 if opts.source.trim().is_empty() {
410 return Err(Error::rule_config(
411 &spec.id,
412 "registry_paths_resolve `source` must not be empty",
413 ));
414 }
415 let is_glob = opts
419 .source
420 .chars()
421 .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'));
422 let registry_scope = if is_glob {
423 Some(
424 Scope::from_patterns(std::slice::from_ref(&opts.source))
425 .map_err(|e| Error::rule_config(&spec.id, format!("invalid `source` glob: {e}")))?,
426 )
427 } else {
428 None
429 };
430 let extract = opts
431 .extract
432 .resolve()
433 .map_err(|e| Error::rule_config(&spec.id, format!("invalid `extract`: {e}")))?;
434 if let Extract::Regex(p) = &extract {
435 Regex::new(p)
436 .map_err(|e| Error::rule_config(&spec.id, format!("invalid `extract.regex`: {e}")))?;
437 }
438
439 Ok(Box::new(RegistryPathsResolveRule {
440 id: spec.id.clone(),
441 level: spec.level,
442 policy_url: spec.policy_url.clone(),
443 message: spec.message.clone(),
444 source: opts.source,
445 registry_scope,
446 extract,
447 base: Base::parse(opts.base.as_deref()),
448 entries_are_globs: opts.entries_are_globs,
449 expect: opts.expect,
450 must_contain: opts.must_contain,
451 exclude_query: opts.exclude_query,
452 orphans: opts.orphans,
453 }))
454}
455
456#[cfg(test)]
457mod tests {
458 use super::*;
459 use crate::extract::LinesOpts;
460 use alint_core::{FileEntry, FileIndex};
461
462 fn index(files: &[&str], dirs: &[&str]) -> FileIndex {
463 let mut e: Vec<FileEntry> = files
464 .iter()
465 .map(|p| FileEntry {
466 path: Path::new(p).into(),
467 is_dir: false,
468 size: 1,
469 })
470 .collect();
471 e.extend(dirs.iter().map(|p| FileEntry {
472 path: Path::new(p).into(),
473 is_dir: true,
474 size: 0,
475 }));
476 FileIndex::from_entries(e)
477 }
478
479 fn rule(opts: Options) -> RegistryPathsResolveRule {
480 RegistryPathsResolveRule {
481 id: "t".into(),
482 level: Level::Error,
483 policy_url: None,
484 message: None,
485 source: opts.source,
486 registry_scope: None,
487 extract: opts.extract.resolve().expect("test extract valid"),
488 base: Base::parse(opts.base.as_deref()),
489 entries_are_globs: opts.entries_are_globs,
490 expect: opts.expect,
491 must_contain: opts.must_contain,
492 exclude_query: opts.exclude_query,
493 orphans: opts.orphans,
494 }
495 }
496
497 fn opts(source: &str, extract: Extract) -> Options {
498 Options {
499 source: source.into(),
500 extract: extract.into(),
501 base: None,
502 entries_are_globs: false,
503 expect: Expect::Any,
504 must_contain: None,
505 exclude_query: None,
506 orphans: None,
507 }
508 }
509
510 fn eval(r: &RegistryPathsResolveRule, root: &Path, idx: &FileIndex) -> Vec<Violation> {
511 let ctx = Context {
512 root,
513 index: idx,
514 registry: None,
515 facts: None,
516 vars: None,
517 git_tracked: None,
518 git_blame: None,
519 };
520 r.evaluate(&ctx).unwrap()
521 }
522
523 #[test]
524 fn lines_entries_resolve_pass_and_fail() {
525 let dir = tempfile::tempdir().unwrap();
526 std::fs::write(
527 dir.path().join("MANIFEST"),
528 "src/a.rs\nsrc/b.rs\n# a comment\n",
529 )
530 .unwrap();
531 let r = rule(opts("MANIFEST", Extract::Lines(LinesOpts::default())));
532 let v = eval(
534 &r,
535 dir.path(),
536 &index(&["src/a.rs", "src/b.rs", "MANIFEST"], &[]),
537 );
538 assert!(v.is_empty(), "{v:?}");
539 let v = eval(&r, dir.path(), &index(&["src/a.rs", "MANIFEST"], &[]));
541 assert_eq!(v.len(), 1);
542 assert!(v[0].message.contains("src/b.rs"));
543 }
544
545 #[test]
546 fn toml_workspace_members_expect_dir_must_contain() {
547 let dir = tempfile::tempdir().unwrap();
548 std::fs::write(
549 dir.path().join("Cargo.toml"),
550 "[workspace]\nmembers = [\"crates/core\", \"crates/cli\"]\n",
551 )
552 .unwrap();
553 let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
554 o.expect = Expect::Dir;
555 o.must_contain = Some("Cargo.toml".into());
556 let r = rule(o);
557 let idx = index(
559 &[
560 "crates/core/Cargo.toml",
561 "crates/cli/Cargo.toml",
562 "Cargo.toml",
563 ],
564 &["crates/core", "crates/cli"],
565 );
566 assert!(eval(&r, dir.path(), &idx).is_empty());
567 let idx = index(
569 &["crates/core/Cargo.toml", "Cargo.toml"],
570 &["crates/core", "crates/cli"],
571 );
572 let v = eval(&r, dir.path(), &idx);
573 assert_eq!(v.len(), 1, "{v:?}");
574 assert!(v[0].message.contains("crates/cli"));
575 }
576
577 #[test]
578 fn non_literal_entries_are_skipped_not_failed() {
579 let dir = tempfile::tempdir().unwrap();
580 std::fs::write(
581 dir.path().join("pkgs.nix"),
582 "callPackage ./pkgs/real {}\ncallPackage ${pkgs.x}/lib {}\n",
583 )
584 .unwrap();
585 let r = rule(opts(
586 "pkgs.nix",
587 Extract::Regex(r"callPackage\s+(\S+)".into()),
588 ));
589 let idx = index(&["pkgs.nix"], &["pkgs/real"]);
596 let v = eval(&r, dir.path(), &idx);
597 assert!(v.is_empty(), "non-literal must be skipped, got {v:?}");
598 }
599
600 #[test]
601 fn entries_are_globs_zero_match_is_a_violation() {
602 let dir = tempfile::tempdir().unwrap();
603 std::fs::write(
604 dir.path().join("Cargo.toml"),
605 "[workspace]\nmembers = [\"crates/*\"]\n",
606 )
607 .unwrap();
608 let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
609 o.entries_are_globs = true;
610 let r = rule(o);
611 let v = eval(&r, dir.path(), &index(&["Cargo.toml"], &[]));
613 assert_eq!(v.len(), 1, "{v:?}");
614 assert!(v[0].message.contains("no path"));
615 }
616
617 #[test]
618 fn orphans_flags_unreferenced_dir() {
619 let dir = tempfile::tempdir().unwrap();
620 std::fs::write(
621 dir.path().join("Cargo.toml"),
622 "[workspace]\nmembers = [\"crates/a\"]\n",
623 )
624 .unwrap();
625 let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
626 o.orphans = Some(OrphansSpec {
627 space: "crates/*/Cargo.toml".into(),
628 unreferenced: Severity::Error,
629 });
630 let r = rule(o);
631 let idx = index(
633 &["crates/a/Cargo.toml", "crates/b/Cargo.toml", "Cargo.toml"],
634 &["crates/a", "crates/b"],
635 );
636 let v = eval(&r, dir.path(), &idx);
637 assert!(
638 v.iter().any(|x| x.message.contains("crates/b/Cargo.toml")),
639 "expected crates/b flagged as orphan, got {v:?}"
640 );
641 }
642
643 #[test]
644 fn exclude_query_subtracts_before_checking() {
645 let dir = tempfile::tempdir().unwrap();
646 std::fs::write(
647 dir.path().join("Cargo.toml"),
648 "[workspace]\nmembers = [\"a\", \"b\"]\nexclude = [\"b\"]\n",
649 )
650 .unwrap();
651 let mut o = opts("Cargo.toml", Extract::Toml("$.workspace.members[*]".into()));
652 o.exclude_query = Some("$.workspace.exclude[*]".into());
653 o.expect = Expect::Dir;
654 let r = rule(o);
655 let idx = index(&["Cargo.toml"], &["a"]);
657 assert!(eval(&r, dir.path(), &idx).is_empty());
658 }
659}