skill_veil_core/scanner/mod.rs
1//! Scanner module for orchestrating skill analysis.
2//!
3//! This module is the **composition root** for the hexagonal layout: it is
4//! the only place in the core domain that legitimately imports concrete
5//! adapter types. Everything else in the domain depends on `ports` traits
6//! and gets adapters injected through `Scanner::with_custom_adapters`. The
7//! `with_std_adapters` / `new` constructors below wire the standard
8//! defaults (`StdFileSystemProvider`, `RegexPatternMatcher`,
9//! `PulldownMarkdownParser`); CLI code re-uses those defaults rather than
10//! reaching into `adapters/` itself. See `CLAUDE.md` → "Clean architecture"
11//! for the rule and `patterns.rs` for the façade pattern used elsewhere
12//! when an adapter import would otherwise smear across the boundary.
13
14use crate::adapters::{PulldownMarkdownParser, RegexPatternMatcher, StdFileSystemProvider};
15use crate::analyzer::SkillDocument;
16use crate::artifact_graph::ArtifactGraph;
17use crate::policy::{BaselineFile, DispositionOverlay, PolicyFile, WaiverFile};
18use crate::ports::{FileSystemProvider, MarkdownParser};
19use crate::rules::{default_external_rule_dirs, RuleEngine};
20use crate::scanner_support::{
21 load_optional_baseline, load_optional_disposition, load_optional_policy, load_optional_waivers,
22};
23pub use crate::scanner_types::{
24 ArtifactMetadata, PackageScanResult, ScanError, ScanErrorEntry, ScanOptions, ScanResult,
25 ScanTargetMode,
26};
27use crate::services::{ArtifactOrchestratorService, FileDiscoveryService, ScanFilterService};
28use crate::{scanner_execution, scanner_graph};
29use std::path::Path;
30use std::sync::Arc;
31
32type EngineAndPolicy = (
33 RuleEngine<RegexPatternMatcher>,
34 Option<BaselineFile>,
35 Option<WaiverFile>,
36 Option<PolicyFile>,
37 Option<DispositionOverlay>,
38);
39
40/// Build the rule engine and load optional policy files from scan options.
41///
42/// Shared by `with_std_adapters` and `with_custom_adapters` to avoid duplicating
43/// the engine + policy loading logic. The `fs` provider is required so policy
44/// file I/O passes through the same `FileSystemProvider` port the rest of the
45/// scanner uses, preserving the hexagonal contract documented in `CLAUDE.md`.
46fn build_engine_and_policy<F: FileSystemProvider>(
47 fs: &F,
48 options: &ScanOptions,
49) -> Result<EngineAndPolicy, ScanError> {
50 let runtime_overlay_dirs = default_external_rule_dirs();
51 let mut engine = RuleEngine::with_defaults_and_matcher(
52 Arc::new(RegexPatternMatcher::new()),
53 fs,
54 &runtime_overlay_dirs,
55 )?;
56 // Strict mode is only meaningful for external rule packs; built-ins
57 // already fail-fast on internal duplicates. Enable before loading the
58 // user-supplied rules_dir so collisions there are promoted to errors.
59 engine.set_strict_mode(options.strict_rules);
60 if let Some(ref rules_dir) = options.rules_dir {
61 engine.load_from_dir(fs, rules_dir)?;
62 }
63 let baseline = load_optional_baseline(fs, options.baseline_path.as_deref())?;
64 let waivers = load_optional_waivers(fs, options.waivers_path.as_deref())?;
65 let policy = load_optional_policy(fs, options.policy_path.as_deref())?;
66 let disposition = load_optional_disposition(fs, options.disposition_path.as_deref())?;
67 Ok((engine, baseline, waivers, policy, disposition))
68}
69
70/// Scanner for analyzing skills and related agent-extension packages.
71pub struct Scanner<
72 F: FileSystemProvider = StdFileSystemProvider,
73 P: MarkdownParser = PulldownMarkdownParser,
74> {
75 engine: RuleEngine<RegexPatternMatcher>,
76 artifact_orchestration: ArtifactOrchestratorService,
77 file_discovery: FileDiscoveryService<F>,
78 filter_service: ScanFilterService,
79 parser: P,
80}
81
82/// Scanner using the default standard-library filesystem and Pulldown Markdown adapters.
83/// Use this in most application code. For injectable adapters, use [`Scanner`] directly.
84pub type DefaultScanner = Scanner<StdFileSystemProvider, PulldownMarkdownParser>;
85
86impl Scanner<StdFileSystemProvider, PulldownMarkdownParser> {
87 #[must_use = "Scanner::new() returns a Result that should be used"]
88 pub fn new() -> Result<Self, ScanError> {
89 Self::with_std_adapters(ScanOptions::default())
90 }
91
92 #[must_use = "Scanner::with_std_adapters() returns a Result that should be used"]
93 pub fn with_std_adapters(options: ScanOptions) -> Result<Self, ScanError> {
94 // One `StdFileSystemProvider` shared between rule loading and file
95 // discovery: the TOCTOU rationale in `scanner_execution.rs` requires
96 // existence checks and reads to go through the same provider, and
97 // future stateful adapter implementations (mocks, in-memory overlays,
98 // chroot wrappers) would silently disagree if two instances were
99 // wired in side-by-side. `with_custom_adapters` already shares a
100 // single instance — keep the std path symmetric.
101 let fs = StdFileSystemProvider::new();
102 let (engine, baseline, waivers, policy, disposition) =
103 build_engine_and_policy(&fs, &options)?;
104 Ok(Self {
105 engine,
106 artifact_orchestration: ArtifactOrchestratorService::new(),
107 file_discovery: FileDiscoveryService::with_fs_provider(options.recursive, fs),
108 filter_service: ScanFilterService::with_policy_state(
109 options,
110 baseline,
111 waivers,
112 policy,
113 disposition,
114 ),
115 parser: PulldownMarkdownParser::new(),
116 })
117 }
118}
119
120impl<F: FileSystemProvider, P: MarkdownParser> Scanner<F, P> {
121 #[must_use = "Scanner::with_custom_adapters() returns a Result that should be used"]
122 pub fn with_custom_adapters(
123 options: ScanOptions,
124 fs_provider: F,
125 parser: P,
126 ) -> Result<Self, ScanError> {
127 let (engine, baseline, waivers, policy, disposition) =
128 build_engine_and_policy(&fs_provider, &options)?;
129 Ok(Self {
130 engine,
131 artifact_orchestration: ArtifactOrchestratorService::new(),
132 file_discovery: FileDiscoveryService::with_fs_provider(options.recursive, fs_provider),
133 filter_service: ScanFilterService::with_policy_state(
134 options,
135 baseline,
136 waivers,
137 policy,
138 disposition,
139 ),
140 parser,
141 })
142 }
143
144 pub(crate) fn engine(&self) -> &RuleEngine<RegexPatternMatcher> {
145 &self.engine
146 }
147
148 pub(crate) fn artifact_orchestration(&self) -> &ArtifactOrchestratorService {
149 &self.artifact_orchestration
150 }
151
152 pub(crate) fn file_discovery(&self) -> &FileDiscoveryService<F> {
153 &self.file_discovery
154 }
155
156 pub(crate) fn filter_service(&self) -> &ScanFilterService {
157 &self.filter_service
158 }
159
160 pub(crate) fn parser(&self) -> &P {
161 &self.parser
162 }
163
164 pub(crate) fn build_artifact_graph(&self, doc: &SkillDocument) -> ArtifactGraph {
165 scanner_graph::build_artifact_graph::<F>(
166 &self.artifact_orchestration,
167 self.file_discovery.fs_provider(),
168 doc,
169 )
170 }
171
172 /// Scan a single document file and return its [`ScanResult`].
173 ///
174 /// Accepts any path that resolves to a readable file through the
175 /// scanner's `FileSystemProvider`. The file does not need to be a
176 /// canonical skill entrypoint — use [`scan_skill_file`] when callers
177 /// want that stricter precondition. Use [`scan_package`] or [`scan`]
178 /// to scan a whole package and aggregate results.
179 ///
180 /// [`scan_skill_file`]: Scanner::scan_skill_file
181 /// [`scan_package`]: Scanner::scan_package
182 /// [`scan`]: Scanner::scan
183 ///
184 /// # Errors
185 ///
186 /// - [`ScanError::PathNotFound`] if `path` does not exist through `fs`.
187 /// - Errors propagated from the analyzer / rule engine pipeline
188 /// (parse failures, rule evaluation errors, …) surface as
189 /// [`ScanError`] variants.
190 pub fn scan_file(&self, path: impl AsRef<Path>) -> Result<ScanResult, ScanError> {
191 let path = path.as_ref();
192 if !self.file_discovery.fs_provider().exists(path) {
193 return Err(ScanError::PathNotFound(path.to_path_buf()));
194 }
195 scanner_execution::scan_document_path(self, path)
196 }
197
198 /// Scan a path that MUST be a canonical skill entrypoint
199 /// (`SKILL.md`, `agent.md`, manifest, etc.). Use this when the
200 /// caller already enforces "this is the skill" semantics — `scan` /
201 /// `scan_package` discover entrypoints automatically and should be
202 /// preferred for general use.
203 ///
204 /// # Errors
205 ///
206 /// - [`ScanError::PathNotFound`] if `path` does not exist.
207 /// - [`ScanError::InvalidSkillEntrypoint`] if `path` exists but is
208 /// not recognised as a skill entrypoint by
209 /// `FileDiscoveryService::is_explicit_skill_file`.
210 /// - Errors propagated from the analyzer / rule pipeline.
211 pub fn scan_skill_file(&self, path: impl AsRef<Path>) -> Result<ScanResult, ScanError> {
212 let path = path.as_ref();
213 if !self.file_discovery.fs_provider().exists(path) {
214 return Err(ScanError::PathNotFound(path.to_path_buf()));
215 }
216 if !FileDiscoveryService::<F>::is_explicit_skill_file(path) {
217 return Err(ScanError::InvalidSkillEntrypoint(path.to_path_buf()));
218 }
219 scanner_execution::scan_document_path(self, path)
220 }
221
222 /// Scan an entire package directory (or a single file treated as a
223 /// degenerate one-target package). Discovers every target via
224 /// `discover_package_targets` and aggregates per-target results
225 /// into a [`PackageScanResult`]. Per-target failures are recorded in
226 /// `pkg_result.errors` instead of aborting the whole scan, so a
227 /// partially malformed package still produces verdicts for the
228 /// readable subset.
229 ///
230 /// # Errors
231 ///
232 /// - [`ScanError::PathNotFound`] if `path` does not exist.
233 /// - Errors from package discovery (only the *initial* discovery
234 /// step bubbles up; per-file errors are captured into
235 /// `PackageScanResult::errors`).
236 pub fn scan_package(&self, path: impl AsRef<Path>) -> Result<PackageScanResult, ScanError> {
237 let path = path.as_ref();
238 let fs = self.file_discovery.fs_provider();
239 if !fs.exists(path) {
240 return Err(ScanError::PathNotFound(path.to_path_buf()));
241 }
242 if fs.is_file(path) {
243 return Ok(match self.scan_file(path) {
244 Ok(result) => PackageScanResult {
245 results: vec![result],
246 errors: Vec::new(),
247 },
248 Err(err) => PackageScanResult {
249 results: Vec::new(),
250 errors: vec![crate::scanner_types::ScanErrorEntry {
251 path: path.to_path_buf(),
252 error: err.to_string(),
253 }],
254 },
255 });
256 }
257
258 let targets = scanner_execution::discover_package_targets(self, path)?;
259 let mut pkg_result = PackageScanResult::new();
260 for target in targets {
261 match self.scan_file(&target) {
262 Ok(result) => pkg_result.results.push(result),
263 Err(err) => {
264 pkg_result
265 .errors
266 .push(crate::scanner_types::ScanErrorEntry {
267 path: target.clone(),
268 error: err.to_string(),
269 });
270 tracing::warn!("Failed to scan {}: {}", target.display(), err);
271 }
272 }
273 }
274 Ok(pkg_result)
275 }
276
277 /// Top-level entry point. Honours the configured `ScanTargetMode`:
278 ///
279 /// - `Auto` (default) — file paths route to [`scan_file`], directory
280 /// paths to [`scan_package`].
281 /// - `File` — always treated as a single document; directories
282 /// produce `PathNotFound`-equivalent errors via the analyzer.
283 /// - `Package` — always treated as a package, even when the path
284 /// is a single file. Useful when callers want package-level
285 /// aggregation over a synthetic one-file package.
286 ///
287 /// [`scan_file`]: Scanner::scan_file
288 /// [`scan_package`]: Scanner::scan_package
289 ///
290 /// # Errors
291 ///
292 /// - [`ScanError::PathNotFound`] if `path` is missing in `Auto` mode.
293 /// - Errors from the underlying `scan_file` / `scan_package` paths.
294 pub fn scan(&self, path: impl AsRef<Path>) -> Result<PackageScanResult, ScanError> {
295 let path = path.as_ref();
296 match self.filter_service.target_mode() {
297 ScanTargetMode::Auto => {
298 let fs = self.file_discovery.fs_provider();
299 if fs.is_file(path) {
300 let result = self.scan_file(path)?;
301 Ok(PackageScanResult {
302 results: vec![result],
303 errors: Vec::new(),
304 })
305 } else if fs.is_dir(path) {
306 self.scan_package(path)
307 } else {
308 Err(ScanError::PathNotFound(path.to_path_buf()))
309 }
310 }
311 ScanTargetMode::File => {
312 let result = self.scan_file(path)?;
313 Ok(PackageScanResult {
314 results: vec![result],
315 errors: Vec::new(),
316 })
317 }
318 ScanTargetMode::Package => self.scan_package(path),
319 }
320 }
321
322 /// Number of compiled rules currently loaded into the underlying
323 /// `RuleEngine`. Combines built-in rules with any external packs
324 /// loaded via `--rules-dir`. Useful for diagnostics and CLI
325 /// `rules count` summaries.
326 pub fn rule_count(&self) -> usize {
327 self.engine.rule_count()
328 }
329
330 /// Borrow every loaded rule as a slice of references. Order matches
331 /// the `RuleEngine`'s internal `Vec<CompiledRule>`: built-ins first,
332 /// then external packs in load order. Intended for read-only
333 /// inspection (CLI `rules list`, snapshot tests); the engine
334 /// retains ownership.
335 pub fn rules(&self) -> Vec<&crate::rules::Rule> {
336 self.engine.rules()
337 }
338}
339
340#[cfg(test)]
341mod basic_tests;
342#[cfg(test)]
343mod capabilities_tests;
344#[cfg(test)]
345mod manifest_tests;