Skip to main content

skill_veil_core/scanner/
mod.rs

1//! Scanner module for orchestrating skill analysis.
2//!
3//! This module is the **composition root** for the hexagonal layout: it is
4//! the only place in the core domain that legitimately imports concrete
5//! adapter types. Everything else in the domain depends on `ports` traits
6//! and gets adapters injected through `Scanner::with_custom_adapters`. The
7//! `with_std_adapters` / `new` constructors below wire the standard
8//! defaults (`StdFileSystemProvider`, `RegexPatternMatcher`,
9//! `PulldownMarkdownParser`); CLI code re-uses those defaults rather than
10//! reaching into `adapters/` itself. See `CLAUDE.md` → "Clean architecture"
11//! for the rule and `patterns.rs` for the façade pattern used elsewhere
12//! when an adapter import would otherwise smear across the boundary.
13
14use crate::adapters::{PulldownMarkdownParser, RegexPatternMatcher, StdFileSystemProvider};
15use crate::analyzer::SkillDocument;
16use crate::artifact_graph::ArtifactGraph;
17use crate::policy::{BaselineFile, DispositionOverlay, PolicyFile, WaiverFile};
18use crate::ports::{FileSystemProvider, MarkdownParser};
19use crate::rules::{default_external_rule_dirs, RuleEngine};
20use crate::scanner_support::{
21    load_optional_baseline, load_optional_disposition, load_optional_policy, load_optional_waivers,
22};
23pub use crate::scanner_types::{
24    ArtifactMetadata, PackageScanResult, ScanError, ScanErrorEntry, ScanOptions, ScanResult,
25    ScanTargetMode,
26};
27use crate::services::{ArtifactOrchestratorService, FileDiscoveryService, ScanFilterService};
28use crate::{scanner_execution, scanner_graph};
29use std::path::Path;
30use std::sync::Arc;
31
32type EngineAndPolicy = (
33    RuleEngine<RegexPatternMatcher>,
34    Option<BaselineFile>,
35    Option<WaiverFile>,
36    Option<PolicyFile>,
37    Option<DispositionOverlay>,
38);
39
40/// Build the rule engine and load optional policy files from scan options.
41///
42/// Shared by `with_std_adapters` and `with_custom_adapters` to avoid duplicating
43/// the engine + policy loading logic. The `fs` provider is required so policy
44/// file I/O passes through the same `FileSystemProvider` port the rest of the
45/// scanner uses, preserving the hexagonal contract documented in `CLAUDE.md`.
46fn build_engine_and_policy<F: FileSystemProvider>(
47    fs: &F,
48    options: &ScanOptions,
49) -> Result<EngineAndPolicy, ScanError> {
50    let runtime_overlay_dirs = default_external_rule_dirs();
51    let mut engine = RuleEngine::with_defaults_and_matcher(
52        Arc::new(RegexPatternMatcher::new()),
53        fs,
54        &runtime_overlay_dirs,
55    )?;
56    // Strict mode is only meaningful for external rule packs; built-ins
57    // already fail-fast on internal duplicates. Enable before loading the
58    // user-supplied rules_dir so collisions there are promoted to errors.
59    engine.set_strict_mode(options.strict_rules);
60    if let Some(ref rules_dir) = options.rules_dir {
61        engine.load_from_dir(fs, rules_dir)?;
62    }
63    let baseline = load_optional_baseline(fs, options.baseline_path.as_deref())?;
64    let waivers = load_optional_waivers(fs, options.waivers_path.as_deref())?;
65    let policy = load_optional_policy(fs, options.policy_path.as_deref())?;
66    let disposition = load_optional_disposition(fs, options.disposition_path.as_deref())?;
67    Ok((engine, baseline, waivers, policy, disposition))
68}
69
70/// Scanner for analyzing skills and related agent-extension packages.
71pub struct Scanner<
72    F: FileSystemProvider = StdFileSystemProvider,
73    P: MarkdownParser = PulldownMarkdownParser,
74> {
75    engine: RuleEngine<RegexPatternMatcher>,
76    artifact_orchestration: ArtifactOrchestratorService,
77    file_discovery: FileDiscoveryService<F>,
78    filter_service: ScanFilterService,
79    parser: P,
80}
81
82/// Scanner using the default standard-library filesystem and Pulldown Markdown adapters.
83/// Use this in most application code. For injectable adapters, use [`Scanner`] directly.
84pub type DefaultScanner = Scanner<StdFileSystemProvider, PulldownMarkdownParser>;
85
86impl Scanner<StdFileSystemProvider, PulldownMarkdownParser> {
87    #[must_use = "Scanner::new() returns a Result that should be used"]
88    pub fn new() -> Result<Self, ScanError> {
89        Self::with_std_adapters(ScanOptions::default())
90    }
91
92    #[must_use = "Scanner::with_std_adapters() returns a Result that should be used"]
93    pub fn with_std_adapters(options: ScanOptions) -> Result<Self, ScanError> {
94        // One `StdFileSystemProvider` shared between rule loading and file
95        // discovery: the TOCTOU rationale in `scanner_execution.rs` requires
96        // existence checks and reads to go through the same provider, and
97        // future stateful adapter implementations (mocks, in-memory overlays,
98        // chroot wrappers) would silently disagree if two instances were
99        // wired in side-by-side. `with_custom_adapters` already shares a
100        // single instance — keep the std path symmetric.
101        let fs = StdFileSystemProvider::new();
102        let (engine, baseline, waivers, policy, disposition) =
103            build_engine_and_policy(&fs, &options)?;
104        Ok(Self {
105            engine,
106            artifact_orchestration: ArtifactOrchestratorService::new(),
107            file_discovery: FileDiscoveryService::with_fs_provider(options.recursive, fs),
108            filter_service: ScanFilterService::with_policy_state(
109                options,
110                baseline,
111                waivers,
112                policy,
113                disposition,
114            ),
115            parser: PulldownMarkdownParser::new(),
116        })
117    }
118}
119
120impl<F: FileSystemProvider, P: MarkdownParser> Scanner<F, P> {
121    #[must_use = "Scanner::with_custom_adapters() returns a Result that should be used"]
122    pub fn with_custom_adapters(
123        options: ScanOptions,
124        fs_provider: F,
125        parser: P,
126    ) -> Result<Self, ScanError> {
127        let (engine, baseline, waivers, policy, disposition) =
128            build_engine_and_policy(&fs_provider, &options)?;
129        Ok(Self {
130            engine,
131            artifact_orchestration: ArtifactOrchestratorService::new(),
132            file_discovery: FileDiscoveryService::with_fs_provider(options.recursive, fs_provider),
133            filter_service: ScanFilterService::with_policy_state(
134                options,
135                baseline,
136                waivers,
137                policy,
138                disposition,
139            ),
140            parser,
141        })
142    }
143
144    pub(crate) fn engine(&self) -> &RuleEngine<RegexPatternMatcher> {
145        &self.engine
146    }
147
148    pub(crate) fn artifact_orchestration(&self) -> &ArtifactOrchestratorService {
149        &self.artifact_orchestration
150    }
151
152    pub(crate) fn file_discovery(&self) -> &FileDiscoveryService<F> {
153        &self.file_discovery
154    }
155
156    pub(crate) fn filter_service(&self) -> &ScanFilterService {
157        &self.filter_service
158    }
159
160    pub(crate) fn parser(&self) -> &P {
161        &self.parser
162    }
163
164    pub(crate) fn build_artifact_graph(&self, doc: &SkillDocument) -> ArtifactGraph {
165        scanner_graph::build_artifact_graph::<F>(
166            &self.artifact_orchestration,
167            self.file_discovery.fs_provider(),
168            doc,
169        )
170    }
171
172    /// Scan a single document file and return its [`ScanResult`].
173    ///
174    /// Accepts any path that resolves to a readable file through the
175    /// scanner's `FileSystemProvider`. The file does not need to be a
176    /// canonical skill entrypoint — use [`scan_skill_file`] when callers
177    /// want that stricter precondition. Use [`scan_package`] or [`scan`]
178    /// to scan a whole package and aggregate results.
179    ///
180    /// [`scan_skill_file`]: Scanner::scan_skill_file
181    /// [`scan_package`]: Scanner::scan_package
182    /// [`scan`]: Scanner::scan
183    ///
184    /// # Errors
185    ///
186    /// - [`ScanError::PathNotFound`] if `path` does not exist through `fs`.
187    /// - Errors propagated from the analyzer / rule engine pipeline
188    ///   (parse failures, rule evaluation errors, …) surface as
189    ///   [`ScanError`] variants.
190    pub fn scan_file(&self, path: impl AsRef<Path>) -> Result<ScanResult, ScanError> {
191        let path = path.as_ref();
192        if !self.file_discovery.fs_provider().exists(path) {
193            return Err(ScanError::PathNotFound(path.to_path_buf()));
194        }
195        scanner_execution::scan_document_path(self, path)
196    }
197
198    /// Scan a path that MUST be a canonical skill entrypoint
199    /// (`SKILL.md`, `agent.md`, manifest, etc.). Use this when the
200    /// caller already enforces "this is the skill" semantics — `scan` /
201    /// `scan_package` discover entrypoints automatically and should be
202    /// preferred for general use.
203    ///
204    /// # Errors
205    ///
206    /// - [`ScanError::PathNotFound`] if `path` does not exist.
207    /// - [`ScanError::InvalidSkillEntrypoint`] if `path` exists but is
208    ///   not recognised as a skill entrypoint by
209    ///   `FileDiscoveryService::is_explicit_skill_file`.
210    /// - Errors propagated from the analyzer / rule pipeline.
211    pub fn scan_skill_file(&self, path: impl AsRef<Path>) -> Result<ScanResult, ScanError> {
212        let path = path.as_ref();
213        if !self.file_discovery.fs_provider().exists(path) {
214            return Err(ScanError::PathNotFound(path.to_path_buf()));
215        }
216        if !FileDiscoveryService::<F>::is_explicit_skill_file(path) {
217            return Err(ScanError::InvalidSkillEntrypoint(path.to_path_buf()));
218        }
219        scanner_execution::scan_document_path(self, path)
220    }
221
222    /// Scan an entire package directory (or a single file treated as a
223    /// degenerate one-target package). Discovers every target via
224    /// `discover_package_targets` and aggregates per-target results
225    /// into a [`PackageScanResult`]. Per-target failures are recorded in
226    /// `pkg_result.errors` instead of aborting the whole scan, so a
227    /// partially malformed package still produces verdicts for the
228    /// readable subset.
229    ///
230    /// # Errors
231    ///
232    /// - [`ScanError::PathNotFound`] if `path` does not exist.
233    /// - Errors from package discovery (only the *initial* discovery
234    ///   step bubbles up; per-file errors are captured into
235    ///   `PackageScanResult::errors`).
236    pub fn scan_package(&self, path: impl AsRef<Path>) -> Result<PackageScanResult, ScanError> {
237        let path = path.as_ref();
238        let fs = self.file_discovery.fs_provider();
239        if !fs.exists(path) {
240            return Err(ScanError::PathNotFound(path.to_path_buf()));
241        }
242        if fs.is_file(path) {
243            return Ok(match self.scan_file(path) {
244                Ok(result) => PackageScanResult {
245                    results: vec![result],
246                    errors: Vec::new(),
247                },
248                Err(err) => PackageScanResult {
249                    results: Vec::new(),
250                    errors: vec![crate::scanner_types::ScanErrorEntry {
251                        path: path.to_path_buf(),
252                        error: err.to_string(),
253                    }],
254                },
255            });
256        }
257
258        let targets = scanner_execution::discover_package_targets(self, path)?;
259        let mut pkg_result = PackageScanResult::new();
260        for target in targets {
261            match self.scan_file(&target) {
262                Ok(result) => pkg_result.results.push(result),
263                Err(err) => {
264                    pkg_result
265                        .errors
266                        .push(crate::scanner_types::ScanErrorEntry {
267                            path: target.clone(),
268                            error: err.to_string(),
269                        });
270                    tracing::warn!("Failed to scan {}: {}", target.display(), err);
271                }
272            }
273        }
274        Ok(pkg_result)
275    }
276
277    /// Top-level entry point. Honours the configured `ScanTargetMode`:
278    ///
279    /// - `Auto` (default) — file paths route to [`scan_file`], directory
280    ///   paths to [`scan_package`].
281    /// - `File` — always treated as a single document; directories
282    ///   produce `PathNotFound`-equivalent errors via the analyzer.
283    /// - `Package` — always treated as a package, even when the path
284    ///   is a single file. Useful when callers want package-level
285    ///   aggregation over a synthetic one-file package.
286    ///
287    /// [`scan_file`]: Scanner::scan_file
288    /// [`scan_package`]: Scanner::scan_package
289    ///
290    /// # Errors
291    ///
292    /// - [`ScanError::PathNotFound`] if `path` is missing in `Auto` mode.
293    /// - Errors from the underlying `scan_file` / `scan_package` paths.
294    pub fn scan(&self, path: impl AsRef<Path>) -> Result<PackageScanResult, ScanError> {
295        let path = path.as_ref();
296        match self.filter_service.target_mode() {
297            ScanTargetMode::Auto => {
298                let fs = self.file_discovery.fs_provider();
299                if fs.is_file(path) {
300                    let result = self.scan_file(path)?;
301                    Ok(PackageScanResult {
302                        results: vec![result],
303                        errors: Vec::new(),
304                    })
305                } else if fs.is_dir(path) {
306                    self.scan_package(path)
307                } else {
308                    Err(ScanError::PathNotFound(path.to_path_buf()))
309                }
310            }
311            ScanTargetMode::File => {
312                let result = self.scan_file(path)?;
313                Ok(PackageScanResult {
314                    results: vec![result],
315                    errors: Vec::new(),
316                })
317            }
318            ScanTargetMode::Package => self.scan_package(path),
319        }
320    }
321
322    /// Number of compiled rules currently loaded into the underlying
323    /// `RuleEngine`. Combines built-in rules with any external packs
324    /// loaded via `--rules-dir`. Useful for diagnostics and CLI
325    /// `rules count` summaries.
326    pub fn rule_count(&self) -> usize {
327        self.engine.rule_count()
328    }
329
330    /// Borrow every loaded rule as a slice of references. Order matches
331    /// the `RuleEngine`'s internal `Vec<CompiledRule>`: built-ins first,
332    /// then external packs in load order. Intended for read-only
333    /// inspection (CLI `rules list`, snapshot tests); the engine
334    /// retains ownership.
335    pub fn rules(&self) -> Vec<&crate::rules::Rule> {
336        self.engine.rules()
337    }
338}
339
340#[cfg(test)]
341mod basic_tests;
342#[cfg(test)]
343mod capabilities_tests;
344#[cfg(test)]
345mod manifest_tests;