Skip to main content

skill_veil_core/rules/
mod.rs

1//! Rule engine for detecting security signals in skills
2//!
3//! Provides declarative rule definitions and evaluation logic for analyzing
4//! skill documents. Rules are defined declaratively in YAML and can detect
5//! patterns using regex, section content matching, or code block language detection.
6//!
7//! # Example
8//!
9//! ```
10//! use skill_veil_core::rules::{default_external_rule_dirs, RuleEngine};
11//! use skill_veil_core::analyzer::SkillDocument;
12//! use skill_veil_core::adapters::{
13//!     PulldownMarkdownParser, RegexPatternMatcher, StdFileSystemProvider,
14//! };
15//! use std::path::PathBuf;
16//! use std::sync::Arc;
17//!
18//! // Compose adapters at the application boundary, then hand them to the
19//! // domain layer through the injected ports.
20//! let fs = StdFileSystemProvider::new();
21//! let runtime_dirs = default_external_rule_dirs();
22//! let engine = RuleEngine::with_defaults_and_matcher(
23//!     Arc::new(RegexPatternMatcher::new()),
24//!     &fs,
25//!     &runtime_dirs,
26//! )
27//! .unwrap();
28//! assert!(engine.rule_count() > 0);
29//!
30//! // Parse a skill document
31//! let parser = PulldownMarkdownParser::new();
32//! let doc = SkillDocument::parse_with_parser(
33//!     PathBuf::from("test.md"),
34//!     "# My Skill\n\n## Setup\n```bash\necho hello\n```".to_string(),
35//!     &parser,
36//! ).unwrap();
37//!
38//! // Evaluate rules against the document
39//! let findings = engine.evaluate(&doc);
40//! ```
41
42mod builtin;
43mod compiled;
44mod condition;
45mod ioc;
46mod parser;
47mod schema;
48
49use crate::ports::{FileSystemError, FileSystemProvider, MarkdownParser, PatternMatcher};
50use sha2::{Digest, Sha256};
51use std::path::Path;
52use std::sync::Arc;
53use thiserror::Error;
54use tracing::warn;
55
56pub use compiled::CompiledRule;
57pub use condition::RuleCondition;
58pub use parser::{default_external_rule_dirs, is_supported_rule_pack_schema, parse_rules_file};
59pub use schema::{IocFeedFile, Rule, RulePackFile, RulePackKind, RulePackMetadata, ShieldHint};
60
61/// Versioned schema string for external rule packs.
62pub const RULE_PACK_SCHEMA_VERSION: &str = "skill-veil.dev/rules/v1alpha1";
63
64/// Default confidence score for rules (0.0 - 1.0)
65pub const DEFAULT_RULE_CONFIDENCE: f32 = 0.9;
66
67/// Error type for rule operations
68///
69/// Encapsulates errors that can occur during rule loading, compilation,
70/// and evaluation.
71#[derive(Error, Debug)]
72pub enum RuleError {
73    /// Failed to load rules from a file or directory
74    #[error("Failed to load rules: {0}")]
75    LoadError(String),
76    /// Rule configuration is invalid
77    #[error("Invalid rule configuration: {0}")]
78    InvalidRule(String),
79    /// Failed to compile a pattern through the matcher port
80    #[error("Pattern compilation failed: {0}")]
81    PatternError(#[from] crate::ports::PatternError),
82    /// Failed to parse YAML rule file
83    #[error("YAML parsing error: {0}")]
84    YamlError(#[from] serde_yaml::Error),
85    /// I/O error during file operations
86    #[error("IO error: {0}")]
87    IoError(#[from] std::io::Error),
88    /// Two embedded built-in rule packs define the same rule id with
89    /// divergent content. This is always a developer bug in the source YAML
90    /// and must not be silently deduplicated at runtime.
91    #[error(
92        "Duplicate built-in rule id `{id}` in `{first}` and `{second}` — \
93         remove or rename one of the definitions"
94    )]
95    DuplicateBuiltinRule {
96        id: String,
97        first: String,
98        second: String,
99    },
100    /// A user-supplied rule pack declared a rule id that collides with an
101    /// already-loaded rule. Only surfaced when strict mode is enabled.
102    #[error(
103        "Duplicate external rule id `{id}` in `{path}` — \
104         already loaded; rename or remove the duplicate (strict mode)"
105    )]
106    DuplicateUserRule { id: String, path: String },
107    /// External rule pack body's SHA-256 digest does not match the value
108    /// recorded in the `<pack>.sha256` sidecar. The pack is rejected to
109    /// prevent silently loading tampered rules.
110    #[error(
111        "Rule pack `{path}` failed integrity check: \
112         expected sha256 `{expected}`, computed `{actual}` — \
113         the pack body changed since the sidecar was issued; \
114         re-issue the sidecar or revert the body"
115    )]
116    ChecksumMismatch {
117        path: String,
118        expected: String,
119        actual: String,
120    },
121    /// External rule pack has no `<pack>.sha256` sidecar and the engine is
122    /// running with `ChecksumPolicy::Required`. Operators who want to load
123    /// unsigned packs (development, ad-hoc tooling) can opt out via
124    /// `set_checksum_policy(ChecksumPolicy::Lenient)` or
125    /// `ChecksumPolicy::WarnOnMissing`.
126    #[error(
127        "Rule pack `{path}` has no sha256 sidecar and ChecksumPolicy::Required \
128         is in effect — generate `{path}.sha256` containing the hex digest \
129         of the pack body"
130    )]
131    MissingChecksum { path: String },
132}
133
134/// Suffix appended to a rule pack path to locate its SHA-256 sidecar.
135/// `<pack>.yaml` therefore resolves to `<pack>.yaml.sha256`. Mirrors the
136/// `sha256sum` convention so operators can issue and verify sidecars
137/// with stock tooling: `sha256sum pack.yaml > pack.yaml.sha256`.
138const RULE_PACK_CHECKSUM_SUFFIX: &str = ".sha256";
139
140/// Compute the SHA-256 hex digest of `bytes`. Used for both the
141/// integrity verification and the regression tests that pin the sidecar
142/// format. Pure; no allocation beyond the returned string.
143fn sha256_hex_of(bytes: &[u8]) -> String {
144    let mut hasher = Sha256::new();
145    hasher.update(bytes);
146    format!("{:x}", hasher.finalize())
147}
148
149/// Parse the body of a `.sha256` sidecar. Accepts both the bare-digest
150/// form (`<hex>\n`) and the canonical `sha256sum` form (`<hex>  <name>\n`)
151/// — the latter is what stock `sha256sum > pack.yaml.sha256` produces.
152/// Returns `None` if no plausible 64-char hex digest is found.
153fn parse_checksum_sidecar(body: &str) -> Option<String> {
154    let first_token = body.split_whitespace().next()?;
155    if first_token.len() == 64 && first_token.chars().all(|c| c.is_ascii_hexdigit()) {
156        Some(first_token.to_ascii_lowercase())
157    } else {
158        None
159    }
160}
161
162/// Verify a rule pack body against its sidecar according to `policy`.
163///
164/// - [`ChecksumPolicy::Lenient`]: never reads the sidecar, never fails.
165/// - [`ChecksumPolicy::WarnOnMissing`]: if the sidecar exists, verify;
166///   if it is missing, emit a `tracing::warn!` and continue.
167/// - [`ChecksumPolicy::Required`]: the sidecar MUST exist and match;
168///   any other state surfaces as `RuleError::MissingChecksum` or
169///   `RuleError::ChecksumMismatch`.
170fn verify_pack_checksum<F: FileSystemProvider>(
171    fs: &F,
172    pack_path: &Path,
173    body: &[u8],
174    policy: ChecksumPolicy,
175) -> Result<(), RuleError> {
176    if matches!(policy, ChecksumPolicy::Lenient) {
177        return Ok(());
178    }
179    let sidecar_path = {
180        let mut buf = pack_path.as_os_str().to_os_string();
181        buf.push(RULE_PACK_CHECKSUM_SUFFIX);
182        std::path::PathBuf::from(buf)
183    };
184    let sidecar_bytes = match fs.read_file_bytes(&sidecar_path) {
185        Ok(bytes) => bytes,
186        Err(FileSystemError::PathNotFound(_)) => match policy {
187            ChecksumPolicy::Required => {
188                return Err(RuleError::MissingChecksum {
189                    path: pack_path.display().to_string(),
190                });
191            }
192            ChecksumPolicy::WarnOnMissing => {
193                warn!(
194                    pack = %pack_path.display(),
195                    sidecar = %sidecar_path.display(),
196                    "rule pack loaded without integrity verification — \
197                     issue a `<pack>.sha256` sidecar to silence this warning"
198                );
199                return Ok(());
200            }
201            ChecksumPolicy::Lenient => unreachable!("handled above"),
202        },
203        Err(FileSystemError::IoError(io)) => return Err(RuleError::IoError(io)),
204    };
205    let sidecar_text = String::from_utf8(sidecar_bytes.as_bytes().to_vec()).map_err(|err| {
206        RuleError::IoError(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
207    })?;
208    let expected = parse_checksum_sidecar(&sidecar_text).ok_or_else(|| {
209        RuleError::IoError(std::io::Error::new(
210            std::io::ErrorKind::InvalidData,
211            format!(
212                "rule pack sidecar `{}` does not contain a 64-char hex SHA-256 digest",
213                sidecar_path.display()
214            ),
215        ))
216    })?;
217    let actual = sha256_hex_of(body);
218    if expected != actual {
219        return Err(RuleError::ChecksumMismatch {
220            path: pack_path.display().to_string(),
221            expected,
222            actual,
223        });
224    }
225    Ok(())
226}
227
228/// Verification policy applied to external rule pack bodies during
229/// `load_rules_file`. The default — [`ChecksumPolicy::WarnOnMissing`] —
230/// emits a `tracing::warn!` when a pack ships without a `<path>.sha256`
231/// sidecar but does not block the load. Operators running production
232/// scans against untrusted rule directories should flip to
233/// [`ChecksumPolicy::Required`] to enforce integrity verification at the
234/// boundary.
235#[derive(Debug, Clone, Copy, PartialEq, Eq)]
236pub enum ChecksumPolicy {
237    /// Skip integrity verification entirely; do not warn on missing sidecars.
238    /// Use only for built-in / embedded packs that the binary itself ships.
239    Lenient,
240    /// Verify the sidecar when present; emit `tracing::warn!` when absent.
241    /// Default for runtime overlays so operators can incrementally adopt
242    /// signed packs without breaking existing deployments.
243    WarnOnMissing,
244    /// Verify the sidecar when present; reject the pack if the sidecar is
245    /// missing. Recommended for production scans against rule directories
246    /// that any user can write to.
247    Required,
248}
249
250/// Rule engine for loading and evaluating rules
251///
252/// The engine is generic over the pattern matcher implementation, allowing
253/// different matching strategies to be used (regex, literal, etc.).
254///
255/// # Example
256///
257/// ```
258/// use skill_veil_core::rules::{default_external_rule_dirs, RuleEngine};
259/// use skill_veil_core::adapters::{RegexPatternMatcher, StdFileSystemProvider};
260/// use std::sync::Arc;
261///
262/// // Compose adapters at the application boundary; the engine receives
263/// // them through the injected ports.
264/// let fs = StdFileSystemProvider::new();
265/// let runtime_dirs = default_external_rule_dirs();
266/// let engine = RuleEngine::with_defaults_and_matcher(
267///     Arc::new(RegexPatternMatcher::new()),
268///     &fs,
269///     &runtime_dirs,
270/// )
271/// .unwrap();
272/// assert!(engine.rule_count() > 0);
273/// ```
274pub struct RuleEngine<M: PatternMatcher> {
275    rules: Vec<CompiledRule>,
276    rules_dir: Option<std::path::PathBuf>,
277    matcher: Arc<M>,
278    /// When true, `load_rules_file` / `add_rule` return
279    /// `RuleError::DuplicateUserRule` on an id collision instead of logging
280    /// a `warn!()` and skipping. Default: **true** as of round-5 hardening.
281    ///
282    /// # Why strict by default
283    ///
284    /// The previous lenient default meant that an external pack with an ID
285    /// colliding with a built-in (or with another loaded pack) was silently
286    /// dropped with only a `tracing::warn!()` line. Maintainers writing
287    /// override packs in `rules/official/` would have no visible signal
288    /// that their rule was discarded — they had to grep logs at runtime.
289    /// Strict-by-default surfaces the collision at load time as a hard
290    /// error with file path context, matching how `cargo` treats duplicate
291    /// crate names and how `eslint` treats duplicate rule definitions.
292    ///
293    /// Pre-flight: `comm` of `rules/official/*.yaml` IDs against
294    /// `builtin_rules.yaml` IDs at the time of the flip showed 0
295    /// collisions, so flipping the default does not break the canonical
296    /// distribution.
297    ///
298    /// # Opt-out
299    ///
300    /// Callers who *intentionally* want the silent-skip behaviour (e.g.
301    /// experimental tooling that loads many overlapping packs) must call
302    /// `set_strict_mode(false)` explicitly. The opt-out is preserved so
303    /// no consumer is forced to rename rules unilaterally.
304    strict_mode: bool,
305    /// Integrity verification policy for external rule pack bodies. See
306    /// [`ChecksumPolicy`] for the three modes. Default is
307    /// `ChecksumPolicy::WarnOnMissing` so operators are informed about
308    /// unverified packs without breaking existing deployments that have
309    /// not yet shipped sidecars.
310    checksum_policy: ChecksumPolicy,
311}
312
313impl<M: PatternMatcher> RuleEngine<M> {
314    /// Create a new rule engine with a custom pattern matcher.
315    #[must_use]
316    pub fn with_matcher(matcher: Arc<M>) -> Self {
317        Self {
318            rules: Vec::new(),
319            rules_dir: None,
320            matcher,
321            strict_mode: true,
322            checksum_policy: ChecksumPolicy::WarnOnMissing,
323        }
324    }
325
326    /// Override the integrity verification policy for external rule
327    /// pack bodies. See [`ChecksumPolicy`] for the three modes. Default
328    /// is `WarnOnMissing`.
329    pub fn set_checksum_policy(&mut self, policy: ChecksumPolicy) {
330        self.checksum_policy = policy;
331    }
332
333    /// Toggle strict mode. When enabled, loading an external pack with a
334    /// duplicate rule id returns `RuleError::DuplicateUserRule` instead of
335    /// emitting a `tracing::warn!()` and skipping.
336    pub fn set_strict_mode(&mut self, strict: bool) {
337        self.strict_mode = strict;
338    }
339
340    /// Create a rule engine with built-in rules plus an optional runtime
341    /// overlay loaded through the injected `FileSystemProvider`.
342    ///
343    /// # Load order contract
344    ///
345    /// Built-in rules are loaded first, runtime overrides second. The
346    /// non-strict duplicate-skip means inverting the order would silently
347    /// discard canonical detections.
348    ///
349    /// # Hexagonal boundary
350    ///
351    /// `runtime_overlay_fs` and `runtime_overlay_dirs` are injected so the
352    /// domain layer never instantiates a concrete adapter. Production
353    /// callers compose them in the application layer (typically
354    /// `Scanner::with_std_adapters`) by pairing `StdFileSystemProvider`
355    /// with `default_external_rule_dirs()`.
356    #[must_use = "RuleEngine::with_defaults_and_matcher() returns a Result that should be used"]
357    pub fn with_defaults_and_matcher<F: FileSystemProvider>(
358        matcher: Arc<M>,
359        runtime_overlay_fs: &F,
360        runtime_overlay_dirs: &[std::path::PathBuf],
361    ) -> Result<Self, RuleError> {
362        let mut engine = Self::with_matcher(matcher);
363        engine.load_builtin_rules()?;
364        engine.load_runtime_default_rules(runtime_overlay_fs, runtime_overlay_dirs)?;
365        Ok(engine)
366    }
367
368    fn load_builtin_rules(&mut self) -> Result<(), RuleError> {
369        for rule in builtin::get_builtin_rules()? {
370            self.add_rule(rule)?;
371        }
372        Ok(())
373    }
374
375    /// Load rules from a directory through a `FileSystemProvider`. Going
376    /// through the port preserves the hexagonal contract: this loader
377    /// reads YAML rule packs from disk, but the domain layer never
378    /// reaches `std::fs` directly.
379    pub fn load_from_dir<F: FileSystemProvider>(
380        &mut self,
381        fs: &F,
382        dir: impl AsRef<Path>,
383    ) -> Result<(), RuleError> {
384        let dir = dir.as_ref();
385        self.rules_dir = Some(dir.to_path_buf());
386
387        for pattern in &["*.yaml", "*.yml"] {
388            let paths = fs.list_files(dir, pattern, true).map_err(|err| match err {
389                FileSystemError::IoError(io) => RuleError::IoError(io),
390                FileSystemError::PathNotFound(missing) => RuleError::IoError(std::io::Error::new(
391                    std::io::ErrorKind::NotFound,
392                    format!("path not found: {}", missing.display()),
393                )),
394            })?;
395            for path in paths {
396                self.load_rules_file(fs, &path)?;
397            }
398        }
399
400        Ok(())
401    }
402
403    /// Load rules from a YAML file.
404    ///
405    /// In **strict mode** (default — see `RuleEngine.strict_mode` doc-comment
406    /// for rationale), an ID that collides with an already-loaded rule
407    /// (built-in or earlier-loaded external) returns
408    /// `RuleError::DuplicateUserRule { id, path }`. The pre-flight at the
409    /// time of the round-5 strict-mode flip showed 0 collisions between
410    /// the embedded `builtin_rules.yaml` and the `rules/official/` packs.
411    ///
412    /// Callers that intentionally want the legacy "warn-and-skip" behaviour
413    /// (e.g. tooling that loads many overlapping experimental packs) must
414    /// opt out via `set_strict_mode(false)`.
415    pub fn load_rules_file<F: FileSystemProvider>(
416        &mut self,
417        fs: &F,
418        path: impl AsRef<Path>,
419    ) -> Result<(), RuleError> {
420        let bytes = fs.read_file_bytes(path.as_ref()).map_err(|err| match err {
421            FileSystemError::IoError(io) => RuleError::IoError(io),
422            FileSystemError::PathNotFound(missing) => RuleError::IoError(std::io::Error::new(
423                std::io::ErrorKind::NotFound,
424                format!("path not found: {}", missing.display()),
425            )),
426        })?;
427        verify_pack_checksum(fs, path.as_ref(), bytes.as_bytes(), self.checksum_policy)?;
428        let content = String::from_utf8(bytes.as_bytes().to_vec()).map_err(|err| {
429            RuleError::IoError(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
430        })?;
431        for rule in parse_rules_file(&content)? {
432            let compiled = CompiledRule::compile(rule)?;
433            if self
434                .rules
435                .iter()
436                .any(|existing| existing.rule.id == compiled.rule.id)
437            {
438                if self.strict_mode {
439                    return Err(RuleError::DuplicateUserRule {
440                        id: compiled.rule.id.clone(),
441                        path: path.as_ref().display().to_string(),
442                    });
443                }
444                warn!(
445                    rule_id = %compiled.rule.id,
446                    path = %path.as_ref().display(),
447                    "skipping duplicate rule ID (existing rule takes priority)"
448                );
449            } else {
450                self.rules.push(compiled);
451            }
452        }
453
454        Ok(())
455    }
456
457    /// Add a single rule.
458    ///
459    /// Skips the rule if one with the same ID already exists.
460    pub fn add_rule(&mut self, rule: Rule) -> Result<(), RuleError> {
461        let compiled = CompiledRule::compile(rule)?;
462        if self
463            .rules
464            .iter()
465            .any(|existing| existing.rule.id == compiled.rule.id)
466        {
467            if self.strict_mode {
468                return Err(RuleError::DuplicateUserRule {
469                    id: compiled.rule.id.clone(),
470                    path: "<programmatic add_rule>".to_string(),
471                });
472            }
473            warn!(
474                rule_id = %compiled.rule.id,
475                "skipping duplicate rule ID (existing rule takes priority)"
476            );
477        } else {
478            self.rules.push(compiled);
479        }
480        Ok(())
481    }
482
483    /// Get all loaded rules.
484    pub fn rules(&self) -> Vec<&Rule> {
485        self.rules.iter().map(|cr| &cr.rule).collect()
486    }
487
488    /// Evaluate all rules against a document.
489    pub fn evaluate(&self, doc: &crate::analyzer::SkillDocument) -> Vec<crate::findings::Finding> {
490        let mut all_findings = Vec::new();
491
492        for compiled_rule in &self.rules {
493            let findings = compiled_rule.matches(doc, self.matcher.as_ref());
494            all_findings.extend(findings);
495        }
496
497        all_findings
498    }
499
500    /// Get rule count.
501    pub fn rule_count(&self) -> usize {
502        self.rules.len()
503    }
504
505    /// Test a rule against sample content.
506    ///
507    /// The caller injects the `MarkdownParser` adapter so the domain layer
508    /// stays free of concrete adapter dependencies. Production callers in
509    /// the CLI pass `&PulldownMarkdownParser::new()`; tests pass whichever
510    /// parser their fixture exercises.
511    pub fn test_rule(
512        &self,
513        rule_id: &str,
514        content: &str,
515        parser: &dyn MarkdownParser,
516    ) -> Result<Vec<crate::findings::Finding>, RuleError> {
517        let doc = crate::analyzer::SkillDocument::parse_with_parser(
518            std::path::PathBuf::from("test.md"),
519            content.to_string(),
520            parser,
521        )
522        .map_err(|e| RuleError::InvalidRule(e.to_string()))?;
523
524        let findings = self
525            .rules
526            .iter()
527            .filter(|cr| cr.rule.id == rule_id)
528            .flat_map(|cr| cr.matches(&doc, self.matcher.as_ref()))
529            .collect();
530
531        Ok(findings)
532    }
533
534    /// Load runtime overlay rule directories through the injected
535    /// `FileSystemProvider`. Each directory is loaded only if it exists;
536    /// non-existent paths are skipped silently so callers can pass a
537    /// canonical list (`default_external_rule_dirs()`) regardless of
538    /// whether the overlay is present in the current working directory.
539    ///
540    /// # Why strict mode is forced off
541    ///
542    /// The runtime overlay is a *development* copy of the embedded packs
543    /// at `crates/skill-veil-core/resources/official/`. When the binary
544    /// runs from the repo root (CI, `cargo run`, local dev) the overlay
545    /// paths happen to resolve and re-introduce IDs already loaded from
546    /// the embedded packs. Strict mode would surface those overlaps as
547    /// `DuplicateUserRule` and abort startup. The intent of the overlay
548    /// is "skip duplicates; the embedded canonical version wins", so we
549    /// run this stage with strict mode forced off and restore the
550    /// caller's preference afterwards. Callers passing `--rules-dir` go
551    /// through `load_from_dir` directly and keep whatever strict setting
552    /// `set_strict_mode` last applied.
553    fn load_runtime_default_rules<F: FileSystemProvider>(
554        &mut self,
555        fs: &F,
556        dirs: &[std::path::PathBuf],
557    ) -> Result<bool, RuleError> {
558        self.with_strict_mode(false, |engine| {
559            let mut loaded = false;
560            for dir in dirs {
561                if fs.exists(dir) {
562                    engine.load_from_dir(fs, dir)?;
563                    loaded = true;
564                }
565            }
566            Ok(loaded)
567        })
568    }
569
570    /// Run `f` with `self.strict_mode` temporarily set to `temporary`,
571    /// restoring the previous value before returning. The closure receives
572    /// `&mut self` so it can call existing `&mut self` methods that consult
573    /// `strict_mode` (e.g. `load_from_dir` → `add_rule`) and observe the
574    /// override.
575    ///
576    /// # Why a helper instead of inline mutation
577    ///
578    /// The previous implementation inlined `std::mem::replace` plus a
579    /// post-loop restore in the caller. Co-locating the override window
580    /// here makes the contract a named operation ("run this block with
581    /// `strict=false`") instead of an open-coded mutation pattern, in
582    /// keeping with the CLAUDE.md guidance to prefer explicit inputs
583    /// over hidden state. The restore happens on both success and error
584    /// paths, mirroring the previous behaviour.
585    fn with_strict_mode<R>(
586        &mut self,
587        temporary: bool,
588        f: impl FnOnce(&mut Self) -> Result<R, RuleError>,
589    ) -> Result<R, RuleError> {
590        let previous = std::mem::replace(&mut self.strict_mode, temporary);
591        let result = f(self);
592        self.strict_mode = previous;
593        result
594    }
595}
596
597#[cfg(test)]
598mod tests;