skill_veil_core/rules/mod.rs
1//! Rule engine for detecting security signals in skills
2//!
3//! Provides declarative rule definitions and evaluation logic for analyzing
4//! skill documents. Rules are defined declaratively in YAML and can detect
5//! patterns using regex, section content matching, or code block language detection.
6//!
7//! # Example
8//!
9//! ```
10//! use skill_veil_core::rules::{default_external_rule_dirs, RuleEngine};
11//! use skill_veil_core::analyzer::SkillDocument;
12//! use skill_veil_core::adapters::{
13//! PulldownMarkdownParser, RegexPatternMatcher, StdFileSystemProvider,
14//! };
15//! use std::path::PathBuf;
16//! use std::sync::Arc;
17//!
18//! // Compose adapters at the application boundary, then hand them to the
19//! // domain layer through the injected ports.
20//! let fs = StdFileSystemProvider::new();
21//! let runtime_dirs = default_external_rule_dirs();
22//! let engine = RuleEngine::with_defaults_and_matcher(
23//! Arc::new(RegexPatternMatcher::new()),
24//! &fs,
25//! &runtime_dirs,
26//! )
27//! .unwrap();
28//! assert!(engine.rule_count() > 0);
29//!
30//! // Parse a skill document
31//! let parser = PulldownMarkdownParser::new();
32//! let doc = SkillDocument::parse_with_parser(
33//! PathBuf::from("test.md"),
34//! "# My Skill\n\n## Setup\n```bash\necho hello\n```".to_string(),
35//! &parser,
36//! ).unwrap();
37//!
38//! // Evaluate rules against the document
39//! let findings = engine.evaluate(&doc);
40//! ```
41
42mod builtin;
43mod compiled;
44mod condition;
45mod ioc;
46mod parser;
47mod schema;
48
49use crate::ports::{FileSystemError, FileSystemProvider, MarkdownParser, PatternMatcher};
50use sha2::{Digest, Sha256};
51use std::path::Path;
52use std::sync::Arc;
53use thiserror::Error;
54use tracing::warn;
55
56pub use compiled::CompiledRule;
57pub use condition::RuleCondition;
58pub use parser::{default_external_rule_dirs, is_supported_rule_pack_schema, parse_rules_file};
59pub use schema::{IocFeedFile, Rule, RulePackFile, RulePackKind, RulePackMetadata, ShieldHint};
60
61/// Versioned schema string for external rule packs.
62pub const RULE_PACK_SCHEMA_VERSION: &str = "skill-veil.dev/rules/v1alpha1";
63
64/// Default confidence score for rules (0.0 - 1.0)
65pub const DEFAULT_RULE_CONFIDENCE: f32 = 0.9;
66
67/// Error type for rule operations
68///
69/// Encapsulates errors that can occur during rule loading, compilation,
70/// and evaluation.
71#[derive(Error, Debug)]
72pub enum RuleError {
73 /// Failed to load rules from a file or directory
74 #[error("Failed to load rules: {0}")]
75 LoadError(String),
76 /// Rule configuration is invalid
77 #[error("Invalid rule configuration: {0}")]
78 InvalidRule(String),
79 /// Failed to compile a pattern through the matcher port
80 #[error("Pattern compilation failed: {0}")]
81 PatternError(#[from] crate::ports::PatternError),
82 /// Failed to parse YAML rule file
83 #[error("YAML parsing error: {0}")]
84 YamlError(#[from] serde_yaml::Error),
85 /// I/O error during file operations
86 #[error("IO error: {0}")]
87 IoError(#[from] std::io::Error),
88 /// Two embedded built-in rule packs define the same rule id with
89 /// divergent content. This is always a developer bug in the source YAML
90 /// and must not be silently deduplicated at runtime.
91 #[error(
92 "Duplicate built-in rule id `{id}` in `{first}` and `{second}` — \
93 remove or rename one of the definitions"
94 )]
95 DuplicateBuiltinRule {
96 id: String,
97 first: String,
98 second: String,
99 },
100 /// A user-supplied rule pack declared a rule id that collides with an
101 /// already-loaded rule. Only surfaced when strict mode is enabled.
102 #[error(
103 "Duplicate external rule id `{id}` in `{path}` — \
104 already loaded; rename or remove the duplicate (strict mode)"
105 )]
106 DuplicateUserRule { id: String, path: String },
107 /// External rule pack body's SHA-256 digest does not match the value
108 /// recorded in the `<pack>.sha256` sidecar. The pack is rejected to
109 /// prevent silently loading tampered rules.
110 #[error(
111 "Rule pack `{path}` failed integrity check: \
112 expected sha256 `{expected}`, computed `{actual}` — \
113 the pack body changed since the sidecar was issued; \
114 re-issue the sidecar or revert the body"
115 )]
116 ChecksumMismatch {
117 path: String,
118 expected: String,
119 actual: String,
120 },
121 /// External rule pack has no `<pack>.sha256` sidecar and the engine is
122 /// running with `ChecksumPolicy::Required`. Operators who want to load
123 /// unsigned packs (development, ad-hoc tooling) can opt out via
124 /// `set_checksum_policy(ChecksumPolicy::Lenient)` or
125 /// `ChecksumPolicy::WarnOnMissing`.
126 #[error(
127 "Rule pack `{path}` has no sha256 sidecar and ChecksumPolicy::Required \
128 is in effect — generate `{path}.sha256` containing the hex digest \
129 of the pack body"
130 )]
131 MissingChecksum { path: String },
132}
133
134/// Suffix appended to a rule pack path to locate its SHA-256 sidecar.
135/// `<pack>.yaml` therefore resolves to `<pack>.yaml.sha256`. Mirrors the
136/// `sha256sum` convention so operators can issue and verify sidecars
137/// with stock tooling: `sha256sum pack.yaml > pack.yaml.sha256`.
138const RULE_PACK_CHECKSUM_SUFFIX: &str = ".sha256";
139
140/// Compute the SHA-256 hex digest of `bytes`. Used for both the
141/// integrity verification and the regression tests that pin the sidecar
142/// format. Pure; no allocation beyond the returned string.
143fn sha256_hex_of(bytes: &[u8]) -> String {
144 let mut hasher = Sha256::new();
145 hasher.update(bytes);
146 format!("{:x}", hasher.finalize())
147}
148
149/// Parse the body of a `.sha256` sidecar. Accepts both the bare-digest
150/// form (`<hex>\n`) and the canonical `sha256sum` form (`<hex> <name>\n`)
151/// — the latter is what stock `sha256sum > pack.yaml.sha256` produces.
152/// Returns `None` if no plausible 64-char hex digest is found.
153fn parse_checksum_sidecar(body: &str) -> Option<String> {
154 let first_token = body.split_whitespace().next()?;
155 if first_token.len() == 64 && first_token.chars().all(|c| c.is_ascii_hexdigit()) {
156 Some(first_token.to_ascii_lowercase())
157 } else {
158 None
159 }
160}
161
162/// Verify a rule pack body against its sidecar according to `policy`.
163///
164/// - [`ChecksumPolicy::Lenient`]: never reads the sidecar, never fails.
165/// - [`ChecksumPolicy::WarnOnMissing`]: if the sidecar exists, verify;
166/// if it is missing, emit a `tracing::warn!` and continue.
167/// - [`ChecksumPolicy::Required`]: the sidecar MUST exist and match;
168/// any other state surfaces as `RuleError::MissingChecksum` or
169/// `RuleError::ChecksumMismatch`.
170fn verify_pack_checksum<F: FileSystemProvider>(
171 fs: &F,
172 pack_path: &Path,
173 body: &[u8],
174 policy: ChecksumPolicy,
175) -> Result<(), RuleError> {
176 if matches!(policy, ChecksumPolicy::Lenient) {
177 return Ok(());
178 }
179 let sidecar_path = {
180 let mut buf = pack_path.as_os_str().to_os_string();
181 buf.push(RULE_PACK_CHECKSUM_SUFFIX);
182 std::path::PathBuf::from(buf)
183 };
184 let sidecar_bytes = match fs.read_file_bytes(&sidecar_path) {
185 Ok(bytes) => bytes,
186 Err(FileSystemError::PathNotFound(_)) => match policy {
187 ChecksumPolicy::Required => {
188 return Err(RuleError::MissingChecksum {
189 path: pack_path.display().to_string(),
190 });
191 }
192 ChecksumPolicy::WarnOnMissing => {
193 warn!(
194 pack = %pack_path.display(),
195 sidecar = %sidecar_path.display(),
196 "rule pack loaded without integrity verification — \
197 issue a `<pack>.sha256` sidecar to silence this warning"
198 );
199 return Ok(());
200 }
201 ChecksumPolicy::Lenient => unreachable!("handled above"),
202 },
203 Err(FileSystemError::IoError(io)) => return Err(RuleError::IoError(io)),
204 };
205 let sidecar_text = String::from_utf8(sidecar_bytes.as_bytes().to_vec()).map_err(|err| {
206 RuleError::IoError(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
207 })?;
208 let expected = parse_checksum_sidecar(&sidecar_text).ok_or_else(|| {
209 RuleError::IoError(std::io::Error::new(
210 std::io::ErrorKind::InvalidData,
211 format!(
212 "rule pack sidecar `{}` does not contain a 64-char hex SHA-256 digest",
213 sidecar_path.display()
214 ),
215 ))
216 })?;
217 let actual = sha256_hex_of(body);
218 if expected != actual {
219 return Err(RuleError::ChecksumMismatch {
220 path: pack_path.display().to_string(),
221 expected,
222 actual,
223 });
224 }
225 Ok(())
226}
227
228/// Verification policy applied to external rule pack bodies during
229/// `load_rules_file`. The default — [`ChecksumPolicy::WarnOnMissing`] —
230/// emits a `tracing::warn!` when a pack ships without a `<path>.sha256`
231/// sidecar but does not block the load. Operators running production
232/// scans against untrusted rule directories should flip to
233/// [`ChecksumPolicy::Required`] to enforce integrity verification at the
234/// boundary.
235#[derive(Debug, Clone, Copy, PartialEq, Eq)]
236pub enum ChecksumPolicy {
237 /// Skip integrity verification entirely; do not warn on missing sidecars.
238 /// Use only for built-in / embedded packs that the binary itself ships.
239 Lenient,
240 /// Verify the sidecar when present; emit `tracing::warn!` when absent.
241 /// Default for runtime overlays so operators can incrementally adopt
242 /// signed packs without breaking existing deployments.
243 WarnOnMissing,
244 /// Verify the sidecar when present; reject the pack if the sidecar is
245 /// missing. Recommended for production scans against rule directories
246 /// that any user can write to.
247 Required,
248}
249
250/// Rule engine for loading and evaluating rules
251///
252/// The engine is generic over the pattern matcher implementation, allowing
253/// different matching strategies to be used (regex, literal, etc.).
254///
255/// # Example
256///
257/// ```
258/// use skill_veil_core::rules::{default_external_rule_dirs, RuleEngine};
259/// use skill_veil_core::adapters::{RegexPatternMatcher, StdFileSystemProvider};
260/// use std::sync::Arc;
261///
262/// // Compose adapters at the application boundary; the engine receives
263/// // them through the injected ports.
264/// let fs = StdFileSystemProvider::new();
265/// let runtime_dirs = default_external_rule_dirs();
266/// let engine = RuleEngine::with_defaults_and_matcher(
267/// Arc::new(RegexPatternMatcher::new()),
268/// &fs,
269/// &runtime_dirs,
270/// )
271/// .unwrap();
272/// assert!(engine.rule_count() > 0);
273/// ```
274pub struct RuleEngine<M: PatternMatcher> {
275 rules: Vec<CompiledRule>,
276 rules_dir: Option<std::path::PathBuf>,
277 matcher: Arc<M>,
278 /// When true, `load_rules_file` / `add_rule` return
279 /// `RuleError::DuplicateUserRule` on an id collision instead of logging
280 /// a `warn!()` and skipping. Default: **true** as of round-5 hardening.
281 ///
282 /// # Why strict by default
283 ///
284 /// The previous lenient default meant that an external pack with an ID
285 /// colliding with a built-in (or with another loaded pack) was silently
286 /// dropped with only a `tracing::warn!()` line. Maintainers writing
287 /// override packs in `rules/official/` would have no visible signal
288 /// that their rule was discarded — they had to grep logs at runtime.
289 /// Strict-by-default surfaces the collision at load time as a hard
290 /// error with file path context, matching how `cargo` treats duplicate
291 /// crate names and how `eslint` treats duplicate rule definitions.
292 ///
293 /// Pre-flight: `comm` of `rules/official/*.yaml` IDs against
294 /// `builtin_rules.yaml` IDs at the time of the flip showed 0
295 /// collisions, so flipping the default does not break the canonical
296 /// distribution.
297 ///
298 /// # Opt-out
299 ///
300 /// Callers who *intentionally* want the silent-skip behaviour (e.g.
301 /// experimental tooling that loads many overlapping packs) must call
302 /// `set_strict_mode(false)` explicitly. The opt-out is preserved so
303 /// no consumer is forced to rename rules unilaterally.
304 strict_mode: bool,
305 /// Integrity verification policy for external rule pack bodies. See
306 /// [`ChecksumPolicy`] for the three modes. Default is
307 /// `ChecksumPolicy::WarnOnMissing` so operators are informed about
308 /// unverified packs without breaking existing deployments that have
309 /// not yet shipped sidecars.
310 checksum_policy: ChecksumPolicy,
311}
312
313impl<M: PatternMatcher> RuleEngine<M> {
314 /// Create a new rule engine with a custom pattern matcher.
315 #[must_use]
316 pub fn with_matcher(matcher: Arc<M>) -> Self {
317 Self {
318 rules: Vec::new(),
319 rules_dir: None,
320 matcher,
321 strict_mode: true,
322 checksum_policy: ChecksumPolicy::WarnOnMissing,
323 }
324 }
325
326 /// Override the integrity verification policy for external rule
327 /// pack bodies. See [`ChecksumPolicy`] for the three modes. Default
328 /// is `WarnOnMissing`.
329 pub fn set_checksum_policy(&mut self, policy: ChecksumPolicy) {
330 self.checksum_policy = policy;
331 }
332
333 /// Toggle strict mode. When enabled, loading an external pack with a
334 /// duplicate rule id returns `RuleError::DuplicateUserRule` instead of
335 /// emitting a `tracing::warn!()` and skipping.
336 pub fn set_strict_mode(&mut self, strict: bool) {
337 self.strict_mode = strict;
338 }
339
340 /// Create a rule engine with built-in rules plus an optional runtime
341 /// overlay loaded through the injected `FileSystemProvider`.
342 ///
343 /// # Load order contract
344 ///
345 /// Built-in rules are loaded first, runtime overrides second. The
346 /// non-strict duplicate-skip means inverting the order would silently
347 /// discard canonical detections.
348 ///
349 /// # Hexagonal boundary
350 ///
351 /// `runtime_overlay_fs` and `runtime_overlay_dirs` are injected so the
352 /// domain layer never instantiates a concrete adapter. Production
353 /// callers compose them in the application layer (typically
354 /// `Scanner::with_std_adapters`) by pairing `StdFileSystemProvider`
355 /// with `default_external_rule_dirs()`.
356 #[must_use = "RuleEngine::with_defaults_and_matcher() returns a Result that should be used"]
357 pub fn with_defaults_and_matcher<F: FileSystemProvider>(
358 matcher: Arc<M>,
359 runtime_overlay_fs: &F,
360 runtime_overlay_dirs: &[std::path::PathBuf],
361 ) -> Result<Self, RuleError> {
362 let mut engine = Self::with_matcher(matcher);
363 engine.load_builtin_rules()?;
364 engine.load_runtime_default_rules(runtime_overlay_fs, runtime_overlay_dirs)?;
365 Ok(engine)
366 }
367
368 fn load_builtin_rules(&mut self) -> Result<(), RuleError> {
369 for rule in builtin::get_builtin_rules()? {
370 self.add_rule(rule)?;
371 }
372 Ok(())
373 }
374
375 /// Load rules from a directory through a `FileSystemProvider`. Going
376 /// through the port preserves the hexagonal contract: this loader
377 /// reads YAML rule packs from disk, but the domain layer never
378 /// reaches `std::fs` directly.
379 pub fn load_from_dir<F: FileSystemProvider>(
380 &mut self,
381 fs: &F,
382 dir: impl AsRef<Path>,
383 ) -> Result<(), RuleError> {
384 let dir = dir.as_ref();
385 self.rules_dir = Some(dir.to_path_buf());
386
387 for pattern in &["*.yaml", "*.yml"] {
388 let paths = fs.list_files(dir, pattern, true).map_err(|err| match err {
389 FileSystemError::IoError(io) => RuleError::IoError(io),
390 FileSystemError::PathNotFound(missing) => RuleError::IoError(std::io::Error::new(
391 std::io::ErrorKind::NotFound,
392 format!("path not found: {}", missing.display()),
393 )),
394 })?;
395 for path in paths {
396 self.load_rules_file(fs, &path)?;
397 }
398 }
399
400 Ok(())
401 }
402
403 /// Load rules from a YAML file.
404 ///
405 /// In **strict mode** (default — see `RuleEngine.strict_mode` doc-comment
406 /// for rationale), an ID that collides with an already-loaded rule
407 /// (built-in or earlier-loaded external) returns
408 /// `RuleError::DuplicateUserRule { id, path }`. The pre-flight at the
409 /// time of the round-5 strict-mode flip showed 0 collisions between
410 /// the embedded `builtin_rules.yaml` and the `rules/official/` packs.
411 ///
412 /// Callers that intentionally want the legacy "warn-and-skip" behaviour
413 /// (e.g. tooling that loads many overlapping experimental packs) must
414 /// opt out via `set_strict_mode(false)`.
415 pub fn load_rules_file<F: FileSystemProvider>(
416 &mut self,
417 fs: &F,
418 path: impl AsRef<Path>,
419 ) -> Result<(), RuleError> {
420 let bytes = fs.read_file_bytes(path.as_ref()).map_err(|err| match err {
421 FileSystemError::IoError(io) => RuleError::IoError(io),
422 FileSystemError::PathNotFound(missing) => RuleError::IoError(std::io::Error::new(
423 std::io::ErrorKind::NotFound,
424 format!("path not found: {}", missing.display()),
425 )),
426 })?;
427 verify_pack_checksum(fs, path.as_ref(), bytes.as_bytes(), self.checksum_policy)?;
428 let content = String::from_utf8(bytes.as_bytes().to_vec()).map_err(|err| {
429 RuleError::IoError(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
430 })?;
431 for rule in parse_rules_file(&content)? {
432 let compiled = CompiledRule::compile(rule)?;
433 if self
434 .rules
435 .iter()
436 .any(|existing| existing.rule.id == compiled.rule.id)
437 {
438 if self.strict_mode {
439 return Err(RuleError::DuplicateUserRule {
440 id: compiled.rule.id.clone(),
441 path: path.as_ref().display().to_string(),
442 });
443 }
444 warn!(
445 rule_id = %compiled.rule.id,
446 path = %path.as_ref().display(),
447 "skipping duplicate rule ID (existing rule takes priority)"
448 );
449 } else {
450 self.rules.push(compiled);
451 }
452 }
453
454 Ok(())
455 }
456
457 /// Add a single rule.
458 ///
459 /// Skips the rule if one with the same ID already exists.
460 pub fn add_rule(&mut self, rule: Rule) -> Result<(), RuleError> {
461 let compiled = CompiledRule::compile(rule)?;
462 if self
463 .rules
464 .iter()
465 .any(|existing| existing.rule.id == compiled.rule.id)
466 {
467 if self.strict_mode {
468 return Err(RuleError::DuplicateUserRule {
469 id: compiled.rule.id.clone(),
470 path: "<programmatic add_rule>".to_string(),
471 });
472 }
473 warn!(
474 rule_id = %compiled.rule.id,
475 "skipping duplicate rule ID (existing rule takes priority)"
476 );
477 } else {
478 self.rules.push(compiled);
479 }
480 Ok(())
481 }
482
483 /// Get all loaded rules.
484 pub fn rules(&self) -> Vec<&Rule> {
485 self.rules.iter().map(|cr| &cr.rule).collect()
486 }
487
488 /// Evaluate all rules against a document.
489 pub fn evaluate(&self, doc: &crate::analyzer::SkillDocument) -> Vec<crate::findings::Finding> {
490 let mut all_findings = Vec::new();
491
492 for compiled_rule in &self.rules {
493 let findings = compiled_rule.matches(doc, self.matcher.as_ref());
494 all_findings.extend(findings);
495 }
496
497 all_findings
498 }
499
500 /// Get rule count.
501 pub fn rule_count(&self) -> usize {
502 self.rules.len()
503 }
504
505 /// Test a rule against sample content.
506 ///
507 /// The caller injects the `MarkdownParser` adapter so the domain layer
508 /// stays free of concrete adapter dependencies. Production callers in
509 /// the CLI pass `&PulldownMarkdownParser::new()`; tests pass whichever
510 /// parser their fixture exercises.
511 pub fn test_rule(
512 &self,
513 rule_id: &str,
514 content: &str,
515 parser: &dyn MarkdownParser,
516 ) -> Result<Vec<crate::findings::Finding>, RuleError> {
517 let doc = crate::analyzer::SkillDocument::parse_with_parser(
518 std::path::PathBuf::from("test.md"),
519 content.to_string(),
520 parser,
521 )
522 .map_err(|e| RuleError::InvalidRule(e.to_string()))?;
523
524 let findings = self
525 .rules
526 .iter()
527 .filter(|cr| cr.rule.id == rule_id)
528 .flat_map(|cr| cr.matches(&doc, self.matcher.as_ref()))
529 .collect();
530
531 Ok(findings)
532 }
533
534 /// Load runtime overlay rule directories through the injected
535 /// `FileSystemProvider`. Each directory is loaded only if it exists;
536 /// non-existent paths are skipped silently so callers can pass a
537 /// canonical list (`default_external_rule_dirs()`) regardless of
538 /// whether the overlay is present in the current working directory.
539 ///
540 /// # Why strict mode is forced off
541 ///
542 /// The runtime overlay is a *development* copy of the embedded packs
543 /// at `crates/skill-veil-core/resources/official/`. When the binary
544 /// runs from the repo root (CI, `cargo run`, local dev) the overlay
545 /// paths happen to resolve and re-introduce IDs already loaded from
546 /// the embedded packs. Strict mode would surface those overlaps as
547 /// `DuplicateUserRule` and abort startup. The intent of the overlay
548 /// is "skip duplicates; the embedded canonical version wins", so we
549 /// run this stage with strict mode forced off and restore the
550 /// caller's preference afterwards. Callers passing `--rules-dir` go
551 /// through `load_from_dir` directly and keep whatever strict setting
552 /// `set_strict_mode` last applied.
553 fn load_runtime_default_rules<F: FileSystemProvider>(
554 &mut self,
555 fs: &F,
556 dirs: &[std::path::PathBuf],
557 ) -> Result<bool, RuleError> {
558 self.with_strict_mode(false, |engine| {
559 let mut loaded = false;
560 for dir in dirs {
561 if fs.exists(dir) {
562 engine.load_from_dir(fs, dir)?;
563 loaded = true;
564 }
565 }
566 Ok(loaded)
567 })
568 }
569
570 /// Run `f` with `self.strict_mode` temporarily set to `temporary`,
571 /// restoring the previous value before returning. The closure receives
572 /// `&mut self` so it can call existing `&mut self` methods that consult
573 /// `strict_mode` (e.g. `load_from_dir` → `add_rule`) and observe the
574 /// override.
575 ///
576 /// # Why a helper instead of inline mutation
577 ///
578 /// The previous implementation inlined `std::mem::replace` plus a
579 /// post-loop restore in the caller. Co-locating the override window
580 /// here makes the contract a named operation ("run this block with
581 /// `strict=false`") instead of an open-coded mutation pattern, in
582 /// keeping with the CLAUDE.md guidance to prefer explicit inputs
583 /// over hidden state. The restore happens on both success and error
584 /// paths, mirroring the previous behaviour.
585 fn with_strict_mode<R>(
586 &mut self,
587 temporary: bool,
588 f: impl FnOnce(&mut Self) -> Result<R, RuleError>,
589 ) -> Result<R, RuleError> {
590 let previous = std::mem::replace(&mut self.strict_mode, temporary);
591 let result = f(self);
592 self.strict_mode = previous;
593 result
594 }
595}
596
597#[cfg(test)]
598mod tests;