mod builtin;
mod compiled;
mod condition;
mod ioc;
mod parser;
mod schema;
use crate::ports::{FileSystemError, FileSystemProvider, MarkdownParser, PatternMatcher};
use sha2::{Digest, Sha256};
use std::path::Path;
use std::sync::Arc;
use thiserror::Error;
use tracing::warn;
pub use compiled::CompiledRule;
pub use condition::RuleCondition;
pub use parser::{default_external_rule_dirs, is_supported_rule_pack_schema, parse_rules_file};
pub use schema::{IocFeedFile, Rule, RulePackFile, RulePackKind, RulePackMetadata, ShieldHint};
pub const RULE_PACK_SCHEMA_VERSION: &str = "skill-veil.dev/rules/v1alpha1";
pub const DEFAULT_RULE_CONFIDENCE: f32 = 0.9;
#[derive(Error, Debug)]
pub enum RuleError {
#[error("Failed to load rules: {0}")]
LoadError(String),
#[error("Invalid rule configuration: {0}")]
InvalidRule(String),
#[error("Pattern compilation failed: {0}")]
PatternError(#[from] crate::ports::PatternError),
#[error("YAML parsing error: {0}")]
YamlError(#[from] serde_yaml::Error),
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error(
"Duplicate built-in rule id `{id}` in `{first}` and `{second}` — \
remove or rename one of the definitions"
)]
DuplicateBuiltinRule {
id: String,
first: String,
second: String,
},
#[error(
"Duplicate external rule id `{id}` in `{path}` — \
already loaded; rename or remove the duplicate (strict mode)"
)]
DuplicateUserRule { id: String, path: String },
#[error(
"Rule pack `{path}` failed integrity check: \
expected sha256 `{expected}`, computed `{actual}` — \
the pack body changed since the sidecar was issued; \
re-issue the sidecar or revert the body"
)]
ChecksumMismatch {
path: String,
expected: String,
actual: String,
},
#[error(
"Rule pack `{path}` has no sha256 sidecar and ChecksumPolicy::Required \
is in effect — generate `{path}.sha256` containing the hex digest \
of the pack body"
)]
MissingChecksum { path: String },
}
const RULE_PACK_CHECKSUM_SUFFIX: &str = ".sha256";
fn sha256_hex_of(bytes: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(bytes);
format!("{:x}", hasher.finalize())
}
fn parse_checksum_sidecar(body: &str) -> Option<String> {
let first_token = body.split_whitespace().next()?;
if first_token.len() == 64 && first_token.chars().all(|c| c.is_ascii_hexdigit()) {
Some(first_token.to_ascii_lowercase())
} else {
None
}
}
fn verify_pack_checksum<F: FileSystemProvider>(
fs: &F,
pack_path: &Path,
body: &[u8],
policy: ChecksumPolicy,
) -> Result<(), RuleError> {
if matches!(policy, ChecksumPolicy::Lenient) {
return Ok(());
}
let sidecar_path = {
let mut buf = pack_path.as_os_str().to_os_string();
buf.push(RULE_PACK_CHECKSUM_SUFFIX);
std::path::PathBuf::from(buf)
};
let sidecar_bytes = match fs.read_file_bytes(&sidecar_path) {
Ok(bytes) => bytes,
Err(FileSystemError::PathNotFound(_)) => match policy {
ChecksumPolicy::Required => {
return Err(RuleError::MissingChecksum {
path: pack_path.display().to_string(),
});
}
ChecksumPolicy::WarnOnMissing => {
warn!(
pack = %pack_path.display(),
sidecar = %sidecar_path.display(),
"rule pack loaded without integrity verification — \
issue a `<pack>.sha256` sidecar to silence this warning"
);
return Ok(());
}
ChecksumPolicy::Lenient => unreachable!("handled above"),
},
Err(FileSystemError::IoError(io)) => return Err(RuleError::IoError(io)),
};
let sidecar_text = String::from_utf8(sidecar_bytes.as_bytes().to_vec()).map_err(|err| {
RuleError::IoError(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
})?;
let expected = parse_checksum_sidecar(&sidecar_text).ok_or_else(|| {
RuleError::IoError(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"rule pack sidecar `{}` does not contain a 64-char hex SHA-256 digest",
sidecar_path.display()
),
))
})?;
let actual = sha256_hex_of(body);
if expected != actual {
return Err(RuleError::ChecksumMismatch {
path: pack_path.display().to_string(),
expected,
actual,
});
}
Ok(())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChecksumPolicy {
Lenient,
WarnOnMissing,
Required,
}
pub struct RuleEngine<M: PatternMatcher> {
rules: Vec<CompiledRule>,
rules_dir: Option<std::path::PathBuf>,
matcher: Arc<M>,
strict_mode: bool,
checksum_policy: ChecksumPolicy,
}
impl<M: PatternMatcher> RuleEngine<M> {
#[must_use]
pub fn with_matcher(matcher: Arc<M>) -> Self {
Self {
rules: Vec::new(),
rules_dir: None,
matcher,
strict_mode: true,
checksum_policy: ChecksumPolicy::WarnOnMissing,
}
}
pub fn set_checksum_policy(&mut self, policy: ChecksumPolicy) {
self.checksum_policy = policy;
}
pub fn set_strict_mode(&mut self, strict: bool) {
self.strict_mode = strict;
}
#[must_use = "RuleEngine::with_defaults_and_matcher() returns a Result that should be used"]
pub fn with_defaults_and_matcher<F: FileSystemProvider>(
matcher: Arc<M>,
runtime_overlay_fs: &F,
runtime_overlay_dirs: &[std::path::PathBuf],
) -> Result<Self, RuleError> {
let mut engine = Self::with_matcher(matcher);
engine.load_builtin_rules()?;
engine.load_runtime_default_rules(runtime_overlay_fs, runtime_overlay_dirs)?;
Ok(engine)
}
fn load_builtin_rules(&mut self) -> Result<(), RuleError> {
for rule in builtin::get_builtin_rules()? {
self.add_rule(rule)?;
}
Ok(())
}
pub fn load_from_dir<F: FileSystemProvider>(
&mut self,
fs: &F,
dir: impl AsRef<Path>,
) -> Result<(), RuleError> {
let dir = dir.as_ref();
self.rules_dir = Some(dir.to_path_buf());
for pattern in &["*.yaml", "*.yml"] {
let paths = fs.list_files(dir, pattern, true).map_err(|err| match err {
FileSystemError::IoError(io) => RuleError::IoError(io),
FileSystemError::PathNotFound(missing) => RuleError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("path not found: {}", missing.display()),
)),
})?;
for path in paths {
self.load_rules_file(fs, &path)?;
}
}
Ok(())
}
pub fn load_rules_file<F: FileSystemProvider>(
&mut self,
fs: &F,
path: impl AsRef<Path>,
) -> Result<(), RuleError> {
let bytes = fs.read_file_bytes(path.as_ref()).map_err(|err| match err {
FileSystemError::IoError(io) => RuleError::IoError(io),
FileSystemError::PathNotFound(missing) => RuleError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("path not found: {}", missing.display()),
)),
})?;
verify_pack_checksum(fs, path.as_ref(), bytes.as_bytes(), self.checksum_policy)?;
let content = String::from_utf8(bytes.as_bytes().to_vec()).map_err(|err| {
RuleError::IoError(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
})?;
for rule in parse_rules_file(&content)? {
let compiled = CompiledRule::compile(rule)?;
if self
.rules
.iter()
.any(|existing| existing.rule.id == compiled.rule.id)
{
if self.strict_mode {
return Err(RuleError::DuplicateUserRule {
id: compiled.rule.id.clone(),
path: path.as_ref().display().to_string(),
});
}
warn!(
rule_id = %compiled.rule.id,
path = %path.as_ref().display(),
"skipping duplicate rule ID (existing rule takes priority)"
);
} else {
self.rules.push(compiled);
}
}
Ok(())
}
pub fn add_rule(&mut self, rule: Rule) -> Result<(), RuleError> {
let compiled = CompiledRule::compile(rule)?;
if self
.rules
.iter()
.any(|existing| existing.rule.id == compiled.rule.id)
{
if self.strict_mode {
return Err(RuleError::DuplicateUserRule {
id: compiled.rule.id.clone(),
path: "<programmatic add_rule>".to_string(),
});
}
warn!(
rule_id = %compiled.rule.id,
"skipping duplicate rule ID (existing rule takes priority)"
);
} else {
self.rules.push(compiled);
}
Ok(())
}
pub fn rules(&self) -> Vec<&Rule> {
self.rules.iter().map(|cr| &cr.rule).collect()
}
pub fn evaluate(&self, doc: &crate::analyzer::SkillDocument) -> Vec<crate::findings::Finding> {
let mut all_findings = Vec::new();
for compiled_rule in &self.rules {
let findings = compiled_rule.matches(doc, self.matcher.as_ref());
all_findings.extend(findings);
}
all_findings
}
pub fn rule_count(&self) -> usize {
self.rules.len()
}
pub fn test_rule(
&self,
rule_id: &str,
content: &str,
parser: &dyn MarkdownParser,
) -> Result<Vec<crate::findings::Finding>, RuleError> {
let doc = crate::analyzer::SkillDocument::parse_with_parser(
std::path::PathBuf::from("test.md"),
content.to_string(),
parser,
)
.map_err(|e| RuleError::InvalidRule(e.to_string()))?;
let findings = self
.rules
.iter()
.filter(|cr| cr.rule.id == rule_id)
.flat_map(|cr| cr.matches(&doc, self.matcher.as_ref()))
.collect();
Ok(findings)
}
fn load_runtime_default_rules<F: FileSystemProvider>(
&mut self,
fs: &F,
dirs: &[std::path::PathBuf],
) -> Result<bool, RuleError> {
self.with_strict_mode(false, |engine| {
let mut loaded = false;
for dir in dirs {
if fs.exists(dir) {
engine.load_from_dir(fs, dir)?;
loaded = true;
}
}
Ok(loaded)
})
}
fn with_strict_mode<R>(
&mut self,
temporary: bool,
f: impl FnOnce(&mut Self) -> Result<R, RuleError>,
) -> Result<R, RuleError> {
let previous = std::mem::replace(&mut self.strict_mode, temporary);
let result = f(self);
self.strict_mode = previous;
result
}
}
#[cfg(test)]
mod tests;