vyre 0.3.0

GPU bytecode condition engine
Documentation
//! Convert rulekit TOML rule definitions to vyre's compiled rule index.
//!
//! This bridges community-contributed TOML detection rules (Tier B) directly
//! into the GPU rule condition engine. Users drop `.toml` files in a rules
//! directory; vyre compiles them into GPU-evaluable bytecode.
//!
//! # Example
//!
//! ```ignore
//! use vyre::toml_rules::compile_toml_rules;
//! use rulekit::RuleFile;
//!
//! let rules = RuleFile::parse_path("rules/malware.toml")?;
//! let index = compile_toml_rules(&rules)?;
//! let matches = index.scan_cpu(file_bytes)?;
//! ```

#[cfg(feature = "toml-rules")]
use crate::core::index::CompiledRuleIndex;
#[cfg(feature = "toml-rules")]
use crate::core::types::{CompiledPattern, PatternMapping, RuleEntry};
#[cfg(feature = "toml-rules")]
use crate::error::{Error, Result};
#[cfg(feature = "toml-rules")]
use crate::vm::bytecode::{Instruction, Program};

/// Compile a set of rulekit TOML rules into a vyre `CompiledRuleIndex`.
///
/// Each rule becomes a single-string rule in vyre. The pattern type
/// determines whether it's compiled as a literal or regex in warpstate.
///
/// # Errors
///
/// Returns an error if pattern compilation fails.
#[cfg(feature = "toml-rules")]
pub fn compile_toml_rules(rule_file: &rulekit::RuleFile) -> Result<CompiledRuleIndex> {
    let rules = rule_file.rules();
    if rules.is_empty() {
        return Err(Error::Pattern {
            message: "no rules to compile".to_string(),
        });
    }

    let mut rule_entries = Vec::with_capacity(rules.len());
    let mut compiled_patterns = Vec::with_capacity(rules.len());
    let mut programs = Vec::with_capacity(rules.len());
    let mut pattern_to_rules = Vec::new();
    let mut rule_list = Vec::new();
    let mut string_local_ids = Vec::new();
    let mut builder = warpstate::PatternSet::builder();

    for (rule_idx, rule) in rules.iter().enumerate() {
        let rule_id = rule_idx as u32;
        let pattern_id = rule_idx as u32;

        // Add pattern to warpstate PatternSet
        let is_regex = matches!(rule.pattern_type, rulekit::PatternType::Regex);
        if is_regex {
            builder = builder.regex(&rule.value);
        } else {
            builder = builder.literal(&rule.value);
        }

        // Build the vyre entries
        let string_id = format!("$s{rule_idx}");
        rule_entries.push(RuleEntry {
            name: rule.id.clone(),
            tags: Vec::new(),
            strings: vec![string_id.clone()],
        });

        compiled_patterns.push(CompiledPattern {
            pattern_id,
            rule_id,
            string_id: 0,
            identifier: string_id,
            source: rule.value.clone(),
            is_regex,
        });

        // Simple condition: rule fires if its string matches at least once
        // Bytecode: PushStringCount(0) → PushImmediate(1) → Gte → Halt
        programs.push(Program {
            instructions: vec![
                Instruction::new(crate::vm::bytecode::Opcode::PushStringCount, 0),
                Instruction::new(crate::vm::bytecode::Opcode::PushImmediate, 1),
                Instruction::new(crate::vm::bytecode::Opcode::Gte, 0),
                Instruction::new(crate::vm::bytecode::Opcode::Halt, 0),
            ],
        });

        // Mapping: pattern_id → [rule_id]
        let rule_list_start = rule_list.len() as u32;
        pattern_to_rules.push([rule_list_start, 1]);
        rule_list.push(rule_id);
        string_local_ids.push(0u32);
    }

    let pattern_set = builder.build().map_err(|e| Error::Pattern {
        message: format!("failed to compile pattern set: {e}"),
    })?;

    let mapping = PatternMapping {
        pattern_to_rules,
        rule_list,
        string_local_ids,
    };

    Ok(CompiledRuleIndex::build(
        rule_entries,
        compiled_patterns,
        mapping,
        programs,
        pattern_set,
    ))
}

/// Load all `.toml` rule files from a directory and compile them.
///
/// This is the production entry point: point at a `rules/` directory,
/// get back a GPU-ready compiled index.
///
/// # Errors
///
/// Returns errors from file I/O, TOML parsing, or pattern compilation.
#[cfg(feature = "toml-rules")]
pub fn compile_rules_dir(dir: impl AsRef<std::path::Path>) -> Result<CompiledRuleIndex> {
    let dir = dir.as_ref();
    let mut rule_files = Vec::new();

    let entries = std::fs::read_dir(dir).map_err(|e| Error::Pattern {
        message: format!("failed to read rules directory {}: {e}", dir.display()),
    })?;

    for entry in entries {
        let entry = entry.map_err(|e| Error::Pattern {
            message: format!("failed to read directory entry: {e}"),
        })?;
        let path = entry.path();
        if path.extension().is_some_and(|ext| ext == "toml") {
            let rule_file = rulekit::RuleFile::parse_path(&path).map_err(|e| Error::Pattern {
                message: format!("failed to parse {}: {e}", path.display()),
            })?;
            rule_files.push(rule_file);
        }
    }

    if rule_files.is_empty() {
        return Err(Error::Pattern {
            message: format!("no .toml rule files found in {}", dir.display()),
        });
    }

    // Compile all rule files, accumulating patterns and rules
    let mut all_rule_entries = Vec::new();
    let mut all_compiled = Vec::new();
    let mut all_programs = Vec::new();
    let mut all_p2r = Vec::new();
    let mut all_rule_list = Vec::new();
    let mut all_string_local_ids = Vec::new();
    let mut builder = warpstate::PatternSet::builder();
    let mut idx = 0u32;

    for rule_file in &rule_files {
        for rule in rule_file.rules() {
            let is_regex = matches!(rule.pattern_type, rulekit::PatternType::Regex);
            if is_regex {
                builder = builder.regex(&rule.value);
            } else {
                builder = builder.literal(&rule.value);
            }

            all_rule_entries.push(RuleEntry {
                name: rule.id.clone(),
                tags: Vec::new(),
                strings: vec![format!("$s{idx}")],
            });

            all_compiled.push(CompiledPattern {
                pattern_id: idx,
                rule_id: idx,
                string_id: 0,
                identifier: format!("$s{idx}"),
                source: rule.value.clone(),
                is_regex,
            });

            all_programs.push(Program {
                instructions: vec![
                    Instruction::new(crate::vm::bytecode::Opcode::PushStringCount, 0),
                    Instruction::new(crate::vm::bytecode::Opcode::PushImmediate, 1),
                    Instruction::new(crate::vm::bytecode::Opcode::Gte, 0),
                    Instruction::new(crate::vm::bytecode::Opcode::Halt, 0),
                ],
            });

            let start = all_rule_list.len() as u32;
            all_p2r.push([start, 1]);
            all_rule_list.push(idx);
            all_string_local_ids.push(0u32);
            idx += 1;
        }
    }

    let pattern_set = builder.build().map_err(|e| Error::Pattern {
        message: format!("failed to compile pattern set: {e}"),
    })?;

    Ok(CompiledRuleIndex::build(
        all_rule_entries,
        all_compiled,
        PatternMapping {
            pattern_to_rules: all_p2r,
            rule_list: all_rule_list,
            string_local_ids: all_string_local_ids,
        },
        all_programs,
        pattern_set,
    ))
}

#[cfg(all(test, feature = "toml-rules"))]
mod tests {
    use super::*;

    fn sample_toml() -> &'static str {
        r#"
[[rules]]
id = "cred.literal"
name = "Credential literal"
severity = "High"
pattern_type = "Literal"
value = "password"
[rules.metadata]
description = "Detects a literal password"

[[rules]]
id = "eval.call"
name = "Eval call"
severity = "Critical"
pattern_type = "Literal"
value = "eval("
"#
    }

    #[test]
    fn compile_toml_rules_produces_valid_index() {
        let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
        let index = compile_toml_rules(&rule_file).unwrap();
        assert_eq!(index.rule_count(), 2);
    }

    #[test]
    fn compiled_index_scans_cpu_correctly() {
        let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
        let index = compile_toml_rules(&rule_file).unwrap();

        let matches = index.scan_cpu(b"the password is eval(secret)").unwrap();
        assert_eq!(matches.len(), 2, "should match both rules");

        let names: Vec<&str> = matches.iter().map(|m| m.rule_name.as_str()).collect();
        assert!(names.contains(&"cred.literal"));
        assert!(names.contains(&"eval.call"));
    }

    #[test]
    fn compiled_index_no_false_positives() {
        let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
        let index = compile_toml_rules(&rule_file).unwrap();

        let matches = index.scan_cpu(b"nothing suspicious here").unwrap();
        assert!(matches.is_empty(), "should find no matches in clean input");
    }

    #[test]
    fn compiled_index_partial_match() {
        let rule_file = rulekit::RuleFile::parse_str(sample_toml()).unwrap();
        let index = compile_toml_rules(&rule_file).unwrap();

        let matches = index.scan_cpu(b"my password is safe").unwrap();
        assert_eq!(matches.len(), 1, "should match only password rule");
        assert_eq!(matches[0].rule_name, "cred.literal");
    }

    #[test]
    fn empty_rules_rejected() {
        let rule_file = rulekit::RuleFile::parse_str("[[rules]]\nid = \"x\"\nname = \"X\"\npattern_type = \"Literal\"\nvalue = \"abc\"\n").unwrap();
        let index = compile_toml_rules(&rule_file);
        assert!(index.is_ok(), "single rule should compile");
    }

    #[test]
    fn regex_rules_compile_and_match() {
        let toml = r#"
[[rules]]
id = "hex.detect"
name = "Hex string"
severity = "Medium"
pattern_type = "Regex"
value = "[0-9a-f]{32}"
"#;
        let rule_file = rulekit::RuleFile::parse_str(toml).unwrap();
        let index = compile_toml_rules(&rule_file).unwrap();

        let matches = index.scan_cpu(b"hash: 0123456789abcdef0123456789abcdef done").unwrap();
        assert_eq!(matches.len(), 1);
        assert_eq!(matches[0].rule_name, "hex.detect");
    }

    #[test]
    fn compile_rules_dir_finds_toml_files() {
        let dir = tempfile::tempdir().unwrap();
        std::fs::write(
            dir.path().join("test.toml"),
            sample_toml(),
        ).unwrap();

        let index = compile_rules_dir(dir.path()).unwrap();
        assert_eq!(index.rule_count(), 2);

        let matches = index.scan_cpu(b"eval(password)").unwrap();
        assert_eq!(matches.len(), 2);
    }

    #[test]
    fn compile_rules_dir_empty_directory_errors() {
        let dir = tempfile::tempdir().unwrap();
        let result = compile_rules_dir(dir.path());
        assert!(result.is_err());
    }
}