vyre 0.2.0

GPU bytecode condition engine
Documentation
use matchkit::Match;

use rulefire::CompiledPattern;
use rulefire::CompiledRuleIndex;
use rulefire::PatternMapping;
use rulefire::RuleEntry;
use rulefire::vm::bytecode::{Instruction, Program};
use rulefire::yara::ast::{Rule, RuleModifiers, StringDecl, StringModifiers, StringPattern};
use rulefire::yara::{compiler, parser::parse_source};
use rulefire::{self, FileContext};

/// Build a compiled index directly from YARA source.
pub fn build_index_from_source(source: &str) -> CompiledRuleIndex {
    let rules = parse_source(source, "<test_case>").expect("parse YARA source");
    let (patterns, mapping) = rulefire::gpu::mapping::build_mapping(&rules).expect("build mapping");
    let programs = rules
        .iter()
        .map(|rule| compiler::compile_rule(rule).expect("compile rule"))
        .collect::<Vec<_>>();
    let rules_meta = rules
        .iter()
        .map(|rule| RuleEntry {
            name: rule.name.clone(),
            tags: rule.tags.clone(),
            strings: rule.strings.iter().map(|decl| decl.identifier.clone()).collect(),
        })
        .collect::<Vec<_>>();

    let mut builder = warpstate::PatternSet::builder();
    for pattern in &patterns {
        if pattern.is_regex {
            builder = builder.regex(&pattern.source);
        } else {
            builder = builder.literal_bytes(pattern.source.clone().into_bytes());
        }
    }
    let pattern_set = builder.build().expect("build pattern set");

    CompiledRuleIndex::build(rules_meta, patterns, mapping, programs, pattern_set)
}

/// Compile a single YARA condition source into a bytecode program.
pub fn compile_source_program(source: &str) -> Program {
    let rules = parse_source(source, "<test_case>").expect("parse YARA source");
    let rule = rules.first().expect("at least one rule");
    compiler::compile_rule(rule).expect("compile rule")
}

/// Return all matches returned by CPU evaluation for a YARA source rule set.
pub fn scan_cpu_from_source(source: &str, input: &[u8]) -> Vec<String> {
    build_index_from_source(source)
        .scan_cpu(input)
        .expect("cpu scan")
        .into_iter()
        .map(|m| m.rule_name)
        .collect()
}

/// Run bytecode with an ad-hoc synthetic two-string index.
pub fn run_program_cpu(program: Program, input: &[u8]) -> bool {
    let index = build_index_with_patterns(program, &["alpha", "beta"]);
    !index.scan_cpu(input).expect("cpu scan").is_empty()
}

/// Run bytecode against explicit matches for direct interpreter validation.
pub fn run_evaluate_index(program: Program, file_bytes: &[u8], matches: &[Match]) -> Vec<bool> {
    let index = build_index_with_patterns(program, &["alpha", "beta"]);
    let file_ctx = FileContext {
        file_size: file_bytes.len() as u32,
        ..Default::default()
    };
    rulefire::vm::interpreter::evaluate_index(&index, file_bytes, matches, file_ctx)
        .expect("interpret")
}

/// Run bytecode against explicit matches and a custom file context.
pub fn run_evaluate_index_with_ctx(
    program: Program,
    file_bytes: &[u8],
    matches: &[Match],
    file_ctx: FileContext,
) -> Vec<bool> {
    let index = build_index_with_patterns(program, &["alpha", "beta"]);
    rulefire::vm::interpreter::evaluate_index(&index, file_bytes, matches, file_ctx)
        .expect("interpret")
}

fn build_index_with_patterns(program: Program, literals: &[&str]) -> CompiledRuleIndex {
    let rule = Rule {
        name: "test_rule".to_string(),
        tags: Vec::new(),
        meta: Vec::new(),
        strings: literals
            .iter()
            .enumerate()
            .map(|(idx, literal)| StringDecl {
                identifier: format!("${}", (b'a' + idx as u8) as char),
                pattern: StringPattern::Text(literal.as_bytes().to_vec()),
                modifiers: StringModifiers::default(),
            })
            .collect(),
        condition: rulefire::yara::ast::Condition::Bool(false),
        source: "inline".to_string(),
        modifiers: RuleModifiers::default(),
    };
    let rules = vec![rule];
    let mut patterns = Vec::new();
    let mut pattern_to_rules = Vec::new();
    let mut rule_list = Vec::new();
    let mut string_local_ids = Vec::new();

    for (idx, literal) in literals.iter().enumerate() {
        let pattern_id = u32::try_from(idx).expect("pattern id fits");
        let rule_id = 0u32;
        let string_id = u32::try_from(idx).expect("string id fits");
        patterns.push(CompiledPattern {
            pattern_id,
            rule_id,
            string_id,
            identifier: format!("${}", (b'a' + idx as u8) as char),
            source: (*literal).to_string(),
            is_regex: false,
        });
        let start = u32::try_from(rule_list.len()).expect("mapping index");
        pattern_to_rules.push([start, 1]);
        rule_list.push(rule_id);
        string_local_ids.push(string_id);
    }

    let mapping = PatternMapping {
        pattern_to_rules,
        rule_list,
        string_local_ids,
    };

    let rules_meta = vec![RuleEntry {
        name: "test_rule".to_string(),
        tags: Vec::new(),
        strings: literals
            .iter()
            .enumerate()
            .map(|(idx, _)| format!("${}", (b'a' + idx as u8) as char))
            .collect(),
    }];

    let mut builder = warpstate::PatternSet::builder();
    for literal in literals {
        builder = builder.literal_bytes(literal.as_bytes().to_vec());
    }
    let pattern_set = builder.build().expect("pattern set");

    CompiledRuleIndex::build(rules_meta, patterns, mapping, vec![program], pattern_set)
}

#[cfg(feature = "gpu")]
pub fn acquire_device() -> Option<(wgpu::Device, wgpu::Queue)> {
    pollster::block_on(async {
        let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default());
        let adapter = instance.request_adapter(&wgpu::RequestAdapterOptions::default()).await?;
        adapter.request_device(&wgpu::DeviceDescriptor::default(), None).await.ok()
    })
}

#[cfg(feature = "gpu")]
pub fn cpu_gpu_parity_ok(source: &str, input: &[u8]) -> bool {
    let index = build_index_from_source(source);
    let cpu = index.scan_cpu(input).expect("cpu");
    let (device, queue) = acquire_device().expect("gpu available");
    let gpu = index.scan_with_device(input, &device, &queue).expect("gpu");
    cpu == gpu
}

#[cfg(feature = "gpu")]
pub fn is_gpu_available() -> bool {
    acquire_device().is_some()
}

#[cfg(not(feature = "gpu"))]
pub fn is_gpu_available() -> bool {
    false
}