vyre 0.3.0

GPU bytecode condition engine
Documentation
//! Pattern-to-rule mapping tests.
#![cfg(feature = "yara")]

use rulefire::gpu::mapping::build_mapping;
use rulefire::yara::ast::{
    Condition, RegexModifiers, Rule, RuleModifiers, StringDecl, StringModifiers, StringPattern,
};

fn make_rule(name: &str, strings: Vec<(&str, &str)>, condition: Condition) -> Rule {
    Rule {
        name: name.to_string(),
        tags: vec![],
        meta: vec![],
        strings: strings
            .into_iter()
            .map(|(id, pattern)| StringDecl {
                identifier: id.to_string(),
                pattern: StringPattern::Text(pattern.as_bytes().to_vec()),
                modifiers: StringModifiers::default(),
            })
            .collect(),
        condition,
        source: "test".to_string(),
        modifiers: Default::default(),
    }
}

#[test]
fn mapping_single_rule_single_string() {
    let rules = vec![make_rule(
        "test",
        vec![("$a", "test")],
        Condition::StringMatch("$a".to_string()),
    )];
    let (patterns, mapping) = build_mapping(&rules).unwrap();

    assert_eq!(patterns.len(), 1);
    assert_eq!(patterns[0].pattern_id, 0);
    assert_eq!(patterns[0].rule_id, 0);
    assert_eq!(patterns[0].string_id, 0);

    assert_eq!(mapping.pattern_to_rules.len(), 1);
    assert_eq!(mapping.rule_list.len(), 1);
    assert_eq!(mapping.string_local_ids.len(), 1);
}

#[test]
fn mapping_single_rule_multiple_strings() {
    let rules = vec![make_rule(
        "test",
        vec![("$a", "alpha"), ("$b", "beta"), ("$c", "gamma")],
        Condition::Bool(true),
    )];
    let (patterns, _mapping) = build_mapping(&rules).unwrap();

    assert_eq!(patterns.len(), 3);
    for (i, pattern) in patterns.iter().enumerate() {
        assert_eq!(pattern.pattern_id, i as u32);
        assert_eq!(pattern.rule_id, 0);
        assert_eq!(pattern.string_id, i as u32);
    }
}

#[test]
fn mapping_multiple_rules() {
    let rules = vec![
        make_rule("rule1", vec![("$a", "pattern1")], Condition::Bool(true)),
        make_rule("rule2", vec![("$b", "pattern2")], Condition::Bool(true)),
    ];
    let (patterns, _mapping) = build_mapping(&rules).unwrap();

    assert_eq!(patterns.len(), 2);
    assert_eq!(patterns[0].rule_id, 0);
    assert_eq!(patterns[1].rule_id, 1);
}

#[test]
fn mapping_preserves_string_identifiers() {
    let rules = vec![make_rule(
        "test",
        vec![("$magic", "magic_bytes"), ("$header", "header_bytes")],
        Condition::Bool(true),
    )];
    let (patterns, _) = build_mapping(&rules).unwrap();

    assert_eq!(patterns[0].identifier, "$magic");
    assert_eq!(patterns[1].identifier, "$header");
}

#[test]
fn mapping_distinguishes_regex() {
    let rules = vec![Rule {
        name: "test".to_string(),
        tags: vec![],
        meta: vec![],
        strings: vec![
            StringDecl {
                identifier: "$text".to_string(),
                pattern: StringPattern::Text(b"literal".to_vec()),
                modifiers: StringModifiers::default(),
            },
            StringDecl {
                identifier: "$regex".to_string(),
                pattern: StringPattern::Regex("[a-z]+".to_string(), RegexModifiers::default()),
                modifiers: StringModifiers::default(),
            },
        ],
        condition: Condition::Bool(true),
        source: "test".to_string(),
        modifiers: RuleModifiers::default(),
    }];
    let (patterns, _) = build_mapping(&rules).unwrap();

    assert!(!patterns[0].is_regex);
    assert!(patterns[1].is_regex);
}

#[test]
fn mapping_hex_pattern_converted() {
    let rules = vec![Rule {
        name: "test".to_string(),
        tags: vec![],
        meta: vec![],
        strings: vec![StringDecl {
            identifier: "$hex".to_string(),
            pattern: StringPattern::Hex(vec![
                rulefire::yara::ast::HexToken::Byte(0xDE),
                rulefire::yara::ast::HexToken::Byte(0xAD),
                rulefire::yara::ast::HexToken::Byte(0xBE),
                rulefire::yara::ast::HexToken::Byte(0xEF),
            ]),
            modifiers: StringModifiers::default(),
        }],
        condition: Condition::Bool(true),
        source: "test".to_string(),
        modifiers: RuleModifiers::default(),
    }];
    let (patterns, _) = build_mapping(&rules).unwrap();

    // Non-UTF-8 hex bytes are stored as regex with \xNN escapes so warpstate
    // can match actual binary content. The previous format ("DE AD BE EF" as
    // a literal) could never match real binary data and was a production bug.
    assert_eq!(patterns[0].source, "(?-u)\\xde\\xad\\xbe\\xef");
    assert!(patterns[0].is_regex, "non-ASCII hex patterns must be regex for correct binary matching");
}

#[test]
fn mapping_pattern_sources_preserved() {
    let rules = vec![make_rule(
        "test",
        vec![("$a", "test_pattern")],
        Condition::Bool(true),
    )];
    let (patterns, _) = build_mapping(&rules).unwrap();

    assert_eq!(patterns[0].source, "test_pattern");
}

#[test]
fn mapping_unique_pattern_ids() {
    let rules = vec![
        make_rule(
            "rule1",
            vec![("$a", "pat1"), ("$b", "pat2")],
            Condition::Bool(true),
        ),
        make_rule("rule2", vec![("$c", "pat3")], Condition::Bool(true)),
    ];
    let (patterns, _) = build_mapping(&rules).unwrap();

    let ids: Vec<_> = patterns.iter().map(|p| p.pattern_id).collect();
    assert_eq!(ids, vec![0, 1, 2]);
}