vyre 0.3.0

GPU bytecode condition engine
Documentation
#[cfg(feature = "gpu")]
use bytemuck::{Pod, Zeroable};

use crate::error::{Error, Result};

/// VM opcode.
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Opcode {
    /// Push `true` onto the evaluation stack.
    PushTrue = 1,
    /// Push `false` onto the evaluation stack.
    PushFalse = 2,
    /// Push whether the current string matched.
    PushStringMatched = 3,
    /// Push the match count for the current string.
    PushStringCount = 4,
    /// Push the offset of the current string match.
    PushStringOffset = 5,
    /// Push the length of the current string match.
    PushStringLength = 6,
    /// Push the file size in bytes.
    PushFileSize = 7,
    /// Push the number of rule entries.
    PushEntryCount = 8,
    /// Push an immediate operand value.
    PushImmediate = 9,
    /// Push the total number of strings.
    PushNumStrings = 10,
    /// Boolean AND of the top two stack values.
    And = 11,
    /// Boolean OR of the top two stack values.
    Or = 12,
    /// Boolean NOT of the top stack value.
    Not = 13,
    /// Equality comparison.
    Eq = 14,
    /// Inequality comparison.
    Neq = 15,
    /// Less-than comparison.
    Lt = 16,
    /// Greater-than comparison.
    Gt = 17,
    /// Less-than-or-equal comparison.
    Lte = 18,
    /// Greater-than-or-equal comparison.
    Gte = 19,
    /// Arithmetic addition.
    Add = 20,
    /// Arithmetic subtraction.
    Sub = 21,
    /// Count the number of matching items.
    CountOf = 22,
    /// True when all items match.
    AllOf = 23,
    /// True when any item matches.
    AnyOf = 24,
    /// Check whether a string exists at a specific offset.
    StringAt = 25,
    /// Check whether a string exists in a range.
    StringIn = 26,
    /// Begin a `for any` iteration block.
    ForAny = 27,
    /// Begin a `for all` iteration block.
    ForAll = 28,
    /// End the current `for` block.
    EndFor = 29,
    /// Stop execution.
    Halt = 30,
    /// Arithmetic multiplication.
    Mul = 31,
    /// Arithmetic division.
    Div = 32,
    /// Arithmetic modulo.
    Mod = 33,
    /// Bitwise AND.
    BitAnd = 34,
    /// Bitwise OR.
    BitOr = 35,
    /// Bitwise XOR.
    BitXor = 36,
    /// Bitwise shift left.
    Shl = 37,
    /// Bitwise shift right.
    Shr = 38,
    /// Read a little-endian integer at the given offset.
    ReadIntAt = 39,
    /// Begin a `for N` iteration block.
    ForN = 40,
    /// Push the file entropy bucket.
    PushEntropy = 41,
    /// Push whether the file is a PE.
    PushIsPe = 42,
    /// Push whether the PE is a DLL.
    PushIsDll = 43,
    /// Push the number of PE sections.
    PushNumSections = 44,
    /// Push the number of PE imports.
    PushNumImports = 45,
    /// Push the PE entry point RVA.
    PushEntryPoint = 46,
    /// Push whether the PE has a signature.
    PushHasSignature = 47,
    /// Push the first four bytes of the file as a `u32`.
    PushMagicU32 = 48,
    /// Push whether the PE is 64-bit.
    PushIs64bit = 49,
    /// Pop `pattern_a`, `pattern_b`; push `1` when the first match of `pattern_a`
    /// appears before the first match of `pattern_b`, otherwise `0`.
    MatchOrder = 50,
    /// Pop `pattern_a`, `pattern_b`; push the smallest byte gap between any
    /// cached matches of the two patterns, or `0` when either pattern is absent.
    MatchDistance = 51,
    /// Pop `pattern_a`, `pattern_b`, `pattern_c`; push `1` when any match of
    /// `pattern_c` starts between the first matches of `pattern_a` and `pattern_b`.
    MatchBetween = 52,
    /// Pop `pattern_a`, `pattern_b`; push `1` when any cached matches of both
    /// patterns share the same approximate brace nesting level.
    MatchSameScope = 53,
    /// Pop `pattern_a`, `pattern_b`, `max_distance`; push `1` when any match of
    /// `pattern_b` starts within `max_distance` bytes after a match of `pattern_a`.
    MatchAfter = 54,
    /// Pop `pattern_id`; push the entropy bucket (`0..=255`) of that pattern's
    /// first cached matched region, or `0` when no cached region exists.
    MatchedRegionEntropy = 55,
    /// Pop `pattern_id`, `index`; push the length of the indexed cached match,
    /// mirroring [`Opcode::PushStringLength`] with explicit stack operands.
    MatchedRegionLength = 56,
    /// Pop `pattern_id`, `window_size`; push the largest number of cached matches
    /// whose start offsets fall within any `window_size`-byte window.
    PatternDensity = 57,
    /// Push the number of distinct global pattern ids that matched this file.
    UniquePatternCount = 58,
    /// Push the total number of matches observed for this file across all patterns.
    TotalMatchCount = 59,
    /// Pop `offset`; push the byte at that file offset, or `0` when out of bounds.
    ReadByteAt = 60,
    /// Pop `offset`, `len`, `expected_hash`; push `1` when the FNV-1a hash of the
    /// bytes at `offset..offset+len` equals `expected_hash`, otherwise `0`.
    ReadBytesEqual = 61,
    /// Pop `offset`; alias for [`Opcode::ReadByteAt`].
    ByteAt = 62,
    /// Pop `value`, `key`; push `value ^ key`.
    XorByte = 63,
}

impl Opcode {
    /// Decode an opcode from its serialized `u32` form.
    ///
    /// # Examples
    /// ```
    /// use rulefire::Opcode;
    ///
    /// assert_eq!(Opcode::from_u32(1).unwrap(), Opcode::PushTrue);
    /// ```
    pub fn from_u32(value: u32) -> Result<Self> {
        const ALL: &[(u32, Opcode)] = &[
            (1, Opcode::PushTrue), (2, Opcode::PushFalse), (3, Opcode::PushStringMatched),
            (4, Opcode::PushStringCount), (5, Opcode::PushStringOffset),
            (6, Opcode::PushStringLength), (7, Opcode::PushFileSize), (8, Opcode::PushEntryCount),
            (9, Opcode::PushImmediate), (10, Opcode::PushNumStrings), (11, Opcode::And),
            (12, Opcode::Or), (13, Opcode::Not), (14, Opcode::Eq), (15, Opcode::Neq),
            (16, Opcode::Lt), (17, Opcode::Gt), (18, Opcode::Lte), (19, Opcode::Gte),
            (20, Opcode::Add), (21, Opcode::Sub), (22, Opcode::CountOf), (23, Opcode::AllOf),
            (24, Opcode::AnyOf), (25, Opcode::StringAt), (26, Opcode::StringIn),
            (27, Opcode::ForAny), (28, Opcode::ForAll), (29, Opcode::EndFor), (30, Opcode::Halt),
            (31, Opcode::Mul), (32, Opcode::Div), (33, Opcode::Mod), (34, Opcode::BitAnd),
            (35, Opcode::BitOr), (36, Opcode::BitXor), (37, Opcode::Shl), (38, Opcode::Shr),
            (39, Opcode::ReadIntAt), (40, Opcode::ForN), (41, Opcode::PushEntropy),
            (42, Opcode::PushIsPe), (43, Opcode::PushIsDll), (44, Opcode::PushNumSections),
            (45, Opcode::PushNumImports), (46, Opcode::PushEntryPoint),
            (47, Opcode::PushHasSignature), (48, Opcode::PushMagicU32),
            (49, Opcode::PushIs64bit), (50, Opcode::MatchOrder),
            (51, Opcode::MatchDistance), (52, Opcode::MatchBetween),
            (53, Opcode::MatchSameScope), (54, Opcode::MatchAfter),
            (55, Opcode::MatchedRegionEntropy), (56, Opcode::MatchedRegionLength),
            (57, Opcode::PatternDensity), (58, Opcode::UniquePatternCount),
            (59, Opcode::TotalMatchCount), (60, Opcode::ReadByteAt),
            (61, Opcode::ReadBytesEqual), (62, Opcode::ByteAt), (63, Opcode::XorByte),
        ];
        ALL.iter()
            .find_map(|(raw, opcode)| (*raw == value).then_some(*opcode))
            .ok_or_else(|| Error::BytecodeValidation {
                message: format!("unknown opcode {value}"),
            })
    }
}

/// Single 8-byte VM instruction.
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "gpu", derive(Pod, Zeroable))]
pub struct Instruction {
    /// Opcode as `u32`.
    pub opcode: u32,
    /// Instruction operand.
    pub operand: u32,
}

impl Instruction {
    /// Create an instruction.
    ///
    /// # Examples
    /// ```
    /// use rulefire::{Instruction, Opcode};
    ///
    /// let instruction = Instruction::new(Opcode::PushImmediate, 7);
    /// assert_eq!(instruction.operand, 7);
    /// ```
    pub const fn new(opcode: Opcode, operand: u32) -> Self {
        Self {
            opcode: opcode as u32,
            operand,
        }
    }

    /// Parse the opcode kind for this instruction.
    ///
    /// # Examples
    /// ```
    /// use rulefire::{Instruction, Opcode};
    ///
    /// let instruction = Instruction::new(Opcode::PushTrue, 0);
    /// assert_eq!(instruction.kind().unwrap(), Opcode::PushTrue);
    /// ```
    pub fn kind(self) -> Result<Opcode> {
        Opcode::from_u32(self.opcode)
    }
}