Skip to main content

bytecode_filter/
opcode.rs

1//! Bytecode instruction set for filter evaluation.
2//!
3//! Each opcode is 1 byte, with operands following inline in the bytecode stream.
4
5/// Bytecode opcodes for the filter VM.
6///
7/// Encoding:
8/// - 1-byte opcode
9/// - Variable operands depending on opcode type
10#[repr(u8)]
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Opcode {
13    // ============ Stack Operations ============
14    /// Push `true` onto the stack.
15    PushTrue = 0x01,
16
17    /// Push `false` onto the stack.
18    PushFalse = 0x02,
19
20    // ============ Payload-wide String Operations ============
21    // Operand: u16 string_index (little-endian)
22    /// Check if payload contains the string at index.
23    /// Bytecode: `[0x10, idx_lo, idx_hi]`
24    Contains = 0x10,
25
26    /// Check if payload starts with the string at index.
27    /// Bytecode: `[0x11, idx_lo, idx_hi]`
28    StartsWith = 0x11,
29
30    /// Check if payload ends with the string at index.
31    /// Bytecode: `[0x12, idx_lo, idx_hi]`
32    EndsWith = 0x12,
33
34    /// Check if payload equals the string at index.
35    /// Bytecode: `[0x13, idx_lo, idx_hi]`
36    Equals = 0x13,
37
38    /// Check if payload matches the regex at index.
39    /// Bytecode: `[0x20, idx_lo, idx_hi]`
40    Matches = 0x20,
41
42    // ============ Boolean Logic ============
43    /// Pop 2 booleans, push (a AND b).
44    /// Bytecode: `[0x30]`
45    And = 0x30,
46
47    /// Pop 2 booleans, push (a OR b).
48    /// Bytecode: `[0x31]`
49    Or = 0x31,
50
51    /// Pop 1 boolean, push (NOT a).
52    /// Bytecode: `[0x32]`
53    Not = 0x32,
54
55    // ============ Part-specific Operations ============
56    // Operand: u8 part_index, u16 string_index
57    /// Check if parts\[part_idx\] contains string at index.
58    /// Bytecode: `[0x40, part_idx, str_idx_lo, str_idx_hi]`
59    PartContains = 0x40,
60
61    /// Check if parts\[part_idx\] starts with string at index.
62    /// Bytecode: `[0x41, part_idx, str_idx_lo, str_idx_hi]`
63    PartStartsWith = 0x41,
64
65    /// Check if parts\[part_idx\] ends with string at index.
66    /// Bytecode: `[0x42, part_idx, str_idx_lo, str_idx_hi]`
67    PartEndsWith = 0x42,
68
69    /// Check if parts\[part_idx\] equals string at index.
70    /// Bytecode: `[0x43, part_idx, str_idx_lo, str_idx_hi]`
71    PartEquals = 0x43,
72
73    /// Check if parts\[part_idx\] matches regex at index.
74    /// Bytecode: `[0x44, part_idx, regex_idx_lo, regex_idx_hi]`
75    PartMatches = 0x44,
76
77    /// Check if parts\[part_idx\] is empty.
78    /// Bytecode: `[0x45, part_idx]`
79    PartIsEmpty = 0x45,
80
81    /// Check if parts\[part_idx\] is not empty.
82    /// Bytecode: `[0x46, part_idx]`
83    PartNotEmpty = 0x46,
84
85    /// Check if parts\[part_idx\] equals any string in a set.
86    /// Bytecode: `[0x47, part_idx, set_idx_lo, set_idx_hi]`
87    PartInSet = 0x47,
88
89    // ============ Case-insensitive Part Operations ============
90    /// Case-insensitive equality check for parts\[part_idx\].
91    /// Bytecode: `[0x48, part_idx, str_idx_lo, str_idx_hi]`
92    PartIEquals = 0x48,
93
94    /// Case-insensitive contains check for parts\[part_idx\].
95    /// Bytecode: `[0x49, part_idx, str_idx_lo, str_idx_hi]`
96    PartIContains = 0x49,
97
98    // ============ Header Extraction Operations ============
99    // Operand: u8 part_idx, u16 header_name_idx, u16 expected_value_idx
100    /// Extract header from parts\[part_idx\], check exact equality.
101    /// Bytecode: `[0x50, part_idx, hdr_idx_lo, hdr_idx_hi, val_idx_lo, val_idx_hi]`
102    HeaderEquals = 0x50,
103
104    /// Extract header from parts\[part_idx\], check case-insensitive equality.
105    /// Bytecode: `[0x51, part_idx, hdr_idx_lo, hdr_idx_hi, val_idx_lo, val_idx_hi]`
106    HeaderIEquals = 0x51,
107
108    /// Extract header from parts\[part_idx\], check if value contains string.
109    /// Bytecode: `[0x52, part_idx, hdr_idx_lo, hdr_idx_hi, val_idx_lo, val_idx_hi]`
110    HeaderContains = 0x52,
111
112    /// Check if header exists in parts\[part_idx\].
113    /// Bytecode: `[0x53, part_idx, hdr_idx_lo, hdr_idx_hi]`
114    HeaderExists = 0x53,
115
116    // ============ Short-circuit Jumps ============
117    /// If top of stack is false, jump by i16 offset (leave false on stack).
118    /// If true, pop and continue to evaluate right operand.
119    /// Bytecode: `[0x70, offset_lo, offset_hi]`
120    JumpIfFalse = 0x70,
121
122    /// If top of stack is true, jump by i16 offset (leave true on stack).
123    /// If false, pop and continue to evaluate right operand.
124    /// Bytecode: `[0x71, offset_lo, offset_hi]`
125    JumpIfTrue = 0x71,
126
127    // ============ Random Sampling ============
128    /// Returns true with probability 1/N.
129    /// Bytecode: `[0x60, n_lo, n_hi]`
130    Rand = 0x60,
131
132    // ============ Control ============
133    /// Return the top of the stack as the filter result.
134    /// Bytecode: `[0xFF]`
135    Return = 0xFF,
136}
137
138impl Opcode {
139    /// Decode an opcode from a byte.
140    #[inline]
141    pub fn from_byte(byte: u8) -> Option<Self> {
142        match byte {
143            0x01 => Some(Opcode::PushTrue),
144            0x02 => Some(Opcode::PushFalse),
145            0x10 => Some(Opcode::Contains),
146            0x11 => Some(Opcode::StartsWith),
147            0x12 => Some(Opcode::EndsWith),
148            0x13 => Some(Opcode::Equals),
149            0x20 => Some(Opcode::Matches),
150            0x30 => Some(Opcode::And),
151            0x31 => Some(Opcode::Or),
152            0x32 => Some(Opcode::Not),
153            0x40 => Some(Opcode::PartContains),
154            0x41 => Some(Opcode::PartStartsWith),
155            0x42 => Some(Opcode::PartEndsWith),
156            0x43 => Some(Opcode::PartEquals),
157            0x44 => Some(Opcode::PartMatches),
158            0x45 => Some(Opcode::PartIsEmpty),
159            0x46 => Some(Opcode::PartNotEmpty),
160            0x47 => Some(Opcode::PartInSet),
161            0x48 => Some(Opcode::PartIEquals),
162            0x49 => Some(Opcode::PartIContains),
163            0x50 => Some(Opcode::HeaderEquals),
164            0x51 => Some(Opcode::HeaderIEquals),
165            0x52 => Some(Opcode::HeaderContains),
166            0x53 => Some(Opcode::HeaderExists),
167            0x60 => Some(Opcode::Rand),
168            0x70 => Some(Opcode::JumpIfFalse),
169            0x71 => Some(Opcode::JumpIfTrue),
170            0xFF => Some(Opcode::Return),
171            _ => None,
172        }
173    }
174
175    /// Get the size of this instruction in bytes (opcode + operands).
176    #[inline]
177    pub fn instruction_size(&self) -> usize {
178        match self {
179            // No operands
180            Opcode::PushTrue
181            | Opcode::PushFalse
182            | Opcode::And
183            | Opcode::Or
184            | Opcode::Not
185            | Opcode::Return => 1,
186
187            // u16 operand
188            Opcode::Contains
189            | Opcode::StartsWith
190            | Opcode::EndsWith
191            | Opcode::Equals
192            | Opcode::Matches
193            | Opcode::Rand
194            | Opcode::JumpIfFalse
195            | Opcode::JumpIfTrue => 3,
196
197            // u8 part_idx only
198            Opcode::PartIsEmpty | Opcode::PartNotEmpty => 2,
199
200            // u8 part_idx + u16 string_idx
201            Opcode::PartContains
202            | Opcode::PartStartsWith
203            | Opcode::PartEndsWith
204            | Opcode::PartEquals
205            | Opcode::PartMatches
206            | Opcode::PartInSet
207            | Opcode::PartIEquals
208            | Opcode::PartIContains
209            | Opcode::HeaderExists => 4,
210
211            // u8 part_idx + u16 header_idx + u16 value_idx
212            Opcode::HeaderEquals | Opcode::HeaderIEquals | Opcode::HeaderContains => 6,
213        }
214    }
215}