1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
//! Bytecode instruction set for filter evaluation.
//!
//! Each opcode is 1 byte, with operands following inline in the bytecode stream.
/// Bytecode opcodes for the filter VM.
///
/// Encoding:
/// - 1-byte opcode
/// - Variable operands depending on opcode type
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Opcode {
// ============ Stack Operations ============
/// Push `true` onto the stack.
PushTrue = 0x01,
/// Push `false` onto the stack.
PushFalse = 0x02,
// ============ Payload-wide String Operations ============
// Operand: u16 string_index (little-endian)
/// Check if payload contains the string at index.
/// Bytecode: `[0x10, idx_lo, idx_hi]`
Contains = 0x10,
/// Check if payload starts with the string at index.
/// Bytecode: `[0x11, idx_lo, idx_hi]`
StartsWith = 0x11,
/// Check if payload ends with the string at index.
/// Bytecode: `[0x12, idx_lo, idx_hi]`
EndsWith = 0x12,
/// Check if payload equals the string at index.
/// Bytecode: `[0x13, idx_lo, idx_hi]`
Equals = 0x13,
/// Check if payload matches the regex at index.
/// Bytecode: `[0x20, idx_lo, idx_hi]`
Matches = 0x20,
// ============ Boolean Logic ============
/// Pop 2 booleans, push (a AND b).
/// Bytecode: `[0x30]`
And = 0x30,
/// Pop 2 booleans, push (a OR b).
/// Bytecode: `[0x31]`
Or = 0x31,
/// Pop 1 boolean, push (NOT a).
/// Bytecode: `[0x32]`
Not = 0x32,
// ============ Part-specific Operations ============
// Operand: u8 part_index, u16 string_index
/// Check if parts\[part_idx\] contains string at index.
/// Bytecode: `[0x40, part_idx, str_idx_lo, str_idx_hi]`
PartContains = 0x40,
/// Check if parts\[part_idx\] starts with string at index.
/// Bytecode: `[0x41, part_idx, str_idx_lo, str_idx_hi]`
PartStartsWith = 0x41,
/// Check if parts\[part_idx\] ends with string at index.
/// Bytecode: `[0x42, part_idx, str_idx_lo, str_idx_hi]`
PartEndsWith = 0x42,
/// Check if parts\[part_idx\] equals string at index.
/// Bytecode: `[0x43, part_idx, str_idx_lo, str_idx_hi]`
PartEquals = 0x43,
/// Check if parts\[part_idx\] matches regex at index.
/// Bytecode: `[0x44, part_idx, regex_idx_lo, regex_idx_hi]`
PartMatches = 0x44,
/// Check if parts\[part_idx\] is empty.
/// Bytecode: `[0x45, part_idx]`
PartIsEmpty = 0x45,
/// Check if parts\[part_idx\] is not empty.
/// Bytecode: `[0x46, part_idx]`
PartNotEmpty = 0x46,
/// Check if parts\[part_idx\] equals any string in a set.
/// Bytecode: `[0x47, part_idx, set_idx_lo, set_idx_hi]`
PartInSet = 0x47,
// ============ Case-insensitive Part Operations ============
/// Case-insensitive equality check for parts\[part_idx\].
/// Bytecode: `[0x48, part_idx, str_idx_lo, str_idx_hi]`
PartIEquals = 0x48,
/// Case-insensitive contains check for parts\[part_idx\].
/// Bytecode: `[0x49, part_idx, str_idx_lo, str_idx_hi]`
PartIContains = 0x49,
// ============ Header Extraction Operations ============
// Operand: u8 part_idx, u16 header_name_idx, u16 expected_value_idx
/// Extract header from parts\[part_idx\], check exact equality.
/// Bytecode: `[0x50, part_idx, hdr_idx_lo, hdr_idx_hi, val_idx_lo, val_idx_hi]`
HeaderEquals = 0x50,
/// Extract header from parts\[part_idx\], check case-insensitive equality.
/// Bytecode: `[0x51, part_idx, hdr_idx_lo, hdr_idx_hi, val_idx_lo, val_idx_hi]`
HeaderIEquals = 0x51,
/// Extract header from parts\[part_idx\], check if value contains string.
/// Bytecode: `[0x52, part_idx, hdr_idx_lo, hdr_idx_hi, val_idx_lo, val_idx_hi]`
HeaderContains = 0x52,
/// Check if header exists in parts\[part_idx\].
/// Bytecode: `[0x53, part_idx, hdr_idx_lo, hdr_idx_hi]`
HeaderExists = 0x53,
// ============ Short-circuit Jumps ============
/// If top of stack is false, jump by i16 offset (leave false on stack).
/// If true, pop and continue to evaluate right operand.
/// Bytecode: `[0x70, offset_lo, offset_hi]`
JumpIfFalse = 0x70,
/// If top of stack is true, jump by i16 offset (leave true on stack).
/// If false, pop and continue to evaluate right operand.
/// Bytecode: `[0x71, offset_lo, offset_hi]`
JumpIfTrue = 0x71,
// ============ Random Sampling ============
/// Returns true with probability 1/N.
/// Bytecode: `[0x60, n_lo, n_hi]`
Rand = 0x60,
// ============ Control ============
/// Return the top of the stack as the filter result.
/// Bytecode: `[0xFF]`
Return = 0xFF,
}
impl Opcode {
/// Decode an opcode from a byte.
#[inline]
pub fn from_byte(byte: u8) -> Option<Self> {
match byte {
0x01 => Some(Opcode::PushTrue),
0x02 => Some(Opcode::PushFalse),
0x10 => Some(Opcode::Contains),
0x11 => Some(Opcode::StartsWith),
0x12 => Some(Opcode::EndsWith),
0x13 => Some(Opcode::Equals),
0x20 => Some(Opcode::Matches),
0x30 => Some(Opcode::And),
0x31 => Some(Opcode::Or),
0x32 => Some(Opcode::Not),
0x40 => Some(Opcode::PartContains),
0x41 => Some(Opcode::PartStartsWith),
0x42 => Some(Opcode::PartEndsWith),
0x43 => Some(Opcode::PartEquals),
0x44 => Some(Opcode::PartMatches),
0x45 => Some(Opcode::PartIsEmpty),
0x46 => Some(Opcode::PartNotEmpty),
0x47 => Some(Opcode::PartInSet),
0x48 => Some(Opcode::PartIEquals),
0x49 => Some(Opcode::PartIContains),
0x50 => Some(Opcode::HeaderEquals),
0x51 => Some(Opcode::HeaderIEquals),
0x52 => Some(Opcode::HeaderContains),
0x53 => Some(Opcode::HeaderExists),
0x60 => Some(Opcode::Rand),
0x70 => Some(Opcode::JumpIfFalse),
0x71 => Some(Opcode::JumpIfTrue),
0xFF => Some(Opcode::Return),
_ => None,
}
}
/// Get the size of this instruction in bytes (opcode + operands).
#[inline]
pub fn instruction_size(&self) -> usize {
match self {
// No operands
Opcode::PushTrue
| Opcode::PushFalse
| Opcode::And
| Opcode::Or
| Opcode::Not
| Opcode::Return => 1,
// u16 operand
Opcode::Contains
| Opcode::StartsWith
| Opcode::EndsWith
| Opcode::Equals
| Opcode::Matches
| Opcode::Rand
| Opcode::JumpIfFalse
| Opcode::JumpIfTrue => 3,
// u8 part_idx only
Opcode::PartIsEmpty | Opcode::PartNotEmpty => 2,
// u8 part_idx + u16 string_idx
Opcode::PartContains
| Opcode::PartStartsWith
| Opcode::PartEndsWith
| Opcode::PartEquals
| Opcode::PartMatches
| Opcode::PartInSet
| Opcode::PartIEquals
| Opcode::PartIContains
| Opcode::HeaderExists => 4,
// u8 part_idx + u16 header_idx + u16 value_idx
Opcode::HeaderEquals | Opcode::HeaderIEquals | Opcode::HeaderContains => 6,
}
}
}