Skip to main content

bytecode_filter/
vm.rs

1//! Bytecode virtual machine for filter evaluation.
2//!
3//! The VM executes compiled filter bytecode against a payload.
4
5use std::sync::atomic::{AtomicU64, Ordering};
6
7use bytes::Bytes;
8use memchr::memmem::Finder;
9use regex::bytes::Regex;
10
11use crate::split::{extract_header_value, PayloadParts};
12
13/// Global counter for deterministic random sampling.
14static RAND_COUNTER: AtomicU64 = AtomicU64::new(0);
15
16/// A compiled filter ready for evaluation.
17///
18/// This struct contains the bytecode and all pre-compiled resources
19/// needed for fast evaluation. Create one at startup and reuse it
20/// for all payload evaluations.
21#[derive(Debug)]
22pub struct CompiledFilter {
23    /// Raw bytecode instructions.
24    bytecode: Box<[u8]>,
25
26    /// Pre-built string searchers for SIMD-accelerated matching.
27    /// Each Finder contains the needle bytes.
28    searchers: Box<[Finder<'static>]>,
29
30    /// The raw string literals (for equality checks).
31    strings: Box<[Box<[u8]>]>,
32
33    /// Pre-compiled regex patterns.
34    regexes: Box<[Regex]>,
35
36    /// String sets for IN operations.
37    /// Each set is a Vec of string indices.
38    string_sets: Box<[Box<[u16]>]>,
39
40    /// Delimiter for payload splitting.
41    delimiter: Box<[u8]>,
42
43    /// Pre-built SIMD-accelerated delimiter finder.
44    delimiter_finder: Finder<'static>,
45
46    /// Original filter source (for debugging).
47    source: Box<str>,
48}
49
50impl CompiledFilter {
51    /// Create a new compiled filter from components.
52    ///
53    /// This is typically called by the compiler, not directly.
54    pub fn new(
55        bytecode: Vec<u8>,
56        strings: Vec<Vec<u8>>,
57        regexes: Vec<Regex>,
58        string_sets: Vec<Vec<u16>>,
59        delimiter: Vec<u8>,
60        source: String,
61    ) -> Self {
62        // Build SIMD searchers from strings
63        let searchers: Vec<Finder<'static>> = strings
64            .iter()
65            .map(|s| {
66                let bytes: &'static [u8] = Box::leak(s.clone().into_boxed_slice());
67                Finder::new(bytes)
68            })
69            .collect();
70
71        let strings: Vec<Box<[u8]>> = strings.into_iter().map(|s| s.into_boxed_slice()).collect();
72
73        let string_sets: Vec<Box<[u16]>> = string_sets
74            .into_iter()
75            .map(|s| s.into_boxed_slice())
76            .collect();
77
78        let delimiter = delimiter.into_boxed_slice();
79        let delim_bytes: &'static [u8] = Box::leak(delimiter.clone());
80        let delimiter_finder = Finder::new(delim_bytes);
81
82        Self {
83            bytecode: bytecode.into_boxed_slice(),
84            searchers: searchers.into_boxed_slice(),
85            strings: strings.into_boxed_slice(),
86            regexes: regexes.into_boxed_slice(),
87            string_sets: string_sets.into_boxed_slice(),
88            delimiter,
89            delimiter_finder,
90            source: source.into_boxed_str(),
91        }
92    }
93
94    /// Evaluate the filter against a record.
95    ///
96    /// # Arguments
97    /// * `payload` - The record payload to evaluate
98    ///
99    /// # Returns
100    /// `true` if the filter matches, `false` otherwise.
101    ///
102    /// # Performance
103    /// - Zero allocations during evaluation
104    /// - SIMD-accelerated string matching
105    /// - Fixed-size stack (no heap)
106    ///
107    /// # Panics
108    ///
109    /// In debug builds only, panics if the bytecode is malformed (invalid opcode
110    /// or stack overflow). In release builds, returns `false` for invalid bytecode.
111    #[inline]
112    pub fn evaluate(&self, payload: Bytes) -> bool {
113        // Demand-driven lazy splitting — delimiters are scanned only as needed
114        let mut parts = PayloadParts::new_lazy(payload);
115        let delim_len = self.delimiter.len();
116
117        // Fixed-size evaluation stack
118        let mut stack = [false; 32];
119        let mut sp: usize = 0;
120        let mut pc: usize = 0;
121
122        let payload_bytes = parts.payload().as_ref() as *const [u8];
123        // SAFETY: payload_bytes points to the Bytes buffer which lives as long as `parts`.
124        // We only use it for read-only payload-wide operations. `parts` is not dropped
125        // or reallocated during the loop, so the pointer remains valid.
126        let payload_bytes: &[u8] = unsafe { &*payload_bytes };
127
128        loop {
129            debug_assert!(pc < self.bytecode.len(), "PC out of bounds");
130            debug_assert!(sp < 32, "Stack overflow");
131
132            match self.bytecode[pc] {
133                // ============ Stack Operations ============
134                0x01 => {
135                    // PushTrue
136                    stack[sp] = true;
137                    sp += 1;
138                    pc += 1;
139                }
140                0x02 => {
141                    // PushFalse
142                    stack[sp] = false;
143                    sp += 1;
144                    pc += 1;
145                }
146
147                // ============ Payload-wide Operations ============
148                0x10 => {
149                    // Contains
150                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
151                    stack[sp] = self.searchers[idx].find(payload_bytes).is_some();
152                    sp += 1;
153                    pc += 3;
154                }
155                0x11 => {
156                    // StartsWith
157                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
158                    stack[sp] = payload_bytes.starts_with(&self.strings[idx]);
159                    sp += 1;
160                    pc += 3;
161                }
162                0x12 => {
163                    // EndsWith
164                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
165                    stack[sp] = payload_bytes.ends_with(&self.strings[idx]);
166                    sp += 1;
167                    pc += 3;
168                }
169                0x13 => {
170                    // Equals
171                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
172                    stack[sp] = payload_bytes == &self.strings[idx][..];
173                    sp += 1;
174                    pc += 3;
175                }
176                0x20 => {
177                    // Matches (regex)
178                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
179                    stack[sp] = self.regexes[idx].is_match(payload_bytes);
180                    sp += 1;
181                    pc += 3;
182                }
183
184                // ============ Boolean Logic ============
185                0x30 => {
186                    // And
187                    debug_assert!(sp >= 2, "Stack underflow on AND");
188                    sp -= 1;
189                    stack[sp - 1] = stack[sp - 1] && stack[sp];
190                    pc += 1;
191                }
192                0x31 => {
193                    // Or
194                    debug_assert!(sp >= 2, "Stack underflow on OR");
195                    sp -= 1;
196                    stack[sp - 1] = stack[sp - 1] || stack[sp];
197                    pc += 1;
198                }
199                0x32 => {
200                    // Not
201                    debug_assert!(sp >= 1, "Stack underflow on NOT");
202                    stack[sp - 1] = !stack[sp - 1];
203                    pc += 1;
204                }
205
206                // ============ Part Operations ============
207                0x40 => {
208                    // PartContains
209                    let part_idx = self.bytecode[pc + 1] as usize;
210                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
211                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
212                    let part = parts.get(part_idx);
213                    stack[sp] = self.searchers[str_idx].find(part).is_some();
214                    sp += 1;
215                    pc += 4;
216                }
217                0x41 => {
218                    // PartStartsWith
219                    let part_idx = self.bytecode[pc + 1] as usize;
220                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
221                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
222                    let part = parts.get(part_idx);
223                    stack[sp] = part.starts_with(&self.strings[str_idx]);
224                    sp += 1;
225                    pc += 4;
226                }
227                0x42 => {
228                    // PartEndsWith
229                    let part_idx = self.bytecode[pc + 1] as usize;
230                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
231                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
232                    let part = parts.get(part_idx);
233                    stack[sp] = part.ends_with(&self.strings[str_idx]);
234                    sp += 1;
235                    pc += 4;
236                }
237                0x43 => {
238                    // PartEquals
239                    let part_idx = self.bytecode[pc + 1] as usize;
240                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
241                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
242                    let part = parts.get(part_idx);
243                    stack[sp] = part == &self.strings[str_idx][..];
244                    sp += 1;
245                    pc += 4;
246                }
247                0x44 => {
248                    // PartMatches
249                    let part_idx = self.bytecode[pc + 1] as usize;
250                    let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
251                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
252                    let part = parts.get(part_idx);
253                    stack[sp] = self.regexes[regex_idx].is_match(part);
254                    sp += 1;
255                    pc += 4;
256                }
257                0x45 => {
258                    // PartIsEmpty
259                    let part_idx = self.bytecode[pc + 1] as usize;
260                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
261                    stack[sp] = parts.get(part_idx).is_empty();
262                    sp += 1;
263                    pc += 2;
264                }
265                0x46 => {
266                    // PartNotEmpty
267                    let part_idx = self.bytecode[pc + 1] as usize;
268                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
269                    stack[sp] = !parts.get(part_idx).is_empty();
270                    sp += 1;
271                    pc += 2;
272                }
273                0x47 => {
274                    // PartInSet
275                    let part_idx = self.bytecode[pc + 1] as usize;
276                    let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
277                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
278                    let part = parts.get(part_idx);
279                    let set = &self.string_sets[set_idx];
280                    stack[sp] = set
281                        .iter()
282                        .any(|&str_idx| part == &self.strings[str_idx as usize][..]);
283                    sp += 1;
284                    pc += 4;
285                }
286                0x48 => {
287                    // PartIEquals (case-insensitive)
288                    let part_idx = self.bytecode[pc + 1] as usize;
289                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
290                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
291                    let part = parts.get(part_idx);
292                    stack[sp] = part.eq_ignore_ascii_case(&self.strings[str_idx]);
293                    sp += 1;
294                    pc += 4;
295                }
296                0x49 => {
297                    // PartIContains (case-insensitive)
298                    let part_idx = self.bytecode[pc + 1] as usize;
299                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
300                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
301                    let part = parts.get(part_idx);
302                    let needle = &self.strings[str_idx];
303                    stack[sp] = icontains(part, needle);
304                    sp += 1;
305                    pc += 4;
306                }
307
308                // ============ Header Operations ============
309                0x50 => {
310                    // HeaderEquals
311                    let part_idx = self.bytecode[pc + 1] as usize;
312                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
313                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
314                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
315                    let headers = parts.get(part_idx);
316                    let header_name = &self.strings[hdr_idx];
317                    let expected = &self.strings[val_idx];
318                    stack[sp] = extract_header_value(headers, header_name)
319                        .map(|v| v == &expected[..])
320                        .unwrap_or(false);
321                    sp += 1;
322                    pc += 6;
323                }
324                0x51 => {
325                    // HeaderIEquals (case-insensitive)
326                    let part_idx = self.bytecode[pc + 1] as usize;
327                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
328                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
329                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
330                    let headers = parts.get(part_idx);
331                    let header_name = &self.strings[hdr_idx];
332                    let expected = &self.strings[val_idx];
333                    stack[sp] = extract_header_value(headers, header_name)
334                        .map(|v| v.eq_ignore_ascii_case(expected))
335                        .unwrap_or(false);
336                    sp += 1;
337                    pc += 6;
338                }
339                0x52 => {
340                    // HeaderContains
341                    let part_idx = self.bytecode[pc + 1] as usize;
342                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
343                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
344                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
345                    let headers = parts.get(part_idx);
346                    let header_name = &self.strings[hdr_idx];
347                    stack[sp] = extract_header_value(headers, header_name)
348                        .map(|v| self.searchers[val_idx].find(v).is_some())
349                        .unwrap_or(false);
350                    sp += 1;
351                    pc += 6;
352                }
353                0x53 => {
354                    // HeaderExists
355                    let part_idx = self.bytecode[pc + 1] as usize;
356                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
357                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
358                    let headers = parts.get(part_idx);
359                    let header_name = &self.strings[hdr_idx];
360                    stack[sp] = extract_header_value(headers, header_name).is_some();
361                    sp += 1;
362                    pc += 4;
363                }
364
365                // ============ Short-circuit Jumps ============
366                0x70 => {
367                    // JumpIfFalse — short-circuit AND
368                    debug_assert!(sp >= 1, "Stack underflow on JumpIfFalse");
369                    if !stack[sp - 1] {
370                        // Left side is false → result is false, skip right operand
371                        let offset = read_i16(&self.bytecode, pc + 1);
372                        pc = (pc as isize + offset as isize) as usize;
373                    } else {
374                        // Left side is true → pop it, evaluate right operand
375                        sp -= 1;
376                        pc += 3;
377                    }
378                }
379                0x71 => {
380                    // JumpIfTrue — short-circuit OR
381                    debug_assert!(sp >= 1, "Stack underflow on JumpIfTrue");
382                    if stack[sp - 1] {
383                        // Left side is true → result is true, skip right operand
384                        let offset = read_i16(&self.bytecode, pc + 1);
385                        pc = (pc as isize + offset as isize) as usize;
386                    } else {
387                        // Left side is false → pop it, evaluate right operand
388                        sp -= 1;
389                        pc += 3;
390                    }
391                }
392
393                // ============ Random ============
394                0x60 => {
395                    // Rand
396                    let n = read_u16(&self.bytecode, pc + 1);
397                    stack[sp] = rand_1_in_n(n);
398                    sp += 1;
399                    pc += 3;
400                }
401
402                // ============ Control ============
403                0xFF => {
404                    // Return
405                    debug_assert!(sp >= 1, "Stack underflow on RETURN");
406                    return stack[sp - 1];
407                }
408
409                _ => {
410                    // Unknown opcode - should never happen with valid bytecode
411                    #[cfg(debug_assertions)]
412                    panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
413                    #[cfg(not(debug_assertions))]
414                    return false;
415                }
416            }
417        }
418    }
419
420    /// Like `evaluate`, but prints a step-by-step trace to stderr when the
421    /// filter returns `true`. Useful for debugging false-positive promotions.
422    pub fn evaluate_debug(&self, payload: Bytes) -> bool {
423        let mut parts = PayloadParts::new_lazy(payload);
424        let delim_len = self.delimiter.len();
425
426        let mut stack = [false; 32];
427        let mut sp: usize = 0;
428        let mut pc: usize = 0;
429
430        let payload_bytes = parts.payload().as_ref() as *const [u8];
431        let payload_bytes: &[u8] = unsafe { &*payload_bytes };
432
433        let mut trace_lines: Vec<String> = Vec::new();
434
435        loop {
436            debug_assert!(pc < self.bytecode.len());
437            debug_assert!(sp < 32);
438
439            match self.bytecode[pc] {
440                0x01 => {
441                    stack[sp] = true;
442                    trace_lines.push(format!("  pc={pc:3} PushTrue → stack[{sp}]=true"));
443                    sp += 1; pc += 1;
444                }
445                0x02 => {
446                    stack[sp] = false;
447                    trace_lines.push(format!("  pc={pc:3} PushFalse → stack[{sp}]=false"));
448                    sp += 1; pc += 1;
449                }
450                0x10 => {
451                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
452                    let result = self.searchers[idx].find(payload_bytes).is_some();
453                    stack[sp] = result;
454                    trace_lines.push(format!(
455                        "  pc={pc:3} Contains str[{idx}]={:?} → {result}",
456                        String::from_utf8_lossy(&self.strings[idx])
457                    ));
458                    sp += 1; pc += 3;
459                }
460                0x11 => {
461                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
462                    let result = payload_bytes.starts_with(&self.strings[idx]);
463                    stack[sp] = result;
464                    trace_lines.push(format!(
465                        "  pc={pc:3} StartsWith str[{idx}]={:?} → {result}",
466                        String::from_utf8_lossy(&self.strings[idx])
467                    ));
468                    sp += 1; pc += 3;
469                }
470                0x12 => {
471                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
472                    let result = payload_bytes.ends_with(&self.strings[idx]);
473                    stack[sp] = result;
474                    trace_lines.push(format!(
475                        "  pc={pc:3} EndsWith str[{idx}]={:?} → {result}",
476                        String::from_utf8_lossy(&self.strings[idx])
477                    ));
478                    sp += 1; pc += 3;
479                }
480                0x13 => {
481                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
482                    let result = payload_bytes == &self.strings[idx][..];
483                    stack[sp] = result;
484                    trace_lines.push(format!(
485                        "  pc={pc:3} Equals str[{idx}]={:?} → {result}",
486                        String::from_utf8_lossy(&self.strings[idx])
487                    ));
488                    sp += 1; pc += 3;
489                }
490                0x20 => {
491                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
492                    let result = self.regexes[idx].is_match(payload_bytes);
493                    stack[sp] = result;
494                    trace_lines.push(format!("  pc={pc:3} Matches regex[{idx}] → {result}"));
495                    sp += 1; pc += 3;
496                }
497                0x30 => {
498                    sp -= 1;
499                    let result = stack[sp - 1] && stack[sp];
500                    stack[sp - 1] = result;
501                    trace_lines.push(format!("  pc={pc:3} And → {result}"));
502                    pc += 1;
503                }
504                0x31 => {
505                    sp -= 1;
506                    let result = stack[sp - 1] || stack[sp];
507                    stack[sp - 1] = result;
508                    trace_lines.push(format!("  pc={pc:3} Or → {result}"));
509                    pc += 1;
510                }
511                0x32 => {
512                    stack[sp - 1] = !stack[sp - 1];
513                    trace_lines.push(format!("  pc={pc:3} Not → {}", stack[sp - 1]));
514                    pc += 1;
515                }
516                0x40 => {
517                    let part_idx = self.bytecode[pc + 1] as usize;
518                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
519                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
520                    let part = parts.get(part_idx);
521                    let result = self.searchers[str_idx].find(part).is_some();
522                    stack[sp] = result;
523                    trace_lines.push(format!(
524                        "  pc={pc:3} PartContains part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
525                        String::from_utf8_lossy(part),
526                        String::from_utf8_lossy(&self.strings[str_idx])
527                    ));
528                    sp += 1; pc += 4;
529                }
530                0x41 => {
531                    let part_idx = self.bytecode[pc + 1] as usize;
532                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
533                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
534                    let part = parts.get(part_idx);
535                    let result = part.starts_with(&self.strings[str_idx]);
536                    stack[sp] = result;
537                    trace_lines.push(format!(
538                        "  pc={pc:3} PartStartsWith part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
539                        String::from_utf8_lossy(part),
540                        String::from_utf8_lossy(&self.strings[str_idx])
541                    ));
542                    sp += 1; pc += 4;
543                }
544                0x42 => {
545                    let part_idx = self.bytecode[pc + 1] as usize;
546                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
547                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
548                    let part = parts.get(part_idx);
549                    let result = part.ends_with(&self.strings[str_idx]);
550                    stack[sp] = result;
551                    trace_lines.push(format!(
552                        "  pc={pc:3} PartEndsWith part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
553                        String::from_utf8_lossy(part),
554                        String::from_utf8_lossy(&self.strings[str_idx])
555                    ));
556                    sp += 1; pc += 4;
557                }
558                0x43 => {
559                    let part_idx = self.bytecode[pc + 1] as usize;
560                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
561                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
562                    let part = parts.get(part_idx);
563                    let result = part == &self.strings[str_idx][..];
564                    stack[sp] = result;
565                    trace_lines.push(format!(
566                        "  pc={pc:3} PartEquals part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
567                        String::from_utf8_lossy(part),
568                        String::from_utf8_lossy(&self.strings[str_idx])
569                    ));
570                    sp += 1; pc += 4;
571                }
572                0x44 => {
573                    let part_idx = self.bytecode[pc + 1] as usize;
574                    let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
575                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
576                    let part = parts.get(part_idx);
577                    let result = self.regexes[regex_idx].is_match(part);
578                    stack[sp] = result;
579                    trace_lines.push(format!(
580                        "  pc={pc:3} PartMatches part[{part_idx}]={:?} regex[{regex_idx}] → {result}",
581                        String::from_utf8_lossy(part)
582                    ));
583                    sp += 1; pc += 4;
584                }
585                0x45 => {
586                    let part_idx = self.bytecode[pc + 1] as usize;
587                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
588                    let result = parts.get(part_idx).is_empty();
589                    stack[sp] = result;
590                    trace_lines.push(format!("  pc={pc:3} PartIsEmpty part[{part_idx}] → {result}"));
591                    sp += 1; pc += 2;
592                }
593                0x46 => {
594                    let part_idx = self.bytecode[pc + 1] as usize;
595                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
596                    let result = !parts.get(part_idx).is_empty();
597                    stack[sp] = result;
598                    trace_lines.push(format!("  pc={pc:3} PartNotEmpty part[{part_idx}] → {result}"));
599                    sp += 1; pc += 2;
600                }
601                0x47 => {
602                    let part_idx = self.bytecode[pc + 1] as usize;
603                    let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
604                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
605                    let part = parts.get(part_idx);
606                    let set = &self.string_sets[set_idx];
607                    let result = set.iter().any(|&si| part == &self.strings[si as usize][..]);
608                    stack[sp] = result;
609                    trace_lines.push(format!(
610                        "  pc={pc:3} PartInSet part[{part_idx}]={:?} set[{set_idx}] → {result}",
611                        String::from_utf8_lossy(part)
612                    ));
613                    sp += 1; pc += 4;
614                }
615                0x48 => {
616                    let part_idx = self.bytecode[pc + 1] as usize;
617                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
618                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
619                    let part = parts.get(part_idx);
620                    let result = part.eq_ignore_ascii_case(&self.strings[str_idx]);
621                    stack[sp] = result;
622                    trace_lines.push(format!(
623                        "  pc={pc:3} PartIEquals part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
624                        String::from_utf8_lossy(part),
625                        String::from_utf8_lossy(&self.strings[str_idx])
626                    ));
627                    sp += 1; pc += 4;
628                }
629                0x49 => {
630                    let part_idx = self.bytecode[pc + 1] as usize;
631                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
632                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
633                    let part = parts.get(part_idx);
634                    let needle = &self.strings[str_idx];
635                    let result = icontains(part, needle);
636                    stack[sp] = result;
637                    trace_lines.push(format!(
638                        "  pc={pc:3} PartIContains part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
639                        String::from_utf8_lossy(part),
640                        String::from_utf8_lossy(&self.strings[str_idx])
641                    ));
642                    sp += 1; pc += 4;
643                }
644                0x50 => {
645                    let part_idx = self.bytecode[pc + 1] as usize;
646                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
647                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
648                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
649                    let headers = parts.get(part_idx);
650                    let header_name = &self.strings[hdr_idx];
651                    let expected = &self.strings[val_idx];
652                    let extracted = extract_header_value(headers, header_name);
653                    let result = extracted.map(|v| v == &expected[..]).unwrap_or(false);
654                    stack[sp] = result;
655                    trace_lines.push(format!(
656                        "  pc={pc:3} HeaderEquals part[{part_idx}] hdr={:?} expected={:?} got={:?} → {result}",
657                        String::from_utf8_lossy(header_name),
658                        String::from_utf8_lossy(expected),
659                        extracted.map(|v| String::from_utf8_lossy(v).to_string())
660                    ));
661                    sp += 1; pc += 6;
662                }
663                0x51 => {
664                    let part_idx = self.bytecode[pc + 1] as usize;
665                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
666                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
667                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
668                    let headers = parts.get(part_idx);
669                    let header_name = &self.strings[hdr_idx];
670                    let expected = &self.strings[val_idx];
671                    let extracted = extract_header_value(headers, header_name);
672                    let result = extracted.map(|v| v.eq_ignore_ascii_case(expected)).unwrap_or(false);
673                    stack[sp] = result;
674                    trace_lines.push(format!(
675                        "  pc={pc:3} HeaderIEquals part[{part_idx}] hdr={:?} expected={:?} got={:?} → {result}",
676                        String::from_utf8_lossy(header_name),
677                        String::from_utf8_lossy(expected),
678                        extracted.map(|v| String::from_utf8_lossy(v).to_string())
679                    ));
680                    sp += 1; pc += 6;
681                }
682                0x52 => {
683                    let part_idx = self.bytecode[pc + 1] as usize;
684                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
685                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
686                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
687                    let headers = parts.get(part_idx);
688                    let header_name = &self.strings[hdr_idx];
689                    let extracted = extract_header_value(headers, header_name);
690                    let result = extracted.map(|v| self.searchers[val_idx].find(v).is_some()).unwrap_or(false);
691                    stack[sp] = result;
692                    trace_lines.push(format!(
693                        "  pc={pc:3} HeaderContains part[{part_idx}] hdr={:?} needle={:?} got={:?} → {result}",
694                        String::from_utf8_lossy(header_name),
695                        String::from_utf8_lossy(&self.strings[val_idx]),
696                        extracted.map(|v| String::from_utf8_lossy(v).to_string())
697                    ));
698                    sp += 1; pc += 6;
699                }
700                0x53 => {
701                    let part_idx = self.bytecode[pc + 1] as usize;
702                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
703                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
704                    let headers = parts.get(part_idx);
705                    let header_name = &self.strings[hdr_idx];
706                    let result = extract_header_value(headers, header_name).is_some();
707                    stack[sp] = result;
708                    trace_lines.push(format!(
709                        "  pc={pc:3} HeaderExists part[{part_idx}] hdr={:?} → {result}",
710                        String::from_utf8_lossy(header_name)
711                    ));
712                    sp += 1; pc += 4;
713                }
714                0x70 => {
715                    if !stack[sp - 1] {
716                        let offset = read_i16(&self.bytecode, pc + 1);
717                        trace_lines.push(format!("  pc={pc:3} JumpIfFalse → false, jump by {offset}"));
718                        pc = (pc as isize + offset as isize) as usize;
719                    } else {
720                        trace_lines.push(format!("  pc={pc:3} JumpIfFalse → true, pop & continue"));
721                        sp -= 1;
722                        pc += 3;
723                    }
724                }
725                0x71 => {
726                    if stack[sp - 1] {
727                        let offset = read_i16(&self.bytecode, pc + 1);
728                        trace_lines.push(format!("  pc={pc:3} JumpIfTrue → true, jump by {offset}"));
729                        pc = (pc as isize + offset as isize) as usize;
730                    } else {
731                        trace_lines.push(format!("  pc={pc:3} JumpIfTrue → false, pop & continue"));
732                        sp -= 1;
733                        pc += 3;
734                    }
735                }
736                0x60 => {
737                    let n = read_u16(&self.bytecode, pc + 1);
738                    let result = rand_1_in_n(n);
739                    stack[sp] = result;
740                    trace_lines.push(format!("  pc={pc:3} Rand(1/{n}) → {result}"));
741                    sp += 1; pc += 3;
742                }
743                0xFF => {
744                    let result = stack[sp - 1];
745                    if result {
746                        eprintln!("=== FILTER DEBUG (result=true) filter={:?} ===", self.source);
747                        for line in &trace_lines {
748                            eprintln!("{line}");
749                        }
750                        eprintln!("=== END FILTER DEBUG ===");
751                    }
752                    return result;
753                }
754                _ => {
755                    #[cfg(debug_assertions)]
756                    panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
757                    #[cfg(not(debug_assertions))]
758                    return false;
759                }
760            }
761        }
762    }
763
764    /// Get the original filter source.
765    pub fn source(&self) -> &str {
766        &self.source
767    }
768
769    /// Get the bytecode length.
770    pub fn bytecode_len(&self) -> usize {
771        self.bytecode.len()
772    }
773
774    /// Get the number of string literals.
775    pub fn string_count(&self) -> usize {
776        self.strings.len()
777    }
778
779    /// Get the number of regex patterns.
780    pub fn regex_count(&self) -> usize {
781        self.regexes.len()
782    }
783
784    /// Get the delimiter used for splitting.
785    pub fn delimiter(&self) -> &[u8] {
786        &self.delimiter
787    }
788}
789
790/// Read a little-endian u16 from bytecode.
791#[inline(always)]
792fn read_u16(bytecode: &[u8], offset: usize) -> u16 {
793    u16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
794}
795
796/// Read a little-endian i16 from bytecode.
797#[inline(always)]
798fn read_i16(bytecode: &[u8], offset: usize) -> i16 {
799    i16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
800}
801
802/// Case-insensitive contains check.
803#[inline]
804fn icontains(haystack: &[u8], needle: &[u8]) -> bool {
805    if needle.is_empty() {
806        return true;
807    }
808    if haystack.len() < needle.len() {
809        return false;
810    }
811
812    // Simple sliding window comparison
813    for window in haystack.windows(needle.len()) {
814        if window.eq_ignore_ascii_case(needle) {
815            return true;
816        }
817    }
818    false
819}
820
821/// Returns true with probability 1/N.
822///
823/// Uses a deterministic counter for reproducible sampling.
824#[inline]
825fn rand_1_in_n(n: u16) -> bool {
826    if n <= 1 {
827        return true;
828    }
829    let count = RAND_COUNTER.fetch_add(1, Ordering::Relaxed);
830    count.is_multiple_of(n as u64)
831}
832
833/// Reset the random counter (for testing).
834pub fn reset_rand_counter() {
835    RAND_COUNTER.store(0, Ordering::Relaxed);
836}
837
838#[cfg(test)]
839mod tests {
840    use super::*;
841
842    fn make_simple_filter(opcode: u8, str_idx: u16, needle: &str) -> CompiledFilter {
843        let mut bytecode = vec![opcode];
844        bytecode.extend_from_slice(&str_idx.to_le_bytes());
845        bytecode.push(0xFF); // Return
846
847        CompiledFilter::new(
848            bytecode,
849            vec![needle.as_bytes().to_vec()],
850            vec![],
851            vec![],
852            b";;;".to_vec(),
853            format!("test filter"),
854        )
855    }
856
857    #[test]
858    fn test_contains() {
859        let filter = make_simple_filter(0x10, 0, "hello");
860        assert!(filter.evaluate(Bytes::from("say hello world")));
861        assert!(!filter.evaluate(Bytes::from("say goodbye")));
862    }
863
864    #[test]
865    fn test_starts_with() {
866        let filter = make_simple_filter(0x11, 0, "hello");
867        assert!(filter.evaluate(Bytes::from("hello world")));
868        assert!(!filter.evaluate(Bytes::from("say hello")));
869    }
870
871    #[test]
872    fn test_ends_with() {
873        let filter = make_simple_filter(0x12, 0, "world");
874        assert!(filter.evaluate(Bytes::from("hello world")));
875        assert!(!filter.evaluate(Bytes::from("world hello")));
876    }
877
878    #[test]
879    fn test_equals() {
880        let filter = make_simple_filter(0x13, 0, "hello");
881        assert!(filter.evaluate(Bytes::from("hello")));
882        assert!(!filter.evaluate(Bytes::from("hello world")));
883    }
884
885    #[test]
886    fn test_push_true() {
887        let filter = CompiledFilter::new(
888            vec![0x01, 0xFF], // PushTrue, Return
889            vec![],
890            vec![],
891            vec![],
892            b";;;".to_vec(),
893            "true".into(),
894        );
895        assert!(filter.evaluate(Bytes::from("anything")));
896    }
897
898    #[test]
899    fn test_push_false() {
900        let filter = CompiledFilter::new(
901            vec![0x02, 0xFF], // PushFalse, Return
902            vec![],
903            vec![],
904            vec![],
905            b";;;".to_vec(),
906            "false".into(),
907        );
908        assert!(!filter.evaluate(Bytes::from("anything")));
909    }
910
911    #[test]
912    fn test_and() {
913        // true AND true = true
914        let filter = CompiledFilter::new(
915            vec![0x01, 0x01, 0x30, 0xFF], // PushTrue, PushTrue, And, Return
916            vec![],
917            vec![],
918            vec![],
919            b";;;".to_vec(),
920            "true AND true".into(),
921        );
922        assert!(filter.evaluate(Bytes::from("")));
923
924        // true AND false = false
925        let filter = CompiledFilter::new(
926            vec![0x01, 0x02, 0x30, 0xFF], // PushTrue, PushFalse, And, Return
927            vec![],
928            vec![],
929            vec![],
930            b";;;".to_vec(),
931            "true AND false".into(),
932        );
933        assert!(!filter.evaluate(Bytes::from("")));
934    }
935
936    #[test]
937    fn test_or() {
938        // false OR true = true
939        let filter = CompiledFilter::new(
940            vec![0x02, 0x01, 0x31, 0xFF], // PushFalse, PushTrue, Or, Return
941            vec![],
942            vec![],
943            vec![],
944            b";;;".to_vec(),
945            "false OR true".into(),
946        );
947        assert!(filter.evaluate(Bytes::from("")));
948
949        // false OR false = false
950        let filter = CompiledFilter::new(
951            vec![0x02, 0x02, 0x31, 0xFF], // PushFalse, PushFalse, Or, Return
952            vec![],
953            vec![],
954            vec![],
955            b";;;".to_vec(),
956            "false OR false".into(),
957        );
958        assert!(!filter.evaluate(Bytes::from("")));
959    }
960
961    #[test]
962    fn test_not() {
963        // NOT true = false
964        let filter = CompiledFilter::new(
965            vec![0x01, 0x32, 0xFF], // PushTrue, Not, Return
966            vec![],
967            vec![],
968            vec![],
969            b";;;".to_vec(),
970            "NOT true".into(),
971        );
972        assert!(!filter.evaluate(Bytes::from("")));
973
974        // NOT false = true
975        let filter = CompiledFilter::new(
976            vec![0x02, 0x32, 0xFF], // PushFalse, Not, Return
977            vec![],
978            vec![],
979            vec![],
980            b";;;".to_vec(),
981            "NOT false".into(),
982        );
983        assert!(filter.evaluate(Bytes::from("")));
984    }
985
986    #[test]
987    fn test_part_equals() {
988        // PartEquals(part=1, str=0) -> parts[1] == "2"
989        let filter = CompiledFilter::new(
990            vec![0x43, 0x01, 0x00, 0x00, 0xFF],
991            vec![b"2".to_vec()],
992            vec![],
993            vec![],
994            b";;;".to_vec(),
995            "field[1] == \"2\"".into(),
996        );
997
998        assert!(filter.evaluate(Bytes::from("v1;;;2;;;subtype")));
999        assert!(!filter.evaluate(Bytes::from("v1;;;1;;;subtype")));
1000    }
1001
1002    #[test]
1003    fn test_part_in_set() {
1004        // PartInSet(part=1, set=0) -> parts[1] in {"1", "2", "3"}
1005        let filter = CompiledFilter::new(
1006            vec![0x47, 0x01, 0x00, 0x00, 0xFF],
1007            vec![b"1".to_vec(), b"2".to_vec(), b"3".to_vec()],
1008            vec![],
1009            vec![vec![0, 1, 2]], // Set 0 contains string indices 0, 1, 2
1010            b";;;".to_vec(),
1011            "field[1] in {\"1\", \"2\", \"3\"}".into(),
1012        );
1013
1014        assert!(filter.evaluate(Bytes::from("v1;;;1;;;sub")));
1015        assert!(filter.evaluate(Bytes::from("v1;;;2;;;sub")));
1016        assert!(filter.evaluate(Bytes::from("v1;;;3;;;sub")));
1017        assert!(!filter.evaluate(Bytes::from("v1;;;4;;;sub")));
1018    }
1019
1020    #[test]
1021    fn test_rand() {
1022        reset_rand_counter();
1023
1024        // rand(2) should return true, false, true, false, ...
1025        let filter = CompiledFilter::new(
1026            vec![0x60, 0x02, 0x00, 0xFF], // Rand(2), Return
1027            vec![],
1028            vec![],
1029            vec![],
1030            b";;;".to_vec(),
1031            "rand(2)".into(),
1032        );
1033
1034        let results: Vec<bool> = (0..10).map(|_| filter.evaluate(Bytes::from(""))).collect();
1035        assert_eq!(
1036            results,
1037            vec![true, false, true, false, true, false, true, false, true, false]
1038        );
1039    }
1040
1041    #[test]
1042    fn test_rand_always_true() {
1043        reset_rand_counter();
1044
1045        let filter = CompiledFilter::new(
1046            vec![0x60, 0x01, 0x00, 0xFF], // Rand(1), Return
1047            vec![],
1048            vec![],
1049            vec![],
1050            b";;;".to_vec(),
1051            "rand(1)".into(),
1052        );
1053
1054        for _ in 0..10 {
1055            assert!(filter.evaluate(Bytes::from("")));
1056        }
1057    }
1058
1059    #[test]
1060    fn test_regex_match() {
1061        let filter = CompiledFilter::new(
1062            vec![0x20, 0x00, 0x00, 0xFF], // Matches(regex=0), Return
1063            vec![],
1064            vec![Regex::new(r"error_[0-9]+").unwrap()],
1065            vec![],
1066            b";;;".to_vec(),
1067            "payload matches \"error_[0-9]+\"".into(),
1068        );
1069
1070        assert!(filter.evaluate(Bytes::from("found error_123 in log")));
1071        assert!(filter.evaluate(Bytes::from("error_0")));
1072        assert!(!filter.evaluate(Bytes::from("error_abc")));
1073        assert!(!filter.evaluate(Bytes::from("no errors")));
1074    }
1075
1076    #[test]
1077    fn test_header_iequals() {
1078        // HeaderIEquals(part=0, header="x-custom", value="expected")
1079        let filter = CompiledFilter::new(
1080            vec![0x51, 0x00, 0x00, 0x00, 0x01, 0x00, 0xFF],
1081            vec![b"x-custom".to_vec(), b"expected".to_vec()],
1082            vec![],
1083            vec![],
1084            b";;;".to_vec(),
1085            "headers.header(\"x-custom\") iequals \"expected\"".into(),
1086        );
1087
1088        assert!(filter.evaluate(Bytes::from("X-Custom: expected\r\n")));
1089        assert!(filter.evaluate(Bytes::from("x-custom: EXPECTED\r\n")));
1090        assert!(filter.evaluate(Bytes::from("X-CUSTOM: Expected\r\n")));
1091        assert!(!filter.evaluate(Bytes::from("X-Custom: other\r\n")));
1092        assert!(!filter.evaluate(Bytes::from("X-Other: expected\r\n")));
1093    }
1094
1095    #[test]
1096    fn test_complex_multi_clause_filter() {
1097        // field[1] == "error" AND field[2] == "500" AND header check
1098        // Bytecode:
1099        //   PartEquals(1, 0)    -> field[1] == "error"
1100        //   PartEquals(2, 1)    -> field[2] == "500"
1101        //   And
1102        //   HeaderIEquals(4, 2, 3) -> header check
1103        //   And
1104        //   Return
1105        let filter = CompiledFilter::new(
1106            vec![
1107                0x43, 0x01, 0x00, 0x00, // PartEquals(part=1, str=0)
1108                0x43, 0x02, 0x01, 0x00, // PartEquals(part=2, str=1)
1109                0x30, // And
1110                0x51, 0x04, 0x02, 0x00, 0x03, 0x00, // HeaderIEquals(part=4, hdr=2, val=3)
1111                0x30, // And
1112                0xFF, // Return
1113            ],
1114            vec![
1115                b"error".to_vec(),
1116                b"500".to_vec(),
1117                b"content-type".to_vec(),
1118                b"application/json".to_vec(),
1119            ],
1120            vec![],
1121            vec![],
1122            b";;;".to_vec(),
1123            "multi-clause filter".into(),
1124        );
1125
1126        // Build a matching record: [ignored, "error", "500", ignored, headers, ...]
1127        let mut fields: Vec<&str> = vec![""; 6];
1128        fields[1] = "error";
1129        fields[2] = "500";
1130        fields[4] = "Content-Type: application/json\r\n";
1131
1132        let payload = fields.join(";;;");
1133        assert!(filter.evaluate(Bytes::from(payload)));
1134
1135        // Non-matching: wrong field[1]
1136        fields[1] = "info";
1137        let payload = fields.join(";;;");
1138        assert!(!filter.evaluate(Bytes::from(payload)));
1139
1140        // Non-matching: wrong field[2]
1141        fields[1] = "error";
1142        fields[2] = "200";
1143        let payload = fields.join(";;;");
1144        assert!(!filter.evaluate(Bytes::from(payload)));
1145
1146        // Non-matching: wrong header value
1147        fields[2] = "500";
1148        fields[4] = "Content-Type: text/html\r\n";
1149        let payload = fields.join(";;;");
1150        assert!(!filter.evaluate(Bytes::from(payload)));
1151    }
1152}