Skip to main content

bytecode_filter/
vm.rs

1//! Bytecode virtual machine for filter evaluation.
2//!
3//! The VM executes compiled filter bytecode against a payload.
4
5use std::sync::atomic::{AtomicU64, Ordering};
6
7use bytes::Bytes;
8use memchr::memmem::Finder;
9use regex::bytes::Regex;
10
11use crate::split::{extract_header_value, PayloadParts};
12
13/// Global counter for deterministic random sampling.
14static RAND_COUNTER: AtomicU64 = AtomicU64::new(0);
15
16/// A compiled filter ready for evaluation.
17///
18/// This struct contains the bytecode and all pre-compiled resources
19/// needed for fast evaluation. Create one at startup and reuse it
20/// for all payload evaluations.
21#[derive(Debug)]
22pub struct CompiledFilter {
23    /// Raw bytecode instructions.
24    bytecode: Box<[u8]>,
25
26    /// Pre-built string searchers for SIMD-accelerated matching.
27    /// Each Finder contains the needle bytes.
28    searchers: Box<[Finder<'static>]>,
29
30    /// The raw string literals (for equality checks).
31    strings: Box<[Box<[u8]>]>,
32
33    /// Pre-compiled regex patterns.
34    regexes: Box<[Regex]>,
35
36    /// String sets for IN operations.
37    /// Each set is a Vec of string indices.
38    string_sets: Box<[Box<[u16]>]>,
39
40    /// Delimiter for payload splitting.
41    delimiter: Box<[u8]>,
42
43    /// Pre-built SIMD-accelerated delimiter finder.
44    delimiter_finder: Finder<'static>,
45
46    /// Original filter source (for debugging).
47    source: Box<str>,
48}
49
50impl CompiledFilter {
51    /// Create a new compiled filter from components.
52    ///
53    /// This is typically called by the compiler, not directly.
54    pub fn new(
55        bytecode: Vec<u8>,
56        strings: Vec<Vec<u8>>,
57        regexes: Vec<Regex>,
58        string_sets: Vec<Vec<u16>>,
59        delimiter: Vec<u8>,
60        source: String,
61    ) -> Self {
62        // Build SIMD searchers from strings
63        let searchers: Vec<Finder<'static>> = strings
64            .iter()
65            .map(|s| {
66                let bytes: &'static [u8] = Box::leak(s.clone().into_boxed_slice());
67                Finder::new(bytes)
68            })
69            .collect();
70
71        let strings: Vec<Box<[u8]>> = strings.into_iter().map(|s| s.into_boxed_slice()).collect();
72
73        let string_sets: Vec<Box<[u16]>> = string_sets
74            .into_iter()
75            .map(|s| s.into_boxed_slice())
76            .collect();
77
78        let delimiter = delimiter.into_boxed_slice();
79        let delim_bytes: &'static [u8] = Box::leak(delimiter.clone());
80        let delimiter_finder = Finder::new(delim_bytes);
81
82        Self {
83            bytecode: bytecode.into_boxed_slice(),
84            searchers: searchers.into_boxed_slice(),
85            strings: strings.into_boxed_slice(),
86            regexes: regexes.into_boxed_slice(),
87            string_sets: string_sets.into_boxed_slice(),
88            delimiter,
89            delimiter_finder,
90            source: source.into_boxed_str(),
91        }
92    }
93
94    /// Evaluate the filter against a record.
95    ///
96    /// # Arguments
97    /// * `payload` - The record payload to evaluate
98    ///
99    /// # Returns
100    /// `true` if the filter matches, `false` otherwise.
101    ///
102    /// # Performance
103    /// - Zero allocations during evaluation
104    /// - SIMD-accelerated string matching
105    /// - Fixed-size stack (no heap)
106    ///
107    /// # Panics
108    ///
109    /// In debug builds only, panics if the bytecode is malformed (invalid opcode
110    /// or stack overflow). In release builds, returns `false` for invalid bytecode.
111    #[inline]
112    pub fn evaluate(&self, payload: Bytes) -> bool {
113        // Demand-driven lazy splitting — delimiters are scanned only as needed
114        let mut parts = PayloadParts::new_lazy(payload);
115        let delim_len = self.delimiter.len();
116
117        // Fixed-size evaluation stack
118        let mut stack = [false; 32];
119        let mut sp: usize = 0;
120        let mut pc: usize = 0;
121
122        let payload_bytes = parts.payload().as_ref() as *const [u8];
123        // SAFETY: payload_bytes points to the Bytes buffer which lives as long as `parts`.
124        // We only use it for read-only payload-wide operations. `parts` is not dropped
125        // or reallocated during the loop, so the pointer remains valid.
126        let payload_bytes: &[u8] = unsafe { &*payload_bytes };
127
128        loop {
129            debug_assert!(pc < self.bytecode.len(), "PC out of bounds");
130            debug_assert!(sp < 32, "Stack overflow");
131
132            match self.bytecode[pc] {
133                // ============ Stack Operations ============
134                0x01 => {
135                    // PushTrue
136                    stack[sp] = true;
137                    sp += 1;
138                    pc += 1;
139                }
140                0x02 => {
141                    // PushFalse
142                    stack[sp] = false;
143                    sp += 1;
144                    pc += 1;
145                }
146
147                // ============ Payload-wide Operations ============
148                0x10 => {
149                    // Contains
150                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
151                    stack[sp] = self.searchers[idx].find(payload_bytes).is_some();
152                    sp += 1;
153                    pc += 3;
154                }
155                0x11 => {
156                    // StartsWith
157                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
158                    stack[sp] = payload_bytes.starts_with(&self.strings[idx]);
159                    sp += 1;
160                    pc += 3;
161                }
162                0x12 => {
163                    // EndsWith
164                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
165                    stack[sp] = payload_bytes.ends_with(&self.strings[idx]);
166                    sp += 1;
167                    pc += 3;
168                }
169                0x13 => {
170                    // Equals
171                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
172                    stack[sp] = payload_bytes == &self.strings[idx][..];
173                    sp += 1;
174                    pc += 3;
175                }
176                0x20 => {
177                    // Matches (regex)
178                    let idx = read_u16(&self.bytecode, pc + 1) as usize;
179                    stack[sp] = self.regexes[idx].is_match(payload_bytes);
180                    sp += 1;
181                    pc += 3;
182                }
183
184                // ============ Boolean Logic ============
185                0x30 => {
186                    // And
187                    debug_assert!(sp >= 2, "Stack underflow on AND");
188                    sp -= 1;
189                    stack[sp - 1] = stack[sp - 1] && stack[sp];
190                    pc += 1;
191                }
192                0x31 => {
193                    // Or
194                    debug_assert!(sp >= 2, "Stack underflow on OR");
195                    sp -= 1;
196                    stack[sp - 1] = stack[sp - 1] || stack[sp];
197                    pc += 1;
198                }
199                0x32 => {
200                    // Not
201                    debug_assert!(sp >= 1, "Stack underflow on NOT");
202                    stack[sp - 1] = !stack[sp - 1];
203                    pc += 1;
204                }
205
206                // ============ Part Operations ============
207                0x40 => {
208                    // PartContains
209                    let part_idx = self.bytecode[pc + 1] as usize;
210                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
211                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
212                    let part = parts.get(part_idx);
213                    stack[sp] = self.searchers[str_idx].find(part).is_some();
214                    sp += 1;
215                    pc += 4;
216                }
217                0x41 => {
218                    // PartStartsWith
219                    let part_idx = self.bytecode[pc + 1] as usize;
220                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
221                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
222                    let part = parts.get(part_idx);
223                    stack[sp] = part.starts_with(&self.strings[str_idx]);
224                    sp += 1;
225                    pc += 4;
226                }
227                0x42 => {
228                    // PartEndsWith
229                    let part_idx = self.bytecode[pc + 1] as usize;
230                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
231                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
232                    let part = parts.get(part_idx);
233                    stack[sp] = part.ends_with(&self.strings[str_idx]);
234                    sp += 1;
235                    pc += 4;
236                }
237                0x43 => {
238                    // PartEquals
239                    let part_idx = self.bytecode[pc + 1] as usize;
240                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
241                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
242                    let part = parts.get(part_idx);
243                    stack[sp] = part == &self.strings[str_idx][..];
244                    sp += 1;
245                    pc += 4;
246                }
247                0x44 => {
248                    // PartMatches
249                    let part_idx = self.bytecode[pc + 1] as usize;
250                    let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
251                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
252                    let part = parts.get(part_idx);
253                    stack[sp] = self.regexes[regex_idx].is_match(part);
254                    sp += 1;
255                    pc += 4;
256                }
257                0x45 => {
258                    // PartIsEmpty
259                    let part_idx = self.bytecode[pc + 1] as usize;
260                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
261                    stack[sp] = parts.get(part_idx).is_empty();
262                    sp += 1;
263                    pc += 2;
264                }
265                0x46 => {
266                    // PartNotEmpty
267                    let part_idx = self.bytecode[pc + 1] as usize;
268                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
269                    stack[sp] = !parts.get(part_idx).is_empty();
270                    sp += 1;
271                    pc += 2;
272                }
273                0x47 => {
274                    // PartInSet
275                    let part_idx = self.bytecode[pc + 1] as usize;
276                    let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
277                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
278                    let part = parts.get(part_idx);
279                    let set = &self.string_sets[set_idx];
280                    stack[sp] = set
281                        .iter()
282                        .any(|&str_idx| part == &self.strings[str_idx as usize][..]);
283                    sp += 1;
284                    pc += 4;
285                }
286                0x48 => {
287                    // PartIEquals (case-insensitive)
288                    let part_idx = self.bytecode[pc + 1] as usize;
289                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
290                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
291                    let part = parts.get(part_idx);
292                    stack[sp] = part.eq_ignore_ascii_case(&self.strings[str_idx]);
293                    sp += 1;
294                    pc += 4;
295                }
296                0x49 => {
297                    // PartIContains (case-insensitive)
298                    let part_idx = self.bytecode[pc + 1] as usize;
299                    let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
300                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
301                    let part = parts.get(part_idx);
302                    let needle = &self.strings[str_idx];
303                    stack[sp] = icontains(part, needle);
304                    sp += 1;
305                    pc += 4;
306                }
307
308                // ============ Header Operations ============
309                0x50 => {
310                    // HeaderEquals
311                    let part_idx = self.bytecode[pc + 1] as usize;
312                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
313                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
314                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
315                    let headers = parts.get(part_idx);
316                    let header_name = &self.strings[hdr_idx];
317                    let expected = &self.strings[val_idx];
318                    stack[sp] = extract_header_value(headers, header_name)
319                        .map(|v| v == &expected[..])
320                        .unwrap_or(false);
321                    sp += 1;
322                    pc += 6;
323                }
324                0x51 => {
325                    // HeaderIEquals (case-insensitive)
326                    let part_idx = self.bytecode[pc + 1] as usize;
327                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
328                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
329                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
330                    let headers = parts.get(part_idx);
331                    let header_name = &self.strings[hdr_idx];
332                    let expected = &self.strings[val_idx];
333                    stack[sp] = extract_header_value(headers, header_name)
334                        .map(|v| v.eq_ignore_ascii_case(expected))
335                        .unwrap_or(false);
336                    sp += 1;
337                    pc += 6;
338                }
339                0x52 => {
340                    // HeaderContains
341                    let part_idx = self.bytecode[pc + 1] as usize;
342                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
343                    let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
344                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
345                    let headers = parts.get(part_idx);
346                    let header_name = &self.strings[hdr_idx];
347                    stack[sp] = extract_header_value(headers, header_name)
348                        .map(|v| self.searchers[val_idx].find(v).is_some())
349                        .unwrap_or(false);
350                    sp += 1;
351                    pc += 6;
352                }
353                0x53 => {
354                    // HeaderExists
355                    let part_idx = self.bytecode[pc + 1] as usize;
356                    let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
357                    parts.ensure(part_idx, &self.delimiter_finder, delim_len);
358                    let headers = parts.get(part_idx);
359                    let header_name = &self.strings[hdr_idx];
360                    stack[sp] = extract_header_value(headers, header_name).is_some();
361                    sp += 1;
362                    pc += 4;
363                }
364
365                // ============ Short-circuit Jumps ============
366                0x70 => {
367                    // JumpIfFalse — short-circuit AND
368                    debug_assert!(sp >= 1, "Stack underflow on JumpIfFalse");
369                    if !stack[sp - 1] {
370                        // Left side is false → result is false, skip right operand
371                        let offset = read_i16(&self.bytecode, pc + 1);
372                        pc = (pc as isize + offset as isize) as usize;
373                    } else {
374                        // Left side is true → pop it, evaluate right operand
375                        sp -= 1;
376                        pc += 3;
377                    }
378                }
379                0x71 => {
380                    // JumpIfTrue — short-circuit OR
381                    debug_assert!(sp >= 1, "Stack underflow on JumpIfTrue");
382                    if stack[sp - 1] {
383                        // Left side is true → result is true, skip right operand
384                        let offset = read_i16(&self.bytecode, pc + 1);
385                        pc = (pc as isize + offset as isize) as usize;
386                    } else {
387                        // Left side is false → pop it, evaluate right operand
388                        sp -= 1;
389                        pc += 3;
390                    }
391                }
392
393                // ============ Random ============
394                0x60 => {
395                    // Rand
396                    let n = read_u16(&self.bytecode, pc + 1);
397                    stack[sp] = rand_1_in_n(n);
398                    sp += 1;
399                    pc += 3;
400                }
401
402                // ============ Control ============
403                0xFF => {
404                    // Return
405                    debug_assert!(sp >= 1, "Stack underflow on RETURN");
406                    return stack[sp - 1];
407                }
408
409                _ => {
410                    // Unknown opcode - should never happen with valid bytecode
411                    #[cfg(debug_assertions)]
412                    panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
413                    #[cfg(not(debug_assertions))]
414                    return false;
415                }
416            }
417        }
418    }
419
420    /// Get the original filter source.
421    pub fn source(&self) -> &str {
422        &self.source
423    }
424
425    /// Get the bytecode length.
426    pub fn bytecode_len(&self) -> usize {
427        self.bytecode.len()
428    }
429
430    /// Get the number of string literals.
431    pub fn string_count(&self) -> usize {
432        self.strings.len()
433    }
434
435    /// Get the number of regex patterns.
436    pub fn regex_count(&self) -> usize {
437        self.regexes.len()
438    }
439
440    /// Get the delimiter used for splitting.
441    pub fn delimiter(&self) -> &[u8] {
442        &self.delimiter
443    }
444}
445
446/// Read a little-endian u16 from bytecode.
447#[inline(always)]
448fn read_u16(bytecode: &[u8], offset: usize) -> u16 {
449    u16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
450}
451
452/// Read a little-endian i16 from bytecode.
453#[inline(always)]
454fn read_i16(bytecode: &[u8], offset: usize) -> i16 {
455    i16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
456}
457
458/// Case-insensitive contains check.
459#[inline]
460fn icontains(haystack: &[u8], needle: &[u8]) -> bool {
461    if needle.is_empty() {
462        return true;
463    }
464    if haystack.len() < needle.len() {
465        return false;
466    }
467
468    // Simple sliding window comparison
469    for window in haystack.windows(needle.len()) {
470        if window.eq_ignore_ascii_case(needle) {
471            return true;
472        }
473    }
474    false
475}
476
477/// Returns true with probability 1/N.
478///
479/// Uses a deterministic counter for reproducible sampling.
480#[inline]
481fn rand_1_in_n(n: u16) -> bool {
482    if n <= 1 {
483        return true;
484    }
485    let count = RAND_COUNTER.fetch_add(1, Ordering::Relaxed);
486    count.is_multiple_of(n as u64)
487}
488
489/// Reset the random counter (for testing).
490pub fn reset_rand_counter() {
491    RAND_COUNTER.store(0, Ordering::Relaxed);
492}
493
494#[cfg(test)]
495mod tests {
496    use super::*;
497
498    fn make_simple_filter(opcode: u8, str_idx: u16, needle: &str) -> CompiledFilter {
499        let mut bytecode = vec![opcode];
500        bytecode.extend_from_slice(&str_idx.to_le_bytes());
501        bytecode.push(0xFF); // Return
502
503        CompiledFilter::new(
504            bytecode,
505            vec![needle.as_bytes().to_vec()],
506            vec![],
507            vec![],
508            b";;;".to_vec(),
509            format!("test filter"),
510        )
511    }
512
513    #[test]
514    fn test_contains() {
515        let filter = make_simple_filter(0x10, 0, "hello");
516        assert!(filter.evaluate(Bytes::from("say hello world")));
517        assert!(!filter.evaluate(Bytes::from("say goodbye")));
518    }
519
520    #[test]
521    fn test_starts_with() {
522        let filter = make_simple_filter(0x11, 0, "hello");
523        assert!(filter.evaluate(Bytes::from("hello world")));
524        assert!(!filter.evaluate(Bytes::from("say hello")));
525    }
526
527    #[test]
528    fn test_ends_with() {
529        let filter = make_simple_filter(0x12, 0, "world");
530        assert!(filter.evaluate(Bytes::from("hello world")));
531        assert!(!filter.evaluate(Bytes::from("world hello")));
532    }
533
534    #[test]
535    fn test_equals() {
536        let filter = make_simple_filter(0x13, 0, "hello");
537        assert!(filter.evaluate(Bytes::from("hello")));
538        assert!(!filter.evaluate(Bytes::from("hello world")));
539    }
540
541    #[test]
542    fn test_push_true() {
543        let filter = CompiledFilter::new(
544            vec![0x01, 0xFF], // PushTrue, Return
545            vec![],
546            vec![],
547            vec![],
548            b";;;".to_vec(),
549            "true".into(),
550        );
551        assert!(filter.evaluate(Bytes::from("anything")));
552    }
553
554    #[test]
555    fn test_push_false() {
556        let filter = CompiledFilter::new(
557            vec![0x02, 0xFF], // PushFalse, Return
558            vec![],
559            vec![],
560            vec![],
561            b";;;".to_vec(),
562            "false".into(),
563        );
564        assert!(!filter.evaluate(Bytes::from("anything")));
565    }
566
567    #[test]
568    fn test_and() {
569        // true AND true = true
570        let filter = CompiledFilter::new(
571            vec![0x01, 0x01, 0x30, 0xFF], // PushTrue, PushTrue, And, Return
572            vec![],
573            vec![],
574            vec![],
575            b";;;".to_vec(),
576            "true AND true".into(),
577        );
578        assert!(filter.evaluate(Bytes::from("")));
579
580        // true AND false = false
581        let filter = CompiledFilter::new(
582            vec![0x01, 0x02, 0x30, 0xFF], // PushTrue, PushFalse, And, Return
583            vec![],
584            vec![],
585            vec![],
586            b";;;".to_vec(),
587            "true AND false".into(),
588        );
589        assert!(!filter.evaluate(Bytes::from("")));
590    }
591
592    #[test]
593    fn test_or() {
594        // false OR true = true
595        let filter = CompiledFilter::new(
596            vec![0x02, 0x01, 0x31, 0xFF], // PushFalse, PushTrue, Or, Return
597            vec![],
598            vec![],
599            vec![],
600            b";;;".to_vec(),
601            "false OR true".into(),
602        );
603        assert!(filter.evaluate(Bytes::from("")));
604
605        // false OR false = false
606        let filter = CompiledFilter::new(
607            vec![0x02, 0x02, 0x31, 0xFF], // PushFalse, PushFalse, Or, Return
608            vec![],
609            vec![],
610            vec![],
611            b";;;".to_vec(),
612            "false OR false".into(),
613        );
614        assert!(!filter.evaluate(Bytes::from("")));
615    }
616
617    #[test]
618    fn test_not() {
619        // NOT true = false
620        let filter = CompiledFilter::new(
621            vec![0x01, 0x32, 0xFF], // PushTrue, Not, Return
622            vec![],
623            vec![],
624            vec![],
625            b";;;".to_vec(),
626            "NOT true".into(),
627        );
628        assert!(!filter.evaluate(Bytes::from("")));
629
630        // NOT false = true
631        let filter = CompiledFilter::new(
632            vec![0x02, 0x32, 0xFF], // PushFalse, Not, Return
633            vec![],
634            vec![],
635            vec![],
636            b";;;".to_vec(),
637            "NOT false".into(),
638        );
639        assert!(filter.evaluate(Bytes::from("")));
640    }
641
642    #[test]
643    fn test_part_equals() {
644        // PartEquals(part=1, str=0) -> parts[1] == "2"
645        let filter = CompiledFilter::new(
646            vec![0x43, 0x01, 0x00, 0x00, 0xFF],
647            vec![b"2".to_vec()],
648            vec![],
649            vec![],
650            b";;;".to_vec(),
651            "field[1] == \"2\"".into(),
652        );
653
654        assert!(filter.evaluate(Bytes::from("v1;;;2;;;subtype")));
655        assert!(!filter.evaluate(Bytes::from("v1;;;1;;;subtype")));
656    }
657
658    #[test]
659    fn test_part_in_set() {
660        // PartInSet(part=1, set=0) -> parts[1] in {"1", "2", "3"}
661        let filter = CompiledFilter::new(
662            vec![0x47, 0x01, 0x00, 0x00, 0xFF],
663            vec![b"1".to_vec(), b"2".to_vec(), b"3".to_vec()],
664            vec![],
665            vec![vec![0, 1, 2]], // Set 0 contains string indices 0, 1, 2
666            b";;;".to_vec(),
667            "field[1] in {\"1\", \"2\", \"3\"}".into(),
668        );
669
670        assert!(filter.evaluate(Bytes::from("v1;;;1;;;sub")));
671        assert!(filter.evaluate(Bytes::from("v1;;;2;;;sub")));
672        assert!(filter.evaluate(Bytes::from("v1;;;3;;;sub")));
673        assert!(!filter.evaluate(Bytes::from("v1;;;4;;;sub")));
674    }
675
676    #[test]
677    fn test_rand() {
678        reset_rand_counter();
679
680        // rand(2) should return true, false, true, false, ...
681        let filter = CompiledFilter::new(
682            vec![0x60, 0x02, 0x00, 0xFF], // Rand(2), Return
683            vec![],
684            vec![],
685            vec![],
686            b";;;".to_vec(),
687            "rand(2)".into(),
688        );
689
690        let results: Vec<bool> = (0..10).map(|_| filter.evaluate(Bytes::from(""))).collect();
691        assert_eq!(
692            results,
693            vec![true, false, true, false, true, false, true, false, true, false]
694        );
695    }
696
697    #[test]
698    fn test_rand_always_true() {
699        reset_rand_counter();
700
701        let filter = CompiledFilter::new(
702            vec![0x60, 0x01, 0x00, 0xFF], // Rand(1), Return
703            vec![],
704            vec![],
705            vec![],
706            b";;;".to_vec(),
707            "rand(1)".into(),
708        );
709
710        for _ in 0..10 {
711            assert!(filter.evaluate(Bytes::from("")));
712        }
713    }
714
715    #[test]
716    fn test_regex_match() {
717        let filter = CompiledFilter::new(
718            vec![0x20, 0x00, 0x00, 0xFF], // Matches(regex=0), Return
719            vec![],
720            vec![Regex::new(r"error_[0-9]+").unwrap()],
721            vec![],
722            b";;;".to_vec(),
723            "payload matches \"error_[0-9]+\"".into(),
724        );
725
726        assert!(filter.evaluate(Bytes::from("found error_123 in log")));
727        assert!(filter.evaluate(Bytes::from("error_0")));
728        assert!(!filter.evaluate(Bytes::from("error_abc")));
729        assert!(!filter.evaluate(Bytes::from("no errors")));
730    }
731
732    #[test]
733    fn test_header_iequals() {
734        // HeaderIEquals(part=0, header="x-custom", value="expected")
735        let filter = CompiledFilter::new(
736            vec![0x51, 0x00, 0x00, 0x00, 0x01, 0x00, 0xFF],
737            vec![b"x-custom".to_vec(), b"expected".to_vec()],
738            vec![],
739            vec![],
740            b";;;".to_vec(),
741            "headers.header(\"x-custom\") iequals \"expected\"".into(),
742        );
743
744        assert!(filter.evaluate(Bytes::from("X-Custom: expected\r\n")));
745        assert!(filter.evaluate(Bytes::from("x-custom: EXPECTED\r\n")));
746        assert!(filter.evaluate(Bytes::from("X-CUSTOM: Expected\r\n")));
747        assert!(!filter.evaluate(Bytes::from("X-Custom: other\r\n")));
748        assert!(!filter.evaluate(Bytes::from("X-Other: expected\r\n")));
749    }
750
751    #[test]
752    fn test_complex_multi_clause_filter() {
753        // field[1] == "error" AND field[2] == "500" AND header check
754        // Bytecode:
755        //   PartEquals(1, 0)    -> field[1] == "error"
756        //   PartEquals(2, 1)    -> field[2] == "500"
757        //   And
758        //   HeaderIEquals(4, 2, 3) -> header check
759        //   And
760        //   Return
761        let filter = CompiledFilter::new(
762            vec![
763                0x43, 0x01, 0x00, 0x00, // PartEquals(part=1, str=0)
764                0x43, 0x02, 0x01, 0x00, // PartEquals(part=2, str=1)
765                0x30, // And
766                0x51, 0x04, 0x02, 0x00, 0x03, 0x00, // HeaderIEquals(part=4, hdr=2, val=3)
767                0x30, // And
768                0xFF, // Return
769            ],
770            vec![
771                b"error".to_vec(),
772                b"500".to_vec(),
773                b"content-type".to_vec(),
774                b"application/json".to_vec(),
775            ],
776            vec![],
777            vec![],
778            b";;;".to_vec(),
779            "multi-clause filter".into(),
780        );
781
782        // Build a matching record: [ignored, "error", "500", ignored, headers, ...]
783        let mut fields: Vec<&str> = vec![""; 6];
784        fields[1] = "error";
785        fields[2] = "500";
786        fields[4] = "Content-Type: application/json\r\n";
787
788        let payload = fields.join(";;;");
789        assert!(filter.evaluate(Bytes::from(payload)));
790
791        // Non-matching: wrong field[1]
792        fields[1] = "info";
793        let payload = fields.join(";;;");
794        assert!(!filter.evaluate(Bytes::from(payload)));
795
796        // Non-matching: wrong field[2]
797        fields[1] = "error";
798        fields[2] = "200";
799        let payload = fields.join(";;;");
800        assert!(!filter.evaluate(Bytes::from(payload)));
801
802        // Non-matching: wrong header value
803        fields[2] = "500";
804        fields[4] = "Content-Type: text/html\r\n";
805        let payload = fields.join(";;;");
806        assert!(!filter.evaluate(Bytes::from(payload)));
807    }
808}