1use std::sync::atomic::{AtomicU64, Ordering};
6
7use bytes::Bytes;
8use memchr::memmem::Finder;
9use regex::bytes::Regex;
10
11use crate::split::{extract_header_value, PayloadParts};
12
13static RAND_COUNTER: AtomicU64 = AtomicU64::new(0);
15
16#[derive(Debug)]
22pub struct CompiledFilter {
23 bytecode: Box<[u8]>,
25
26 searchers: Box<[Finder<'static>]>,
29
30 strings: Box<[Box<[u8]>]>,
32
33 regexes: Box<[Regex]>,
35
36 string_sets: Box<[Box<[u16]>]>,
39
40 delimiter: Box<[u8]>,
42
43 delimiter_finder: Finder<'static>,
45
46 source: Box<str>,
48}
49
50impl CompiledFilter {
51 pub fn new(
55 bytecode: Vec<u8>,
56 strings: Vec<Vec<u8>>,
57 regexes: Vec<Regex>,
58 string_sets: Vec<Vec<u16>>,
59 delimiter: Vec<u8>,
60 source: String,
61 ) -> Self {
62 let searchers: Vec<Finder<'static>> = strings
64 .iter()
65 .map(|s| {
66 let bytes: &'static [u8] = Box::leak(s.clone().into_boxed_slice());
67 Finder::new(bytes)
68 })
69 .collect();
70
71 let strings: Vec<Box<[u8]>> = strings.into_iter().map(|s| s.into_boxed_slice()).collect();
72
73 let string_sets: Vec<Box<[u16]>> = string_sets
74 .into_iter()
75 .map(|s| s.into_boxed_slice())
76 .collect();
77
78 let delimiter = delimiter.into_boxed_slice();
79 let delim_bytes: &'static [u8] = Box::leak(delimiter.clone());
80 let delimiter_finder = Finder::new(delim_bytes);
81
82 Self {
83 bytecode: bytecode.into_boxed_slice(),
84 searchers: searchers.into_boxed_slice(),
85 strings: strings.into_boxed_slice(),
86 regexes: regexes.into_boxed_slice(),
87 string_sets: string_sets.into_boxed_slice(),
88 delimiter,
89 delimiter_finder,
90 source: source.into_boxed_str(),
91 }
92 }
93
94 #[inline]
112 pub fn evaluate(&self, payload: Bytes) -> bool {
113 let mut parts = PayloadParts::new_lazy(payload);
115 let delim_len = self.delimiter.len();
116
117 let mut stack = [false; 32];
119 let mut sp: usize = 0;
120 let mut pc: usize = 0;
121
122 let payload_bytes = parts.payload().as_ref() as *const [u8];
123 let payload_bytes: &[u8] = unsafe { &*payload_bytes };
127
128 loop {
129 debug_assert!(pc < self.bytecode.len(), "PC out of bounds");
130 debug_assert!(sp < 32, "Stack overflow");
131
132 match self.bytecode[pc] {
133 0x01 => {
135 stack[sp] = true;
137 sp += 1;
138 pc += 1;
139 }
140 0x02 => {
141 stack[sp] = false;
143 sp += 1;
144 pc += 1;
145 }
146
147 0x10 => {
149 let idx = read_u16(&self.bytecode, pc + 1) as usize;
151 stack[sp] = self.searchers[idx].find(payload_bytes).is_some();
152 sp += 1;
153 pc += 3;
154 }
155 0x11 => {
156 let idx = read_u16(&self.bytecode, pc + 1) as usize;
158 stack[sp] = payload_bytes.starts_with(&self.strings[idx]);
159 sp += 1;
160 pc += 3;
161 }
162 0x12 => {
163 let idx = read_u16(&self.bytecode, pc + 1) as usize;
165 stack[sp] = payload_bytes.ends_with(&self.strings[idx]);
166 sp += 1;
167 pc += 3;
168 }
169 0x13 => {
170 let idx = read_u16(&self.bytecode, pc + 1) as usize;
172 stack[sp] = payload_bytes == &self.strings[idx][..];
173 sp += 1;
174 pc += 3;
175 }
176 0x20 => {
177 let idx = read_u16(&self.bytecode, pc + 1) as usize;
179 stack[sp] = self.regexes[idx].is_match(payload_bytes);
180 sp += 1;
181 pc += 3;
182 }
183
184 0x30 => {
186 debug_assert!(sp >= 2, "Stack underflow on AND");
188 sp -= 1;
189 stack[sp - 1] = stack[sp - 1] && stack[sp];
190 pc += 1;
191 }
192 0x31 => {
193 debug_assert!(sp >= 2, "Stack underflow on OR");
195 sp -= 1;
196 stack[sp - 1] = stack[sp - 1] || stack[sp];
197 pc += 1;
198 }
199 0x32 => {
200 debug_assert!(sp >= 1, "Stack underflow on NOT");
202 stack[sp - 1] = !stack[sp - 1];
203 pc += 1;
204 }
205
206 0x40 => {
208 let part_idx = self.bytecode[pc + 1] as usize;
210 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
211 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
212 let part = parts.get(part_idx);
213 stack[sp] = self.searchers[str_idx].find(part).is_some();
214 sp += 1;
215 pc += 4;
216 }
217 0x41 => {
218 let part_idx = self.bytecode[pc + 1] as usize;
220 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
221 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
222 let part = parts.get(part_idx);
223 stack[sp] = part.starts_with(&self.strings[str_idx]);
224 sp += 1;
225 pc += 4;
226 }
227 0x42 => {
228 let part_idx = self.bytecode[pc + 1] as usize;
230 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
231 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
232 let part = parts.get(part_idx);
233 stack[sp] = part.ends_with(&self.strings[str_idx]);
234 sp += 1;
235 pc += 4;
236 }
237 0x43 => {
238 let part_idx = self.bytecode[pc + 1] as usize;
240 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
241 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
242 let part = parts.get(part_idx);
243 stack[sp] = part == &self.strings[str_idx][..];
244 sp += 1;
245 pc += 4;
246 }
247 0x44 => {
248 let part_idx = self.bytecode[pc + 1] as usize;
250 let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
251 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
252 let part = parts.get(part_idx);
253 stack[sp] = self.regexes[regex_idx].is_match(part);
254 sp += 1;
255 pc += 4;
256 }
257 0x45 => {
258 let part_idx = self.bytecode[pc + 1] as usize;
260 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
261 stack[sp] = parts.get(part_idx).is_empty();
262 sp += 1;
263 pc += 2;
264 }
265 0x46 => {
266 let part_idx = self.bytecode[pc + 1] as usize;
268 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
269 stack[sp] = !parts.get(part_idx).is_empty();
270 sp += 1;
271 pc += 2;
272 }
273 0x47 => {
274 let part_idx = self.bytecode[pc + 1] as usize;
276 let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
277 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
278 let part = parts.get(part_idx);
279 let set = &self.string_sets[set_idx];
280 stack[sp] = set
281 .iter()
282 .any(|&str_idx| part == &self.strings[str_idx as usize][..]);
283 sp += 1;
284 pc += 4;
285 }
286 0x48 => {
287 let part_idx = self.bytecode[pc + 1] as usize;
289 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
290 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
291 let part = parts.get(part_idx);
292 stack[sp] = part.eq_ignore_ascii_case(&self.strings[str_idx]);
293 sp += 1;
294 pc += 4;
295 }
296 0x49 => {
297 let part_idx = self.bytecode[pc + 1] as usize;
299 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
300 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
301 let part = parts.get(part_idx);
302 let needle = &self.strings[str_idx];
303 stack[sp] = icontains(part, needle);
304 sp += 1;
305 pc += 4;
306 }
307
308 0x50 => {
310 let part_idx = self.bytecode[pc + 1] as usize;
312 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
313 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
314 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
315 let headers = parts.get(part_idx);
316 let header_name = &self.strings[hdr_idx];
317 let expected = &self.strings[val_idx];
318 stack[sp] = extract_header_value(headers, header_name)
319 .map(|v| v == &expected[..])
320 .unwrap_or(false);
321 sp += 1;
322 pc += 6;
323 }
324 0x51 => {
325 let part_idx = self.bytecode[pc + 1] as usize;
327 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
328 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
329 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
330 let headers = parts.get(part_idx);
331 let header_name = &self.strings[hdr_idx];
332 let expected = &self.strings[val_idx];
333 stack[sp] = extract_header_value(headers, header_name)
334 .map(|v| v.eq_ignore_ascii_case(expected))
335 .unwrap_or(false);
336 sp += 1;
337 pc += 6;
338 }
339 0x52 => {
340 let part_idx = self.bytecode[pc + 1] as usize;
342 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
343 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
344 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
345 let headers = parts.get(part_idx);
346 let header_name = &self.strings[hdr_idx];
347 stack[sp] = extract_header_value(headers, header_name)
348 .map(|v| self.searchers[val_idx].find(v).is_some())
349 .unwrap_or(false);
350 sp += 1;
351 pc += 6;
352 }
353 0x53 => {
354 let part_idx = self.bytecode[pc + 1] as usize;
356 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
357 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
358 let headers = parts.get(part_idx);
359 let header_name = &self.strings[hdr_idx];
360 stack[sp] = extract_header_value(headers, header_name).is_some();
361 sp += 1;
362 pc += 4;
363 }
364
365 0x70 => {
367 debug_assert!(sp >= 1, "Stack underflow on JumpIfFalse");
369 if !stack[sp - 1] {
370 let offset = read_i16(&self.bytecode, pc + 1);
372 pc = (pc as isize + offset as isize) as usize;
373 } else {
374 sp -= 1;
376 pc += 3;
377 }
378 }
379 0x71 => {
380 debug_assert!(sp >= 1, "Stack underflow on JumpIfTrue");
382 if stack[sp - 1] {
383 let offset = read_i16(&self.bytecode, pc + 1);
385 pc = (pc as isize + offset as isize) as usize;
386 } else {
387 sp -= 1;
389 pc += 3;
390 }
391 }
392
393 0x60 => {
395 let n = read_u16(&self.bytecode, pc + 1);
397 stack[sp] = rand_1_in_n(n);
398 sp += 1;
399 pc += 3;
400 }
401
402 0xFF => {
404 debug_assert!(sp >= 1, "Stack underflow on RETURN");
406 return stack[sp - 1];
407 }
408
409 _ => {
410 #[cfg(debug_assertions)]
412 panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
413 #[cfg(not(debug_assertions))]
414 return false;
415 }
416 }
417 }
418 }
419
420 pub fn evaluate_debug(&self, payload: Bytes) -> bool {
423 let mut parts = PayloadParts::new_lazy(payload);
424 let delim_len = self.delimiter.len();
425
426 let mut stack = [false; 32];
427 let mut sp: usize = 0;
428 let mut pc: usize = 0;
429
430 let payload_bytes = parts.payload().as_ref() as *const [u8];
431 let payload_bytes: &[u8] = unsafe { &*payload_bytes };
432
433 let mut trace_lines: Vec<String> = Vec::new();
434
435 loop {
436 debug_assert!(pc < self.bytecode.len());
437 debug_assert!(sp < 32);
438
439 match self.bytecode[pc] {
440 0x01 => {
441 stack[sp] = true;
442 trace_lines.push(format!(" pc={pc:3} PushTrue → stack[{sp}]=true"));
443 sp += 1; pc += 1;
444 }
445 0x02 => {
446 stack[sp] = false;
447 trace_lines.push(format!(" pc={pc:3} PushFalse → stack[{sp}]=false"));
448 sp += 1; pc += 1;
449 }
450 0x10 => {
451 let idx = read_u16(&self.bytecode, pc + 1) as usize;
452 let result = self.searchers[idx].find(payload_bytes).is_some();
453 stack[sp] = result;
454 trace_lines.push(format!(
455 " pc={pc:3} Contains str[{idx}]={:?} → {result}",
456 String::from_utf8_lossy(&self.strings[idx])
457 ));
458 sp += 1; pc += 3;
459 }
460 0x11 => {
461 let idx = read_u16(&self.bytecode, pc + 1) as usize;
462 let result = payload_bytes.starts_with(&self.strings[idx]);
463 stack[sp] = result;
464 trace_lines.push(format!(
465 " pc={pc:3} StartsWith str[{idx}]={:?} → {result}",
466 String::from_utf8_lossy(&self.strings[idx])
467 ));
468 sp += 1; pc += 3;
469 }
470 0x12 => {
471 let idx = read_u16(&self.bytecode, pc + 1) as usize;
472 let result = payload_bytes.ends_with(&self.strings[idx]);
473 stack[sp] = result;
474 trace_lines.push(format!(
475 " pc={pc:3} EndsWith str[{idx}]={:?} → {result}",
476 String::from_utf8_lossy(&self.strings[idx])
477 ));
478 sp += 1; pc += 3;
479 }
480 0x13 => {
481 let idx = read_u16(&self.bytecode, pc + 1) as usize;
482 let result = payload_bytes == &self.strings[idx][..];
483 stack[sp] = result;
484 trace_lines.push(format!(
485 " pc={pc:3} Equals str[{idx}]={:?} → {result}",
486 String::from_utf8_lossy(&self.strings[idx])
487 ));
488 sp += 1; pc += 3;
489 }
490 0x20 => {
491 let idx = read_u16(&self.bytecode, pc + 1) as usize;
492 let result = self.regexes[idx].is_match(payload_bytes);
493 stack[sp] = result;
494 trace_lines.push(format!(" pc={pc:3} Matches regex[{idx}] → {result}"));
495 sp += 1; pc += 3;
496 }
497 0x30 => {
498 sp -= 1;
499 let result = stack[sp - 1] && stack[sp];
500 stack[sp - 1] = result;
501 trace_lines.push(format!(" pc={pc:3} And → {result}"));
502 pc += 1;
503 }
504 0x31 => {
505 sp -= 1;
506 let result = stack[sp - 1] || stack[sp];
507 stack[sp - 1] = result;
508 trace_lines.push(format!(" pc={pc:3} Or → {result}"));
509 pc += 1;
510 }
511 0x32 => {
512 stack[sp - 1] = !stack[sp - 1];
513 trace_lines.push(format!(" pc={pc:3} Not → {}", stack[sp - 1]));
514 pc += 1;
515 }
516 0x40 => {
517 let part_idx = self.bytecode[pc + 1] as usize;
518 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
519 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
520 let part = parts.get(part_idx);
521 let result = self.searchers[str_idx].find(part).is_some();
522 stack[sp] = result;
523 trace_lines.push(format!(
524 " pc={pc:3} PartContains part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
525 String::from_utf8_lossy(part),
526 String::from_utf8_lossy(&self.strings[str_idx])
527 ));
528 sp += 1; pc += 4;
529 }
530 0x41 => {
531 let part_idx = self.bytecode[pc + 1] as usize;
532 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
533 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
534 let part = parts.get(part_idx);
535 let result = part.starts_with(&self.strings[str_idx]);
536 stack[sp] = result;
537 trace_lines.push(format!(
538 " pc={pc:3} PartStartsWith part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
539 String::from_utf8_lossy(part),
540 String::from_utf8_lossy(&self.strings[str_idx])
541 ));
542 sp += 1; pc += 4;
543 }
544 0x42 => {
545 let part_idx = self.bytecode[pc + 1] as usize;
546 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
547 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
548 let part = parts.get(part_idx);
549 let result = part.ends_with(&self.strings[str_idx]);
550 stack[sp] = result;
551 trace_lines.push(format!(
552 " pc={pc:3} PartEndsWith part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
553 String::from_utf8_lossy(part),
554 String::from_utf8_lossy(&self.strings[str_idx])
555 ));
556 sp += 1; pc += 4;
557 }
558 0x43 => {
559 let part_idx = self.bytecode[pc + 1] as usize;
560 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
561 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
562 let part = parts.get(part_idx);
563 let result = part == &self.strings[str_idx][..];
564 stack[sp] = result;
565 trace_lines.push(format!(
566 " pc={pc:3} PartEquals part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
567 String::from_utf8_lossy(part),
568 String::from_utf8_lossy(&self.strings[str_idx])
569 ));
570 sp += 1; pc += 4;
571 }
572 0x44 => {
573 let part_idx = self.bytecode[pc + 1] as usize;
574 let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
575 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
576 let part = parts.get(part_idx);
577 let result = self.regexes[regex_idx].is_match(part);
578 stack[sp] = result;
579 trace_lines.push(format!(
580 " pc={pc:3} PartMatches part[{part_idx}]={:?} regex[{regex_idx}] → {result}",
581 String::from_utf8_lossy(part)
582 ));
583 sp += 1; pc += 4;
584 }
585 0x45 => {
586 let part_idx = self.bytecode[pc + 1] as usize;
587 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
588 let result = parts.get(part_idx).is_empty();
589 stack[sp] = result;
590 trace_lines.push(format!(" pc={pc:3} PartIsEmpty part[{part_idx}] → {result}"));
591 sp += 1; pc += 2;
592 }
593 0x46 => {
594 let part_idx = self.bytecode[pc + 1] as usize;
595 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
596 let result = !parts.get(part_idx).is_empty();
597 stack[sp] = result;
598 trace_lines.push(format!(" pc={pc:3} PartNotEmpty part[{part_idx}] → {result}"));
599 sp += 1; pc += 2;
600 }
601 0x47 => {
602 let part_idx = self.bytecode[pc + 1] as usize;
603 let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
604 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
605 let part = parts.get(part_idx);
606 let set = &self.string_sets[set_idx];
607 let result = set.iter().any(|&si| part == &self.strings[si as usize][..]);
608 stack[sp] = result;
609 trace_lines.push(format!(
610 " pc={pc:3} PartInSet part[{part_idx}]={:?} set[{set_idx}] → {result}",
611 String::from_utf8_lossy(part)
612 ));
613 sp += 1; pc += 4;
614 }
615 0x48 => {
616 let part_idx = self.bytecode[pc + 1] as usize;
617 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
618 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
619 let part = parts.get(part_idx);
620 let result = part.eq_ignore_ascii_case(&self.strings[str_idx]);
621 stack[sp] = result;
622 trace_lines.push(format!(
623 " pc={pc:3} PartIEquals part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
624 String::from_utf8_lossy(part),
625 String::from_utf8_lossy(&self.strings[str_idx])
626 ));
627 sp += 1; pc += 4;
628 }
629 0x49 => {
630 let part_idx = self.bytecode[pc + 1] as usize;
631 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
632 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
633 let part = parts.get(part_idx);
634 let needle = &self.strings[str_idx];
635 let result = icontains(part, needle);
636 stack[sp] = result;
637 trace_lines.push(format!(
638 " pc={pc:3} PartIContains part[{part_idx}]={:?} str[{str_idx}]={:?} → {result}",
639 String::from_utf8_lossy(part),
640 String::from_utf8_lossy(&self.strings[str_idx])
641 ));
642 sp += 1; pc += 4;
643 }
644 0x50 => {
645 let part_idx = self.bytecode[pc + 1] as usize;
646 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
647 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
648 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
649 let headers = parts.get(part_idx);
650 let header_name = &self.strings[hdr_idx];
651 let expected = &self.strings[val_idx];
652 let extracted = extract_header_value(headers, header_name);
653 let result = extracted.map(|v| v == &expected[..]).unwrap_or(false);
654 stack[sp] = result;
655 trace_lines.push(format!(
656 " pc={pc:3} HeaderEquals part[{part_idx}] hdr={:?} expected={:?} got={:?} → {result}",
657 String::from_utf8_lossy(header_name),
658 String::from_utf8_lossy(expected),
659 extracted.map(|v| String::from_utf8_lossy(v).to_string())
660 ));
661 sp += 1; pc += 6;
662 }
663 0x51 => {
664 let part_idx = self.bytecode[pc + 1] as usize;
665 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
666 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
667 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
668 let headers = parts.get(part_idx);
669 let header_name = &self.strings[hdr_idx];
670 let expected = &self.strings[val_idx];
671 let extracted = extract_header_value(headers, header_name);
672 let result = extracted.map(|v| v.eq_ignore_ascii_case(expected)).unwrap_or(false);
673 stack[sp] = result;
674 trace_lines.push(format!(
675 " pc={pc:3} HeaderIEquals part[{part_idx}] hdr={:?} expected={:?} got={:?} → {result}",
676 String::from_utf8_lossy(header_name),
677 String::from_utf8_lossy(expected),
678 extracted.map(|v| String::from_utf8_lossy(v).to_string())
679 ));
680 sp += 1; pc += 6;
681 }
682 0x52 => {
683 let part_idx = self.bytecode[pc + 1] as usize;
684 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
685 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
686 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
687 let headers = parts.get(part_idx);
688 let header_name = &self.strings[hdr_idx];
689 let extracted = extract_header_value(headers, header_name);
690 let result = extracted.map(|v| self.searchers[val_idx].find(v).is_some()).unwrap_or(false);
691 stack[sp] = result;
692 trace_lines.push(format!(
693 " pc={pc:3} HeaderContains part[{part_idx}] hdr={:?} needle={:?} got={:?} → {result}",
694 String::from_utf8_lossy(header_name),
695 String::from_utf8_lossy(&self.strings[val_idx]),
696 extracted.map(|v| String::from_utf8_lossy(v).to_string())
697 ));
698 sp += 1; pc += 6;
699 }
700 0x53 => {
701 let part_idx = self.bytecode[pc + 1] as usize;
702 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
703 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
704 let headers = parts.get(part_idx);
705 let header_name = &self.strings[hdr_idx];
706 let result = extract_header_value(headers, header_name).is_some();
707 stack[sp] = result;
708 trace_lines.push(format!(
709 " pc={pc:3} HeaderExists part[{part_idx}] hdr={:?} → {result}",
710 String::from_utf8_lossy(header_name)
711 ));
712 sp += 1; pc += 4;
713 }
714 0x70 => {
715 if !stack[sp - 1] {
716 let offset = read_i16(&self.bytecode, pc + 1);
717 trace_lines.push(format!(" pc={pc:3} JumpIfFalse → false, jump by {offset}"));
718 pc = (pc as isize + offset as isize) as usize;
719 } else {
720 trace_lines.push(format!(" pc={pc:3} JumpIfFalse → true, pop & continue"));
721 sp -= 1;
722 pc += 3;
723 }
724 }
725 0x71 => {
726 if stack[sp - 1] {
727 let offset = read_i16(&self.bytecode, pc + 1);
728 trace_lines.push(format!(" pc={pc:3} JumpIfTrue → true, jump by {offset}"));
729 pc = (pc as isize + offset as isize) as usize;
730 } else {
731 trace_lines.push(format!(" pc={pc:3} JumpIfTrue → false, pop & continue"));
732 sp -= 1;
733 pc += 3;
734 }
735 }
736 0x60 => {
737 let n = read_u16(&self.bytecode, pc + 1);
738 let result = rand_1_in_n(n);
739 stack[sp] = result;
740 trace_lines.push(format!(" pc={pc:3} Rand(1/{n}) → {result}"));
741 sp += 1; pc += 3;
742 }
743 0xFF => {
744 let result = stack[sp - 1];
745 if result {
746 eprintln!("=== FILTER DEBUG (result=true) filter={:?} ===", self.source);
747 for line in &trace_lines {
748 eprintln!("{line}");
749 }
750 eprintln!("=== END FILTER DEBUG ===");
751 }
752 return result;
753 }
754 _ => {
755 #[cfg(debug_assertions)]
756 panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
757 #[cfg(not(debug_assertions))]
758 return false;
759 }
760 }
761 }
762 }
763
764 pub fn source(&self) -> &str {
766 &self.source
767 }
768
769 pub fn bytecode_len(&self) -> usize {
771 self.bytecode.len()
772 }
773
774 pub fn string_count(&self) -> usize {
776 self.strings.len()
777 }
778
779 pub fn regex_count(&self) -> usize {
781 self.regexes.len()
782 }
783
784 pub fn delimiter(&self) -> &[u8] {
786 &self.delimiter
787 }
788}
789
790#[inline(always)]
792fn read_u16(bytecode: &[u8], offset: usize) -> u16 {
793 u16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
794}
795
796#[inline(always)]
798fn read_i16(bytecode: &[u8], offset: usize) -> i16 {
799 i16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
800}
801
802#[inline]
804fn icontains(haystack: &[u8], needle: &[u8]) -> bool {
805 if needle.is_empty() {
806 return true;
807 }
808 if haystack.len() < needle.len() {
809 return false;
810 }
811
812 for window in haystack.windows(needle.len()) {
814 if window.eq_ignore_ascii_case(needle) {
815 return true;
816 }
817 }
818 false
819}
820
821#[inline]
825fn rand_1_in_n(n: u16) -> bool {
826 if n <= 1 {
827 return true;
828 }
829 let count = RAND_COUNTER.fetch_add(1, Ordering::Relaxed);
830 count.is_multiple_of(n as u64)
831}
832
833pub fn reset_rand_counter() {
835 RAND_COUNTER.store(0, Ordering::Relaxed);
836}
837
838#[cfg(test)]
839mod tests {
840 use super::*;
841
842 fn make_simple_filter(opcode: u8, str_idx: u16, needle: &str) -> CompiledFilter {
843 let mut bytecode = vec![opcode];
844 bytecode.extend_from_slice(&str_idx.to_le_bytes());
845 bytecode.push(0xFF); CompiledFilter::new(
848 bytecode,
849 vec![needle.as_bytes().to_vec()],
850 vec![],
851 vec![],
852 b";;;".to_vec(),
853 format!("test filter"),
854 )
855 }
856
857 #[test]
858 fn test_contains() {
859 let filter = make_simple_filter(0x10, 0, "hello");
860 assert!(filter.evaluate(Bytes::from("say hello world")));
861 assert!(!filter.evaluate(Bytes::from("say goodbye")));
862 }
863
864 #[test]
865 fn test_starts_with() {
866 let filter = make_simple_filter(0x11, 0, "hello");
867 assert!(filter.evaluate(Bytes::from("hello world")));
868 assert!(!filter.evaluate(Bytes::from("say hello")));
869 }
870
871 #[test]
872 fn test_ends_with() {
873 let filter = make_simple_filter(0x12, 0, "world");
874 assert!(filter.evaluate(Bytes::from("hello world")));
875 assert!(!filter.evaluate(Bytes::from("world hello")));
876 }
877
878 #[test]
879 fn test_equals() {
880 let filter = make_simple_filter(0x13, 0, "hello");
881 assert!(filter.evaluate(Bytes::from("hello")));
882 assert!(!filter.evaluate(Bytes::from("hello world")));
883 }
884
885 #[test]
886 fn test_push_true() {
887 let filter = CompiledFilter::new(
888 vec![0x01, 0xFF], vec![],
890 vec![],
891 vec![],
892 b";;;".to_vec(),
893 "true".into(),
894 );
895 assert!(filter.evaluate(Bytes::from("anything")));
896 }
897
898 #[test]
899 fn test_push_false() {
900 let filter = CompiledFilter::new(
901 vec![0x02, 0xFF], vec![],
903 vec![],
904 vec![],
905 b";;;".to_vec(),
906 "false".into(),
907 );
908 assert!(!filter.evaluate(Bytes::from("anything")));
909 }
910
911 #[test]
912 fn test_and() {
913 let filter = CompiledFilter::new(
915 vec![0x01, 0x01, 0x30, 0xFF], vec![],
917 vec![],
918 vec![],
919 b";;;".to_vec(),
920 "true AND true".into(),
921 );
922 assert!(filter.evaluate(Bytes::from("")));
923
924 let filter = CompiledFilter::new(
926 vec![0x01, 0x02, 0x30, 0xFF], vec![],
928 vec![],
929 vec![],
930 b";;;".to_vec(),
931 "true AND false".into(),
932 );
933 assert!(!filter.evaluate(Bytes::from("")));
934 }
935
936 #[test]
937 fn test_or() {
938 let filter = CompiledFilter::new(
940 vec![0x02, 0x01, 0x31, 0xFF], vec![],
942 vec![],
943 vec![],
944 b";;;".to_vec(),
945 "false OR true".into(),
946 );
947 assert!(filter.evaluate(Bytes::from("")));
948
949 let filter = CompiledFilter::new(
951 vec![0x02, 0x02, 0x31, 0xFF], vec![],
953 vec![],
954 vec![],
955 b";;;".to_vec(),
956 "false OR false".into(),
957 );
958 assert!(!filter.evaluate(Bytes::from("")));
959 }
960
961 #[test]
962 fn test_not() {
963 let filter = CompiledFilter::new(
965 vec![0x01, 0x32, 0xFF], vec![],
967 vec![],
968 vec![],
969 b";;;".to_vec(),
970 "NOT true".into(),
971 );
972 assert!(!filter.evaluate(Bytes::from("")));
973
974 let filter = CompiledFilter::new(
976 vec![0x02, 0x32, 0xFF], vec![],
978 vec![],
979 vec![],
980 b";;;".to_vec(),
981 "NOT false".into(),
982 );
983 assert!(filter.evaluate(Bytes::from("")));
984 }
985
986 #[test]
987 fn test_part_equals() {
988 let filter = CompiledFilter::new(
990 vec![0x43, 0x01, 0x00, 0x00, 0xFF],
991 vec![b"2".to_vec()],
992 vec![],
993 vec![],
994 b";;;".to_vec(),
995 "field[1] == \"2\"".into(),
996 );
997
998 assert!(filter.evaluate(Bytes::from("v1;;;2;;;subtype")));
999 assert!(!filter.evaluate(Bytes::from("v1;;;1;;;subtype")));
1000 }
1001
1002 #[test]
1003 fn test_part_in_set() {
1004 let filter = CompiledFilter::new(
1006 vec![0x47, 0x01, 0x00, 0x00, 0xFF],
1007 vec![b"1".to_vec(), b"2".to_vec(), b"3".to_vec()],
1008 vec![],
1009 vec![vec![0, 1, 2]], b";;;".to_vec(),
1011 "field[1] in {\"1\", \"2\", \"3\"}".into(),
1012 );
1013
1014 assert!(filter.evaluate(Bytes::from("v1;;;1;;;sub")));
1015 assert!(filter.evaluate(Bytes::from("v1;;;2;;;sub")));
1016 assert!(filter.evaluate(Bytes::from("v1;;;3;;;sub")));
1017 assert!(!filter.evaluate(Bytes::from("v1;;;4;;;sub")));
1018 }
1019
1020 #[test]
1021 fn test_rand() {
1022 reset_rand_counter();
1023
1024 let filter = CompiledFilter::new(
1026 vec![0x60, 0x02, 0x00, 0xFF], vec![],
1028 vec![],
1029 vec![],
1030 b";;;".to_vec(),
1031 "rand(2)".into(),
1032 );
1033
1034 let results: Vec<bool> = (0..10).map(|_| filter.evaluate(Bytes::from(""))).collect();
1035 assert_eq!(
1036 results,
1037 vec![true, false, true, false, true, false, true, false, true, false]
1038 );
1039 }
1040
1041 #[test]
1042 fn test_rand_always_true() {
1043 reset_rand_counter();
1044
1045 let filter = CompiledFilter::new(
1046 vec![0x60, 0x01, 0x00, 0xFF], vec![],
1048 vec![],
1049 vec![],
1050 b";;;".to_vec(),
1051 "rand(1)".into(),
1052 );
1053
1054 for _ in 0..10 {
1055 assert!(filter.evaluate(Bytes::from("")));
1056 }
1057 }
1058
1059 #[test]
1060 fn test_regex_match() {
1061 let filter = CompiledFilter::new(
1062 vec![0x20, 0x00, 0x00, 0xFF], vec![],
1064 vec![Regex::new(r"error_[0-9]+").unwrap()],
1065 vec![],
1066 b";;;".to_vec(),
1067 "payload matches \"error_[0-9]+\"".into(),
1068 );
1069
1070 assert!(filter.evaluate(Bytes::from("found error_123 in log")));
1071 assert!(filter.evaluate(Bytes::from("error_0")));
1072 assert!(!filter.evaluate(Bytes::from("error_abc")));
1073 assert!(!filter.evaluate(Bytes::from("no errors")));
1074 }
1075
1076 #[test]
1077 fn test_header_iequals() {
1078 let filter = CompiledFilter::new(
1080 vec![0x51, 0x00, 0x00, 0x00, 0x01, 0x00, 0xFF],
1081 vec![b"x-custom".to_vec(), b"expected".to_vec()],
1082 vec![],
1083 vec![],
1084 b";;;".to_vec(),
1085 "headers.header(\"x-custom\") iequals \"expected\"".into(),
1086 );
1087
1088 assert!(filter.evaluate(Bytes::from("X-Custom: expected\r\n")));
1089 assert!(filter.evaluate(Bytes::from("x-custom: EXPECTED\r\n")));
1090 assert!(filter.evaluate(Bytes::from("X-CUSTOM: Expected\r\n")));
1091 assert!(!filter.evaluate(Bytes::from("X-Custom: other\r\n")));
1092 assert!(!filter.evaluate(Bytes::from("X-Other: expected\r\n")));
1093 }
1094
1095 #[test]
1096 fn test_complex_multi_clause_filter() {
1097 let filter = CompiledFilter::new(
1106 vec![
1107 0x43, 0x01, 0x00, 0x00, 0x43, 0x02, 0x01, 0x00, 0x30, 0x51, 0x04, 0x02, 0x00, 0x03, 0x00, 0x30, 0xFF, ],
1114 vec![
1115 b"error".to_vec(),
1116 b"500".to_vec(),
1117 b"content-type".to_vec(),
1118 b"application/json".to_vec(),
1119 ],
1120 vec![],
1121 vec![],
1122 b";;;".to_vec(),
1123 "multi-clause filter".into(),
1124 );
1125
1126 let mut fields: Vec<&str> = vec![""; 6];
1128 fields[1] = "error";
1129 fields[2] = "500";
1130 fields[4] = "Content-Type: application/json\r\n";
1131
1132 let payload = fields.join(";;;");
1133 assert!(filter.evaluate(Bytes::from(payload)));
1134
1135 fields[1] = "info";
1137 let payload = fields.join(";;;");
1138 assert!(!filter.evaluate(Bytes::from(payload)));
1139
1140 fields[1] = "error";
1142 fields[2] = "200";
1143 let payload = fields.join(";;;");
1144 assert!(!filter.evaluate(Bytes::from(payload)));
1145
1146 fields[2] = "500";
1148 fields[4] = "Content-Type: text/html\r\n";
1149 let payload = fields.join(";;;");
1150 assert!(!filter.evaluate(Bytes::from(payload)));
1151 }
1152}