1use std::sync::atomic::{AtomicU64, Ordering};
6
7use bytes::Bytes;
8use memchr::memmem::Finder;
9use regex::bytes::Regex;
10
11use crate::split::{extract_header_value, PayloadParts};
12
13static RAND_COUNTER: AtomicU64 = AtomicU64::new(0);
15
16#[derive(Debug)]
22pub struct CompiledFilter {
23 bytecode: Box<[u8]>,
25
26 searchers: Box<[Finder<'static>]>,
29
30 strings: Box<[Box<[u8]>]>,
32
33 regexes: Box<[Regex]>,
35
36 string_sets: Box<[Box<[u16]>]>,
39
40 delimiter: Box<[u8]>,
42
43 delimiter_finder: Finder<'static>,
45
46 source: Box<str>,
48}
49
50impl CompiledFilter {
51 pub fn new(
55 bytecode: Vec<u8>,
56 strings: Vec<Vec<u8>>,
57 regexes: Vec<Regex>,
58 string_sets: Vec<Vec<u16>>,
59 delimiter: Vec<u8>,
60 source: String,
61 ) -> Self {
62 let searchers: Vec<Finder<'static>> = strings
64 .iter()
65 .map(|s| {
66 let bytes: &'static [u8] = Box::leak(s.clone().into_boxed_slice());
67 Finder::new(bytes)
68 })
69 .collect();
70
71 let strings: Vec<Box<[u8]>> = strings.into_iter().map(|s| s.into_boxed_slice()).collect();
72
73 let string_sets: Vec<Box<[u16]>> = string_sets
74 .into_iter()
75 .map(|s| s.into_boxed_slice())
76 .collect();
77
78 let delimiter = delimiter.into_boxed_slice();
79 let delim_bytes: &'static [u8] = Box::leak(delimiter.clone());
80 let delimiter_finder = Finder::new(delim_bytes);
81
82 Self {
83 bytecode: bytecode.into_boxed_slice(),
84 searchers: searchers.into_boxed_slice(),
85 strings: strings.into_boxed_slice(),
86 regexes: regexes.into_boxed_slice(),
87 string_sets: string_sets.into_boxed_slice(),
88 delimiter,
89 delimiter_finder,
90 source: source.into_boxed_str(),
91 }
92 }
93
94 #[inline]
112 pub fn evaluate(&self, payload: Bytes) -> bool {
113 let mut parts = PayloadParts::new_lazy(payload);
115 let delim_len = self.delimiter.len();
116
117 let mut stack = [false; 32];
119 let mut sp: usize = 0;
120 let mut pc: usize = 0;
121
122 let payload_bytes = parts.payload().as_ref() as *const [u8];
123 let payload_bytes: &[u8] = unsafe { &*payload_bytes };
127
128 loop {
129 debug_assert!(pc < self.bytecode.len(), "PC out of bounds");
130 debug_assert!(sp < 32, "Stack overflow");
131
132 match self.bytecode[pc] {
133 0x01 => {
135 stack[sp] = true;
137 sp += 1;
138 pc += 1;
139 }
140 0x02 => {
141 stack[sp] = false;
143 sp += 1;
144 pc += 1;
145 }
146
147 0x10 => {
149 let idx = read_u16(&self.bytecode, pc + 1) as usize;
151 stack[sp] = self.searchers[idx].find(payload_bytes).is_some();
152 sp += 1;
153 pc += 3;
154 }
155 0x11 => {
156 let idx = read_u16(&self.bytecode, pc + 1) as usize;
158 stack[sp] = payload_bytes.starts_with(&self.strings[idx]);
159 sp += 1;
160 pc += 3;
161 }
162 0x12 => {
163 let idx = read_u16(&self.bytecode, pc + 1) as usize;
165 stack[sp] = payload_bytes.ends_with(&self.strings[idx]);
166 sp += 1;
167 pc += 3;
168 }
169 0x13 => {
170 let idx = read_u16(&self.bytecode, pc + 1) as usize;
172 stack[sp] = payload_bytes == &self.strings[idx][..];
173 sp += 1;
174 pc += 3;
175 }
176 0x20 => {
177 let idx = read_u16(&self.bytecode, pc + 1) as usize;
179 stack[sp] = self.regexes[idx].is_match(payload_bytes);
180 sp += 1;
181 pc += 3;
182 }
183
184 0x30 => {
186 debug_assert!(sp >= 2, "Stack underflow on AND");
188 sp -= 1;
189 stack[sp - 1] = stack[sp - 1] && stack[sp];
190 pc += 1;
191 }
192 0x31 => {
193 debug_assert!(sp >= 2, "Stack underflow on OR");
195 sp -= 1;
196 stack[sp - 1] = stack[sp - 1] || stack[sp];
197 pc += 1;
198 }
199 0x32 => {
200 debug_assert!(sp >= 1, "Stack underflow on NOT");
202 stack[sp - 1] = !stack[sp - 1];
203 pc += 1;
204 }
205
206 0x40 => {
208 let part_idx = self.bytecode[pc + 1] as usize;
210 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
211 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
212 let part = parts.get(part_idx);
213 stack[sp] = self.searchers[str_idx].find(part).is_some();
214 sp += 1;
215 pc += 4;
216 }
217 0x41 => {
218 let part_idx = self.bytecode[pc + 1] as usize;
220 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
221 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
222 let part = parts.get(part_idx);
223 stack[sp] = part.starts_with(&self.strings[str_idx]);
224 sp += 1;
225 pc += 4;
226 }
227 0x42 => {
228 let part_idx = self.bytecode[pc + 1] as usize;
230 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
231 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
232 let part = parts.get(part_idx);
233 stack[sp] = part.ends_with(&self.strings[str_idx]);
234 sp += 1;
235 pc += 4;
236 }
237 0x43 => {
238 let part_idx = self.bytecode[pc + 1] as usize;
240 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
241 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
242 let part = parts.get(part_idx);
243 stack[sp] = part == &self.strings[str_idx][..];
244 sp += 1;
245 pc += 4;
246 }
247 0x44 => {
248 let part_idx = self.bytecode[pc + 1] as usize;
250 let regex_idx = read_u16(&self.bytecode, pc + 2) as usize;
251 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
252 let part = parts.get(part_idx);
253 stack[sp] = self.regexes[regex_idx].is_match(part);
254 sp += 1;
255 pc += 4;
256 }
257 0x45 => {
258 let part_idx = self.bytecode[pc + 1] as usize;
260 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
261 stack[sp] = parts.get(part_idx).is_empty();
262 sp += 1;
263 pc += 2;
264 }
265 0x46 => {
266 let part_idx = self.bytecode[pc + 1] as usize;
268 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
269 stack[sp] = !parts.get(part_idx).is_empty();
270 sp += 1;
271 pc += 2;
272 }
273 0x47 => {
274 let part_idx = self.bytecode[pc + 1] as usize;
276 let set_idx = read_u16(&self.bytecode, pc + 2) as usize;
277 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
278 let part = parts.get(part_idx);
279 let set = &self.string_sets[set_idx];
280 stack[sp] = set
281 .iter()
282 .any(|&str_idx| part == &self.strings[str_idx as usize][..]);
283 sp += 1;
284 pc += 4;
285 }
286 0x48 => {
287 let part_idx = self.bytecode[pc + 1] as usize;
289 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
290 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
291 let part = parts.get(part_idx);
292 stack[sp] = part.eq_ignore_ascii_case(&self.strings[str_idx]);
293 sp += 1;
294 pc += 4;
295 }
296 0x49 => {
297 let part_idx = self.bytecode[pc + 1] as usize;
299 let str_idx = read_u16(&self.bytecode, pc + 2) as usize;
300 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
301 let part = parts.get(part_idx);
302 let needle = &self.strings[str_idx];
303 stack[sp] = icontains(part, needle);
304 sp += 1;
305 pc += 4;
306 }
307
308 0x50 => {
310 let part_idx = self.bytecode[pc + 1] as usize;
312 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
313 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
314 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
315 let headers = parts.get(part_idx);
316 let header_name = &self.strings[hdr_idx];
317 let expected = &self.strings[val_idx];
318 stack[sp] = extract_header_value(headers, header_name)
319 .map(|v| v == &expected[..])
320 .unwrap_or(false);
321 sp += 1;
322 pc += 6;
323 }
324 0x51 => {
325 let part_idx = self.bytecode[pc + 1] as usize;
327 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
328 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
329 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
330 let headers = parts.get(part_idx);
331 let header_name = &self.strings[hdr_idx];
332 let expected = &self.strings[val_idx];
333 stack[sp] = extract_header_value(headers, header_name)
334 .map(|v| v.eq_ignore_ascii_case(expected))
335 .unwrap_or(false);
336 sp += 1;
337 pc += 6;
338 }
339 0x52 => {
340 let part_idx = self.bytecode[pc + 1] as usize;
342 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
343 let val_idx = read_u16(&self.bytecode, pc + 4) as usize;
344 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
345 let headers = parts.get(part_idx);
346 let header_name = &self.strings[hdr_idx];
347 stack[sp] = extract_header_value(headers, header_name)
348 .map(|v| self.searchers[val_idx].find(v).is_some())
349 .unwrap_or(false);
350 sp += 1;
351 pc += 6;
352 }
353 0x53 => {
354 let part_idx = self.bytecode[pc + 1] as usize;
356 let hdr_idx = read_u16(&self.bytecode, pc + 2) as usize;
357 parts.ensure(part_idx, &self.delimiter_finder, delim_len);
358 let headers = parts.get(part_idx);
359 let header_name = &self.strings[hdr_idx];
360 stack[sp] = extract_header_value(headers, header_name).is_some();
361 sp += 1;
362 pc += 4;
363 }
364
365 0x70 => {
367 debug_assert!(sp >= 1, "Stack underflow on JumpIfFalse");
369 if !stack[sp - 1] {
370 let offset = read_i16(&self.bytecode, pc + 1);
372 pc = (pc as isize + offset as isize) as usize;
373 } else {
374 sp -= 1;
376 pc += 3;
377 }
378 }
379 0x71 => {
380 debug_assert!(sp >= 1, "Stack underflow on JumpIfTrue");
382 if stack[sp - 1] {
383 let offset = read_i16(&self.bytecode, pc + 1);
385 pc = (pc as isize + offset as isize) as usize;
386 } else {
387 sp -= 1;
389 pc += 3;
390 }
391 }
392
393 0x60 => {
395 let n = read_u16(&self.bytecode, pc + 1);
397 stack[sp] = rand_1_in_n(n);
398 sp += 1;
399 pc += 3;
400 }
401
402 0xFF => {
404 debug_assert!(sp >= 1, "Stack underflow on RETURN");
406 return stack[sp - 1];
407 }
408
409 _ => {
410 #[cfg(debug_assertions)]
412 panic!("Unknown opcode: 0x{:02X} at pc={}", self.bytecode[pc], pc);
413 #[cfg(not(debug_assertions))]
414 return false;
415 }
416 }
417 }
418 }
419
420 pub fn source(&self) -> &str {
422 &self.source
423 }
424
425 pub fn bytecode_len(&self) -> usize {
427 self.bytecode.len()
428 }
429
430 pub fn string_count(&self) -> usize {
432 self.strings.len()
433 }
434
435 pub fn regex_count(&self) -> usize {
437 self.regexes.len()
438 }
439
440 pub fn delimiter(&self) -> &[u8] {
442 &self.delimiter
443 }
444}
445
446#[inline(always)]
448fn read_u16(bytecode: &[u8], offset: usize) -> u16 {
449 u16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
450}
451
452#[inline(always)]
454fn read_i16(bytecode: &[u8], offset: usize) -> i16 {
455 i16::from_le_bytes([bytecode[offset], bytecode[offset + 1]])
456}
457
458#[inline]
460fn icontains(haystack: &[u8], needle: &[u8]) -> bool {
461 if needle.is_empty() {
462 return true;
463 }
464 if haystack.len() < needle.len() {
465 return false;
466 }
467
468 for window in haystack.windows(needle.len()) {
470 if window.eq_ignore_ascii_case(needle) {
471 return true;
472 }
473 }
474 false
475}
476
477#[inline]
481fn rand_1_in_n(n: u16) -> bool {
482 if n <= 1 {
483 return true;
484 }
485 let count = RAND_COUNTER.fetch_add(1, Ordering::Relaxed);
486 count.is_multiple_of(n as u64)
487}
488
489pub fn reset_rand_counter() {
491 RAND_COUNTER.store(0, Ordering::Relaxed);
492}
493
494#[cfg(test)]
495mod tests {
496 use super::*;
497
498 fn make_simple_filter(opcode: u8, str_idx: u16, needle: &str) -> CompiledFilter {
499 let mut bytecode = vec![opcode];
500 bytecode.extend_from_slice(&str_idx.to_le_bytes());
501 bytecode.push(0xFF); CompiledFilter::new(
504 bytecode,
505 vec![needle.as_bytes().to_vec()],
506 vec![],
507 vec![],
508 b";;;".to_vec(),
509 format!("test filter"),
510 )
511 }
512
513 #[test]
514 fn test_contains() {
515 let filter = make_simple_filter(0x10, 0, "hello");
516 assert!(filter.evaluate(Bytes::from("say hello world")));
517 assert!(!filter.evaluate(Bytes::from("say goodbye")));
518 }
519
520 #[test]
521 fn test_starts_with() {
522 let filter = make_simple_filter(0x11, 0, "hello");
523 assert!(filter.evaluate(Bytes::from("hello world")));
524 assert!(!filter.evaluate(Bytes::from("say hello")));
525 }
526
527 #[test]
528 fn test_ends_with() {
529 let filter = make_simple_filter(0x12, 0, "world");
530 assert!(filter.evaluate(Bytes::from("hello world")));
531 assert!(!filter.evaluate(Bytes::from("world hello")));
532 }
533
534 #[test]
535 fn test_equals() {
536 let filter = make_simple_filter(0x13, 0, "hello");
537 assert!(filter.evaluate(Bytes::from("hello")));
538 assert!(!filter.evaluate(Bytes::from("hello world")));
539 }
540
541 #[test]
542 fn test_push_true() {
543 let filter = CompiledFilter::new(
544 vec![0x01, 0xFF], vec![],
546 vec![],
547 vec![],
548 b";;;".to_vec(),
549 "true".into(),
550 );
551 assert!(filter.evaluate(Bytes::from("anything")));
552 }
553
554 #[test]
555 fn test_push_false() {
556 let filter = CompiledFilter::new(
557 vec![0x02, 0xFF], vec![],
559 vec![],
560 vec![],
561 b";;;".to_vec(),
562 "false".into(),
563 );
564 assert!(!filter.evaluate(Bytes::from("anything")));
565 }
566
567 #[test]
568 fn test_and() {
569 let filter = CompiledFilter::new(
571 vec![0x01, 0x01, 0x30, 0xFF], vec![],
573 vec![],
574 vec![],
575 b";;;".to_vec(),
576 "true AND true".into(),
577 );
578 assert!(filter.evaluate(Bytes::from("")));
579
580 let filter = CompiledFilter::new(
582 vec![0x01, 0x02, 0x30, 0xFF], vec![],
584 vec![],
585 vec![],
586 b";;;".to_vec(),
587 "true AND false".into(),
588 );
589 assert!(!filter.evaluate(Bytes::from("")));
590 }
591
592 #[test]
593 fn test_or() {
594 let filter = CompiledFilter::new(
596 vec![0x02, 0x01, 0x31, 0xFF], vec![],
598 vec![],
599 vec![],
600 b";;;".to_vec(),
601 "false OR true".into(),
602 );
603 assert!(filter.evaluate(Bytes::from("")));
604
605 let filter = CompiledFilter::new(
607 vec![0x02, 0x02, 0x31, 0xFF], vec![],
609 vec![],
610 vec![],
611 b";;;".to_vec(),
612 "false OR false".into(),
613 );
614 assert!(!filter.evaluate(Bytes::from("")));
615 }
616
617 #[test]
618 fn test_not() {
619 let filter = CompiledFilter::new(
621 vec![0x01, 0x32, 0xFF], vec![],
623 vec![],
624 vec![],
625 b";;;".to_vec(),
626 "NOT true".into(),
627 );
628 assert!(!filter.evaluate(Bytes::from("")));
629
630 let filter = CompiledFilter::new(
632 vec![0x02, 0x32, 0xFF], vec![],
634 vec![],
635 vec![],
636 b";;;".to_vec(),
637 "NOT false".into(),
638 );
639 assert!(filter.evaluate(Bytes::from("")));
640 }
641
642 #[test]
643 fn test_part_equals() {
644 let filter = CompiledFilter::new(
646 vec![0x43, 0x01, 0x00, 0x00, 0xFF],
647 vec![b"2".to_vec()],
648 vec![],
649 vec![],
650 b";;;".to_vec(),
651 "field[1] == \"2\"".into(),
652 );
653
654 assert!(filter.evaluate(Bytes::from("v1;;;2;;;subtype")));
655 assert!(!filter.evaluate(Bytes::from("v1;;;1;;;subtype")));
656 }
657
658 #[test]
659 fn test_part_in_set() {
660 let filter = CompiledFilter::new(
662 vec![0x47, 0x01, 0x00, 0x00, 0xFF],
663 vec![b"1".to_vec(), b"2".to_vec(), b"3".to_vec()],
664 vec![],
665 vec![vec![0, 1, 2]], b";;;".to_vec(),
667 "field[1] in {\"1\", \"2\", \"3\"}".into(),
668 );
669
670 assert!(filter.evaluate(Bytes::from("v1;;;1;;;sub")));
671 assert!(filter.evaluate(Bytes::from("v1;;;2;;;sub")));
672 assert!(filter.evaluate(Bytes::from("v1;;;3;;;sub")));
673 assert!(!filter.evaluate(Bytes::from("v1;;;4;;;sub")));
674 }
675
676 #[test]
677 fn test_rand() {
678 reset_rand_counter();
679
680 let filter = CompiledFilter::new(
682 vec![0x60, 0x02, 0x00, 0xFF], vec![],
684 vec![],
685 vec![],
686 b";;;".to_vec(),
687 "rand(2)".into(),
688 );
689
690 let results: Vec<bool> = (0..10).map(|_| filter.evaluate(Bytes::from(""))).collect();
691 assert_eq!(
692 results,
693 vec![true, false, true, false, true, false, true, false, true, false]
694 );
695 }
696
697 #[test]
698 fn test_rand_always_true() {
699 reset_rand_counter();
700
701 let filter = CompiledFilter::new(
702 vec![0x60, 0x01, 0x00, 0xFF], vec![],
704 vec![],
705 vec![],
706 b";;;".to_vec(),
707 "rand(1)".into(),
708 );
709
710 for _ in 0..10 {
711 assert!(filter.evaluate(Bytes::from("")));
712 }
713 }
714
715 #[test]
716 fn test_regex_match() {
717 let filter = CompiledFilter::new(
718 vec![0x20, 0x00, 0x00, 0xFF], vec![],
720 vec![Regex::new(r"error_[0-9]+").unwrap()],
721 vec![],
722 b";;;".to_vec(),
723 "payload matches \"error_[0-9]+\"".into(),
724 );
725
726 assert!(filter.evaluate(Bytes::from("found error_123 in log")));
727 assert!(filter.evaluate(Bytes::from("error_0")));
728 assert!(!filter.evaluate(Bytes::from("error_abc")));
729 assert!(!filter.evaluate(Bytes::from("no errors")));
730 }
731
732 #[test]
733 fn test_header_iequals() {
734 let filter = CompiledFilter::new(
736 vec![0x51, 0x00, 0x00, 0x00, 0x01, 0x00, 0xFF],
737 vec![b"x-custom".to_vec(), b"expected".to_vec()],
738 vec![],
739 vec![],
740 b";;;".to_vec(),
741 "headers.header(\"x-custom\") iequals \"expected\"".into(),
742 );
743
744 assert!(filter.evaluate(Bytes::from("X-Custom: expected\r\n")));
745 assert!(filter.evaluate(Bytes::from("x-custom: EXPECTED\r\n")));
746 assert!(filter.evaluate(Bytes::from("X-CUSTOM: Expected\r\n")));
747 assert!(!filter.evaluate(Bytes::from("X-Custom: other\r\n")));
748 assert!(!filter.evaluate(Bytes::from("X-Other: expected\r\n")));
749 }
750
751 #[test]
752 fn test_complex_multi_clause_filter() {
753 let filter = CompiledFilter::new(
762 vec![
763 0x43, 0x01, 0x00, 0x00, 0x43, 0x02, 0x01, 0x00, 0x30, 0x51, 0x04, 0x02, 0x00, 0x03, 0x00, 0x30, 0xFF, ],
770 vec![
771 b"error".to_vec(),
772 b"500".to_vec(),
773 b"content-type".to_vec(),
774 b"application/json".to_vec(),
775 ],
776 vec![],
777 vec![],
778 b";;;".to_vec(),
779 "multi-clause filter".into(),
780 );
781
782 let mut fields: Vec<&str> = vec![""; 6];
784 fields[1] = "error";
785 fields[2] = "500";
786 fields[4] = "Content-Type: application/json\r\n";
787
788 let payload = fields.join(";;;");
789 assert!(filter.evaluate(Bytes::from(payload)));
790
791 fields[1] = "info";
793 let payload = fields.join(";;;");
794 assert!(!filter.evaluate(Bytes::from(payload)));
795
796 fields[1] = "error";
798 fields[2] = "200";
799 let payload = fields.join(";;;");
800 assert!(!filter.evaluate(Bytes::from(payload)));
801
802 fields[2] = "500";
804 fields[4] = "Content-Type: text/html\r\n";
805 let payload = fields.join(";;;");
806 assert!(!filter.evaluate(Bytes::from(payload)));
807 }
808}