mod codegen;
mod emit;
mod verify;
pub use codegen::{
compile_alternation, compile_char_class, compile_character_class, compile_literal_search,
};
pub use verify::{compile_with_limit, CharRange, CompileError, MAX_BPF_PATTERN_LEN};
#[cfg(test)]
mod tests {
use super::*;
use crate::assembler::{
format_program, BpfInsn, BPF_B, BPF_CALL, BPF_EXIT, BPF_JGT, BPF_JMP, BPF_JNE, BPF_K,
BPF_LDX, BPF_MEM, BPF_X,
};
fn assert_jump_offsets_are_valid(program: &[BpfInsn]) {
for (idx, insn) in program.iter().enumerate() {
let is_jump = (insn.code & 0x07) == BPF_JMP;
if is_jump && (insn.code & 0xF0) != BPF_EXIT && (insn.code & 0xF0) != BPF_CALL {
let target = idx as isize + 1 + insn.off as isize;
assert!(
target >= 0 && (target as usize) < program.len(),
"instruction {} has invalid jump target: offset {} -> {}",
idx,
insn.off,
target
);
}
}
}
#[test]
fn empty_pattern_returns_immediate_true() -> Result<(), CompileError> {
let prog = compile_literal_search(b"")?;
assert_eq!(prog.len(), 4);
assert_eq!(prog[2].imm, 1); Ok(())
}
#[test]
fn single_byte_pattern_generates_valid_program() -> Result<(), CompileError> {
let prog = compile_literal_search(b"A")?;
assert!(prog.len() >= 8, "program too short: {} insns", prog.len());
assert_eq!(prog[prog.len() - 1].code & 0xF0, BPF_EXIT);
Ok(())
}
#[test]
fn multi_byte_pattern_unrolls_correctly() -> Result<(), CompileError> {
let prog = compile_literal_search(b"abc")?;
let byte_loads = prog
.iter()
.filter(|i| i.code == (BPF_LDX | BPF_MEM | BPF_B))
.count();
assert_eq!(byte_loads, 3, "expected 3 byte loads for 3-byte pattern");
Ok(())
}
#[test]
fn jump_offsets_are_valid() -> Result<(), CompileError> {
let prog = compile_literal_search(b"XY")?;
assert_jump_offsets_are_valid(&prog);
Ok(())
}
#[test]
fn program_terminates_with_exit() -> Result<(), CompileError> {
for pattern in [b"a".as_slice(), b"ab", b"hello world", b""] {
let prog = compile_literal_search(pattern)?;
let last = &prog[prog.len() - 1];
assert_eq!(
last.code & 0xF0,
BPF_EXIT,
"pattern {pattern:?}: last insn is not EXIT"
);
}
Ok(())
}
#[test]
fn backpatch_bounds_check_points_to_fail() -> Result<(), CompileError> {
let prog = compile_literal_search(b"test")?;
let bounds_check = prog
.iter()
.enumerate()
.find(|(_, i)| i.code == (BPF_JMP | BPF_JGT | BPF_X));
assert!(bounds_check.is_some(), "no bounds check found");
if let Some((idx, insn)) = bounds_check {
let target = idx + 1 + insn.off as usize;
assert_eq!(prog[target].imm, 0, "bounds check doesn't jump to fail");
}
Ok(())
}
#[test]
fn long_pattern_compiles() -> Result<(), CompileError> {
let pattern = b"this is a longer pattern for testing unroll depth";
let prog = compile_literal_search(pattern)?;
let byte_loads = prog
.iter()
.filter(|i| i.code == (BPF_LDX | BPF_MEM | BPF_B))
.count();
assert_eq!(byte_loads, pattern.len());
Ok(())
}
#[test]
fn max_length_pattern_compiles() -> Result<(), CompileError> {
let pattern = vec![b'A'; MAX_BPF_PATTERN_LEN];
let prog = compile_literal_search(&pattern)?;
assert!(prog.len() > pattern.len()); Ok(())
}
#[test]
fn oversized_pattern_returns_error() {
let pattern = vec![b'A'; MAX_BPF_PATTERN_LEN + 1];
let message = match compile_literal_search(&pattern) {
Ok(_) => String::new(),
Err(error) => error.to_string(),
};
assert!(message.contains("exceeds BPF verifier limit"));
}
#[test]
fn character_class_with_ranges_generates_match_and_fail_paths() -> Result<(), CompileError> {
let prog = compile_character_class(b"a-z0-9")?;
assert!(prog.len() > 4);
assert_eq!(prog[prog.len() - 4].imm, 0);
assert_eq!(prog[prog.len() - 2].imm, 1);
assert_eq!(prog[prog.len() - 1].code & 0xF0, BPF_EXIT);
assert_jump_offsets_are_valid(&prog);
Ok(())
}
#[test]
fn empty_character_class_is_impossible_match() -> Result<(), CompileError> {
let prog = compile_character_class(b"")?;
assert_eq!(prog.len(), 2);
assert_eq!(prog[0].imm, 0);
assert_eq!(prog[1].code & 0xF0, BPF_EXIT);
Ok(())
}
#[test]
fn alternation_compiles_sequence_with_match_and_fail_paths() -> Result<(), CompileError> {
let prog = compile_alternation(&[b"ab", b"xy", b"z"])?;
assert!(prog.len() > 4);
assert_eq!(prog[prog.len() - 4].imm, 0);
assert_eq!(prog[prog.len() - 2].imm, 1);
assert_jump_offsets_are_valid(&prog);
Ok(())
}
#[test]
fn alternation_with_empty_alternative_matches_immediately() -> Result<(), CompileError> {
let prog = compile_alternation(&[b"ab", b"", b"cd"])?;
assert_eq!(prog.len(), 2);
assert_eq!(prog[0].imm, 1);
assert_eq!(prog[1].code & 0xF0, BPF_EXIT);
Ok(())
}
#[test]
fn all_nul_pattern_compiles_and_compares_zero_bytes() -> Result<(), CompileError> {
let pattern = [0_u8, 0, 0, 0];
let prog = compile_literal_search(&pattern)?;
let zero_comparisons = prog
.iter()
.filter(|insn| insn.code == (BPF_JMP | BPF_JNE | BPF_K) && insn.imm == 0)
.count();
assert_eq!(zero_comparisons, pattern.len());
assert_jump_offsets_are_valid(&prog);
Ok(())
}
#[test]
fn pattern_with_1025_bytes_is_rejected() {
let pattern = vec![0_u8; MAX_BPF_PATTERN_LEN + 1];
assert!(matches!(
compile_literal_search(&pattern),
Err(CompileError::PatternTooLong { len: 1025, max }) if max == MAX_BPF_PATTERN_LEN
));
}
#[test]
fn character_class_rejects_reversed_ranges() {
assert!(matches!(
compile_character_class(b"z-a"),
Err(CompileError::InvalidPattern {
reason: "character class range endpoints are reversed"
})
));
}
#[test]
fn pretty_printer_formats_compiled_program() -> Result<(), CompileError> {
let prog = compile_literal_search(b"AZ")?;
let rendered = format_program(&prog);
assert!(rendered.contains("0000: r2 = *(u32 *)(r1 + 0)"));
assert!(rendered.contains("if r7 != 65 goto"));
assert!(rendered.contains("exit"));
Ok(())
}
}