#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum CompileError {
#[error("pattern length {len} exceeds BPF verifier limit of {max} bytes. Fix: use userspace matching for patterns longer than {max} bytes.")]
PatternTooLong {
len: usize,
max: usize,
},
#[error("invalid pattern: {reason}. Fix: use a supported subset (literal bytes, alternation, and ranges with start <= end).")]
InvalidPattern {
reason: &'static str,
},
}
pub const MAX_BPF_PATTERN_LEN: usize = 1024;
const MAX_BPF_INSTRUCTION_LEN: usize = 4096;
#[derive(Clone, Copy)]
pub(super) enum PatternRange {
Single(u8),
Span(u8, u8),
}
pub(crate) fn parse_character_class(class: &[u8]) -> Result<Vec<PatternRange>, CompileError> {
let mut ranges = Vec::new();
let mut idx = 0;
while idx < class.len() {
let start = match class[idx] {
b'\\' => {
if idx + 1 >= class.len() {
return Err(CompileError::InvalidPattern {
reason: "unterminated character-class escape",
});
}
let escaped = class[idx + 1];
idx += 2;
escaped
}
b => {
idx += 1;
b
}
};
let is_range = idx + 1 < class.len() && class[idx] == b'-';
if is_range {
let end = match class[idx + 1] {
b'\\' => {
if idx + 2 >= class.len() {
return Err(CompileError::InvalidPattern {
reason: "unterminated character-class range escape",
});
}
let escaped = class[idx + 2];
idx += 3;
escaped
}
b => {
idx += 2;
b
}
};
if start > end {
return Err(CompileError::InvalidPattern {
reason: "character class range endpoints are reversed",
});
}
ranges.push(PatternRange::Span(start, end));
continue;
}
ranges.push(PatternRange::Single(start));
}
Ok(ranges)
}
pub fn compile_with_limit(expected_instruction_count: usize) -> Result<(), CompileError> {
if expected_instruction_count > MAX_BPF_INSTRUCTION_LEN {
return Err(CompileError::PatternTooLong {
len: expected_instruction_count,
max: MAX_BPF_INSTRUCTION_LEN,
});
}
Ok(())
}
pub(crate) fn literal_search_instruction_count(pattern_len: usize) -> usize {
if pattern_len == 0 {
return 4;
}
10 + pattern_len * 2
}
pub(crate) fn estimate_character_class_instructions(ranges: &[PatternRange]) -> usize {
if ranges.is_empty() {
return 2;
}
let compare_count = ranges.iter().fold(0usize, |acc, range| match range {
PatternRange::Single(_) => acc + 1,
PatternRange::Span(_, _) => acc + 3,
});
2 + compare_count
}
pub(crate) fn estimate_alternation_instructions(alternatives: &[&[u8]]) -> usize {
if alternatives.is_empty() || alternatives.iter().any(|alt| alt.is_empty()) {
return 2;
}
let alternative_count = alternatives.iter().map(|alt| {
if alt.is_empty() {
0
} else {
1 + (alt.len() * 2) + 1
}
});
2 + alternative_count.sum::<usize>()
}
#[derive(Debug, Clone, Copy)]
pub struct CharRange {
pub lo: u8,
pub hi: u8,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_rejects_empty_class_escapes() {
assert!(matches!(
parse_character_class(b"\\"),
Err(CompileError::InvalidPattern {
reason: "unterminated character-class escape"
})
));
assert!(matches!(
parse_character_class(b"a-\\"),
Err(CompileError::InvalidPattern {
reason: "unterminated character-class range escape"
})
));
}
#[test]
fn parse_range_requires_ordered_endpoints() {
assert!(matches!(
parse_character_class(b"z-a"),
Err(CompileError::InvalidPattern {
reason: "character class range endpoints are reversed"
})
));
}
#[test]
fn compile_limit_safeguards_instruction_cap() {
assert!(matches!(
compile_with_limit(4097),
Err(CompileError::PatternTooLong {
len: 4097,
max: 4096
})
));
}
#[test]
fn literal_count_for_empty_pattern_is_fixed() {
assert_eq!(literal_search_instruction_count(0), 4);
assert_eq!(literal_search_instruction_count(3), 16);
}
#[test]
fn range_estimate_is_non_empty_for_singleton() {
let ranges = vec![PatternRange::Single(0x41)];
assert_eq!(estimate_character_class_instructions(&ranges), 3);
}
#[test]
fn alternation_estimate_counts_bound_checks() {
let alternates: Vec<&[u8]> = vec![b"aa", b"b"];
assert_eq!(
estimate_alternation_instructions(&alternates),
2 + ((1 + 4 + 1) + (1 + 2 + 1))
);
}
}