use regex::{Regex, RegexBuilder};
use crate::format::LogFormat;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FilterOp {
Eq,
Ne,
Re,
NotRe,
Lt,
Le,
Gt,
Ge,
}
#[derive(Debug, Clone)]
pub struct FilterSpec {
pub field: String,
pub op: FilterOp,
pub value: String,
}
impl FilterSpec {
pub fn parse(input: &str) -> Result<Self, String> {
for (op, sep) in &[
(FilterOp::NotRe, "!~"),
(FilterOp::Ne, "!="),
(FilterOp::Le, "<="),
(FilterOp::Ge, ">="),
(FilterOp::Re, "~"),
(FilterOp::Eq, "="),
(FilterOp::Lt, "<"),
(FilterOp::Gt, ">"),
] {
if let Some((field, value)) = input.split_once(sep) {
if field.is_empty() {
return Err(format!("filter `{input}`: empty field name"));
}
return Ok(FilterSpec {
field: field.to_string(),
op: op.clone(),
value: value.to_string(),
});
}
}
Err(format!(
"filter `{input}`: missing operator (expected =, !=, ~, !~, <, <=, >, or >=)"
))
}
}
#[derive(Debug)]
struct CompiledPredicate {
field: String,
op: FilterOp,
literal: Option<String>,
regex: Option<Regex>,
}
#[derive(Debug)]
pub struct CompiledFilter {
pub format_name: String,
format_regex: Regex,
format_regex_record: Regex,
predicates: Vec<CompiledPredicate>,
}
#[derive(Debug, PartialEq, Eq)]
pub enum FilterMatch {
Matched,
NotMatched,
NotParsed,
}
impl CompiledFilter {
pub fn compile(format: &LogFormat, specs: Vec<FilterSpec>) -> Result<Self, String> {
let mut predicates = Vec::with_capacity(specs.len());
for spec in specs {
if !format.field_names.iter().any(|n| n == &spec.field) {
return Err(format!(
"filter `{}{:?}{}`: field `{}` is not in format `{}` (available: {})",
spec.field,
spec.op,
spec.value,
spec.field,
format.name,
format.field_names.join(", "),
));
}
let (literal, regex) = match spec.op {
FilterOp::Eq
| FilterOp::Ne
| FilterOp::Lt
| FilterOp::Le
| FilterOp::Gt
| FilterOp::Ge => (Some(spec.value.clone()), None),
FilterOp::Re | FilterOp::NotRe => {
let r = Regex::new(&spec.value)
.map_err(|e| format!("filter `{}`: invalid regex `{}`: {e}", spec.field, spec.value))?;
(None, Some(r))
}
};
predicates.push(CompiledPredicate {
field: spec.field,
op: spec.op,
literal,
regex,
});
}
let format_regex_record = RegexBuilder::new(format.regex.as_str())
.dot_matches_new_line(true)
.multi_line(true)
.build()
.map_err(|e| {
format!("format `{}`: rebuilding regex for records mode: {e}", format.name)
})?;
Ok(Self {
format_name: format.name.clone(),
format_regex: format.regex.clone(),
format_regex_record,
predicates,
})
}
pub fn evaluate(&self, line: &[u8]) -> FilterMatch {
self.evaluate_with(&self.format_regex, line)
}
pub fn evaluate_record(&self, record: &[u8]) -> FilterMatch {
self.evaluate_with(&self.format_regex_record, record)
}
fn evaluate_with(&self, regex: &Regex, bytes: &[u8]) -> FilterMatch {
let line_str = match std::str::from_utf8(bytes) {
Ok(s) => s,
Err(_) => return FilterMatch::NotParsed,
};
let Some(caps) = regex.captures(line_str) else {
return FilterMatch::NotParsed;
};
for p in &self.predicates {
let Some(m) = caps.name(&p.field) else {
return FilterMatch::NotMatched;
};
let captured = m.as_str();
let ok = match p.op {
FilterOp::Eq => p.literal.as_deref() == Some(captured),
FilterOp::Ne => p.literal.as_deref() != Some(captured),
FilterOp::Re => p.regex.as_ref().is_some_and(|r| r.is_match(captured)),
FilterOp::NotRe => p.regex.as_ref().is_some_and(|r| !r.is_match(captured)),
FilterOp::Lt | FilterOp::Le | FilterOp::Gt | FilterOp::Ge => {
let rhs = p.literal.as_deref().unwrap_or("");
compare(&p.op, captured, rhs)
}
};
if !ok {
return FilterMatch::NotMatched;
}
}
FilterMatch::Matched
}
}
fn compare(op: &FilterOp, lhs: &str, rhs: &str) -> bool {
let order = match (lhs.parse::<f64>(), rhs.parse::<f64>()) {
(Ok(a), Ok(b)) => a.partial_cmp(&b),
_ => Some(lhs.cmp(rhs)),
};
let Some(order) = order else { return false; };
use std::cmp::Ordering::{Equal, Greater, Less};
matches!(
(op, order),
(FilterOp::Lt, Less)
| (FilterOp::Le, Less | Equal)
| (FilterOp::Gt, Greater)
| (FilterOp::Ge, Greater | Equal)
)
}
#[cfg(test)]
mod tests {
use super::*;
fn apache_combined() -> LogFormat {
LogFormat::compile(
"apache-combined",
r#"^(?P<ip>\S+) \S+ (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<url>\S+) (?P<protocol>[^"]+)" (?P<status>\d+) (?P<size>\S+) "(?P<referer>[^"]*)" "(?P<agent>[^"]*)"$"#,
)
.unwrap()
}
const SAMPLE_200: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /index.html HTTP/1.1" 200 2326 "-" "Mozilla/5.0""#;
const SAMPLE_500: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /api/data HTTP/1.1" 500 512 "-" "curl/7.0""#;
const NON_PARSING: &[u8] = b"this line does not match the format at all";
#[test]
fn parse_eq() {
let s = FilterSpec::parse("status=500").unwrap();
assert_eq!(s.field, "status");
assert_eq!(s.op, FilterOp::Eq);
assert_eq!(s.value, "500");
}
#[test]
fn parse_ne_before_eq() {
let s = FilterSpec::parse("status!=200").unwrap();
assert_eq!(s.op, FilterOp::Ne);
assert_eq!(s.value, "200");
}
#[test]
fn parse_re() {
let s = FilterSpec::parse(r"ip~^10\.").unwrap();
assert_eq!(s.op, FilterOp::Re);
assert_eq!(s.value, r"^10\.");
}
#[test]
fn parse_not_re_before_re() {
let s = FilterSpec::parse("agent!~bot").unwrap();
assert_eq!(s.op, FilterOp::NotRe);
assert_eq!(s.value, "bot");
}
#[test]
fn parse_rejects_no_operator() {
let err = FilterSpec::parse("status").unwrap_err();
assert!(err.contains("missing operator"), "{err}");
}
#[test]
fn parse_rejects_empty_field() {
let err = FilterSpec::parse("=500").unwrap_err();
assert!(err.contains("empty field"), "{err}");
}
#[test]
fn compile_rejects_unknown_field() {
let fmt = apache_combined();
let specs = vec![FilterSpec::parse("notafield=x").unwrap()];
let err = CompiledFilter::compile(&fmt, specs).unwrap_err();
assert!(err.contains("not in format"), "{err}");
}
#[test]
fn evaluate_eq_matches() {
let fmt = apache_combined();
let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=500").unwrap()]).unwrap();
assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
}
#[test]
fn evaluate_re_matches_5xx() {
let fmt = apache_combined();
let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status~^5").unwrap()]).unwrap();
assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
}
#[test]
fn evaluate_ne_excludes_200() {
let fmt = apache_combined();
let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status!=200").unwrap()]).unwrap();
assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
}
#[test]
fn evaluate_multiple_filters_and() {
let fmt = apache_combined();
let f = CompiledFilter::compile(
&fmt,
vec![
FilterSpec::parse("status~^5").unwrap(),
FilterSpec::parse(r"url~/api/").unwrap(),
],
)
.unwrap();
assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
}
#[test]
fn evaluate_unparseable_line_is_not_parsed() {
let fmt = apache_combined();
let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=200").unwrap()]).unwrap();
assert_eq!(f.evaluate(NON_PARSING), FilterMatch::NotParsed);
}
#[test]
fn parse_le_before_lt() {
let s = FilterSpec::parse("status<=200").unwrap();
assert_eq!(s.op, FilterOp::Le);
assert_eq!(s.value, "200");
}
#[test]
fn parse_ge_before_gt() {
let s = FilterSpec::parse("status>=500").unwrap();
assert_eq!(s.op, FilterOp::Ge);
assert_eq!(s.value, "500");
}
#[test]
fn parse_lt() {
let s = FilterSpec::parse("size<1000").unwrap();
assert_eq!(s.op, FilterOp::Lt);
assert_eq!(s.value, "1000");
}
#[test]
fn parse_gt() {
let s = FilterSpec::parse("size>0").unwrap();
assert_eq!(s.op, FilterOp::Gt);
assert_eq!(s.value, "0");
}
#[test]
fn evaluate_ge_numeric() {
let fmt = apache_combined();
let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status>=500").unwrap()]).unwrap();
assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
}
#[test]
fn evaluate_lt_numeric() {
let fmt = apache_combined();
let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status<400").unwrap()]).unwrap();
assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::Matched);
assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::NotMatched);
}
#[test]
fn evaluate_lex_fallback() {
assert!(compare(&FilterOp::Lt, "-", "100"));
assert!(!compare(&FilterOp::Gt, "-", "100"));
}
#[test]
fn evaluate_lex_string_compare() {
assert!(compare(&FilterOp::Gt, "warning", "warn"));
assert!(!compare(&FilterOp::Gt, "info", "warn"));
assert!(compare(&FilterOp::Ge, "warn", "warn"));
assert!(compare(&FilterOp::Le, "warn", "warn"));
}
#[test]
fn parse_rejects_no_op_mentions_new_ops() {
let err = FilterSpec::parse("status").unwrap_err();
assert!(err.contains(">=") && err.contains("<="), "{err}");
}
}