1use regex::Regex;
2
3use crate::format::LogFormat;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum FilterOp {
8 Eq,
10 Ne,
12 Re,
14 NotRe,
16 Lt,
18 Le,
20 Gt,
22 Ge,
24}
25
26#[derive(Debug, Clone)]
28pub struct FilterSpec {
29 pub field: String,
30 pub op: FilterOp,
31 pub value: String,
32}
33
34impl FilterSpec {
35 pub fn parse(input: &str) -> Result<Self, String> {
40 for (op, sep) in &[
41 (FilterOp::NotRe, "!~"),
42 (FilterOp::Ne, "!="),
43 (FilterOp::Le, "<="),
44 (FilterOp::Ge, ">="),
45 (FilterOp::Re, "~"),
46 (FilterOp::Eq, "="),
47 (FilterOp::Lt, "<"),
48 (FilterOp::Gt, ">"),
49 ] {
50 if let Some((field, value)) = input.split_once(sep) {
51 if field.is_empty() {
52 return Err(format!("filter `{input}`: empty field name"));
53 }
54 return Ok(FilterSpec {
55 field: field.to_string(),
56 op: op.clone(),
57 value: value.to_string(),
58 });
59 }
60 }
61 Err(format!(
62 "filter `{input}`: missing operator (expected =, !=, ~, !~, <, <=, >, or >=)"
63 ))
64 }
65}
66
67#[derive(Debug)]
70struct CompiledPredicate {
71 field: String,
72 op: FilterOp,
73 literal: Option<String>,
75 regex: Option<Regex>,
77}
78
79#[derive(Debug)]
83pub struct CompiledFilter {
84 pub format_name: String,
85 format_regex: Regex,
86 predicates: Vec<CompiledPredicate>,
87}
88
89#[derive(Debug, PartialEq, Eq)]
90pub enum FilterMatch {
91 Matched,
93 NotMatched,
95 NotParsed,
97}
98
99impl CompiledFilter {
100 pub fn compile(format: &LogFormat, specs: Vec<FilterSpec>) -> Result<Self, String> {
103 let mut predicates = Vec::with_capacity(specs.len());
104 for spec in specs {
105 if !format.field_names.iter().any(|n| n == &spec.field) {
106 return Err(format!(
107 "filter `{}{:?}{}`: field `{}` is not in format `{}` (available: {})",
108 spec.field,
109 spec.op,
110 spec.value,
111 spec.field,
112 format.name,
113 format.field_names.join(", "),
114 ));
115 }
116 let (literal, regex) = match spec.op {
117 FilterOp::Eq
118 | FilterOp::Ne
119 | FilterOp::Lt
120 | FilterOp::Le
121 | FilterOp::Gt
122 | FilterOp::Ge => (Some(spec.value.clone()), None),
123 FilterOp::Re | FilterOp::NotRe => {
124 let r = Regex::new(&spec.value)
125 .map_err(|e| format!("filter `{}`: invalid regex `{}`: {e}", spec.field, spec.value))?;
126 (None, Some(r))
127 }
128 };
129 predicates.push(CompiledPredicate {
130 field: spec.field,
131 op: spec.op,
132 literal,
133 regex,
134 });
135 }
136 Ok(Self {
137 format_name: format.name.clone(),
138 format_regex: format.regex.clone(),
139 predicates,
140 })
141 }
142
143 pub fn evaluate(&self, line: &[u8]) -> FilterMatch {
147 let line_str = match std::str::from_utf8(line) {
148 Ok(s) => s,
149 Err(_) => return FilterMatch::NotParsed,
150 };
151 let Some(caps) = self.format_regex.captures(line_str) else {
152 return FilterMatch::NotParsed;
153 };
154 for p in &self.predicates {
155 let Some(m) = caps.name(&p.field) else {
156 return FilterMatch::NotMatched;
157 };
158 let captured = m.as_str();
159 let ok = match p.op {
160 FilterOp::Eq => p.literal.as_deref() == Some(captured),
161 FilterOp::Ne => p.literal.as_deref() != Some(captured),
162 FilterOp::Re => p.regex.as_ref().is_some_and(|r| r.is_match(captured)),
163 FilterOp::NotRe => p.regex.as_ref().is_some_and(|r| !r.is_match(captured)),
164 FilterOp::Lt | FilterOp::Le | FilterOp::Gt | FilterOp::Ge => {
165 let rhs = p.literal.as_deref().unwrap_or("");
166 compare(&p.op, captured, rhs)
167 }
168 };
169 if !ok {
170 return FilterMatch::NotMatched;
171 }
172 }
173 FilterMatch::Matched
174 }
175}
176
177fn compare(op: &FilterOp, lhs: &str, rhs: &str) -> bool {
184 let order = match (lhs.parse::<f64>(), rhs.parse::<f64>()) {
185 (Ok(a), Ok(b)) => a.partial_cmp(&b),
186 _ => Some(lhs.cmp(rhs)),
187 };
188 let Some(order) = order else { return false; };
189 use std::cmp::Ordering::{Equal, Greater, Less};
190 matches!(
191 (op, order),
192 (FilterOp::Lt, Less)
193 | (FilterOp::Le, Less | Equal)
194 | (FilterOp::Gt, Greater)
195 | (FilterOp::Ge, Greater | Equal)
196 )
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202
203 fn apache_combined() -> LogFormat {
204 LogFormat::compile(
205 "apache-combined",
206 r#"^(?P<ip>\S+) \S+ (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<url>\S+) (?P<protocol>[^"]+)" (?P<status>\d+) (?P<size>\S+) "(?P<referer>[^"]*)" "(?P<agent>[^"]*)"$"#,
207 )
208 .unwrap()
209 }
210
211 const SAMPLE_200: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /index.html HTTP/1.1" 200 2326 "-" "Mozilla/5.0""#;
212 const SAMPLE_500: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /api/data HTTP/1.1" 500 512 "-" "curl/7.0""#;
213 const NON_PARSING: &[u8] = b"this line does not match the format at all";
214
215 #[test]
216 fn parse_eq() {
217 let s = FilterSpec::parse("status=500").unwrap();
218 assert_eq!(s.field, "status");
219 assert_eq!(s.op, FilterOp::Eq);
220 assert_eq!(s.value, "500");
221 }
222
223 #[test]
224 fn parse_ne_before_eq() {
225 let s = FilterSpec::parse("status!=200").unwrap();
226 assert_eq!(s.op, FilterOp::Ne);
227 assert_eq!(s.value, "200");
228 }
229
230 #[test]
231 fn parse_re() {
232 let s = FilterSpec::parse(r"ip~^10\.").unwrap();
233 assert_eq!(s.op, FilterOp::Re);
234 assert_eq!(s.value, r"^10\.");
235 }
236
237 #[test]
238 fn parse_not_re_before_re() {
239 let s = FilterSpec::parse("agent!~bot").unwrap();
240 assert_eq!(s.op, FilterOp::NotRe);
241 assert_eq!(s.value, "bot");
242 }
243
244 #[test]
245 fn parse_rejects_no_operator() {
246 let err = FilterSpec::parse("status").unwrap_err();
247 assert!(err.contains("missing operator"), "{err}");
248 }
249
250 #[test]
251 fn parse_rejects_empty_field() {
252 let err = FilterSpec::parse("=500").unwrap_err();
253 assert!(err.contains("empty field"), "{err}");
254 }
255
256 #[test]
257 fn compile_rejects_unknown_field() {
258 let fmt = apache_combined();
259 let specs = vec![FilterSpec::parse("notafield=x").unwrap()];
260 let err = CompiledFilter::compile(&fmt, specs).unwrap_err();
261 assert!(err.contains("not in format"), "{err}");
262 }
263
264 #[test]
265 fn evaluate_eq_matches() {
266 let fmt = apache_combined();
267 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=500").unwrap()]).unwrap();
268 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
269 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
270 }
271
272 #[test]
273 fn evaluate_re_matches_5xx() {
274 let fmt = apache_combined();
275 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status~^5").unwrap()]).unwrap();
276 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
277 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
278 }
279
280 #[test]
281 fn evaluate_ne_excludes_200() {
282 let fmt = apache_combined();
283 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status!=200").unwrap()]).unwrap();
284 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
285 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
286 }
287
288 #[test]
289 fn evaluate_multiple_filters_and() {
290 let fmt = apache_combined();
291 let f = CompiledFilter::compile(
292 &fmt,
293 vec![
294 FilterSpec::parse("status~^5").unwrap(),
295 FilterSpec::parse(r"url~/api/").unwrap(),
296 ],
297 )
298 .unwrap();
299 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
300 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
301 }
302
303 #[test]
304 fn evaluate_unparseable_line_is_not_parsed() {
305 let fmt = apache_combined();
306 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=200").unwrap()]).unwrap();
307 assert_eq!(f.evaluate(NON_PARSING), FilterMatch::NotParsed);
308 }
309
310 #[test]
313 fn parse_le_before_lt() {
314 let s = FilterSpec::parse("status<=200").unwrap();
315 assert_eq!(s.op, FilterOp::Le);
316 assert_eq!(s.value, "200");
317 }
318
319 #[test]
320 fn parse_ge_before_gt() {
321 let s = FilterSpec::parse("status>=500").unwrap();
322 assert_eq!(s.op, FilterOp::Ge);
323 assert_eq!(s.value, "500");
324 }
325
326 #[test]
327 fn parse_lt() {
328 let s = FilterSpec::parse("size<1000").unwrap();
329 assert_eq!(s.op, FilterOp::Lt);
330 assert_eq!(s.value, "1000");
331 }
332
333 #[test]
334 fn parse_gt() {
335 let s = FilterSpec::parse("size>0").unwrap();
336 assert_eq!(s.op, FilterOp::Gt);
337 assert_eq!(s.value, "0");
338 }
339
340 #[test]
341 fn evaluate_ge_numeric() {
342 let fmt = apache_combined();
343 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status>=500").unwrap()]).unwrap();
344 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
345 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
346 }
347
348 #[test]
349 fn evaluate_lt_numeric() {
350 let fmt = apache_combined();
351 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status<400").unwrap()]).unwrap();
352 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::Matched);
353 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::NotMatched);
354 }
355
356 #[test]
357 fn evaluate_lex_fallback() {
358 assert!(compare(&FilterOp::Lt, "-", "100"));
362 assert!(!compare(&FilterOp::Gt, "-", "100"));
363 }
364
365 #[test]
366 fn evaluate_lex_string_compare() {
367 assert!(compare(&FilterOp::Gt, "warning", "warn"));
369 assert!(!compare(&FilterOp::Gt, "info", "warn"));
370 assert!(compare(&FilterOp::Ge, "warn", "warn"));
371 assert!(compare(&FilterOp::Le, "warn", "warn"));
372 }
373
374 #[test]
375 fn parse_rejects_no_op_mentions_new_ops() {
376 let err = FilterSpec::parse("status").unwrap_err();
377 assert!(err.contains(">=") && err.contains("<="), "{err}");
378 }
379}