1use regex::{Regex, RegexBuilder};
2
3use crate::format::LogFormat;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum FilterOp {
8 Eq,
10 Ne,
12 Re,
14 NotRe,
16 Lt,
18 Le,
20 Gt,
22 Ge,
24}
25
26#[derive(Debug, Clone)]
28pub struct FilterSpec {
29 pub field: String,
30 pub op: FilterOp,
31 pub value: String,
32}
33
34impl FilterSpec {
35 pub fn parse(input: &str) -> Result<Self, String> {
40 for (op, sep) in &[
41 (FilterOp::NotRe, "!~"),
42 (FilterOp::Ne, "!="),
43 (FilterOp::Le, "<="),
44 (FilterOp::Ge, ">="),
45 (FilterOp::Re, "~"),
46 (FilterOp::Eq, "="),
47 (FilterOp::Lt, "<"),
48 (FilterOp::Gt, ">"),
49 ] {
50 if let Some((field, value)) = input.split_once(sep) {
51 if field.is_empty() {
52 return Err(format!("filter `{input}`: empty field name"));
53 }
54 return Ok(FilterSpec {
55 field: field.to_string(),
56 op: op.clone(),
57 value: value.to_string(),
58 });
59 }
60 }
61 Err(format!(
62 "filter `{input}`: missing operator (expected =, !=, ~, !~, <, <=, >, or >=)"
63 ))
64 }
65}
66
67#[derive(Debug)]
70struct CompiledPredicate {
71 field: String,
72 op: FilterOp,
73 literal: Option<String>,
75 regex: Option<Regex>,
77}
78
79#[derive(Debug)]
89pub struct CompiledFilter {
90 pub format_name: String,
91 format_regex: Regex,
92 format_regex_record: Regex,
93 predicates: Vec<CompiledPredicate>,
94}
95
96#[derive(Debug, PartialEq, Eq)]
97pub enum FilterMatch {
98 Matched,
100 NotMatched,
102 NotParsed,
104}
105
106impl CompiledFilter {
107 pub fn compile(format: &LogFormat, specs: Vec<FilterSpec>) -> Result<Self, String> {
110 let mut predicates = Vec::with_capacity(specs.len());
111 for spec in specs {
112 if !format.field_names.iter().any(|n| n == &spec.field) {
113 return Err(format!(
114 "filter `{}{:?}{}`: field `{}` is not in format `{}` (available: {})",
115 spec.field,
116 spec.op,
117 spec.value,
118 spec.field,
119 format.name,
120 format.field_names.join(", "),
121 ));
122 }
123 let (literal, regex) = match spec.op {
124 FilterOp::Eq
125 | FilterOp::Ne
126 | FilterOp::Lt
127 | FilterOp::Le
128 | FilterOp::Gt
129 | FilterOp::Ge => (Some(spec.value.clone()), None),
130 FilterOp::Re | FilterOp::NotRe => {
131 let r = Regex::new(&spec.value)
132 .map_err(|e| format!("filter `{}`: invalid regex `{}`: {e}", spec.field, spec.value))?;
133 (None, Some(r))
134 }
135 };
136 predicates.push(CompiledPredicate {
137 field: spec.field,
138 op: spec.op,
139 literal,
140 regex,
141 });
142 }
143 let format_regex_record = RegexBuilder::new(format.regex.as_str())
144 .dot_matches_new_line(true)
145 .multi_line(true)
146 .build()
147 .map_err(|e| {
148 format!("format `{}`: rebuilding regex for records mode: {e}", format.name)
149 })?;
150
151 Ok(Self {
152 format_name: format.name.clone(),
153 format_regex: format.regex.clone(),
154 format_regex_record,
155 predicates,
156 })
157 }
158
159 pub fn evaluate(&self, line: &[u8]) -> FilterMatch {
163 self.evaluate_with(&self.format_regex, line)
164 }
165
166 pub fn evaluate_record(&self, record: &[u8]) -> FilterMatch {
171 self.evaluate_with(&self.format_regex_record, record)
172 }
173
174 fn evaluate_with(&self, regex: &Regex, bytes: &[u8]) -> FilterMatch {
175 let line_str = match std::str::from_utf8(bytes) {
176 Ok(s) => s,
177 Err(_) => return FilterMatch::NotParsed,
178 };
179 let Some(caps) = regex.captures(line_str) else {
180 return FilterMatch::NotParsed;
181 };
182 for p in &self.predicates {
183 let Some(m) = caps.name(&p.field) else {
184 return FilterMatch::NotMatched;
185 };
186 let captured = m.as_str();
187 let ok = match p.op {
188 FilterOp::Eq => p.literal.as_deref() == Some(captured),
189 FilterOp::Ne => p.literal.as_deref() != Some(captured),
190 FilterOp::Re => p.regex.as_ref().is_some_and(|r| r.is_match(captured)),
191 FilterOp::NotRe => p.regex.as_ref().is_some_and(|r| !r.is_match(captured)),
192 FilterOp::Lt | FilterOp::Le | FilterOp::Gt | FilterOp::Ge => {
193 let rhs = p.literal.as_deref().unwrap_or("");
194 compare(&p.op, captured, rhs)
195 }
196 };
197 if !ok {
198 return FilterMatch::NotMatched;
199 }
200 }
201 FilterMatch::Matched
202 }
203}
204
205fn compare(op: &FilterOp, lhs: &str, rhs: &str) -> bool {
212 let order = match (lhs.parse::<f64>(), rhs.parse::<f64>()) {
213 (Ok(a), Ok(b)) => a.partial_cmp(&b),
214 _ => Some(lhs.cmp(rhs)),
215 };
216 let Some(order) = order else { return false; };
217 use std::cmp::Ordering::{Equal, Greater, Less};
218 matches!(
219 (op, order),
220 (FilterOp::Lt, Less)
221 | (FilterOp::Le, Less | Equal)
222 | (FilterOp::Gt, Greater)
223 | (FilterOp::Ge, Greater | Equal)
224 )
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 fn apache_combined() -> LogFormat {
232 LogFormat::compile(
233 "apache-combined",
234 r#"^(?P<ip>\S+) \S+ (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<url>\S+) (?P<protocol>[^"]+)" (?P<status>\d+) (?P<size>\S+) "(?P<referer>[^"]*)" "(?P<agent>[^"]*)"$"#,
235 )
236 .unwrap()
237 }
238
239 const SAMPLE_200: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /index.html HTTP/1.1" 200 2326 "-" "Mozilla/5.0""#;
240 const SAMPLE_500: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /api/data HTTP/1.1" 500 512 "-" "curl/7.0""#;
241 const NON_PARSING: &[u8] = b"this line does not match the format at all";
242
243 #[test]
244 fn parse_eq() {
245 let s = FilterSpec::parse("status=500").unwrap();
246 assert_eq!(s.field, "status");
247 assert_eq!(s.op, FilterOp::Eq);
248 assert_eq!(s.value, "500");
249 }
250
251 #[test]
252 fn parse_ne_before_eq() {
253 let s = FilterSpec::parse("status!=200").unwrap();
254 assert_eq!(s.op, FilterOp::Ne);
255 assert_eq!(s.value, "200");
256 }
257
258 #[test]
259 fn parse_re() {
260 let s = FilterSpec::parse(r"ip~^10\.").unwrap();
261 assert_eq!(s.op, FilterOp::Re);
262 assert_eq!(s.value, r"^10\.");
263 }
264
265 #[test]
266 fn parse_not_re_before_re() {
267 let s = FilterSpec::parse("agent!~bot").unwrap();
268 assert_eq!(s.op, FilterOp::NotRe);
269 assert_eq!(s.value, "bot");
270 }
271
272 #[test]
273 fn parse_rejects_no_operator() {
274 let err = FilterSpec::parse("status").unwrap_err();
275 assert!(err.contains("missing operator"), "{err}");
276 }
277
278 #[test]
279 fn parse_rejects_empty_field() {
280 let err = FilterSpec::parse("=500").unwrap_err();
281 assert!(err.contains("empty field"), "{err}");
282 }
283
284 #[test]
285 fn compile_rejects_unknown_field() {
286 let fmt = apache_combined();
287 let specs = vec![FilterSpec::parse("notafield=x").unwrap()];
288 let err = CompiledFilter::compile(&fmt, specs).unwrap_err();
289 assert!(err.contains("not in format"), "{err}");
290 }
291
292 #[test]
293 fn evaluate_eq_matches() {
294 let fmt = apache_combined();
295 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=500").unwrap()]).unwrap();
296 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
297 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
298 }
299
300 #[test]
301 fn evaluate_re_matches_5xx() {
302 let fmt = apache_combined();
303 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status~^5").unwrap()]).unwrap();
304 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
305 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
306 }
307
308 #[test]
309 fn evaluate_ne_excludes_200() {
310 let fmt = apache_combined();
311 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status!=200").unwrap()]).unwrap();
312 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
313 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
314 }
315
316 #[test]
317 fn evaluate_multiple_filters_and() {
318 let fmt = apache_combined();
319 let f = CompiledFilter::compile(
320 &fmt,
321 vec![
322 FilterSpec::parse("status~^5").unwrap(),
323 FilterSpec::parse(r"url~/api/").unwrap(),
324 ],
325 )
326 .unwrap();
327 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
328 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
329 }
330
331 #[test]
332 fn evaluate_unparseable_line_is_not_parsed() {
333 let fmt = apache_combined();
334 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=200").unwrap()]).unwrap();
335 assert_eq!(f.evaluate(NON_PARSING), FilterMatch::NotParsed);
336 }
337
338 #[test]
341 fn parse_le_before_lt() {
342 let s = FilterSpec::parse("status<=200").unwrap();
343 assert_eq!(s.op, FilterOp::Le);
344 assert_eq!(s.value, "200");
345 }
346
347 #[test]
348 fn parse_ge_before_gt() {
349 let s = FilterSpec::parse("status>=500").unwrap();
350 assert_eq!(s.op, FilterOp::Ge);
351 assert_eq!(s.value, "500");
352 }
353
354 #[test]
355 fn parse_lt() {
356 let s = FilterSpec::parse("size<1000").unwrap();
357 assert_eq!(s.op, FilterOp::Lt);
358 assert_eq!(s.value, "1000");
359 }
360
361 #[test]
362 fn parse_gt() {
363 let s = FilterSpec::parse("size>0").unwrap();
364 assert_eq!(s.op, FilterOp::Gt);
365 assert_eq!(s.value, "0");
366 }
367
368 #[test]
369 fn evaluate_ge_numeric() {
370 let fmt = apache_combined();
371 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status>=500").unwrap()]).unwrap();
372 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
373 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
374 }
375
376 #[test]
377 fn evaluate_lt_numeric() {
378 let fmt = apache_combined();
379 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status<400").unwrap()]).unwrap();
380 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::Matched);
381 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::NotMatched);
382 }
383
384 #[test]
385 fn evaluate_lex_fallback() {
386 assert!(compare(&FilterOp::Lt, "-", "100"));
390 assert!(!compare(&FilterOp::Gt, "-", "100"));
391 }
392
393 #[test]
394 fn evaluate_lex_string_compare() {
395 assert!(compare(&FilterOp::Gt, "warning", "warn"));
397 assert!(!compare(&FilterOp::Gt, "info", "warn"));
398 assert!(compare(&FilterOp::Ge, "warn", "warn"));
399 assert!(compare(&FilterOp::Le, "warn", "warn"));
400 }
401
402 #[test]
403 fn parse_rejects_no_op_mentions_new_ops() {
404 let err = FilterSpec::parse("status").unwrap_err();
405 assert!(err.contains(">=") && err.contains("<="), "{err}");
406 }
407}