1use regex::{Regex, RegexBuilder};
2
3use crate::format::LogFormat;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum FilterOp {
8 Eq,
10 Ne,
12 Re,
14 NotRe,
16 Lt,
18 Le,
20 Gt,
22 Ge,
24}
25
26#[derive(Debug, Clone)]
28pub struct FilterSpec {
29 pub field: String,
30 pub op: FilterOp,
31 pub value: String,
32}
33
34impl FilterSpec {
35 pub fn parse(input: &str) -> Result<Self, String> {
40 for (op, sep) in &[
41 (FilterOp::NotRe, "!~"),
42 (FilterOp::Ne, "!="),
43 (FilterOp::Le, "<="),
44 (FilterOp::Ge, ">="),
45 (FilterOp::Re, "~"),
46 (FilterOp::Eq, "="),
47 (FilterOp::Lt, "<"),
48 (FilterOp::Gt, ">"),
49 ] {
50 if let Some((field, value)) = input.split_once(sep) {
51 if field.is_empty() {
52 return Err(format!("filter `{input}`: empty field name"));
53 }
54 return Ok(FilterSpec {
55 field: field.to_string(),
56 op: op.clone(),
57 value: value.to_string(),
58 });
59 }
60 }
61 Err(format!(
62 "filter `{input}`: missing operator (expected =, !=, ~, !~, <, <=, >, or >=)"
63 ))
64 }
65}
66
67#[derive(Debug)]
70struct CompiledPredicate {
71 field: String,
72 op: FilterOp,
73 literal: Option<String>,
75 regex: Option<Regex>,
77}
78
79#[derive(Debug)]
89pub struct CompiledFilter {
90 pub format_name: String,
91 format_regex: Regex,
92 format_regex_record: Regex,
93 predicates: Vec<CompiledPredicate>,
94}
95
96#[derive(Debug, PartialEq, Eq)]
97pub enum FilterMatch {
98 Matched,
100 NotMatched,
102 NotParsed,
104}
105
106impl CompiledFilter {
107 pub fn compile(
112 format: &LogFormat,
113 specs: Vec<FilterSpec>,
114 case_mode: crate::viewport::CaseMode,
115 ) -> Result<Self, String> {
116 let mut predicates = Vec::with_capacity(specs.len());
117 for spec in specs {
118 if !format.field_names.iter().any(|n| n == &spec.field) {
119 return Err(format!(
120 "filter `{}{:?}{}`: field `{}` is not in format `{}` (available: {})",
121 spec.field,
122 spec.op,
123 spec.value,
124 spec.field,
125 format.name,
126 format.field_names.join(", "),
127 ));
128 }
129 let (literal, regex) = match spec.op {
130 FilterOp::Eq
131 | FilterOp::Ne
132 | FilterOp::Lt
133 | FilterOp::Le
134 | FilterOp::Gt
135 | FilterOp::Ge => (Some(spec.value.clone()), None),
136 FilterOp::Re | FilterOp::NotRe => {
137 let compiled = case_mode.apply_to_pattern(&spec.value);
138 let r = Regex::new(&compiled)
139 .map_err(|e| format!("filter `{}`: invalid regex `{}`: {e}", spec.field, spec.value))?;
140 (None, Some(r))
141 }
142 };
143 predicates.push(CompiledPredicate {
144 field: spec.field,
145 op: spec.op,
146 literal,
147 regex,
148 });
149 }
150 let format_regex_record = RegexBuilder::new(format.regex.as_str())
151 .dot_matches_new_line(true)
152 .multi_line(true)
153 .build()
154 .map_err(|e| {
155 format!("format `{}`: rebuilding regex for records mode: {e}", format.name)
156 })?;
157
158 Ok(Self {
159 format_name: format.name.clone(),
160 format_regex: format.regex.clone(),
161 format_regex_record,
162 predicates,
163 })
164 }
165
166 pub fn evaluate(&self, line: &[u8]) -> FilterMatch {
170 self.evaluate_with(&self.format_regex, line)
171 }
172
173 pub fn evaluate_record(&self, record: &[u8]) -> FilterMatch {
178 self.evaluate_with(&self.format_regex_record, record)
179 }
180
181 fn evaluate_with(&self, regex: &Regex, bytes: &[u8]) -> FilterMatch {
182 let line_str = match std::str::from_utf8(bytes) {
183 Ok(s) => s,
184 Err(_) => return FilterMatch::NotParsed,
185 };
186 let Some(caps) = regex.captures(line_str) else {
187 return FilterMatch::NotParsed;
188 };
189 for p in &self.predicates {
190 let Some(m) = caps.name(&p.field) else {
191 return FilterMatch::NotMatched;
192 };
193 let captured = m.as_str();
194 let ok = match p.op {
195 FilterOp::Eq => p.literal.as_deref() == Some(captured),
196 FilterOp::Ne => p.literal.as_deref() != Some(captured),
197 FilterOp::Re => p.regex.as_ref().is_some_and(|r| r.is_match(captured)),
198 FilterOp::NotRe => p.regex.as_ref().is_some_and(|r| !r.is_match(captured)),
199 FilterOp::Lt | FilterOp::Le | FilterOp::Gt | FilterOp::Ge => {
200 let rhs = p.literal.as_deref().unwrap_or("");
201 compare(&p.op, captured, rhs)
202 }
203 };
204 if !ok {
205 return FilterMatch::NotMatched;
206 }
207 }
208 FilterMatch::Matched
209 }
210}
211
212fn compare(op: &FilterOp, lhs: &str, rhs: &str) -> bool {
219 let order = match (lhs.parse::<f64>(), rhs.parse::<f64>()) {
220 (Ok(a), Ok(b)) => a.partial_cmp(&b),
221 _ => Some(lhs.cmp(rhs)),
222 };
223 let Some(order) = order else { return false; };
224 use std::cmp::Ordering::{Equal, Greater, Less};
225 matches!(
226 (op, order),
227 (FilterOp::Lt, Less)
228 | (FilterOp::Le, Less | Equal)
229 | (FilterOp::Gt, Greater)
230 | (FilterOp::Ge, Greater | Equal)
231 )
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237
238 fn apache_combined() -> LogFormat {
239 LogFormat::compile(
240 "apache-combined",
241 r#"^(?P<ip>\S+) \S+ (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<url>\S+) (?P<protocol>[^"]+)" (?P<status>\d+) (?P<size>\S+) "(?P<referer>[^"]*)" "(?P<agent>[^"]*)"$"#,
242 )
243 .unwrap()
244 }
245
246 const SAMPLE_200: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /index.html HTTP/1.1" 200 2326 "-" "Mozilla/5.0""#;
247 const SAMPLE_500: &[u8] = br#"127.0.0.1 - alice [10/Oct/2023:13:55:36 +0000] "GET /api/data HTTP/1.1" 500 512 "-" "curl/7.0""#;
248 const NON_PARSING: &[u8] = b"this line does not match the format at all";
249
250 #[test]
251 fn parse_eq() {
252 let s = FilterSpec::parse("status=500").unwrap();
253 assert_eq!(s.field, "status");
254 assert_eq!(s.op, FilterOp::Eq);
255 assert_eq!(s.value, "500");
256 }
257
258 #[test]
259 fn parse_ne_before_eq() {
260 let s = FilterSpec::parse("status!=200").unwrap();
261 assert_eq!(s.op, FilterOp::Ne);
262 assert_eq!(s.value, "200");
263 }
264
265 #[test]
266 fn parse_re() {
267 let s = FilterSpec::parse(r"ip~^10\.").unwrap();
268 assert_eq!(s.op, FilterOp::Re);
269 assert_eq!(s.value, r"^10\.");
270 }
271
272 #[test]
273 fn parse_not_re_before_re() {
274 let s = FilterSpec::parse("agent!~bot").unwrap();
275 assert_eq!(s.op, FilterOp::NotRe);
276 assert_eq!(s.value, "bot");
277 }
278
279 #[test]
280 fn parse_rejects_no_operator() {
281 let err = FilterSpec::parse("status").unwrap_err();
282 assert!(err.contains("missing operator"), "{err}");
283 }
284
285 #[test]
286 fn parse_rejects_empty_field() {
287 let err = FilterSpec::parse("=500").unwrap_err();
288 assert!(err.contains("empty field"), "{err}");
289 }
290
291 #[test]
292 fn compile_rejects_unknown_field() {
293 let fmt = apache_combined();
294 let specs = vec![FilterSpec::parse("notafield=x").unwrap()];
295 let err = CompiledFilter::compile(&fmt, specs, crate::viewport::CaseMode::Sensitive).unwrap_err();
296 assert!(err.contains("not in format"), "{err}");
297 }
298
299 #[test]
300 fn evaluate_eq_matches() {
301 let fmt = apache_combined();
302 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=500").unwrap()], crate::viewport::CaseMode::Sensitive).unwrap();
303 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
304 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
305 }
306
307 #[test]
308 fn evaluate_re_matches_5xx() {
309 let fmt = apache_combined();
310 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status~^5").unwrap()], crate::viewport::CaseMode::Sensitive).unwrap();
311 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
312 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
313 }
314
315 #[test]
316 fn evaluate_ne_excludes_200() {
317 let fmt = apache_combined();
318 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status!=200").unwrap()], crate::viewport::CaseMode::Sensitive).unwrap();
319 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
320 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
321 }
322
323 #[test]
324 fn evaluate_multiple_filters_and() {
325 let fmt = apache_combined();
326 let f = CompiledFilter::compile(
327 &fmt,
328 vec![
329 FilterSpec::parse("status~^5").unwrap(),
330 FilterSpec::parse(r"url~/api/").unwrap(),
331 ],
332 crate::viewport::CaseMode::Sensitive,
333 )
334 .unwrap();
335 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
336 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
337 }
338
339 #[test]
340 fn evaluate_unparseable_line_is_not_parsed() {
341 let fmt = apache_combined();
342 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status=200").unwrap()], crate::viewport::CaseMode::Sensitive).unwrap();
343 assert_eq!(f.evaluate(NON_PARSING), FilterMatch::NotParsed);
344 }
345
346 #[test]
349 fn parse_le_before_lt() {
350 let s = FilterSpec::parse("status<=200").unwrap();
351 assert_eq!(s.op, FilterOp::Le);
352 assert_eq!(s.value, "200");
353 }
354
355 #[test]
356 fn parse_ge_before_gt() {
357 let s = FilterSpec::parse("status>=500").unwrap();
358 assert_eq!(s.op, FilterOp::Ge);
359 assert_eq!(s.value, "500");
360 }
361
362 #[test]
363 fn parse_lt() {
364 let s = FilterSpec::parse("size<1000").unwrap();
365 assert_eq!(s.op, FilterOp::Lt);
366 assert_eq!(s.value, "1000");
367 }
368
369 #[test]
370 fn parse_gt() {
371 let s = FilterSpec::parse("size>0").unwrap();
372 assert_eq!(s.op, FilterOp::Gt);
373 assert_eq!(s.value, "0");
374 }
375
376 #[test]
377 fn evaluate_ge_numeric() {
378 let fmt = apache_combined();
379 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status>=500").unwrap()], crate::viewport::CaseMode::Sensitive).unwrap();
380 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::Matched);
381 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::NotMatched);
382 }
383
384 #[test]
385 fn evaluate_lt_numeric() {
386 let fmt = apache_combined();
387 let f = CompiledFilter::compile(&fmt, vec![FilterSpec::parse("status<400").unwrap()], crate::viewport::CaseMode::Sensitive).unwrap();
388 assert_eq!(f.evaluate(SAMPLE_200), FilterMatch::Matched);
389 assert_eq!(f.evaluate(SAMPLE_500), FilterMatch::NotMatched);
390 }
391
392 #[test]
393 fn evaluate_lex_fallback() {
394 assert!(compare(&FilterOp::Lt, "-", "100"));
398 assert!(!compare(&FilterOp::Gt, "-", "100"));
399 }
400
401 #[test]
402 fn evaluate_lex_string_compare() {
403 assert!(compare(&FilterOp::Gt, "warning", "warn"));
405 assert!(!compare(&FilterOp::Gt, "info", "warn"));
406 assert!(compare(&FilterOp::Ge, "warn", "warn"));
407 assert!(compare(&FilterOp::Le, "warn", "warn"));
408 }
409
410 #[test]
411 fn parse_rejects_no_op_mentions_new_ops() {
412 let err = FilterSpec::parse("status").unwrap_err();
413 assert!(err.contains(">=") && err.contains("<="), "{err}");
414 }
415}