Skip to main content

fakecloud_logs/
filter_pattern.rs

1//! CloudWatch Logs filter pattern evaluator used by metric filters.
2//!
3//! Supports the documented variants:
4//! - Empty pattern matches everything.
5//! - Quoted phrase: substring match of the literal text inside the quotes.
6//! - Plain string (single token): substring match.
7//! - Space-separated terms: AND match (all terms must appear in the message).
8//! - JSON pattern `{ <expr> }`: parses the message as JSON, evaluates a
9//!   boolean expression with `=`, `!=`, `>`, `<`, `>=`, `<=`, `&&`, `||`.
10//!
11//! Independent from the simpler `matches_filter_pattern` helper in
12//! `service/mod.rs`; metric filters need `||` plus JSON-path value
13//! extraction for `MetricValue = $.field`, which the older helper
14//! doesn't expose.
15//!
16//! Array-style patterns (`[a, b, c]`) match positionally against
17//! whitespace-separated tokens in the message. Each comma-separated
18//! field is either a bare name (always matches that slot), a quoted
19//! string, a literal value for equality, or a comparison `=v`,
20//! `!=v`, `>=v`, `<=v`, `>v`, `<v`. `...` is a wildcard that skips any
21//! number of tokens.
22//!
23//! `IS NULL` and free-form regex are out of scope and fail closed.
24
25use serde_json::Value;
26
27/// Returns true when `message` matches the given filter pattern.
28pub fn matches(pattern: &str, message: &str) -> bool {
29    let pattern = pattern.trim();
30    if pattern.is_empty() {
31        return true;
32    }
33
34    if pattern.starts_with('{') && pattern.ends_with('}') {
35        return matches_json(pattern, message);
36    }
37
38    if pattern.starts_with('[') && pattern.ends_with(']') {
39        return matches_array(pattern, message);
40    }
41    if pattern.starts_with('[') {
42        return false;
43    }
44
45    if pattern.starts_with('"') && pattern.ends_with('"') && pattern.len() >= 2 {
46        let inner = &pattern[1..pattern.len() - 1];
47        let unescaped = inner.replace("\\\"", "\"");
48        return message.contains(&unescaped);
49    }
50
51    let terms = tokenize(pattern);
52    if terms.is_empty() {
53        return true;
54    }
55    terms.iter().all(|t| message.contains(t.as_str()))
56}
57
58/// Resolve the literal `MetricValue` from a metric filter transformation.
59///
60/// `metric_value` is either:
61/// - a number literal (e.g. `"1"`, `"42.5"`),
62/// - a JSON path reference (e.g. `"$.bytes"`) extracted from the matched
63///   message,
64/// - empty/missing, falling back to `default_value` or `1.0`.
65pub fn resolve_metric_value(metric_value: &str, default_value: Option<f64>, message: &str) -> f64 {
66    let trimmed = metric_value.trim();
67    if trimmed.is_empty() {
68        return default_value.unwrap_or(1.0);
69    }
70
71    if let Some(path) = trimmed.strip_prefix("$.") {
72        if let Ok(json) = serde_json::from_str::<Value>(message) {
73            if let Some(v) = resolve_path(&json, path) {
74                if let Some(n) = v.as_f64() {
75                    return n;
76                }
77                if let Some(s) = v.as_str() {
78                    if let Ok(n) = s.parse::<f64>() {
79                        return n;
80                    }
81                }
82            }
83        }
84        return default_value.unwrap_or(1.0);
85    }
86
87    trimmed
88        .parse::<f64>()
89        .unwrap_or_else(|_| default_value.unwrap_or(1.0))
90}
91
92fn matches_json(pattern: &str, message: &str) -> bool {
93    let inner = pattern
94        .strip_prefix('{')
95        .and_then(|s| s.strip_suffix('}'))
96        .unwrap_or("")
97        .trim();
98    if inner.is_empty() {
99        return true;
100    }
101
102    let json: Value = match serde_json::from_str(message) {
103        Ok(v) => v,
104        Err(_) => return false,
105    };
106
107    eval_or(inner, &json)
108}
109
110/// Top-level disjunction: `a || b || c` -> any of `a`, `b`, `c`.
111fn eval_or(expr: &str, json: &Value) -> bool {
112    split_top_level(expr, "||")
113        .into_iter()
114        .any(|chunk| eval_and(chunk.trim(), json))
115}
116
117/// Conjunction below `||`: `a && b` -> all of `a`, `b`.
118fn eval_and(expr: &str, json: &Value) -> bool {
119    split_top_level(expr, "&&")
120        .into_iter()
121        .all(|chunk| eval_atom(chunk.trim(), json))
122}
123
124/// Split `expr` on `sep`, ignoring occurrences inside quoted strings.
125fn split_top_level(expr: &str, sep: &str) -> Vec<String> {
126    let mut parts = Vec::new();
127    let bytes = expr.as_bytes();
128    let sep_bytes = sep.as_bytes();
129    let mut start = 0usize;
130    let mut i = 0usize;
131    let mut in_quotes = false;
132    while i < bytes.len() {
133        let c = bytes[i];
134        if c == b'\\' && i + 1 < bytes.len() {
135            i += 2;
136            continue;
137        }
138        if c == b'"' {
139            in_quotes = !in_quotes;
140            i += 1;
141            continue;
142        }
143        if !in_quotes && bytes[i..].starts_with(sep_bytes) {
144            parts.push(expr[start..i].to_string());
145            i += sep_bytes.len();
146            start = i;
147            continue;
148        }
149        i += 1;
150    }
151    parts.push(expr[start..].to_string());
152    parts
153}
154
155fn eval_atom(condition: &str, json: &Value) -> bool {
156    let condition = condition.trim();
157    let condition = condition
158        .strip_prefix('(')
159        .and_then(|s| s.strip_suffix(')'))
160        .map(|s| s.trim())
161        .unwrap_or(condition);
162
163    let ops = ["!=", ">=", "<=", "=", ">", "<"];
164    let mut found: Option<(&str, usize)> = None;
165    let bytes = condition.as_bytes();
166    let mut in_quotes = false;
167    let mut i = 0usize;
168    while i < bytes.len() {
169        let c = bytes[i];
170        if c == b'\\' && i + 1 < bytes.len() {
171            i += 2;
172            continue;
173        }
174        if c == b'"' {
175            in_quotes = !in_quotes;
176            i += 1;
177            continue;
178        }
179        if !in_quotes {
180            if let Some(op) = ops
181                .iter()
182                .find(|op| condition[i..].starts_with(*op))
183                .copied()
184            {
185                found = Some((op, i));
186                break;
187            }
188        }
189        i += 1;
190    }
191
192    let Some((op, pos)) = found else {
193        // No comparison: `{ $.field }` matches when field exists.
194        if let Some(path) = condition.strip_prefix("$.") {
195            return resolve_path(json, path).is_some();
196        }
197        return false;
198    };
199
200    let field = condition[..pos].trim();
201    let value = condition[pos + op.len()..].trim();
202
203    let path = match field.strip_prefix("$.") {
204        Some(p) => p,
205        None => return false,
206    };
207
208    let actual = match resolve_path(json, path) {
209        Some(v) => v,
210        // Missing field: only `!=` semantically holds.
211        None => return op == "!=",
212    };
213
214    if value.starts_with('"') && value.ends_with('"') && value.len() >= 2 {
215        let s = &value[1..value.len() - 1];
216        let unescaped = s.replace("\\\"", "\"");
217        return match op {
218            "=" => actual.as_str() == Some(unescaped.as_str()),
219            "!=" => actual.as_str() != Some(unescaped.as_str()),
220            _ => false,
221        };
222    }
223
224    if let Ok(num) = value.parse::<f64>() {
225        let actual_num = actual.as_f64();
226        return match (op, actual_num) {
227            ("=", Some(n)) => (n - num).abs() < f64::EPSILON,
228            ("!=", Some(n)) => (n - num).abs() >= f64::EPSILON,
229            (">", Some(n)) => n > num,
230            ("<", Some(n)) => n < num,
231            (">=", Some(n)) => n >= num,
232            ("<=", Some(n)) => n <= num,
233            _ => false,
234        };
235    }
236
237    if value == "true" || value == "false" {
238        let expected = value == "true";
239        return match op {
240            "=" => actual.as_bool() == Some(expected),
241            "!=" => actual.as_bool() != Some(expected),
242            _ => false,
243        };
244    }
245
246    false
247}
248
249fn matches_array(pattern: &str, message: &str) -> bool {
250    let inner = pattern
251        .strip_prefix('[')
252        .and_then(|s| s.strip_suffix(']'))
253        .unwrap_or("")
254        .trim();
255    if inner.is_empty() {
256        return true;
257    }
258    let fields: Vec<&str> = inner.split(',').map(|s| s.trim()).collect();
259    let tokens: Vec<&str> = message.split_whitespace().collect();
260    array_match(&fields, &tokens)
261}
262
263fn array_match(fields: &[&str], tokens: &[&str]) -> bool {
264    if fields.is_empty() {
265        return tokens.is_empty();
266    }
267    let head = fields[0];
268    if head == "..." {
269        let rest = &fields[1..];
270        if rest.is_empty() {
271            return true;
272        }
273        for i in 0..=tokens.len() {
274            if array_match(rest, &tokens[i..]) {
275                return true;
276            }
277        }
278        return false;
279    }
280    if tokens.is_empty() {
281        return false;
282    }
283    if !array_field_matches(head, tokens[0]) {
284        return false;
285    }
286    array_match(&fields[1..], &tokens[1..])
287}
288
289fn array_field_matches(field: &str, token: &str) -> bool {
290    let f = field.trim();
291    if f == "*" || f.is_empty() {
292        return true;
293    }
294    // Quoted literal -> equality.
295    if f.starts_with('"') && f.ends_with('"') && f.len() >= 2 {
296        return token == &f[1..f.len() - 1];
297    }
298    // Comparison ops with no LHS (`=200`, `>=400`, etc.) compare against the token.
299    for op in ["!=", ">=", "<=", "=", ">", "<"] {
300        if let Some(rhs) = f.strip_prefix(op) {
301            return cmp_field(op, token, rhs.trim());
302        }
303    }
304    // `name=value` style (used in named-array patterns).
305    for op in ["!=", ">=", "<=", "=", ">", "<"] {
306        if let Some(idx) = f.find(op) {
307            let rhs = &f[idx + op.len()..].trim();
308            return cmp_field(op, token, rhs);
309        }
310    }
311    // Bare identifier -> name placeholder, matches anything.
312    if f.chars()
313        .next()
314        .is_some_and(|c| c.is_alphabetic() || c == '_' || c == '$')
315    {
316        return true;
317    }
318    // Fallback: treat as literal.
319    token == f
320}
321
322fn cmp_field(op: &str, token: &str, rhs: &str) -> bool {
323    let rhs = rhs.trim();
324    let rhs = if rhs.starts_with('"') && rhs.ends_with('"') && rhs.len() >= 2 {
325        &rhs[1..rhs.len() - 1]
326    } else {
327        rhs
328    };
329    if let (Ok(a), Ok(b)) = (token.parse::<f64>(), rhs.parse::<f64>()) {
330        return match op {
331            "=" => (a - b).abs() < f64::EPSILON,
332            "!=" => (a - b).abs() >= f64::EPSILON,
333            ">" => a > b,
334            "<" => a < b,
335            ">=" => a >= b,
336            "<=" => a <= b,
337            _ => false,
338        };
339    }
340    match op {
341        "=" => token == rhs,
342        "!=" => token != rhs,
343        _ => false,
344    }
345}
346
347fn resolve_path<'a>(json: &'a Value, path: &str) -> Option<&'a Value> {
348    let mut current = json;
349    for part in path.split('.') {
350        current = current.get(part)?;
351    }
352    if current.is_null() {
353        None
354    } else {
355        Some(current)
356    }
357}
358
359fn tokenize(pattern: &str) -> Vec<String> {
360    let mut terms = Vec::new();
361    let mut chars = pattern.chars().peekable();
362    while let Some(&c) = chars.peek() {
363        if c.is_whitespace() {
364            chars.next();
365            continue;
366        }
367        if c == '"' {
368            chars.next();
369            let mut buf = String::new();
370            loop {
371                match chars.next() {
372                    Some('\\') => {
373                        if let Some(n) = chars.next() {
374                            buf.push(n);
375                        }
376                    }
377                    Some('"') => break,
378                    Some(ch) => buf.push(ch),
379                    None => break,
380                }
381            }
382            terms.push(buf);
383        } else {
384            let mut buf = String::new();
385            while let Some(&ch) = chars.peek() {
386                if ch.is_whitespace() {
387                    break;
388                }
389                buf.push(ch);
390                chars.next();
391            }
392            if !buf.is_empty() {
393                terms.push(buf);
394            }
395        }
396    }
397    terms
398}
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403
404    #[test]
405    fn plain_string_pattern_matches_substring() {
406        assert!(matches("ERROR", "service ERROR: timeout"));
407        assert!(!matches("ERROR", "service INFO: ok"));
408    }
409
410    #[test]
411    fn quoted_phrase_pattern_matches_exact() {
412        assert!(matches(
413            "\"connection refused\"",
414            "tcp: connection refused on :8080"
415        ));
416        assert!(!matches(
417            "\"connection refused\"",
418            "tcp: connection was refused"
419        ));
420    }
421
422    #[test]
423    fn space_separated_terms_require_all_to_match() {
424        assert!(matches("ERROR DATABASE", "ERROR: DATABASE down"));
425        assert!(!matches("ERROR DATABASE", "ERROR: cache miss"));
426        assert!(!matches("ERROR DATABASE", "INFO: DATABASE healthy"));
427    }
428
429    #[test]
430    fn json_pattern_equals_predicate() {
431        assert!(matches("{ $.statusCode = 500 }", r#"{"statusCode": 500}"#));
432        assert!(!matches("{ $.statusCode = 500 }", r#"{"statusCode": 200}"#));
433    }
434
435    #[test]
436    fn json_pattern_inequality_predicate() {
437        assert!(matches("{ $.statusCode != 200 }", r#"{"statusCode": 500}"#));
438        assert!(!matches(
439            "{ $.statusCode != 200 }",
440            r#"{"statusCode": 200}"#
441        ));
442    }
443
444    #[test]
445    fn json_pattern_and_predicate() {
446        let p = "{ $.statusCode = 500 && $.method = \"GET\" }";
447        assert!(matches(p, r#"{"statusCode": 500, "method": "GET"}"#));
448        assert!(!matches(p, r#"{"statusCode": 500, "method": "POST"}"#));
449        assert!(!matches(p, r#"{"statusCode": 200, "method": "GET"}"#));
450    }
451
452    #[test]
453    fn json_pattern_or_predicate() {
454        let p = "{ $.statusCode = 500 || $.statusCode = 503 }";
455        assert!(matches(p, r#"{"statusCode": 500}"#));
456        assert!(matches(p, r#"{"statusCode": 503}"#));
457        assert!(!matches(p, r#"{"statusCode": 200}"#));
458    }
459
460    #[test]
461    fn json_pattern_numeric_comparisons() {
462        assert!(matches("{ $.latency > 100 }", r#"{"latency": 250}"#));
463        assert!(!matches("{ $.latency > 100 }", r#"{"latency": 50}"#));
464        assert!(matches("{ $.latency <= 100 }", r#"{"latency": 100}"#));
465    }
466
467    #[test]
468    fn json_pattern_against_non_json_message_fails() {
469        assert!(!matches("{ $.statusCode = 500 }", "plain text, not JSON"));
470    }
471
472    #[test]
473    fn empty_pattern_matches_anything() {
474        assert!(matches("", "anything"));
475        assert!(matches("   ", "anything"));
476    }
477
478    #[test]
479    fn array_pattern_positional_match() {
480        assert!(matches("[a, b]", "a b"));
481        assert!(matches("[host, status]", "192.168.1.1 200"));
482        assert!(!matches("[a, b, c]", "a b"));
483    }
484
485    #[test]
486    fn array_pattern_comparison_ops() {
487        assert!(matches("[ip, =200]", "1.2.3.4 200"));
488        assert!(!matches("[ip, =200]", "1.2.3.4 500"));
489        assert!(matches("[ip, >=400]", "1.2.3.4 500"));
490        assert!(!matches("[ip, >=400]", "1.2.3.4 200"));
491        assert!(matches("[ip, !=200]", "1.2.3.4 500"));
492    }
493
494    #[test]
495    fn array_pattern_named_field_with_predicate() {
496        assert!(matches("[ip, status=200]", "1.2.3.4 200"));
497        assert!(!matches("[ip, status=200]", "1.2.3.4 404"));
498    }
499
500    #[test]
501    fn array_pattern_ellipsis_skips_tokens() {
502        assert!(matches("[ip, ..., status]", "1.2.3.4 a b c 200"));
503        assert!(matches("[..., =200]", "any tokens 200"));
504        assert!(!matches("[..., =500]", "any tokens 200"));
505    }
506
507    #[test]
508    fn array_pattern_quoted_literal() {
509        assert!(matches("[\"GET\", path]", "GET /index.html"));
510        assert!(!matches("[\"POST\", path]", "GET /index.html"));
511    }
512
513    #[test]
514    fn array_pattern_unbalanced_fails_closed() {
515        assert!(!matches("[a, b", "a b"));
516    }
517
518    #[test]
519    fn resolve_metric_value_literal_number() {
520        assert_eq!(resolve_metric_value("1", None, "msg"), 1.0);
521        assert_eq!(resolve_metric_value("42.5", None, "msg"), 42.5);
522    }
523
524    #[test]
525    fn resolve_metric_value_json_path_extracts_field() {
526        let v = resolve_metric_value("$.bytes", None, r#"{"bytes": 1024}"#);
527        assert_eq!(v, 1024.0);
528    }
529
530    #[test]
531    fn resolve_metric_value_falls_back_when_missing() {
532        let v = resolve_metric_value("$.bytes", Some(7.0), r#"{"other": 1}"#);
533        assert_eq!(v, 7.0);
534        let v = resolve_metric_value("", None, "msg");
535        assert_eq!(v, 1.0);
536    }
537}