use super::*;
fn exercise_multi_patterns(
should_not_match_any: &[&str],
patterns_with_matches: &[(&str, &[&str])],
) {
let mut q = Quamina::new();
for (pattern, _) in patterns_with_matches {
q.add_pattern(*pattern, pattern)
.unwrap_or_else(|e| panic!("Failed to add pattern {pattern}: {e:?}"));
}
for (pattern, should_match) in patterns_with_matches {
for val in *should_match {
let event = format!(r#"{{"x":"{val}"}}"#);
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(pattern),
"Pattern '{pattern}' should match '{val}', got {pattern_ids:?}"
);
}
}
for val in should_not_match_any {
let event = format!(r#"{{"x":"{val}"}}"#);
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.is_empty(),
"'{val}' should not match any pattern, got {pattern_ids:?}"
);
}
}
#[test]
fn test_prefix_match() {
let q = q!("p1" => r#"{"name": [{"prefix": "prod-"}]}"#);
assert_matches!(
q,
r#"{"name": "prod-server-1"}"#,
vec!["p1"],
"Should match prefix"
);
assert_no_match!(
q,
r#"{"name": "dev-server-1"}"#,
"Should not match different prefix"
);
}
#[test]
fn test_suffix() {
let q = q!("p1" => r#"{"file": [{"suffix": ".jpg"}]}"#);
assert_matches!(q, r#"{"file": "photo.jpg"}"#, vec!["p1"]);
assert_no_match!(q, r#"{"file": "photo.png"}"#);
}
#[test]
fn test_wildcard_suffix() {
let q = q!("p1" => r#"{"file": [{"wildcard": "*.txt"}]}"#);
assert_matches!(
q,
r#"{"file": "document.txt"}"#,
vec!["p1"],
"Should match *.txt"
);
assert_no_match!(q, r#"{"file": "document.pdf"}"#, "Should not match .pdf");
}
#[test]
fn test_wildcard_prefix() {
let q = q!("p1" => r#"{"name": [{"wildcard": "prod-*"}]}"#);
assert_matches!(
q,
r#"{"name": "prod-server"}"#,
vec!["p1"],
"Should match prod-*"
);
}
#[test]
fn test_wildcard_contains() {
let q = q!("p1" => r#"{"msg": [{"wildcard": "*error*"}]}"#);
assert_matches!(
q,
r#"{"msg": "an error occurred"}"#,
vec!["p1"],
"Should match *error*"
);
assert_no_match!(q, r#"{"msg": "all good"}"#);
}
#[test]
fn test_wildcard_matches_empty_string() {
let q = q!("p1" => r#"{"x": [{"wildcard": "*"}]}"#);
assert_matches!(q, r#"{"x": ""}"#, vec!["p1"], "* should match empty string");
assert_matches!(
q,
r#"{"x": "hello"}"#,
vec!["p1"],
"* should match any string"
);
}
#[test]
fn test_wildcard_escape_star() {
let q = q!("p1" => r#"{"val": [{"wildcard": "a\\*b"}]}"#);
assert_matches!(
q,
r#"{"val": "a*b"}"#,
vec!["p1"],
"\\* should match literal *"
);
assert_no_match!(q, r#"{"val": "aXb"}"#, "Escaped * should not be wildcard");
}
#[test]
fn test_wildcard_escape_backslash() {
let q = q!("p1" => r#"{"path": [{"wildcard": "a\\\\b"}]}"#);
assert_matches!(
q,
r#"{"path": "a\\b"}"#,
vec!["p1"],
"\\\\ should match literal \\"
);
}
#[test]
fn test_wildcard_invalid_patterns() {
let mut q = Quamina::new();
let result = q.add_pattern("p1", r#"{"x": [{"wildcard": "foo**bar"}]}"#);
assert!(result.is_err(), "Adjacent ** should be rejected");
let mut q2 = Quamina::new();
let result2 = q2.add_pattern("p2", r#"{"x": [{"wildcard": "he\\llo"}]}"#);
assert!(result2.is_err(), "Invalid escape \\l should be rejected");
let mut q3 = Quamina::new();
let result3 = q3.add_pattern("p3", r#"{"x": [{"wildcard": "x\\"}]}"#);
assert!(result3.is_err(), "Trailing backslash should be rejected");
}
#[test]
fn test_shellstyle_suffix() {
let q = q!("p1" => r#"{"a": [{"shellstyle": "*bc"}]}"#);
assert_matches!(q, r#"{"a": "bc"}"#, vec!["p1"]);
assert_matches!(q, r#"{"a": "abc"}"#, vec!["p1"]);
assert_no_match!(q, r#"{"a": "xyz"}"#);
}
#[test]
fn test_shellstyle_prefix() {
let q = q!("p1" => r#"{"c": [{"shellstyle": "xy*"}]}"#);
assert_matches!(q, r#"{"c": "xyzzz"}"#, vec!["p1"]);
assert_matches!(q, r#"{"c": "xy"}"#, vec!["p1"]);
}
#[test]
fn test_shellstyle_infix() {
let q = q!("p1" => r#"{"b": [{"shellstyle": "d*f"}]}"#);
assert_matches!(q, r#"{"b": "dexef"}"#, vec!["p1"]);
assert_matches!(q, r#"{"b": "df"}"#, vec!["p1"]);
}
#[test]
fn test_shellstyle_multiple_wildcards() {
let q = q!("p1" => r#"{"d": [{"shellstyle": "12*4*"}]}"#);
assert_matches!(q, r#"{"d": "12345"}"#, vec!["p1"]);
assert_matches!(q, r#"{"d": "1244"}"#, vec!["p1"]);
assert_no_match!(q, r#"{"d": "1235"}"#);
}
#[test]
fn test_shellstyle_contains() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "*foo*"}]}"#);
for text in ["xxfooyy", "fooyy", "xxfoo", "foo"] {
let event = format!(r#"{{"x": "{text}"}}"#);
assert_matches!(q, event, vec!["p1"]);
}
assert_no_match!(q, r#"{"x": "bar"}"#);
}
#[test]
fn test_multiple_overlapping_shellstyle_patterns() {
let q = q!(
"suffix_bc" => r#"{"a": [{"shellstyle": "*bc"}]}"#,
"infix_ef" => r#"{"b": [{"shellstyle": "d*f"}]}"#,
"prefix_xy" => r#"{"c": [{"shellstyle": "xy*"}]}"#
);
assert_has_match!(q, r#"{"a": "abc"}"#, "suffix_bc");
assert_has_match!(q, r#"{"b": "dexef"}"#, "infix_ef");
assert_has_match!(q, r#"{"c": "xyzzz"}"#, "prefix_xy");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_nested_transmap_safety() {
let q = q!(
"P0" => r#"{"a": [{"shellstyle": "foo*"}], "b": [{"shellstyle": "bar*"}]}"#,
"P1" => r#"{"a": [{"shellstyle": "foo*"}], "b": [{"shellstyle": "baz*"}]}"#,
"P2" => r#"{"a": [{"shellstyle": "fox*"}], "b": [{"shellstyle": "bar*"}]}"#
);
assert_has_match!(q, r#"{"a": "fooXYZ", "b": "barXYZ"}"#, "P0");
assert_has_match!(q, r#"{"a": "fooABC", "b": "bazABC"}"#, "P1");
assert_has_match!(q, r#"{"a": "foxDEF", "b": "barDEF"}"#, "P2");
assert_has_match!(q, r#"{"a": "fooXYZ", "b": "bar"}"#, "P0");
assert_no_has_match!(q, r#"{"a": "fooXYZ", "b": "bar"}"#, "P1");
assert_has_match!(q, r#"{"a": "fooXYZ", "b": "baz"}"#, "P1");
assert_no_has_match!(q, r#"{"a": "fooXYZ", "b": "baz"}"#, "P0");
assert_no_match!(q, r#"{"a": "nomatch", "b": "nomatch"}"#);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_overlapping_shellstyle_nesting() {
let q = q!(
"P1" => r#"{"a": [{"shellstyle": "*"}], "b": [{"shellstyle": "*"}]}"#,
"P2" => r#"{"a": [{"shellstyle": "*"}], "b": [{"shellstyle": "bar*"}]}"#,
"P3" => r#"{"a": [{"shellstyle": "foo*"}], "b": [{"shellstyle": "*"}]}"#,
"P4" => r#"{"a": [{"shellstyle": "foo*"}], "b": [{"shellstyle": "bar*"}]}"#
);
let event = r#"{"a": "fooX", "b": "barY"}"#;
assert_has_match!(q, event, "P1");
assert_has_match!(q, event, "P2");
assert_has_match!(q, event, "P3");
assert_has_match!(q, event, "P4");
assert_match_count!(q, event, 4);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_three_level_nesting() {
let q = q!(
"deep-1" => r#"{"a": [{"shellstyle": "*"}], "b": [{"shellstyle": "*"}], "c": [{"shellstyle": "cat*"}]}"#,
"deep-2" => r#"{"a": [{"shellstyle": "*"}], "b": [{"shellstyle": "bar*"}], "c": [{"shellstyle": "cow*"}]}"#,
"side" => r#"{"a": [{"shellstyle": "foo*"}], "d": [{"shellstyle": "dog*"}]}"#
);
let event = r#"{"a": "fooX", "b": "barY", "c": "catZ", "d": "dogW"}"#;
for i in 0..100 {
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"deep-1"),
"iter {i}: missing deep-1, got {pattern_ids:?}"
);
assert!(
pattern_ids.contains(&"side"),
"iter {i}: missing side, got {pattern_ids:?}"
);
assert!(
!pattern_ids.contains(&"deep-2"),
"iter {i}: unexpected deep-2 (c=catZ should not match cow*)"
);
}
}
#[test]
fn test_nested_transmap_safety_miri_friendly() {
let q = q!(
"P0" => r#"{"a": [{"shellstyle": "foo*"}], "b": [{"shellstyle": "bar*"}]}"#,
"P1" => r#"{"a": [{"shellstyle": "foo*"}], "b": [{"shellstyle": "baz*"}]}"#
);
assert_has_match!(q, r#"{"a": "fooX", "b": "barX"}"#, "P0");
assert_has_match!(q, r#"{"a": "fooX", "b": "bazX"}"#, "P1");
assert_no_match!(q, r#"{"a": "nomatch", "b": "nomatch"}"#);
}
#[test]
fn test_overlapping_shellstyle_nesting_miri_friendly() {
let q = q!(
"P1" => r#"{"a": [{"shellstyle": "*"}], "b": [{"shellstyle": "bar*"}]}"#,
"P2" => r#"{"a": [{"shellstyle": "foo*"}], "b": [{"shellstyle": "bar*"}]}"#
);
let event = r#"{"a": "fooX", "b": "barY"}"#;
assert_has_match!(q, event, "P1");
assert_has_match!(q, event, "P2");
assert_match_count!(q, event, 2);
}
#[test]
fn test_three_level_nesting_miri_friendly() {
let q = q!(
"deep-1" => r#"{"a": [{"shellstyle": "*"}], "b": [{"shellstyle": "*"}], "c": [{"shellstyle": "cat*"}]}"#,
"side" => r#"{"a": [{"shellstyle": "foo*"}], "d": [{"shellstyle": "dog*"}]}"#
);
let event = r#"{"a": "fooX", "b": "barY", "c": "catZ", "d": "dogW"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"deep-1"),
"missing deep-1, got {pattern_ids:?}"
);
assert!(
pattern_ids.contains(&"side"),
"missing side, got {pattern_ids:?}"
);
}
#[test]
fn test_anything_but() {
let q = q!("p1" => r#"{"status": [{"anything-but": ["deleted", "archived"]}]}"#);
assert_matches!(
q,
r#"{"status": "active"}"#,
vec!["p1"],
"Should match non-excluded value"
);
assert_no_match!(
q,
r#"{"status": "deleted"}"#,
"Should not match excluded value"
);
}
#[test]
fn test_anything_but_validation() {
let mut q = Quamina::new();
let result = q.add_pattern("p1", r#"{"status": [{"anything-but": []}]}"#);
assert!(
result.is_err(),
"Empty anything-but array should be rejected"
);
let mut q2 = Quamina::new();
let result2 = q2.add_pattern("p2", r#"{"x": [{"anything-but": [true, null]}]}"#);
assert!(
result2.is_err(),
"anything-but with only booleans/nulls should be rejected"
);
let mut q3 = Quamina::new();
let result3 = q3.add_pattern("p3", r#"{"x": [{"anything-but": ["a", 1]}]}"#);
assert!(
result3.is_err(),
"anything-but with mixed strings and numbers should be rejected"
);
}
#[test]
fn test_anything_but_single_string() {
let q = q!("p1" => r#"{"status": [{"anything-but": "deleted"}]}"#);
assert_matches!(q, r#"{"status": "active"}"#, vec!["p1"]);
assert_no_match!(q, r#"{"status": "deleted"}"#);
}
#[test]
fn test_anything_but_numeric() {
let q = q!("p1" => r#"{"code": [{"anything-but": 404}]}"#);
assert_matches!(
q,
r#"{"code": 200}"#,
vec!["p1"],
"Should match non-excluded number"
);
assert_no_match!(q, r#"{"code": 404}"#, "Should not match excluded number");
}
#[test]
fn test_anything_but_prefix_relationship() {
let q = q!("not_foo" => r#"{"z": [{"anything-but": ["foo"]}]}"#);
assert_matches!(
q,
r#"{"z": "foot"}"#,
vec!["not_foo"],
"anything-but ['foo'] should match 'foot'"
);
assert_no_match!(
q,
r#"{"z": "foo"}"#,
"anything-but ['foo'] should not match 'foo'"
);
}
#[test]
fn test_anything_but_with_exact_match() {
let q =
q!("pFoo" => r#"{"z": ["foo"]}"#, "pAbFoot" => r#"{"z": [{"anything-but": ["foot"]}]}"#);
assert_match_count!(q, r#"{"z": "foo"}"#, 2, "foo should match both patterns");
assert_no_match!(q, r#"{"z": "foot"}"#, "foot should match nothing");
}
#[test]
fn test_equals_ignore_case() {
let q = q!("p1" => r#"{"name": [{"equals-ignore-case": "Test"}]}"#);
for (event, desc) in [
(r#"{"name": "test"}"#, "lowercase"),
(r#"{"name": "TEST"}"#, "uppercase"),
(r#"{"name": "TeSt"}"#, "mixed case"),
] {
assert_matches!(q, event, vec!["p1"], desc);
}
assert_no_match!(q, r#"{"name": "other"}"#);
}
#[test]
fn test_equals_ignore_case_multiple_patterns() {
let q = q!(
"r1" => r#"{"a": [{"equals-ignore-case": "aBc"}]}"#,
"r2" => r#"{"b": [{"equals-ignore-case": "XyZ"}]}"#,
"r3" => r#"{"b": [{"equals-ignore-case": "xyZ"}]}"#
);
assert_matches!(q, r#"{"a": "abc"}"#, vec!["r1"]);
assert_match_count!(q, r#"{"b": "XYZ"}"#, 2, "Both r2 and r3 should match XYZ");
}
#[test]
fn test_equals_ignore_case_unicode() {
let q = q!("p1" => r#"{"word": [{"equals-ignore-case": "Σοφία"}]}"#);
assert_matches!(
q,
r#"{"word": "σοφία"}"#,
vec!["p1"],
"Greek sigma case folding"
);
assert_matches!(q, r#"{"word": "ΣΟΦΊΑ"}"#, vec!["p1"], "Greek uppercase");
}
#[test]
fn test_numeric_greater_than() {
let q = q!("p1" => r#"{"age": [{"numeric": [">", 18]}]}"#);
assert_matches!(q, r#"{"age": 25}"#, vec!["p1"]);
assert_no_match!(q, r#"{"age": 18}"#);
assert_no_match!(q, r#"{"age": 15}"#);
}
#[test]
fn test_numeric_range() {
let q = q!("p1" => r#"{"score": [{"numeric": [">=", 0, "<=", 100]}]}"#);
assert_matches!(q, r#"{"score": 50}"#, vec!["p1"]);
assert_matches!(q, r#"{"score": 0}"#, vec!["p1"]);
assert_matches!(q, r#"{"score": 100}"#, vec!["p1"]);
assert_no_match!(q, r#"{"score": 101}"#);
}
#[test]
fn test_numeric_equals() {
let q = q!("p1" => r#"{"count": [{"numeric": ["=", 42]}]}"#);
assert_matches!(q, r#"{"count": 42}"#, vec!["p1"]);
assert_no_match!(q, r#"{"count": 43}"#);
}
#[test]
fn test_numeric_scientific_notation() {
let q = q!("p1" => r#"{"value": [{"numeric": [">=", 300, "<=", 400]}]}"#);
assert_matches!(q, r#"{"value": 350}"#, vec!["p1"], "Integer 350");
assert_matches!(q, r#"{"value": 3.5e2}"#, vec!["p1"], "Scientific 3.5e2");
}
#[test]
fn test_regex_match() {
let q = q!("p1" => r#"{"code": [{"regex": "[A-Z]{3}-[0-9]{3}"}]}"#);
assert_matches!(q, r#"{"code": "ABC-123"}"#, vec!["p1"]);
assert_no_match!(q, r#"{"code": "invalid"}"#);
}
#[test]
fn test_regex_with_escape() {
let q = q!("p1" => r#"{"email": [{"regex": "[a-z]+@example~.com"}]}"#);
assert_matches!(q, r#"{"email": "alice@example.com"}"#, vec!["p1"]);
assert_no_match!(q, r#"{"email": "alice@exampleXcom"}"#);
}
#[test]
fn test_regex_alternation() {
let q = q!("p1" => r#"{"a": [{"regex": "a|b"}]}"#);
assert_has_match!(q, r#"{"a": "a"}"#, "p1");
assert_has_match!(q, r#"{"a": "b"}"#, "p1");
assert_no_match!(q, r#"{"a": "c"}"#);
}
#[test]
fn test_regex_character_class() {
let q = q!("p1" => r#"{"a": [{"regex": "[hij]"}]}"#);
assert_has_match!(q, r#"{"a": "h"}"#, "p1");
assert_has_match!(q, r#"{"a": "i"}"#, "p1");
assert_no_match!(q, r#"{"a": "x"}"#);
}
#[test]
fn test_regexp_simple_optional() {
let q = q!("test" => r#"{"a": [{"regexp": "a?b"}]}"#);
assert_has_match!(q, r#"{"a": "ab"}"#, "test", "'a?b' should match 'ab'");
assert_has_match!(q, r#"{"a": "b"}"#, "test", "'a?b' should match 'b'");
assert_no_match!(q, r#"{"a": "aab"}"#, "'a?b' should NOT match 'aab'");
}
#[test]
fn test_empty_regex_matches_empty_string() {
let q = q!("a" => r#"{"a": [{"regex": ""}]}"#);
assert_matches!(
q,
r#"{"a": ""}"#,
vec!["a"],
"empty regex should match empty string"
);
assert_no_match!(
q,
r#"{"a": "hello"}"#,
"empty regex should NOT match non-empty string"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv4_basic() {
let q = q!("p1" => r#"{"sourceIP": [{"cidr": "10.0.0.0/24"}]}"#);
assert_matches!(
q,
r#"{"sourceIP": "10.0.0.1"}"#,
vec!["p1"],
"10.0.0.1 in /24"
);
assert_matches!(
q,
r#"{"sourceIP": "10.0.0.255"}"#,
vec!["p1"],
"10.0.0.255 in /24"
);
assert_no_match!(q, r#"{"sourceIP": "10.0.1.1"}"#, "10.0.1.1 NOT in /24");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_miri_lightweight() {
let q = q!("p1" => r#"{"sourceIP": [{"cidr": "10.0.0.0/8"}]}"#);
assert_matches!(
q,
r#"{"sourceIP": "10.1.2.3"}"#,
vec!["p1"],
"10.1.2.3 in /8"
);
assert_no_match!(q, r#"{"sourceIP": "192.168.1.1"}"#, "192.168.1.1 NOT in /8");
}
#[test]
fn test_cidr_invalid_patterns() {
let mut q = Quamina::new();
let result = q.add_pattern("p1", r#"{"ip": [{"cidr": "not-an-ip/24"}]}"#);
assert!(result.is_err(), "Invalid IP should be rejected");
let result = q.add_pattern("p2", r#"{"ip": [{"cidr": "10.0.0.0/33"}]}"#);
assert!(result.is_err(), "Invalid prefix length should be rejected");
}
#[test]
fn test_cidr_ipv4_prefix_mask_boundary() {
let q = q!("p32" => r#"{"ip": [{"cidr": "10.0.0.1/32"}]}"#);
assert_matches!(
q,
r#"{"ip": "10.0.0.1"}"#,
vec!["p32"],
"10.0.0.1 should match /32 with 10.0.0.1"
);
assert_no_match!(
q,
r#"{"ip": "10.0.0.0"}"#,
"10.0.0.0 should NOT match /32 with 10.0.0.1"
);
assert_no_match!(
q,
r#"{"ip": "10.0.0.2"}"#,
"10.0.0.2 should NOT match /32 with 10.0.0.1"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv4_prefix_various_lengths() {
let tests = vec![
("/16", "172.16.0.0/16", "172.16.255.255", "172.17.0.0"),
("/25", "10.0.0.128/25", "10.0.0.255", "10.0.1.0"),
("/30", "192.168.1.0/30", "192.168.1.3", "192.168.1.4"),
];
for (name, pattern_cidr, ip_match, ip_nomatch) in tests {
let pattern = format!(r#"{{"ip": [{{"cidr": "{pattern_cidr}"}}]}}"#);
let q = q!("p1" => pattern.as_str());
let event_match = format!(r#"{{"ip": "{ip_match}"}}"#);
let msg_match = format!("{name}: {ip_match} should match {pattern_cidr}");
assert_matches!(q, &event_match, vec!["p1"], &msg_match);
let event_nomatch = format!(r#"{{"ip": "{ip_nomatch}"}}"#);
let msg_nomatch = format!("{name}: {ip_nomatch} should NOT match {pattern_cidr}");
assert_no_match!(q, &event_nomatch, &msg_nomatch);
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_double_colon_variations() {
let tests = vec![
(
"2001:db8::1/128",
"2001:db8:0:0:0:0:0:1",
"2001:db8:0:0:0:0:0:2",
),
("::1/128", "0:0:0:0:0:0:0:1", "0:0:0:0:0:0:0:2"),
(
"2001:db8::/32",
"2001:db8:0:0:0:0:0:1",
"2001:db9:0:0:0:0:0:1",
),
];
for (pattern_cidr, ip_match, ip_nomatch) in tests {
let pattern = format!(r#"{{"ip": [{{"cidr": "{pattern_cidr}"}}]}}"#);
let q = q!("p1" => pattern.as_str());
let event_match = format!(r#"{{"ip": "{ip_match}"}}"#);
let msg_match = format!("{ip_match} should match {pattern_cidr}");
assert_matches!(q, &event_match, vec!["p1"], &msg_match);
let event_nomatch = format!(r#"{{"ip": "{ip_nomatch}"}}"#);
let msg_nomatch = format!("{ip_nomatch} should NOT match {pattern_cidr}");
assert_no_match!(q, &event_nomatch, &msg_nomatch);
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_group_limit() {
let q = q!("p1" => r#"{"ip": [{"cidr": "2001:db8:0:0:0:0:0:1/128"}]}"#);
assert_matches!(
q,
r#"{"ip": "2001:db8:0:0:0:0:0:1"}"#,
vec!["p1"],
"exact host must match /128"
);
assert_no_match!(
q,
r#"{"ip": "2001:db8:0:0:0:0:0:2"}"#,
"address differing in the last group must not match /128"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_partial_boundary_byte_masking() {
let q = q!("p" => r#"{"ip": [{"cidr": "2001:db8:abcd:ef01:0:0:0:0/60"}]}"#);
assert_matches!(
q,
r#"{"ip": "2001:db8:abcd:ef0a:0:0:0:0"}"#,
vec!["p"],
"/60 host 0xef0a must match the same /60 network as 0xef01"
);
assert_no_match!(
q,
r#"{"ip": "2001:db8:abcd:ef10:0:0:0:0"}"#,
"/60 host 0xef10 is in a different /60 block (top-nibble boundary)"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_partial_range_not_treated_as_full_wildcard() {
let q = q!("p" => r#"{"ip": [{"cidr": "::/60"}]}"#);
assert_matches!(
q,
r#"{"ip": "0:0:0:1:0:0:0:0"}"#,
vec!["p"],
"0x0001 is within /60"
);
assert_no_match!(
q,
r#"{"ip": "0:0:0:ff:0:0:0:0"}"#,
"0x00ff is outside /60 [0,15]"
);
}
#[test]
fn test_cidr_ipv6_invalid_formats() {
let mut q = Quamina::new();
let result = q.add_pattern("p1", r#"{"ip": [{"cidr": "2001:db8:::1/64"}]}"#);
assert!(result.is_err(), "Multiple :: should be rejected");
let result = q.add_pattern("p2", r#"{"ip": [{"cidr": "2001:db8::/129"}]}"#);
assert!(result.is_err(), "IPv6 prefix > 128 should be rejected");
let result = q.add_pattern("p3", r#"{"ip": [{"cidr": "gggg::1/64"}]}"#);
assert!(result.is_err(), "Invalid hex should be rejected");
}
#[test]
fn test_lookaround_pattern_add_to_quamina() {
let mut q = Quamina::<String>::new();
let pattern = r#"{"status": [{"regexp": "foo(?=bar)"}]}"#;
let result = q.add_pattern("test".to_string(), pattern);
assert!(
result.is_ok(),
"Lookahead pattern should be accepted: {:?}",
result.err()
);
}
#[test]
fn test_positive_lookahead_match() {
let mut q = Quamina::<String>::new();
let pattern = r#"{"status": [{"regexp": "foo(?=bar)bar"}]}"#;
q.add_pattern("test".to_string(), pattern).unwrap();
let event = r#"{"status": "foobar"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"test".to_string()),
"foo(?=bar)bar should match 'foobar'"
);
let event = r#"{"status": "foobaz"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
!pattern_ids.contains(&"test".to_string()),
"foo(?=bar)bar should NOT match 'foobaz'"
);
}
#[test]
fn test_negative_lookahead_match() {
let mut q = Quamina::<String>::new();
let pattern = r#"{"status": [{"regexp": "foo(?!bar)..."}]}"#;
q.add_pattern("test".to_string(), pattern).unwrap();
let event = r#"{"status": "foobaz"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"test".to_string()),
"foo(?!bar)... should match 'foobaz'"
);
let event = r#"{"status": "foobar"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
!pattern_ids.contains(&"test".to_string()),
"foo(?!bar)... should NOT match 'foobar'"
);
}
#[test]
fn test_lookbehind_match() {
let mut q = Quamina::<String>::new();
let pattern = r#"{"status": [{"regexp": "(?<=foo)bar"}]}"#;
q.add_pattern("test".to_string(), pattern).unwrap();
let event = r#"{"status": "foobar"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"test".to_string()),
"(?<=foo)bar should match 'foobar'"
);
let event = r#"{"status": "xxxbar"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
!pattern_ids.contains(&"test".to_string()),
"(?<=foo)bar should NOT match 'xxxbar'"
);
}
#[test]
fn test_negative_lookbehind_match() {
let mut q = Quamina::<String>::new();
let pattern = r#"{"status": [{"regexp": "(?<!foo)bar"}]}"#;
q.add_pattern("test".to_string(), pattern).unwrap();
let event = r#"{"status": "xxxbar"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"test".to_string()),
"(?<!foo)bar should match 'xxxbar'"
);
let event = r#"{"status": "foobar"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
!pattern_ids.contains(&"test".to_string()),
"(?<!foo)bar should NOT match 'foobar'"
);
}
#[test]
fn test_lookahead_with_exact_on_same_field() {
let mut q = Quamina::<String>::new();
q.add_pattern(
"look".to_string(),
r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#,
)
.unwrap();
q.add_pattern("exact".to_string(), r#"{"v": ["hello"]}"#)
.unwrap();
let m = q.matches_for_event(br#"{"v": "foobar"}"#).unwrap();
assert!(
m.contains(&"look".to_string()),
"lookahead pattern should match 'foobar' even with exact pattern on same field"
);
assert!(
!m.contains(&"exact".to_string()),
"exact pattern 'hello' should NOT match 'foobar'"
);
let m = q.matches_for_event(br#"{"v": "hello"}"#).unwrap();
assert!(
m.contains(&"exact".to_string()),
"exact pattern should match 'hello'"
);
assert!(
!m.contains(&"look".to_string()),
"lookahead pattern should NOT match 'hello'"
);
let m = q.matches_for_event(br#"{"v": "other"}"#).unwrap();
assert!(m.is_empty(), "no pattern should match 'other'");
}
#[test]
fn test_exact_added_before_lookahead_on_same_field() {
let mut q = Quamina::<String>::new();
q.add_pattern("exact".to_string(), r#"{"v": ["hello"]}"#)
.unwrap();
q.add_pattern(
"look".to_string(),
r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#,
)
.unwrap();
let m = q.matches_for_event(br#"{"v": "foobar"}"#).unwrap();
assert!(
m.contains(&"look".to_string()),
"lookahead pattern should match 'foobar' when exact was added first"
);
let m = q.matches_for_event(br#"{"v": "hello"}"#).unwrap();
assert!(
m.contains(&"exact".to_string()),
"exact pattern should match 'hello'"
);
}
#[test]
fn test_singleton_hit_and_multi_condition_hit_same_value() {
let mut q = Quamina::<String>::new();
q.add_pattern(
"look".to_string(),
r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#,
)
.unwrap();
q.add_pattern("exact".to_string(), r#"{"v": ["foobar"]}"#)
.unwrap();
let m = q.matches_for_event(br#"{"v": "foobar"}"#).unwrap();
assert!(
m.contains(&"look".to_string()),
"lookahead should match 'foobar'"
);
assert!(
m.contains(&"exact".to_string()),
"exact should also match 'foobar'"
);
assert_eq!(m.len(), 2, "both patterns should match");
}
#[test]
fn test_wb_start_word_char() {
let q = q!("test" => r#"{"name": [{"regexp": "~bhello"}]}"#);
assert_has_match!(q, r#"{"name": "hello"}"#, "test");
}
#[test]
fn test_wb_start_non_word_char() {
let q = q!("test" => r#"{"name": [{"regexp": "~bhello"}]}"#);
assert_no_has_match!(q, r#"{"name": " hello"}"#, "test");
}
#[test]
fn test_wb_end_word_char() {
let q = q!("test" => r#"{"name": [{"regexp": "hello~b"}]}"#);
assert_has_match!(q, r#"{"name": "hello"}"#, "test");
}
#[test]
fn test_wb_end_non_word_char() {
let q = q!("test" => r#"{"name": [{"regexp": "hello~b"}]}"#);
assert_no_has_match!(q, r#"{"name": "hello "}"#, "test");
}
#[test]
fn test_wb_middle_word_to_nonword() {
let q = q!("test" => r#"{"name": [{"regexp": "hello~b world"}]}"#);
assert_has_match!(q, r#"{"name": "hello world"}"#, "test");
}
#[test]
fn test_wb_middle_nonword_to_word() {
let q = q!("test" => r#"{"name": [{"regexp": "hello ~bworld"}]}"#);
assert_has_match!(q, r#"{"name": "hello world"}"#, "test");
}
#[test]
fn test_wb_middle_word_to_word_err() {
let mut q = Quamina::new();
assert_add_err!(q, "test", r#"{"name": [{"regexp": "hello~bworld"}]}"#);
}
#[test]
fn test_nwb_word_to_word() {
let q = q!("test" => r#"{"name": [{"regexp": "hello~Bworld"}]}"#);
assert_has_match!(q, r#"{"name": "helloworld"}"#, "test");
}
#[test]
fn test_nwb_word_to_nonword_err() {
let mut q = Quamina::new();
assert_add_err!(q, "test", r#"{"name": [{"regexp": "hello~B world"}]}"#);
}
#[test]
fn test_nwb_start_nonword() {
let q = q!("test" => r#"{"name": [{"regexp": "~B hello"}]}"#);
assert_has_match!(q, r#"{"name": " hello"}"#, "test");
}
#[test]
fn test_nwb_start_word_err() {
let mut q = Quamina::new();
assert_add_err!(q, "test", r#"{"name": [{"regexp": "~Bhello"}]}"#);
}
#[test]
fn test_wb_whole_word_match() {
let q = q!("test" => r#"{"name": [{"regexp": ".*~bcat~b.*"}]}"#);
assert_has_match!(q, r#"{"name": "the cat sat"}"#, "test");
}
#[test]
fn test_wb_whole_word_no_match() {
let q = q!("test" => r#"{"name": [{"regexp": ".*~bcat~b.*"}]}"#);
assert_no_has_match!(q, r#"{"name": "concatenate"}"#, "test");
}
#[test]
fn test_wb_whole_word_at_start() {
let q = q!("test" => r#"{"name": [{"regexp": ".*~bcat~b.*"}]}"#);
assert_has_match!(q, r#"{"name": "cat is here"}"#, "test");
}
#[test]
fn test_wb_whole_word_at_end() {
let q = q!("test" => r#"{"name": [{"regexp": ".*~bcat~b.*"}]}"#);
assert_has_match!(q, r#"{"name": "the cat"}"#, "test");
}
#[test]
fn test_wb_whole_word_only() {
let q = q!("test" => r#"{"name": [{"regexp": ".*~bcat~b.*"}]}"#);
assert_has_match!(q, r#"{"name": "cat"}"#, "test");
}
#[test]
fn test_wb_underscore_is_word_char() {
let mut q = Quamina::new();
assert_add_err!(q, "test", r#"{"name": [{"regexp": "a~b_"}]}"#);
}
#[test]
fn test_nwb_underscore_is_word_char() {
let q = q!("test" => r#"{"name": [{"regexp": "a~B_"}]}"#);
assert_has_match!(q, r#"{"name": "a_"}"#, "test");
}
#[test]
fn test_wb_digit_to_space() {
let q = q!("test" => r#"{"name": [{"regexp": "abc3~b end"}]}"#);
assert_has_match!(q, r#"{"name": "abc3 end"}"#, "test");
}
#[test]
fn test_wb_with_char_class() {
let q = q!("test" => r#"{"name": [{"regexp": "[0-9]~b "}]}"#);
assert_has_match!(q, r#"{"name": "5 "}"#, "test");
}
#[test]
fn test_wb_with_dot() {
let q = q!("test" => r#"{"name": [{"regexp": ".~b."}]}"#);
assert_has_match!(q, r#"{"name": "a "}"#, "test");
assert_no_has_match!(q, r#"{"name": "ab"}"#, "test");
}
#[test]
fn test_wb_plus_quantifier() {
let q = q!("test" => r#"{"v": [{"regexp": "a+~b "}]}"#);
assert_has_match!(q, r#"{"v": "aaa "}"#, "test");
assert_has_match!(q, r#"{"v": "a "}"#, "test");
assert_no_has_match!(q, r#"{"v": "aab"}"#, "test");
}
#[test]
fn test_wb_optional_quantifier() {
let q = q!("test" => r#"{"v": [{"regexp": "xa?~b "}]}"#);
assert_has_match!(q, r#"{"v": "xa "}"#, "test");
assert_no_has_match!(q, r#"{"v": "xab"}"#, "test");
}
#[test]
fn test_wb_range_quantifier() {
let q = q!("test" => r#"{"v": [{"regexp": "a{2,4}~b "}]}"#);
assert_has_match!(q, r#"{"v": "aa "}"#, "test");
assert_has_match!(q, r#"{"v": "aaaa "}"#, "test");
assert_no_has_match!(q, r#"{"v": "a "}"#, "test");
}
#[test]
fn test_wb_utf8_multibyte() {
let q = q!("test" => r#"{"v": [{"regexp": "caf~bé"}]}"#);
assert_has_match!(q, r#"{"v": "café"}"#, "test");
}
#[test]
fn test_wb_utf8_emoji_boundary() {
let q = q!("test" => r#"{"v": [{"regexp": ".~b."}]}"#);
assert_has_match!(q, "{\"v\": \"a😀\"}", "test");
assert_no_has_match!(q, r#"{"v": "ab"}"#, "test");
}
#[test]
fn test_wb_utf8_nonword_to_word() {
let q = q!("test" => r#"{"v": [{"regexp": ".~bcat"}]}"#);
assert_has_match!(q, "{\"v\": \"écat\"}", "test");
assert_no_has_match!(q, r#"{"v": "acat"}"#, "test");
}
#[test]
#[cfg_attr(miri, ignore)] fn test_range_quantifier_at_max_accepted() {
let q = q!("p" => r#"{"v": [{"regexp": "x{98,100}"}]}"#);
assert_has_match!(q, &format!(r#"{{"v": "{}"}}"#, "x".repeat(99)), "p");
assert_no_has_match!(q, r#"{"v": "x"}"#, "p");
let q = q!("p" => r#"{"v": [{"regexp": "x{100}"}]}"#);
assert_has_match!(q, &format!(r#"{{"v": "{}"}}"#, "x".repeat(100)), "p");
let q = q!("p" => r#"{"v": [{"regexp": "x{2,}"}]}"#);
assert_has_match!(q, r#"{"v": "xxx"}"#, "p");
assert_no_has_match!(q, r#"{"v": "x"}"#, "p");
}
#[test]
#[cfg(miri)]
fn test_range_quantifier_at_max_accepted_miri() {
use crate::regexp::parse_regexp;
for rx in ["x{98,100}", "x{100}", "x{100,}"] {
parse_regexp(rx).unwrap_or_else(|e| panic!("{rx} should be accepted, got: {e}"));
}
}
#[test]
fn test_range_quantifier_over_max_rejected() {
let mut q = Quamina::<String>::new();
for rx in [
"x{1,101}", "x{101}", "x{101,}", "x{101,200}", "x{1,65535}", "x{1,65536}", ] {
let pattern = format!(r#"{{"v": [{{"regexp": "{rx}"}}]}}"#);
let err = q
.add_pattern("p".to_string(), &pattern)
.expect_err(&format!("{rx} should be rejected"));
assert!(
err.to_string().contains("quantifier"),
"{rx} should fail in the quantifier parser, got: {err}"
);
}
}
#[test]
fn test_json_escape_sequences() {
let q = q!("p1" => r#"{"msg": ["line1\nline2"]}"#);
assert_matches!(
q,
r#"{"msg": "line1\nline2"}"#,
vec!["p1"],
"Should match \\n escape sequence"
);
}
#[test]
fn test_unicode_escape_in_event() {
let q = q!("p1" => r#"{"greeting": ["Hello"]}"#);
assert_matches!(
q,
r#"{"greeting": "\u0048\u0065\u006c\u006c\u006f"}"#,
vec!["p1"],
"Unicode escape should decode to 'Hello'"
);
}
#[test]
fn test_unicode_escape_emoji() {
let q = q!("p1" => r#"{"emoji": ["💋"]}"#);
assert_matches!(
q,
r#"{"emoji": "\ud83d\udc8b"}"#,
vec!["p1"],
"UTF-16 surrogate pair should decode to emoji"
);
}
#[test]
fn test_exists_false_ordering() {
let event = r#"{"aField": "a", "bField": "b", "cField": "c"}"#;
let should_not_patterns = [
r#"{"aField": ["a"], "bField": [{"exists": false}], "cField": ["c"]}"#,
r#"{"aField": [{"exists": false}], "bField": ["b"], "cField": ["c"]}"#,
r#"{"aField": ["a"], "bField": ["b"], "cField": [{"exists": false}]}"#,
];
for (i, pattern) in should_not_patterns.iter().enumerate() {
let mut q = Quamina::new();
q.add_pattern(format!("p{i}"), pattern).unwrap();
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.is_empty(),
"Pattern {i} should NOT match: {pattern}"
);
}
}
#[test]
fn test_overlapping_exact_match_patterns() {
let q = q!(
"p1" => r#"{"a": ["foo"]}"#,
"p2" => r#"{"a": ["football"]}"#,
"p3" => r#"{"a": ["footballer"]}"#
);
assert_matches!(
q,
r#"{"x": 3, "a": "foo"}"#,
vec!["p1"],
"foo should only match p1"
);
assert_matches!(
q,
r#"{"x": 3, "a": "football"}"#,
vec!["p2"],
"football should only match p2"
);
assert_matches!(
q,
r#"{"x": 3, "a": "footballer"}"#,
vec!["p3"],
"footballer should only match p3"
);
assert_no_match!(q, r#"{"a": "foot"}"#, "foot should not match any pattern");
}
#[test]
fn test_anything_but_numeric_single() {
let q = q!("p1" => r#"{"code": [{"anything-but": 404}]}"#);
assert_matches!(
q,
r#"{"code": 200}"#,
vec!["p1"],
"Should match non-excluded number"
);
assert_no_match!(q, r#"{"code": 404}"#, "Should not match excluded number");
assert_matches!(
q,
r#"{"code": "not-a-number"}"#,
vec!["p1"],
"Non-numeric value passes numeric anything-but"
);
}
#[test]
fn test_anything_but_numeric_array() {
let q = q!("p1" => r#"{"code": [{"anything-but": [400, 404, 500]}]}"#);
assert_matches!(
q,
r#"{"code": 200}"#,
vec!["p1"],
"Should match non-excluded number"
);
assert_no_match!(q, r#"{"code": 404}"#, "Should not match excluded number");
assert_no_match!(
q,
r#"{"code": 500}"#,
"Should not match another excluded number"
);
}
#[test]
fn test_anything_but_numeric_float() {
let q = q!("p1" => r#"{"price": [{"anything-but": [9.99, 19.99]}]}"#);
assert_matches!(
q,
r#"{"price": 14.99}"#,
vec!["p1"],
"Should match non-excluded float"
);
assert_no_match!(q, r#"{"price": 9.99}"#, "Should not match excluded float");
}
#[test]
fn test_equals_ignore_case_with_exact_match() {
let q = q!(
"singleton" => r#"{"x": ["singleton"]}"#,
"mono" => r#"{"x": [{"equals-ignore-case": "foo"}]}"#
);
assert_matches!(
q,
r#"{"x": "singleton"}"#,
vec!["singleton"],
"Exact match should work"
);
assert_matches!(
q,
r#"{"x": "FoO"}"#,
vec!["mono"],
"Case-insensitive match should work"
);
assert_no_match!(q, r#"{"x": "bar"}"#, "Unrelated value should not match");
}
#[test]
fn test_regex_various_patterns() {
let cases: &[(&str, &str, &[&str], &[&str])] = &[
("p1", "a|b", &["a", "b"], &["c"]),
("p2", "[hij]", &["h", "i", "j"], &["x"]),
("p3", "a[e-g]x", &["aex", "afx", "agx"], &["ax"]),
(
"p4",
"[0-9][0-9][rtn][dh]",
&["11th", "23rd", "22nd"],
&["first"],
),
];
for &(name, regex, pattern_ids, no_matches) in cases {
let mut q = Quamina::new();
let pattern = format!(r#"{{"a": [{{"regex": "{regex}"}}]}}"#);
q.add_pattern(name, &pattern).unwrap();
for m in pattern_ids {
let event = format!(r#"{{"a": "{m}"}}"#);
assert_has_match!(q, &event, name, &format!("'{regex}' should match '{m}'"));
}
for m in no_matches {
let event = format!(r#"{{"a": "{m}"}}"#);
assert_no_match!(q, &event, &format!("'{regex}' should NOT match '{m}'"));
}
}
}
const END2END_REGEXP_SAMPLES: &[crate::regexp_samples::RegexpSample] = &[
crate::regexp_samples::RegexpSample {
regex: "(xyz)?a?b",
pattern_ids: &["xyzb", "xyzab", "ab", "b"],
nomatches: &["xyzc", "c", "xyza"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "a|b",
pattern_ids: &["a", "b"],
nomatches: &["x"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "a",
pattern_ids: &["a"],
nomatches: &["b", ""],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "a.b",
pattern_ids: &["axb", "a.b"],
nomatches: &["ab", "axxb"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "abc|def",
pattern_ids: &["abc", "def"],
nomatches: &["x"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[hij]",
pattern_ids: &["h", "i", "j"],
nomatches: &["x"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "a[e-g]x",
pattern_ids: &["aex", "afx", "agx"],
nomatches: &["ax", "axx"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[ae-gx]",
pattern_ids: &["a", "e", "f", "g", "x"],
nomatches: &["b"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[-ab]",
pattern_ids: &["-", "a", "b"],
nomatches: &["c"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[ab-]",
pattern_ids: &["-", "a", "b"],
nomatches: &["c"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[~[~]]",
pattern_ids: &["[", "]"],
nomatches: &["a"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[a-c]|[xz]",
pattern_ids: &["a", "b", "c", "x", "z"],
nomatches: &["w"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[ac-e]h|p[xy]",
pattern_ids: &["ah", "ch", "dh", "eh", "px", "py"],
nomatches: &["xp"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "[0-9][0-9][rtn][dh]",
pattern_ids: &["11th", "23rd", "22nd"],
nomatches: &["first", "9th"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "a(h|i)z",
pattern_ids: &["ahz", "aiz"],
nomatches: &["a.z"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "a([1-3]|ac)z",
pattern_ids: &["a1z", "a2z", "a3z", "aacz"],
nomatches: &["a.z", "a0z"],
valid: true,
},
crate::regexp_samples::RegexpSample {
regex: "a(h|([x-z]|(1|2)))z",
pattern_ids: &["ahz", "axz", "a1z", "a2z"],
nomatches: &["a.z"],
valid: true,
},
];
#[test]
#[cfg_attr(miri, ignore)]
fn test_regexp_end2end() {
for test in END2END_REGEXP_SAMPLES {
let mut q = Quamina::new();
let pattern = format!(r#"{{"a": [{{"regexp": "{}"}}]}}"#, test.regex);
q.add_pattern("test", &pattern)
.unwrap_or_else(|e| panic!("Failed to add pattern '{}': {}", test.regex, e));
for m in test.pattern_ids {
let event = format!(r#"{{"a": "{m}"}}"#);
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"test"),
"Pattern '{}' should match '{}', but didn't",
test.regex,
m
);
}
for m in test.nomatches {
let event = format!(r#"{{"a": "{m}"}}"#);
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.is_empty(),
"Pattern '{}' should NOT match '{}', but did",
test.regex,
m
);
}
}
let mut all_patterns = Quamina::new();
for (i, test) in END2END_REGEXP_SAMPLES.iter().enumerate() {
let pattern = format!(r#"{{"a": [{{"regexp": "{}"}}]}}"#, test.regex);
let name = format!("p{i}");
all_patterns
.add_pattern(name, &pattern)
.unwrap_or_else(|e| panic!("Failed to add pattern '{}': {}", test.regex, e));
}
for (i, test) in END2END_REGEXP_SAMPLES.iter().enumerate() {
let expected_name = format!("p{i}");
for m in test.pattern_ids {
let event = format!(r#"{{"a": "{m}"}}"#);
let pattern_ids = all_patterns.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&expected_name),
"Merged FA: Pattern '{}' should match '{}', but didn't",
test.regex,
m
);
}
}
}
#[test]
fn test_shellstyle_long_case() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "*abab"}]}"#);
for text in ["abaabab", "ababab", "ababaabab", "abab"] {
let event = format!(r#"{{"x": "{text}"}}"#);
assert_matches!(q, &event, vec!["p1"], &format!("Should match {text}"));
}
}
#[test]
fn test_multiple_shellstyle_same_field() {
let q = q!(
"suffix_bc" => r#"{"x": [{"shellstyle": "*bc"}]}"#,
"suffix_xc" => r#"{"x": [{"shellstyle": "*xc"}]}"#,
"prefix_ab" => r#"{"x": [{"shellstyle": "ab*"}]}"#
);
assert_has_match!(q, r#"{"x": "abc"}"#, "suffix_bc");
assert_has_match!(q, r#"{"x": "abc"}"#, "prefix_ab");
assert_has_match!(q, r#"{"x": "axc"}"#, "suffix_xc");
assert_has_match!(q, r#"{"x": "abcdef"}"#, "prefix_ab");
assert_no_match!(q, r#"{"x": "xyz"}"#);
}
#[test]
fn test_anything_but_with_shellstyle() {
let q = q!(
"pFooStar" => r#"{"z": [{"shellstyle": "foo*"}]}"#,
"pAbFoot" => r#"{"z": [{"anything-but": ["foot"]}]}"#
);
assert_match_count!(q, r#"{"z": "foo"}"#, 2);
assert_has_match!(q, r#"{"z": "foo"}"#, "pFooStar");
assert_has_match!(q, r#"{"z": "foo"}"#, "pAbFoot");
assert_match_count!(q, r#"{"z": "foot"}"#, 1);
assert_has_match!(q, r#"{"z": "foot"}"#, "pFooStar");
assert_match_count!(q, r#"{"z": "bar"}"#, 1);
assert_has_match!(q, r#"{"z": "bar"}"#, "pAbFoot");
}
#[test]
fn test_anything_but_with_overlapping_exclusions() {
let q = q!("notTTT" => r#"{"x": [{"anything-but": ["tim", "time", "timed"]}]}"#);
let excluded = ["tim", "time", "timed"];
for val in excluded {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_no_match!(q, event);
}
let included = ["t", "ti", "timer", "timely", "timekeeper"];
for val in included {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_match_count!(q, event, 1);
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_anything_but_wordle_words() {
use std::fs;
use std::path::Path;
let problem_words = ["bloo", "aper", "fnord", "doubts", "astern"];
let mut q = Quamina::new();
let problem_json: Vec<String> = problem_words.iter().map(|w| format!("\"{w}\"")).collect();
let pattern = format!(
r#"{{"a": [{{"anything-but": [{}]}}]}}"#,
problem_json.join(",")
);
q.add_pattern("not_problems", &pattern).unwrap();
for word in &problem_words {
let event = format!(r#"{{"a": "{word}"}}"#);
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.is_empty(),
"Problem word '{word}' should be excluded"
);
}
let wwords_path = Path::new("testdata/wwords.txt");
if wwords_path.exists() {
let contents = fs::read_to_string(wwords_path).unwrap();
for word in contents.lines() {
let word = word.trim();
if word.is_empty() {
continue;
}
let event = format!(r#"{{"a": "{word}"}}"#);
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert_eq!(
pattern_ids.len(),
1,
"Wordle word '{word}' should match anything-but pattern"
);
}
}
}
#[test]
fn test_shellstyle_repeated_sequences() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "*abab"}]}"#);
let should_match = ["abab", "abaabab", "ababab", "ababaabab", "xxabab"];
for val in should_match {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_matches!(q, event, vec!["p1"]);
}
let should_not = ["abab_", "aba", "ab", "xaba"];
for val in should_not {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_no_match!(q, event);
}
}
#[test]
fn test_shellstyle_suffix_merged_bug() {
let j = r#"{"Url": "xy9"}"#;
let patterns = [
(r#"{"Url": [{"shellstyle": "*9"}]}"#, "p0"),
(r#"{"Url": [{"shellstyle": "x*9"}]}"#, "p1"),
];
for &(pattern, name) in &patterns {
let q = q!(name => pattern);
assert_match_count!(q, j, 1);
assert_has_match!(q, j, name);
}
let mut q = Quamina::new();
for (pattern, name) in &patterns {
q.add_pattern(*name, pattern).unwrap();
}
assert_match_count!(q, j, 2);
assert_has_match!(q, j, "p0");
assert_has_match!(q, j, "p1");
}
#[test]
fn test_shellstyle_complex_wildcards() {
let test_cases = [
(
r#"{"x": [{"shellstyle": "xx*yy*zz"}]}"#,
vec!["xxabyycdzz", "xxyyzz", "xxyyzzzzz"],
vec!["xyzyxzy yy zz", "zz yy xx"],
),
(
r#"{"x": [{"shellstyle": "*xx*yy*"}]}"#,
vec!["xxyy", "xxyyef", "abxxyy", "abxxcdyy"],
vec!["ayybyyzxx", "xyzzy"],
),
];
for (pattern, should_match, should_not) in test_cases {
let q = q!("p1" => pattern);
for val in should_match {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_matches!(q, event, vec!["p1"]);
}
for val in should_not {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_no_match!(q, event);
}
}
}
#[test]
fn test_wildcard_comprehensive() {
use crate::test_helpers::exercise_wildcard;
exercise_wildcard("*", &["", "*", "h", "hello"], &[]);
exercise_wildcard(
"*hello",
&["hello", "hhello", "xxxhello", "*hello"],
&["", "ello", "hellx", "xhellx"],
);
exercise_wildcard(
"h*llo",
&["hllo", "hello", "hxxxllo"],
&["", "hlo", "hll", "hellol"],
);
exercise_wildcard(
"hel*o",
&["helo", "hello", "helxxxo"],
&["", "hell", "helox", "hellox"],
);
exercise_wildcard(
"hello*",
&["hello", "hellox", "hellooo", "hello*"],
&["", "hell", "hellx", "hellxo"],
);
exercise_wildcard(
"h*l*o",
&["hlo", "helo", "hllo", "hloo", "hello", "hxxxlxxxo", "h*l*o"],
&["", "ho", "heeo", "helx", "llo"],
);
exercise_wildcard(
"he*l*",
&["hel", "hexl", "helx", "helxx", "helxl", "helxlx", "helxxl"],
&["", "he", "hex", "hexxx"],
);
exercise_wildcard(
"*l*",
&["l", "xl", "lx", "xlx", "xxl", "lxx", "xxlxx", "xlxlxlxlxl"],
&["", "x", "xx", "xtx"],
);
exercise_wildcard(
"*.*",
&["a.b", "file.txt", "a.b.c", ".x", "x."],
&["", "noperiod", "abc"],
);
exercise_wildcard(
"*a*b*",
&["ab", "xab", "abx", "xabx", "xaxbx", "aXXbYY"],
&["", "a", "b", "ba", "x"],
);
}
#[test]
fn test_wildcard_escape_sequences_comprehensive() {
use crate::test_helpers::exercise_wildcard;
exercise_wildcard("hel\\\\*o", &["hel*o"], &["helo", "hello"]);
exercise_wildcard(
"he\\\\**o",
&["he*o", "he*llo", "he*hello"],
&["heo", "helo"],
);
exercise_wildcard("he\\\\\\\\llo", &["he\\\\llo"], &["hello"]);
}
#[test]
fn test_wildcard_invalid_escape_sequences() {
let mut q = Quamina::new();
let valid_result = q.add_pattern("valid", r#"{"x": [{"wildcard": "he*\\**"}]}"#);
assert!(
valid_result.is_ok(),
"he*\\** should be valid: {valid_result:?}"
);
let invalid_patterns = [
(r#"{"x": [{"wildcard": "he\\llo"}]}"#, "invalid escape \\l"),
(r#"{"x": [{"wildcard": "foo**bar"}]}"#, "adjacent **"),
(r#"{"x": [{"wildcard": "**f"}]}"#, "leading **"),
(r#"{"x": [{"wildcard": "x**"}]}"#, "trailing **"),
(r#"{"x": [{"wildcard": "x\\"}]}"#, "trailing backslash"),
];
for (pattern, desc) in invalid_patterns {
let mut q2 = Quamina::new();
let result = q2.add_pattern("p", pattern);
assert!(result.is_err(), "{desc} should be rejected: {pattern}");
}
}
#[test]
fn test_wildcard_syntax_errors() {
let invalid_patterns = [
r#"{"x": [{"wildcard": . }]}"#, r#"{"x": [{"wildcard": 3}]}"#, r#"{"x": [{"wildcard": "x" ]}"#, r#"{"x": [{"wildcard": true}]}"#, r#"{"x": [{"wildcard": null}]}"#, r#"{"x": [{"wildcard": ["a"]}]}"#, ];
for pattern in invalid_patterns {
let mut q = Quamina::new();
let result = q.add_pattern("p", pattern);
assert!(result.is_err(), "Should reject invalid pattern: {pattern}");
}
}
#[test]
fn test_wildcard_multi_patterns_basic() {
exercise_multi_patterns(
&[],
&[
(
r#"{"x":[{"wildcard": "*"}]}"#,
&["", "*", "h", "ho", "hello"],
),
(r#"{"x":[{"wildcard": "h*o"}]}"#, &["ho", "hello"]),
(r#"{"x":["hello"]}"#, &["hello"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_suffix_exact() {
exercise_multi_patterns(
&["", "hellox", "blahabc"],
&[
(
r#"{"x":[{"wildcard": "*hello"}]}"#,
&["hello", "xhello", "hehello"],
),
(r#"{"x":["abc"]}"#, &["abc"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_suffix_infix() {
exercise_multi_patterns(
&["", "h", "ello", "hel", "hlo", "hell"],
&[
(
r#"{"x":[{"wildcard": "*hello"}]}"#,
&["hello", "xhello", "hehello"],
),
(
r#"{"x":[{"wildcard": "h*llo"}]}"#,
&["hllo", "hello", "hehello"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_suffix_infix2() {
exercise_multi_patterns(
&["", "h", "ello", "hel", "heo", "hell"],
&[
(
r#"{"x":[{"wildcard": "*hello"}]}"#,
&["hello", "xhello", "hehello"],
),
(
r#"{"x":[{"wildcard": "he*lo"}]}"#,
&["helo", "hello", "hehello"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_suffix_double() {
exercise_multi_patterns(
&["", "e", "l", "lo", "hel"],
&[
(r#"{"x":[{"wildcard": "*elo"}]}"#, &["elo", "helo", "xhelo"]),
(
r#"{"x":[{"wildcard": "e*l*"}]}"#,
&["el", "elo", "exl", "elx", "exlx", "exxl", "elxx", "exxlxx"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_suffix_double2() {
exercise_multi_patterns(
&["", "he", "hexxo", "ello"],
&[
(
r#"{"x":[{"wildcard": "*hello"}]}"#,
&["hello", "xhello", "xxhello"],
),
(
r#"{"x":[{"wildcard": "he*l*"}]}"#,
&[
"hel", "hello", "helo", "hexl", "hexlx", "hexxl", "helxx", "hexxlxx",
],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_infix_pair() {
exercise_multi_patterns(
&["", "hlo", "heo", "hllol", "helol"],
&[
(
r#"{"x":[{"wildcard": "h*llo"}]}"#,
&["hllo", "hello", "hxxxllo", "hexxxllo"],
),
(
r#"{"x":[{"wildcard": "he*lo"}]}"#,
&["helo", "hello", "hexxxlo", "hexxxllo"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_suffix_pair() {
exercise_multi_patterns(
&[
"", "hlox", "hllo", "helo", "heox", "helx", "hellx", "helloxx", "heloxx",
],
&[
(
r#"{"x":[{"wildcard": "h*llox"}]}"#,
&["hllox", "hellox", "hxxxllox", "helhllox", "hheloxllox"],
),
(
r#"{"x":[{"wildcard": "hel*ox"}]}"#,
&["helox", "hellox", "helxxxox", "helhllox", "helhlloxox"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_complex1() {
exercise_multi_patterns(
&[
"", "h", "he", "hl", "el", "hlo", "llo", "hllol", "hxll", "hexxx",
],
&[
(
r#"{"x":[{"wildcard": "h*llo"}]}"#,
&["hllo", "hello", "hxxxllo", "hexxxllo", "hexxxlllo"],
),
(
r#"{"x":[{"wildcard": "he*l*"}]}"#,
&[
"hel",
"helo",
"hexl",
"hello",
"helol",
"hexxxlo",
"hexxxllo",
"hexxxlllo",
],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_complex2() {
exercise_multi_patterns(
&[
"", "h", "hex", "hl", "exl", "hxlo", "xllo", "hxllol", "hxxll", "hexxx",
],
&[
(
r#"{"x":[{"wildcard": "h*xllo"}]}"#,
&["hxllo", "hexllo", "hxxxllo", "hexxxllo"],
),
(
r#"{"x":[{"wildcard": "hex*l*"}]}"#,
&[
"hexl",
"hexlo",
"hexxl",
"hexllo",
"hexlol",
"hexxxlo",
"hexxxllo",
"hexxxlllo",
],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_overlap1() {
exercise_multi_patterns(
&["", "hel", "heo", "hlo", "hellxox"],
&[
(
r#"{"x":[{"wildcard": "he*lo"}]}"#,
&["helo", "hello", "hexxxlo", "helxxxlo"],
),
(
r#"{"x":[{"wildcard": "hel*o"}]}"#,
&["helo", "hello", "hellxo", "helxxxo", "helxxxlo"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_overlap2() {
exercise_multi_patterns(
&["", "hlo", "hll", "hel", "helox"],
&[
(
r#"{"x":[{"wildcard": "h*llo"}]}"#,
&["hllo", "hello", "hxxxllo", "helllo"],
),
(
r#"{"x":[{"wildcard": "hel*o"}]}"#,
&["helo", "hello", "helxo", "helllo"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_prefix_suffix() {
exercise_multi_patterns(
&["", "he", "hel", "helox", "helx", "hxlo"],
&[
(
r#"{"x":[{"wildcard": "he*lo"}]}"#,
&["helo", "hello", "helllo", "helxlo"],
),
(
r#"{"x":[{"wildcard": "hell*"}]}"#,
&["hell", "hello", "helllo", "hellx", "hellxxx"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_prefix_suffix2() {
exercise_multi_patterns(
&["", "hel", "helox", "helxox", "hexo"],
&[
(
r#"{"x":[{"wildcard": "hel*o"}]}"#,
&["helo", "hello", "helllo", "hellloo", "helloo", "heloo"],
),
(
r#"{"x":[{"wildcard": "hell*"}]}"#,
&["hell", "hello", "helllo", "hellloo", "helloo", "hellox"],
),
],
);
}
#[test]
fn test_wildcard_multi_patterns_prefix_pair() {
exercise_multi_patterns(
&["", "he", "hex", "hexlo"],
&[
(
r#"{"x":[{"wildcard": "hel*"}]}"#,
&["hel", "helx", "hello", "hellox"],
),
(r#"{"x":[{"wildcard": "hello*"}]}"#, &["hello", "hellox"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_suffix_exact2() {
exercise_multi_patterns(
&["", "he", "hex", "hexlo"],
&[
(
r#"{"x":[{"wildcard": "*hello"}]}"#,
&["hello", "hhello", "hhhello"],
),
(r#"{"x":["hello"]}"#, &["hello"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_infix_exact() {
exercise_multi_patterns(
&["", "he", "hel", "heo", "heloz", "hellox", "heloxo"],
&[
(
r#"{"x":[{"wildcard": "he*lo"}]}"#,
&["helo", "hello", "helllo"],
),
(r#"{"x":["helox"]}"#, &["helox"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_infix_exact2() {
exercise_multi_patterns(
&["", "he", "helx", "helo", "hexlx", "hellox", "heloxx"],
&[
(
r#"{"x":[{"wildcard": "he*l"}]}"#,
&["hel", "hexl", "hexxxl"],
),
(r#"{"x":["helox"]}"#, &["helox"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_prefix_exact() {
exercise_multi_patterns(
&["", "h", "hxlox", "hxelox"],
&[
(
r#"{"x":[{"wildcard": "he*"}]}"#,
&["he", "helo", "helox", "heloxx"],
),
(r#"{"x":["helox"]}"#, &["helox"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_double_exact() {
exercise_multi_patterns(
&["", "h", "he", "hel", "hexxo", "hexxohexxo"],
&[
(
r#"{"x":[{"wildcard": "h*l*o"}]}"#,
&[
"hlo",
"helo",
"hllo",
"hello",
"hexloo",
"hellohello",
"hellohellxo",
],
),
(r#"{"x":["hellohello"]}"#, &["hellohello"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_double_exact2() {
exercise_multi_patterns(
&["", "h", "he", "hlo", "hexxo", "hexxohexxo"],
&[
(
r#"{"x":[{"wildcard": "he*l*"}]}"#,
&[
"hel",
"helo",
"hexl",
"hello",
"hexloo",
"hellohellx",
"hellohello",
],
),
(r#"{"x":["hellohello"]}"#, &["hellohello"]),
],
);
}
#[test]
fn test_wildcard_multi_patterns_zero_expansion() {
exercise_multi_patterns(
&["ab", "abXYZ"],
&[
(
r#"{"x":[{"wildcard": "abc*def"}]}"#,
&["abcdef", "abcXdef", "abcXXXdef"],
),
(
r#"{"x":[{"wildcard": "abc*"}]}"#,
&["abc", "abcdef", "abcXdef", "abcXXXdef", "abcxyz"],
),
],
);
}
#[test]
fn test_wildcard_escape_backslash_star() {
let q = q!("p1" => r#"{"x": [{"wildcard": "he\\\\\\*llo"}]}"#);
assert_matches!(q, r#"{"x": "he\\*llo"}"#, vec!["p1"]);
let no_match_events = [
r#"{"x": "hello"}"#,
r#"{"x": "he\\\\llo"}"#, r#"{"x": "he\\llo"}"#, r#"{"x": "he\\xxllo"}"#, ];
for event in no_match_events {
assert_no_match!(q, event);
}
}
#[test]
fn test_wildcard_escape_backslash_wildcard() {
let q = q!("p1" => r#"{"x": [{"wildcard": "he\\\\*llo"}]}"#);
let match_events = [
r#"{"x": "he\\llo"}"#, r#"{"x": "he\\*llo"}"#, r#"{"x": "he\\\\llo"}"#, r#"{"x": "he\\xxllo"}"#, ];
for event in match_events {
assert_matches!(q, event, vec!["p1"]);
}
let no_match_events = [
r#"{"x": "hello"}"#, r#"{"x": "he\\ll"}"#, ];
for event in no_match_events {
assert_no_match!(q, event);
}
}
#[test]
fn test_shellstyle_duplicate_pattern() {
let q = q!(
"r4" => r#"{"c": [{"shellstyle": "xy*"}]}"#,
"r5" => r#"{"c": [{"shellstyle": "xy*"}]}"#
);
assert_match_count!(q, r#"{"c": "xyzzz"}"#, 2);
assert_has_match!(q, r#"{"c": "xyzzz"}"#, "r4");
assert_has_match!(q, r#"{"c": "xyzzz"}"#, "r5");
}
#[test]
fn test_shellstyle_double_wildcard() {
let q = q!("r6" => r#"{"d": [{"shellstyle": "12*4*"}]}"#);
assert_matches!(
q,
r#"{"d": "12345"}"#,
vec!["r6"],
"12*4* should match 12345"
);
assert_no_match!(q, r#"{"d": "1235"}"#, "12*4* should not match 1235");
}
#[test]
fn test_shellstyle_zero_length_prefix() {
let q = q!("r1" => r#"{"a": [{"shellstyle": "*bc"}]}"#);
assert_matches!(
q,
r#"{"a": "bc"}"#,
vec!["r1"],
"*bc should match bc (zero-length prefix)"
);
}
#[test]
fn test_shellstyle_ruler_negative_cases() {
let q = q!(
"r2" => r#"{"b": [{"shellstyle": "d*f"}]}"#,
"r4" => r#"{"c": [{"shellstyle": "xy*"}]}"#
);
let cases = [
(r#"{"c": "abc"}"#, "xy* should not match abc"),
(r#"{"c": "abcxyz"}"#, "xy* should not match abcxyz"),
(r#"{"b": "de"}"#, "d*f should not match de"),
];
for (event, msg) in cases {
assert_no_match!(q, event, msg);
}
}
#[test]
fn test_wildcard_unicode_strings() {
let q = q!("p1" => r#"{"x": [{"wildcard": "*hello"}]}"#);
assert_matches!(
q,
r#"{"x": "23Őzhello"}"#,
vec!["p1"],
"*hello should match 23Őzhello"
);
let q2 = q!("p2" => r#"{"x": [{"wildcard": "h*llo"}]}"#);
assert_matches!(
q2,
r#"{"x": "hel23Őzlllo"}"#,
vec!["p2"],
"h*llo should match hel23Őzlllo"
);
let q3 = q!("p3" => r#"{"x": [{"wildcard": "hello*"}]}"#);
assert_matches!(
q3,
r#"{"x": "hello23Őzlllo"}"#,
vec!["p3"],
"hello* should match hello23Őzlllo"
);
let q4 = q!("p4" => r#"{"x": [{"wildcard": "h*l*o"}]}"#);
assert_matches!(
q4,
r#"{"x": "hel23Őzlllo"}"#,
vec!["p4"],
"h*l*o should match hel23Őzlllo"
);
}
#[test]
fn test_shellstyle_suffix_with_space() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "*ST"}]}"#);
assert_matches!(
q,
r#"{"x": "STA ST"}"#,
vec!["p1"],
"*ST should match 'STA ST'"
);
assert_matches!(q, r#"{"x": "1ST"}"#, vec!["p1"], "*ST should match '1ST'");
assert_no_match!(q, r#"{"x": "STA"}"#, "*ST should not match 'STA'");
assert_no_match!(
q,
r#"{"x": "STAST "}"#,
"*ST should not match 'STAST ' (trailing space)"
);
}
#[test]
fn test_shellstyle_prefix_negative() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "foo*"}]}"#);
assert_no_match!(q, r#"{"x": "afoo"}"#, "foo* should not match 'afoo'");
assert_no_match!(q, r#"{"x": "fofo"}"#, "foo* should not match 'fofo'");
}
#[test]
fn test_shellstyle_suffix_negative() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "*foo"}]}"#);
assert_no_match!(q, r#"{"x": "foox"}"#, "*foo should not match 'foox'");
assert_no_match!(q, r#"{"x": "afooo"}"#, "*foo should not match 'afooo'");
}
#[test]
fn test_shellstyle_contains_negative() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "*foo*"}]}"#);
assert_no_match!(q, r#"{"x": "afoa"}"#, "*foo* should not match 'afoa'");
assert_no_match!(
q,
r#"{"x": "fofofoxooxoo"}"#,
"*foo* should not match 'fofofoxooxoo'"
);
}
#[test]
fn test_shellstyle_double_wildcard_variations() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "xx*yy*zz"}]}"#);
for val in ["xxyycdzz", "xxabyyzz"] {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_matches!(q, event, vec!["p1"]);
}
let q2 = q!("p2" => r#"{"x": [{"shellstyle": "*xx*yy*"}]}"#);
for val in ["abxxcdyyef", "xxcdyyef", "abxxyyef", "xxcdyy", "xxyyef"] {
let event = format!(r#"{{"x": "{val}"}}"#);
assert_matches!(q2, event, vec!["p2"]);
}
}
#[test]
fn test_regexp_samples_exist() {
assert!(
!crate::regexp_samples::REGEXP_SAMPLES.is_empty(),
"No regexp samples found"
);
assert_eq!(
crate::regexp_samples::REGEXP_SAMPLES.len(),
992,
"Expected 992 samples"
);
}
#[cfg(test)]
fn regexp_star_samples_matching_empty(regex: &str) -> bool {
matches!(
regex,
"(([~.~~~?~*~+~{~}~[~]~(~)~|]?)*)+"
| "[~~~|~.~?~*~+~(~)~{~}~-~[~]~^]*"
| "[~*a]*"
| "[a-]*"
| "[~n~r~t~~~|~.~-~^~?~*~+~{~}~[~]~(~)]*"
| "[a~*]*"
| "[0-9]*"
| "(([a-d]*)|([a-z]*))"
| "(([d-f]*)|([c-e]*))"
| "(([c-e]*)|([d-f]*))"
| "(([a-d]*)|(.*))"
| "(([d-f]*)|(.*))"
| "(([c-e]*)|(.*))"
| "(.*)"
| "([^~?])*"
)
}
#[cfg(test)]
fn regexp_should_skip(re: &str) -> bool {
let chars: Vec<char> = re.chars().collect();
for i in 0..chars.len().saturating_sub(1) {
if chars[i] == '~' && matches!(chars[i + 1], 'b' | 'B') {
return true;
}
}
false
}
#[cfg(test)]
fn regexp_is_known_extension(re: &str) -> bool {
let chars: Vec<char> = re.chars().collect();
for i in 0..chars.len() {
if i + 1 < chars.len()
&& chars[i] == '~'
&& matches!(
chars[i + 1],
'd' | 'D' | 'w' | 'W' | 's' | 'S' | 'p' | 'P' | 'i' | 'I' | 'c' | 'C'
)
{
return true;
}
if i + 1 < chars.len() && matches!(chars[i], '*' | '+' | '?' | '}') && chars[i + 1] == '?' {
return true;
}
if i + 2 < chars.len() && chars[i] == '(' && chars[i + 1] == '?' && chars[i + 2] == ':' {
return true;
}
}
false
}
#[cfg(test)]
fn evaluate_regexp_sample(sample: &crate::regexp_samples::RegexpSample) -> usize {
use crate::automaton::arena::{ARENA_VALUE_TERMINATOR, NfaBuffers, traverse_arena_nfa};
use crate::regexp::{make_regexp_nfa_arena, parse_regexp};
use std::sync::Arc;
let parse_result = parse_regexp(sample.regex);
let mut problems = 0;
if sample.valid {
let Ok(tree) = parse_result else {
return 0;
};
let (arena, start, field_matcher) = make_regexp_nfa_arena(tree);
let mut bufs = NfaBuffers::new();
let fm_ptr = Arc::as_ptr(&field_matcher) as usize;
let mut traverse = |s: &str| -> bool {
let mut value: Vec<u8> = Vec::with_capacity(s.len() + 3);
value.push(b'"');
value.extend_from_slice(s.as_bytes());
value.push(b'"');
value.push(ARENA_VALUE_TERMINATOR);
bufs.clear();
traverse_arena_nfa(&arena, start, &value, &mut bufs);
bufs.transitions.contains(&fm_ptr)
};
for should_match in sample.pattern_ids {
if !traverse(should_match) && !should_match.is_empty() {
problems += 1;
}
}
for should_not_match in sample.nomatches {
let matched = traverse(should_not_match);
if matched
&& !(should_not_match.is_empty()
&& regexp_star_samples_matching_empty(sample.regex))
&& !should_not_match.is_empty()
{
problems += 1;
}
}
} else if parse_result.is_ok() && !regexp_is_known_extension(sample.regex) {
problems += 1;
}
problems
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_regexp_validity() {
use crate::regexp_samples::REGEXP_SAMPLES;
let mut problems = 0;
for sample in REGEXP_SAMPLES {
if regexp_should_skip(sample.regex) {
continue;
}
if sample.pattern_ids.iter().any(|s| s.len() > 50)
|| sample.nomatches.iter().any(|s| s.len() > 50)
{
continue;
}
problems += evaluate_regexp_sample(sample);
if problems >= 10 {
break;
}
}
assert!(
problems <= 4,
"Found {problems} regexp validation problems (expected <= 4)"
);
}
#[test]
#[cfg(miri)]
fn test_regexp_validity_miri_minimal() {
use crate::automaton::arena::{ARENA_VALUE_TERMINATOR, NfaBuffers, traverse_arena_nfa};
use crate::regexp::{make_regexp_nfa_arena, parse_regexp};
use std::sync::Arc;
let mut bufs = NfaBuffers::new();
let root = parse_regexp("a|b").unwrap();
let (arena, start, fm) = make_regexp_nfa_arena(root);
bufs.clear();
traverse_arena_nfa(
&arena,
start,
&[b'"', b'a', b'"', ARENA_VALUE_TERMINATOR],
&mut bufs,
);
assert!(
bufs.transitions
.iter()
.any(|&m| m == Arc::as_ptr(&fm) as usize)
);
bufs.clear();
traverse_arena_nfa(
&arena,
start,
&[b'"', b'x', b'"', ARENA_VALUE_TERMINATOR],
&mut bufs,
);
assert!(
!bufs
.transitions
.iter()
.any(|&m| m == Arc::as_ptr(&fm) as usize)
);
let root = parse_regexp("a(h|i)z").unwrap();
let (arena, start, fm) = make_regexp_nfa_arena(root);
bufs.clear();
traverse_arena_nfa(
&arena,
start,
&[b'"', b'a', b'h', b'z', b'"', ARENA_VALUE_TERMINATOR],
&mut bufs,
);
assert!(
bufs.transitions
.iter()
.any(|&m| m == Arc::as_ptr(&fm) as usize)
);
let root = parse_regexp("[a-c]").unwrap();
let (arena, start, fm) = make_regexp_nfa_arena(root);
bufs.clear();
traverse_arena_nfa(
&arena,
start,
&[b'"', b'b', b'"', ARENA_VALUE_TERMINATOR],
&mut bufs,
);
assert!(
bufs.transitions
.iter()
.any(|&m| m == Arc::as_ptr(&fm) as usize)
);
bufs.clear();
traverse_arena_nfa(
&arena,
start,
&[b'"', b'z', b'"', ARENA_VALUE_TERMINATOR],
&mut bufs,
);
assert!(
!bufs
.transitions
.iter()
.any(|&m| m == Arc::as_ptr(&fm) as usize)
);
let root = parse_regexp("a.b").unwrap();
let (arena, start, fm) = make_regexp_nfa_arena(root);
bufs.clear();
traverse_arena_nfa(
&arena,
start,
&[b'"', b'a', b'x', b'b', b'"', ARENA_VALUE_TERMINATOR],
&mut bufs,
);
assert!(
bufs.transitions
.iter()
.any(|&m| m == Arc::as_ptr(&fm) as usize)
);
}
#[test]
#[cfg(miri)]
fn test_regexp_end2end_miri_minimal() {
let mut q = Quamina::new();
q.add_pattern("p0", r#"{"a": [{"regexp": "abc|def"}]}"#)
.unwrap();
let m = q.matches_for_event(r#"{"a": "abc"}"#.as_bytes()).unwrap();
assert!(m.contains(&"p0"));
let m = q.matches_for_event(r#"{"a": "xyz"}"#.as_bytes()).unwrap();
assert!(!m.contains(&"p0"));
q.add_pattern("p1", r#"{"a": [{"regexp": "a(h|i)z"}]}"#)
.unwrap();
let m = q.matches_for_event(r#"{"a": "ahz"}"#.as_bytes()).unwrap();
assert!(m.contains(&"p1"));
q.add_pattern("p2", r#"{"a": [{"regexp": "[a-c]"}]}"#)
.unwrap();
let m = q.matches_for_event(r#"{"a": "b"}"#.as_bytes()).unwrap();
assert!(m.contains(&"p2"));
}
#[test]
fn test_json_all_escape_sequences() {
let q1 = q!("p1" => r#"{"x": ["hello\"world"]}"#);
assert_matches!(
q1,
r#"{"x": "hello\"world"}"#,
vec!["p1"],
"Quote escape should match"
);
let q2 = q!("p2" => r#"{"x": ["a/b"]}"#);
assert_matches!(
q2,
r#"{"x": "a\/b"}"#,
vec!["p2"],
"Forward slash escape should match"
);
let pattern_with_backspace = format!(r#"{{"x": ["a{}b"]}}"#, '\x08');
let q3 = q!("p3" => &pattern_with_backspace);
assert_matches!(
q3,
r#"{"x": "a\bb"}"#,
vec!["p3"],
"Backspace escape should match"
);
let pattern_with_formfeed = format!(r#"{{"x": ["a{}b"]}}"#, '\x0c');
let q4 = q!("p4" => &pattern_with_formfeed);
assert_matches!(
q4,
r#"{"x": "a\fb"}"#,
vec!["p4"],
"Form feed escape should match"
);
let q5 = q!("p5" => r#"{"x": ["a\rb"]}"#);
assert_matches!(
q5,
r#"{"x": "a\rb"}"#,
vec!["p5"],
"Carriage return escape should match"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv4_various_prefixes() {
let q = q!(
"class_a" => r#"{"ip": [{"cidr": "10.0.0.0/8"}]}"#,
"class_b" => r#"{"ip": [{"cidr": "172.16.0.0/16"}]}"#,
"class_c" => r#"{"ip": [{"cidr": "192.168.1.0/24"}]}"#,
"single" => r#"{"ip": [{"cidr": "8.8.8.8/32"}]}"#
);
assert_has_match!(q, r#"{"ip": "10.255.255.255"}"#, "class_a");
assert_has_match!(q, r#"{"ip": "172.16.255.255"}"#, "class_b");
assert_has_match!(q, r#"{"ip": "192.168.1.100"}"#, "class_c");
assert_has_match!(q, r#"{"ip": "8.8.8.8"}"#, "single");
assert_no_has_match!(q, r#"{"ip": "8.8.8.9"}"#, "single");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_basic() {
let q = q!("p1" => r#"{"sourceIP": [{"cidr": "2001:db8::/32"}]}"#);
assert_matches!(
q,
r#"{"sourceIP": "2001:db8:0:0:0:0:0:1"}"#,
vec!["p1"],
"IPv6 in range"
);
assert_matches!(
q,
r#"{"sourceIP": "2001:db8:ffff:ffff:ffff:ffff:ffff:ffff"}"#,
vec!["p1"],
"IPv6 at end of range"
);
assert_no_match!(
q,
r#"{"sourceIP": "2001:db9:0:0:0:0:0:1"}"#,
"IPv6 outside range"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_shorthand() {
let q = q!("loopback" => r#"{"ip": [{"cidr": "::1/128"}]}"#);
assert_matches!(
q,
r#"{"ip": "0:0:0:0:0:0:0:1"}"#,
vec!["loopback"],
"Loopback should match"
);
assert_no_match!(
q,
r#"{"ip": "0:0:0:0:0:0:0:2"}"#,
"Non-loopback should not match /128"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_non_ip_values() {
let q = q!("p1" => r#"{"ip": [{"cidr": "10.0.0.0/8"}]}"#);
assert_no_match!(
q,
r#"{"ip": "not-an-ip"}"#,
"Non-IP string should not match CIDR"
);
assert_no_match!(q, r#"{"ip": ""}"#, "Empty string should not match CIDR");
assert_no_match!(q, r#"{"ip": 12345}"#, "Number should not match CIDR");
}
#[test]
#[cfg(miri)]
fn test_cidr_non_ip_values_miri_friendly() {
let q = q!("p1" => r#"{"ip": [{"cidr": "127.0.0.1/32"}]}"#);
assert_no_match!(q, r#"{"ip": "not-an-ip"}"#, "Non-IP string");
assert_no_match!(q, r#"{"ip": 12345}"#, "Number");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_with_other_matchers() {
let q = q!(
"internal" => r#"{"sourceIP": [{"cidr": "10.0.0.0/8"}]}"#,
"specific" => r#"{"sourceIP": ["10.0.0.1"]}"#,
"status" => r#"{"status": ["active"]}"#
);
assert_has_match!(
q,
r#"{"sourceIP": "10.0.0.1", "status": "active"}"#,
"internal"
);
assert_has_match!(
q,
r#"{"sourceIP": "10.0.0.1", "status": "active"}"#,
"specific"
);
assert_has_match!(
q,
r#"{"sourceIP": "10.0.0.1", "status": "active"}"#,
"status"
);
assert_has_match!(q, r#"{"sourceIP": "10.0.0.2"}"#, "internal");
assert_no_has_match!(q, r#"{"sourceIP": "10.0.0.2"}"#, "specific");
}
#[test]
fn test_lookaround_pattern_parsing() {
use crate::regexp::parse_regexp;
let result = parse_regexp("foo(?=bar)");
assert!(result.is_ok(), "Positive lookahead should parse");
let result = parse_regexp("foo(?!bar)");
assert!(result.is_ok(), "Negative lookahead should parse");
let result = parse_regexp("(?<=foo)bar");
assert!(result.is_ok(), "Positive lookbehind should parse");
let result = parse_regexp("(?<!foo)bar");
assert!(result.is_ok(), "Negative lookbehind should parse");
}
#[test]
fn test_lookaround_transformation() {
let q = q!("la" => r#"{"x": [{"regexp": "foo(?=bar)bar"}]}"#);
assert_has_match!(q, r#"{"x": "foobar"}"#, "la");
}
#[test]
fn test_lookaround_rejected_patterns() {
use crate::regexp::parse_regexp;
let result = parse_regexp("(?=foo(?=bar))");
assert!(result.is_err(), "Nested lookahead should be rejected");
let result = parse_regexp("(?<=foo*)bar");
assert!(
result.is_err(),
"Variable-length lookbehind should be rejected"
);
let result = parse_regexp("(?<=foo+)bar");
assert!(
result.is_err(),
"Variable-length lookbehind with + should be rejected"
);
}
#[test]
fn test_lookaround_primary_match() {
let mut q = Quamina::<String>::new();
let pattern = r#"{"status": [{"regexp": "foo(?=bar)"}]}"#;
q.add_pattern("lookahead".to_string(), pattern).unwrap();
let event = r#"{"status": "foobar"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
pattern_ids.contains(&"lookahead".to_string()),
"foo(?=bar) should match 'foobar'"
);
let event = r#"{"status": "foo"}"#;
let pattern_ids = q.matches_for_event(event.as_bytes()).unwrap();
assert!(
!pattern_ids.contains(&"lookahead".to_string()),
"foo(?=bar) should NOT match 'foo' (lookahead fails)"
);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_lookaround_buffer_reuse_no_stale_state() {
let mut q = Quamina::<String>::new();
q.add_pattern(
"look".to_string(),
r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#,
)
.unwrap();
q.add_pattern("exact".to_string(), r#"{"w": ["hello"]}"#)
.unwrap();
for _ in 0..200 {
let m = q.matches_for_event(br#"{"v": "foobar"}"#).unwrap();
assert!(
m.contains(&"look".to_string()),
"foobar should match lookahead"
);
let m = q.matches_for_event(br#"{"w": "hello"}"#).unwrap();
assert!(m.contains(&"exact".to_string()), "hello should match exact");
let m = q.matches_for_event(br#"{"v": "nomatch"}"#).unwrap();
assert!(m.is_empty(), "nomatch should match nothing");
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_lookaround_multiple_conditions_no_cross_contamination() {
let mut q = Quamina::<String>::new();
q.add_pattern("pos".to_string(), r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#)
.unwrap();
q.add_pattern("neg".to_string(), r#"{"v": [{"regexp": "foo(?!bar)baz"}]}"#)
.unwrap();
for _ in 0..200 {
let m = q.matches_for_event(br#"{"v": "foobar"}"#).unwrap();
assert!(m.contains(&"pos".to_string()), "foobar should match pos");
assert!(
!m.contains(&"neg".to_string()),
"foobar should not match neg"
);
let m = q.matches_for_event(br#"{"v": "foobaz"}"#).unwrap();
assert!(
!m.contains(&"pos".to_string()),
"foobaz should not match pos"
);
assert!(m.contains(&"neg".to_string()), "foobaz should match neg");
}
}
#[test]
fn test_lookaround_buffer_reuse_no_stale_state_miri_friendly() {
let mut q = Quamina::<String>::new();
q.add_pattern(
"look".to_string(),
r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#,
)
.unwrap();
q.add_pattern("exact".to_string(), r#"{"w": ["hello"]}"#)
.unwrap();
let m = q.matches_for_event(br#"{"v": "foobar"}"#).unwrap();
assert!(
m.contains(&"look".to_string()),
"foobar should match lookahead"
);
let m = q.matches_for_event(br#"{"w": "hello"}"#).unwrap();
assert!(m.contains(&"exact".to_string()), "hello should match exact");
let m = q.matches_for_event(br#"{"v": "nomatch"}"#).unwrap();
assert!(m.is_empty(), "nomatch should match nothing");
}
#[test]
fn test_lookaround_multiple_conditions_no_cross_contamination_miri_friendly() {
let mut q = Quamina::<String>::new();
q.add_pattern("pos".to_string(), r#"{"v": [{"regexp": "foo(?=bar)bar"}]}"#)
.unwrap();
q.add_pattern("neg".to_string(), r#"{"v": [{"regexp": "foo(?!bar)baz"}]}"#)
.unwrap();
let m = q.matches_for_event(br#"{"v": "foobar"}"#).unwrap();
assert!(m.contains(&"pos".to_string()), "foobar should match pos");
assert!(
!m.contains(&"neg".to_string()),
"foobar should not match neg"
);
let m = q.matches_for_event(br#"{"v": "foobaz"}"#).unwrap();
assert!(
!m.contains(&"pos".to_string()),
"foobaz should not match pos"
);
assert!(m.contains(&"neg".to_string()), "foobaz should match neg");
}
#[test]
fn test_shellstyle_subset_overlap_same_field() {
let q = q!(
"r1" => r#"{"b": [{"shellstyle": "d*f"}]}"#,
"r2" => r#"{"b": [{"shellstyle": "d*ff"}]}"#
);
assert_has_match!(q, r#"{"b": "dexeff"}"#, "r1");
assert_has_match!(q, r#"{"b": "dexeff"}"#, "r2");
assert_match_count!(q, r#"{"b": "dexeff"}"#, 2);
assert_has_match!(q, r#"{"b": "def"}"#, "r1");
assert_no_has_match!(q, r#"{"b": "def"}"#, "r2");
assert_has_match!(q, r#"{"b": "df"}"#, "r1");
assert_no_has_match!(q, r#"{"b": "df"}"#, "r2");
assert_has_match!(q, r#"{"b": "dff"}"#, "r1");
assert_has_match!(q, r#"{"b": "dff"}"#, "r2");
assert_no_match!(q, r#"{"b": "hello"}"#);
}
#[test]
fn test_equals_ignore_case_length_boundaries() {
let q = q!("p1" => r#"{"name": [{"equals-ignore-case": "XyZ"}]}"#);
assert_matches!(q, r#"{"name": "xyz"}"#, vec!["p1"], "Lowercase");
assert_matches!(q, r#"{"name": "XYZ"}"#, vec!["p1"], "Uppercase");
for (event, desc) in [
(r#"{"name": "xyzz"}"#, "extra trailing char"),
(r#"{"name": "xy"}"#, "shorter"),
(r#"{"name": "ABCXYZ"}"#, "contains but not equal"),
(r#"{"name": "abc"}"#, "different"),
(r#"{"name": ""}"#, "empty"),
] {
assert_no_match!(q, event, desc);
}
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_unicode_category_epsilon_closure() {
let mut q = Quamina::new();
q.add_pattern("p1", r#"{"text": [{"regex": "~p{L}~p{L}"}]}"#)
.expect("Failed to add pattern");
q.add_pattern("p2", r#"{"text": [{"regex": "~p{L}"}]}"#)
.expect("Failed to add pattern");
q.add_pattern("p3", r#"{"text": [{"regex": "[abc]"}]}"#)
.expect("Failed to add pattern p3");
assert_has_match!(q, r#"{"text": "ab"}"#, "p1");
assert_has_match!(q, r#"{"text": "AB"}"#, "p1");
assert_no_has_match!(q, r#"{"text": "a1"}"#, "p1");
assert_has_match!(q, r#"{"text": "a"}"#, "p2");
assert_has_match!(q, r#"{"text": "Z"}"#, "p2");
assert_no_has_match!(q, r#"{"text": "1"}"#, "p2");
assert_has_match!(q, r#"{"text": "a"}"#, "p3");
assert_has_match!(q, r#"{"text": "b"}"#, "p3");
assert_no_has_match!(q, r#"{"text": "d"}"#, "p3");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_fa_shell_cache_clearing() {
use crate::regexp::clear_fa_shell_cache;
let mut q = Quamina::new();
q.add_pattern("p1", r#"{"text": [{"regex": "~p{Lu}+"}]}"#)
.expect("Failed to add pattern 1");
assert_has_match!(q, r#"{"text": "HELLO"}"#, "p1");
clear_fa_shell_cache();
q.add_pattern("p2", r#"{"text": [{"regex": "~p{Ll}+"}]}"#)
.expect("Failed to add pattern 2");
assert_has_match!(q, r#"{"text": "HELLO"}"#, "p1");
assert_has_match!(q, r#"{"text": "hello"}"#, "p2");
assert_no_has_match!(q, r#"{"text": "hello"}"#, "p1");
assert_no_has_match!(q, r#"{"text": "HELLO"}"#, "p2");
}
#[test]
fn test_fa_shell_cache_clearing_miri_friendly() {
use crate::regexp::clear_fa_shell_cache;
let mut q = Quamina::new();
q.add_pattern("p1", r#"{"text": [{"regex": "[A-Z]+"}]}"#)
.expect("Failed to add pattern 1");
assert_has_match!(q, r#"{"text": "HELLO"}"#, "p1");
clear_fa_shell_cache();
q.add_pattern("p2", r#"{"text": [{"regex": "[a-z]+"}]}"#)
.expect("Failed to add pattern 2");
assert_has_match!(q, r#"{"text": "HELLO"}"#, "p1");
assert_has_match!(q, r#"{"text": "hello"}"#, "p2");
assert_no_has_match!(q, r#"{"text": "hello"}"#, "p1");
assert_no_has_match!(q, r#"{"text": "HELLO"}"#, "p2");
}
#[test]
fn test_surrogate_boundary_before() {
let mut q = Quamina::new();
let char_d7ff = '\u{D7FF}'; let pattern_d7ff = format!(r#"{{"text": [{{"regex": "[{char_d7ff}]"}}]}}"#);
q.add_pattern("p1", &pattern_d7ff)
.expect("Failed to add pattern with U+D7FF");
let event_d7ff = format!(r#"{{"text": "{char_d7ff}"}}"#);
assert_has_match!(q, &event_d7ff, "p1");
assert_no_has_match!(q, r#"{"text": "a"}"#, "p1");
let char_e000 = '\u{E000}'; let pattern_e000 = format!(r#"{{"text": [{{"regex": "[{char_e000}]"}}]}}"#);
let mut q2 = Quamina::new();
q2.add_pattern("p2", &pattern_e000)
.expect("Failed to add pattern with U+E000");
let event_e000 = format!(r#"{{"text": "{char_e000}"}}"#);
assert_has_match!(q2, &event_e000, "p2");
assert_no_has_match!(q2, r#"{"text": "a"}"#, "p2");
}
#[allow(clippy::similar_names)]
#[test]
fn test_surrogate_boundary_range() {
let mut q = Quamina::new();
let char_d7fc = '\u{D7FC}'; let char_e003 = '\u{E003}'; let pattern = format!(r#"{{"text": [{{"regex": "[{char_d7fc}-{char_e003}]"}}]}}"#);
q.add_pattern("p1", &pattern)
.expect("Failed to add pattern with range spanning surrogate");
let event_d7fc = format!(r#"{{"text": "{char_d7fc}"}}"#);
assert_has_match!(q, &event_d7fc, "p1");
let char_e000 = '\u{E000}'; let event_e000 = format!(r#"{{"text": "{char_e000}"}}"#);
assert_has_match!(q, &event_e000, "p1");
let event_e003 = format!(r#"{{"text": "{char_e003}"}}"#);
assert_has_match!(q, &event_e003, "p1");
assert_no_has_match!(q, r#"{"text": "a"}"#, "p1");
let char_d7fb = '\u{D7FB}'; let event_d7fb = format!(r#"{{"text": "{char_d7fb}"}}"#);
assert_no_has_match!(q, &event_d7fb, "p1");
}
#[test]
fn test_surrogate_boundary_multiple_ranges() {
let mut q = Quamina::new();
q.add_pattern("p1", r#"{"text": [{"regex": "[a-z]"}]}"#)
.expect("Failed to add pattern p1");
let char_d7fe = '\u{D7FE}'; let char_e002 = '\u{E002}'; let pattern_p2 = format!(r#"{{"text": [{{"regex": "[{char_d7fe}-{char_e002}]"}}]}}"#);
q.add_pattern("p2", &pattern_p2)
.expect("Failed to add pattern p2");
assert_has_match!(q, r#"{"text": "m"}"#, "p1");
assert_no_has_match!(q, r#"{"text": "M"}"#, "p1");
let event_d7fe = format!(r#"{{"text": "{char_d7fe}"}}"#);
assert_has_match!(q, &event_d7fe, "p2");
let event_e002 = format!(r#"{{"text": "{char_e002}"}}"#);
assert_has_match!(q, &event_e002, "p2");
assert_no_has_match!(q, &event_d7fe, "p1");
assert_no_has_match!(q, r#"{"text": "m"}"#, "p2");
}
#[allow(clippy::similar_names)]
#[test]
fn test_rune_range_utf8_length_boundary() {
let char_07ff = '\u{07FF}';
let char_0800 = '\u{0800}';
let q = q!("p1" => &format!(
r#"{{"text": [{{"regexp": "[{char_07ff}-{char_0800}]"}}]}}"#
));
let event_07ff = format!(r#"{{"text": "{char_07ff}"}}"#);
assert_has_match!(q, &event_07ff, "p1");
let event_0800 = format!(r#"{{"text": "{char_0800}"}}"#);
assert_has_match!(q, &event_0800, "p1");
let event_07fe = format!(r#"{{"text": "{}"}}"#, '\u{07FE}');
assert_no_has_match!(q, &event_07fe, "p1");
assert_no_has_match!(q, r#"{"text": "a"}"#, "p1");
}
#[allow(clippy::similar_names)]
#[test]
fn test_rune_range_split_lead_byte() {
let q = q!("p1" => &format!(
r#"{{"text": [{{"regexp": "[{}-{}]"}}]}}"#, '\u{00C1}', '\u{0150}'
));
for cp in ['\u{00C1}', '\u{0100}', '\u{0150}'] {
let event = format!(r#"{{"text": "{cp}"}}"#);
assert_has_match!(q, &event, "p1");
}
let event_below = format!(r#"{{"text": "{}"}}"#, '\u{00C0}');
assert_no_has_match!(q, &event_below, "p1");
let event_above = format!(r#"{{"text": "{}"}}"#, '\u{0151}');
assert_no_has_match!(q, &event_above, "p1");
}
#[test]
fn test_shellstyle_merged_prefix_and_suffix() {
let q = q!(
"star_prefix" => r#"{"x": [{"shellstyle": "abc*"}]}"#,
"star_suffix" => r#"{"x": [{"shellstyle": "*xyz"}]}"#
);
assert_has_match!(q, r#"{"x": "abcxyz"}"#, "star_prefix");
assert_has_match!(q, r#"{"x": "abcxyz"}"#, "star_suffix");
assert_has_match!(q, r#"{"x": "abcdef"}"#, "star_prefix");
assert_no_has_match!(q, r#"{"x": "abcdef"}"#, "star_suffix");
assert_has_match!(q, r#"{"x": "123xyz"}"#, "star_suffix");
assert_no_has_match!(q, r#"{"x": "123xyz"}"#, "star_prefix");
assert_no_match!(q, r#"{"x": "hello"}"#);
}
#[test]
fn test_multi_field_exact_match() {
let q = q!("multi" => r#"{"a": ["1"], "b": ["2"], "c": ["3"]}"#);
assert_has_match!(q, r#"{"a": "1", "b": "2", "c": "3"}"#, "multi");
assert_no_match!(q, r#"{"a": "1", "b": "2"}"#);
assert_no_match!(q, r#"{"a": "1", "b": "2", "c": "X"}"#);
assert_has_match!(q, r#"{"a": "1", "b": "2", "c": "3", "d": "4"}"#, "multi");
}
#[test]
fn test_lookbehind_positive_and_negative() {
let q = q!(
"pos_lb" => r#"{"v": [{"regexp": "(?<=pre)fix"}]}"#,
"neg_lb" => r#"{"v": [{"regexp": "(?<!pre)fix"}]}"#
);
assert_has_match!(q, r#"{"v": "prefix"}"#, "pos_lb");
assert_no_has_match!(q, r#"{"v": "prefix"}"#, "neg_lb");
assert_has_match!(q, r#"{"v": "suffix"}"#, "neg_lb");
assert_no_has_match!(q, r#"{"v": "suffix"}"#, "pos_lb");
assert_has_match!(q, r#"{"v": "fix"}"#, "neg_lb");
assert_no_has_match!(q, r#"{"v": "fix"}"#, "pos_lb");
}
#[test]
fn test_equals_ignore_case_all_variants() {
let q = q!("ic" => r#"{"name": [{"equals-ignore-case": "Hello"}]}"#);
assert_has_match!(q, r#"{"name": "Hello"}"#, "ic");
assert_has_match!(q, r#"{"name": "hello"}"#, "ic");
assert_has_match!(q, r#"{"name": "HELLO"}"#, "ic");
assert_has_match!(q, r#"{"name": "hElLo"}"#, "ic");
assert_no_match!(q, r#"{"name": "Hell"}"#);
assert_no_match!(q, r#"{"name": "Helloo"}"#);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_prefix_boundaries() {
let q = q!("p48" => r#"{"ip": [{"cidr": "2001:db8:abcd::/48"}]}"#);
assert_has_match!(q, r#"{"ip": "2001:db8:abcd:0:0:0:0:0"}"#, "p48");
assert_has_match!(
q,
r#"{"ip": "2001:db8:abcd:ffff:ffff:ffff:ffff:ffff"}"#,
"p48"
);
assert_no_match!(q, r#"{"ip": "2001:db8:abce:0:0:0:0:0"}"#);
let q128 = q!("exact" => r#"{"ip": [{"cidr": "fe80:0:0:0:0:0:0:1/128"}]}"#);
assert_has_match!(q128, r#"{"ip": "fe80:0:0:0:0:0:0:1"}"#, "exact");
assert_no_match!(q128, r#"{"ip": "fe80:0:0:0:0:0:0:2"}"#);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_wide_prefix() {
let q = q!("p16" => r#"{"ip": [{"cidr": "ff00::/16"}]}"#);
assert_has_match!(q, r#"{"ip": "ff00:0:0:0:0:0:0:0"}"#, "p16");
assert_has_match!(
q,
r#"{"ip": "ff00:1234:abcd:ef01:2345:6789:abcd:ef01"}"#,
"p16"
);
assert_no_match!(q, r#"{"ip": "ff01:0:0:0:0:0:0:0"}"#);
}
#[test]
fn test_anything_but_mixed_lengths() {
let q = q!("ab" => r#"{"x": [{"anything-but": ["a", "bb", "ccc"]}]}"#);
assert_no_has_match!(q, r#"{"x": "a"}"#, "ab");
assert_no_has_match!(q, r#"{"x": "bb"}"#, "ab");
assert_no_has_match!(q, r#"{"x": "ccc"}"#, "ab");
assert_has_match!(q, r#"{"x": "b"}"#, "ab");
assert_has_match!(q, r#"{"x": "cc"}"#, "ab");
assert_has_match!(q, r#"{"x": "aa"}"#, "ab");
assert_has_match!(q, r#"{"x": "bbb"}"#, "ab");
assert_has_match!(q, r#"{"x": "cccc"}"#, "ab");
assert_has_match!(q, r#"{"x": "xyz"}"#, "ab");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_shellstyle_three_pattern_merge() {
let q = q!(
"p1" => r#"{"x": [{"shellstyle": "a*b"}]}"#,
"p2" => r#"{"x": [{"shellstyle": "c*d"}]}"#,
"p3" => r#"{"x": [{"shellstyle": "e*f"}]}"#
);
assert_has_match!(q, r#"{"x": "a123b"}"#, "p1");
assert_has_match!(q, r#"{"x": "c456d"}"#, "p2");
assert_has_match!(q, r#"{"x": "e789f"}"#, "p3");
assert_no_has_match!(q, r#"{"x": "a123b"}"#, "p2");
assert_no_has_match!(q, r#"{"x": "a123b"}"#, "p3");
assert_no_has_match!(q, r#"{"x": "c456d"}"#, "p1");
assert_no_match!(q, r#"{"x": "hello"}"#);
}
#[test]
fn test_cidr_ipv6_double_colon_must_appear_at_most_once() {
let mut q = Quamina::new();
let result = q.add_pattern("p", r#"{"ip": [{"cidr": "1::2::3/128"}]}"#);
assert!(result.is_err(), "Address with two :: should be rejected");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_eight_explicit_groups_with_shorthand() {
let q = q!("p1" => r#"{"ip": [{"cidr": "1:2:3:4::5:6:7:8/128"}]}"#);
assert_has_match!(q, r#"{"ip": "1:2:3:4:5:6:7:8"}"#, "p1");
assert_no_match!(q, r#"{"ip": "1:2:3:4:5:6:7:9"}"#);
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_right_side_high_bytes() {
let q = q!("p1" => r#"{"ip": [{"cidr": "::abcd:ef01/128"}]}"#);
assert_has_match!(q, r#"{"ip": "0:0:0:0:0:0:abcd:ef01"}"#, "p1");
assert_no_match!(q, r#"{"ip": "0:0:0:0:0:0:abcd:ef02"}"#);
assert_no_match!(q, r#"{"ip": "0:0:0:0:0:0:abce:ef01"}"#);
}
#[test]
fn test_cidr_ipv6_parse_masks_boundary_byte() {
use crate::json::CidrPattern;
let CidrPattern::V6 {
network,
prefix_len,
} = CidrPattern::parse("2001:db8:abcd:ef01::/60").expect("valid /60")
else {
panic!("expected V6");
};
assert_eq!(prefix_len, 60);
assert_eq!(
network,
[
0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0xef, 0x00, 0, 0, 0, 0, 0, 0, 0, 0
]
);
let CidrPattern::V6 { network, .. } = CidrPattern::parse("::ff/124").expect("valid /124")
else {
panic!("expected V6");
};
assert_eq!(network[15], 0xf0, "0xff & 0xf0 == 0xf0 at byte 15");
let CidrPattern::V6 { network, .. } = CidrPattern::parse("::ff/128").expect("valid /128")
else {
panic!("expected V6");
};
assert_eq!(network[15], 0xff, "/128 leaves the last byte untouched");
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_cidr_ipv6_non_byte_aligned_prefix() {
let q = q!("p121" => r#"{"ip": [{"cidr": "2001:db8:0:0:0:0:0:80/121"}]}"#);
assert_has_match!(q, r#"{"ip": "2001:db8:0:0:0:0:0:80"}"#, "p121");
assert_has_match!(q, r#"{"ip": "2001:db8:0:0:0:0:0:ff"}"#, "p121");
assert_no_match!(q, r#"{"ip": "2001:db8:0:0:0:0:0:0"}"#);
assert_no_match!(q, r#"{"ip": "2001:db8:0:0:0:0:0:7f"}"#);
}
#[test]
fn test_pattern_json_backspace_and_formfeed_escapes() {
let q = q!(
"bs" => r#"{"x": ["a\bb"]}"#,
"ff" => r#"{"x": ["a\fb"]}"#
);
assert_has_match!(q, r#"{"x": "a\bb"}"#, "bs");
assert_has_match!(q, r#"{"x": "a\fb"}"#, "ff");
assert_no_has_match!(q, r#"{"x": "abb"}"#, "bs");
assert_no_has_match!(q, r#"{"x": "afb"}"#, "ff");
}
#[test]
fn test_pattern_json_unicode_escape_bmp() {
let q = q!("u" => r#"{"x": ["\u0041\u0042\u0043"]}"#);
assert_has_match!(q, r#"{"x": "ABC"}"#, "u");
assert_no_match!(q, r#"{"x": "abc"}"#);
}
#[test]
fn test_pattern_json_unicode_surrogate_pair() {
let q = q!("emoji" => r#"{"x": ["\uD83D\uDE00"]}"#);
assert_has_match!(q, r#"{"x": "😀"}"#, "emoji");
assert_no_match!(q, r#"{"x": "😁"}"#);
}
#[test]
fn test_shellstyle_multi_wildcard() {
let q = q!("p1" => r#"{"x": [{"shellstyle": "a*b*c"}]}"#);
assert_has_match!(q, r#"{"x": "abc"}"#, "p1");
assert_has_match!(q, r#"{"x": "aXbYc"}"#, "p1");
assert_has_match!(q, r#"{"x": "aXXXbYYYc"}"#, "p1");
assert_no_match!(q, r#"{"x": "ab"}"#);
assert_no_match!(q, r#"{"x": "axb"}"#);
assert_no_match!(q, r#"{"x": "bac"}"#);
}
#[test]
fn test_lookbehind_with_alternation() {
let q = q!("p1" => r#"{"v": [{"regexp": "(?<=ab|cd)x"}]}"#);
assert_has_match!(q, r#"{"v": "abx"}"#, "p1");
assert_has_match!(q, r#"{"v": "cdx"}"#, "p1");
assert_no_match!(q, r#"{"v": "efx"}"#);
assert_no_match!(q, r#"{"v": "abz"}"#);
}
#[test]
fn test_lookbehind_alternation_with_primary_alternation() {
let q = q!("p1" => r#"{"v": [{"regexp": "(?<=a|b)(x|y)"}]}"#);
assert_has_match!(q, r#"{"v": "ax"}"#, "p1");
assert_has_match!(q, r#"{"v": "ay"}"#, "p1");
assert_has_match!(q, r#"{"v": "bx"}"#, "p1");
assert_has_match!(q, r#"{"v": "by"}"#, "p1");
assert_no_match!(q, r#"{"v": "cx"}"#);
assert_no_match!(q, r#"{"v": "az"}"#);
}
#[test]
fn test_mut_numeric_pattern_rejects_string_value() {
let q = q!("n" => r#"{"x": [{"numeric": ["=", 42]}]}"#);
assert_matches!(q, r#"{"x": 42}"#, vec!["n"], "numeric 42 must match");
assert_no_match!(
q,
r#"{"x": "42"}"#,
"string \"42\" must NOT match a numeric pattern"
);
}
#[test]
fn test_mut_memory_usage_accumulates_suffix_and_lookaround() {
let q = q!(
"suf" => r#"{"x": [{"suffix": "lo"}]}"#,
"la" => r#"{"y": [{"regexp": "foo(?=bar)bar"}]}"#
);
let (_, used) = q.get_memory_budget();
assert_eq!(
used, 5280,
"memory usage must equal exact sum of arena byte sizes, got {used}"
);
}