sochdb_query/like.rs
1//! Canonical SQL `LIKE` pattern matching.
2//!
3//! This is the single source of truth for `LIKE` semantics across every query
4//! path (storage scan, filter pushdown, expression evaluation, virtual/plugin
5//! tables). Historically the engine carried four divergent implementations that
6//! disagreed on case sensitivity and on whether regex metacharacters in the
7//! subject/pattern were treated literally. They have all been routed through
8//! [`like_match`] so that `LIKE` behaves identically regardless of the path a
9//! query takes.
10//!
11//! # Semantics
12//! - Case **sensitive** (SQL-92 standard; collation-aware folding is out of scope).
13//! - `%` matches zero or more characters.
14//! - `_` matches exactly one character.
15//! - Every other character — including regex metacharacters such as `.`, `(`,
16//! `*`, `[` — matches itself literally. The matcher does not use a regex
17//! engine, so no escaping pitfalls exist.
18
19/// Returns `true` if `s` matches the SQL `LIKE` `pattern`.
20///
21/// `%` matches any run of characters (including empty), `_` matches exactly one
22/// character, and all other characters are matched literally and
23/// case-sensitively.
24pub fn like_match(s: &str, pattern: &str) -> bool {
25 let s_chars: Vec<char> = s.chars().collect();
26 let p_chars: Vec<char> = pattern.chars().collect();
27 like_match_chars(&s_chars, &p_chars)
28}
29
30/// Iterative LIKE matcher with greedy `%` backtracking.
31///
32/// Runs in O(len(s) * len(pattern)) time and O(1) extra space by tracking the
33/// most recent `%` position to backtrack to, avoiding the exponential blowup of
34/// naive recursive backtracking on patterns with many `%` wildcards.
35fn like_match_chars(s: &[char], p: &[char]) -> bool {
36 let (mut si, mut pi) = (0usize, 0usize);
37 // Backtrack anchors: where to resume if a tentative `%` match fails.
38 let mut star_pi: Option<usize> = None;
39 let mut star_si = 0usize;
40
41 while si < s.len() {
42 if pi < p.len() && p[pi] == '%' {
43 // Record the `%` and assume it matches the empty string for now.
44 star_pi = Some(pi);
45 star_si = si;
46 pi += 1;
47 } else if pi < p.len() && (p[pi] == '_' || p[pi] == s[si]) {
48 si += 1;
49 pi += 1;
50 } else if let Some(spi) = star_pi {
51 // Mismatch: let the last `%` consume one more character.
52 star_si += 1;
53 si = star_si;
54 pi = spi + 1;
55 } else {
56 return false;
57 }
58 }
59
60 // Consume any trailing `%` wildcards (they can match the empty string).
61 while pi < p.len() && p[pi] == '%' {
62 pi += 1;
63 }
64 pi == p.len()
65}
66
67#[cfg(test)]
68mod tests {
69 use super::like_match;
70
71 #[test]
72 fn test_basic_wildcards() {
73 assert!(like_match("hello", "hello"));
74 assert!(like_match("hello", "%llo"));
75 assert!(like_match("hello", "h%o"));
76 assert!(like_match("hello", "h_llo"));
77 assert!(like_match("hello", "%"));
78 assert!(like_match("", "%"));
79 assert!(like_match("hello", "h%l%o"));
80 assert!(!like_match("hello", "world"));
81 assert!(!like_match("hello", "hell"));
82 assert!(!like_match("hello", "_ello_"));
83 }
84
85 #[test]
86 fn test_literal_metacharacters() {
87 // Regex metacharacters must be matched literally, not as regex.
88 assert!(like_match("file.txt", "file%"));
89 assert!(like_match("file.txt", "file.txt"));
90 assert!(!like_match("fileXtxt", "file.txt")); // '.' is literal, not "any char"
91 assert!(like_match("test(1)", "%(%"));
92 assert!(like_match("a+b", "a+b"));
93 assert!(like_match("a[b]", "a[b]"));
94 }
95
96 #[test]
97 fn test_case_sensitive() {
98 assert!(like_match("Hello", "Hello"));
99 assert!(!like_match("Hello", "hello"));
100 assert!(!like_match("HELLO", "%llo"));
101 }
102
103 #[test]
104 fn test_many_percent_no_blowup() {
105 // Pathological pattern that would blow up under naive recursion.
106 let s = "a".repeat(50);
107 assert!(like_match(&s, "%a%a%a%a%a%a%a%a%a%a%"));
108 let s2 = "a".repeat(49) + "b";
109 assert!(!like_match(&s2, "%a%a%a%a%a%a%a%a%a%a%a%c"));
110 }
111}