Skip to main content

sochdb_query/
like.rs

1//! Canonical SQL `LIKE` pattern matching.
2//!
3//! This is the single source of truth for `LIKE` semantics across every query
4//! path (storage scan, filter pushdown, expression evaluation, virtual/plugin
5//! tables). Historically the engine carried four divergent implementations that
6//! disagreed on case sensitivity and on whether regex metacharacters in the
7//! subject/pattern were treated literally. They have all been routed through
8//! [`like_match`] so that `LIKE` behaves identically regardless of the path a
9//! query takes.
10//!
11//! # Semantics
12//! - Case **sensitive** (SQL-92 standard; collation-aware folding is out of scope).
13//! - `%` matches zero or more characters.
14//! - `_` matches exactly one character.
15//! - Every other character — including regex metacharacters such as `.`, `(`,
16//!   `*`, `[` — matches itself literally. The matcher does not use a regex
17//!   engine, so no escaping pitfalls exist.
18
19/// Returns `true` if `s` matches the SQL `LIKE` `pattern`.
20///
21/// `%` matches any run of characters (including empty), `_` matches exactly one
22/// character, and all other characters are matched literally and
23/// case-sensitively.
24pub fn like_match(s: &str, pattern: &str) -> bool {
25    let s_chars: Vec<char> = s.chars().collect();
26    let p_chars: Vec<char> = pattern.chars().collect();
27    like_match_chars(&s_chars, &p_chars)
28}
29
30/// Iterative LIKE matcher with greedy `%` backtracking.
31///
32/// Runs in O(len(s) * len(pattern)) time and O(1) extra space by tracking the
33/// most recent `%` position to backtrack to, avoiding the exponential blowup of
34/// naive recursive backtracking on patterns with many `%` wildcards.
35fn like_match_chars(s: &[char], p: &[char]) -> bool {
36    let (mut si, mut pi) = (0usize, 0usize);
37    // Backtrack anchors: where to resume if a tentative `%` match fails.
38    let mut star_pi: Option<usize> = None;
39    let mut star_si = 0usize;
40
41    while si < s.len() {
42        if pi < p.len() && p[pi] == '%' {
43            // Record the `%` and assume it matches the empty string for now.
44            star_pi = Some(pi);
45            star_si = si;
46            pi += 1;
47        } else if pi < p.len() && (p[pi] == '_' || p[pi] == s[si]) {
48            si += 1;
49            pi += 1;
50        } else if let Some(spi) = star_pi {
51            // Mismatch: let the last `%` consume one more character.
52            star_si += 1;
53            si = star_si;
54            pi = spi + 1;
55        } else {
56            return false;
57        }
58    }
59
60    // Consume any trailing `%` wildcards (they can match the empty string).
61    while pi < p.len() && p[pi] == '%' {
62        pi += 1;
63    }
64    pi == p.len()
65}
66
67#[cfg(test)]
68mod tests {
69    use super::like_match;
70
71    #[test]
72    fn test_basic_wildcards() {
73        assert!(like_match("hello", "hello"));
74        assert!(like_match("hello", "%llo"));
75        assert!(like_match("hello", "h%o"));
76        assert!(like_match("hello", "h_llo"));
77        assert!(like_match("hello", "%"));
78        assert!(like_match("", "%"));
79        assert!(like_match("hello", "h%l%o"));
80        assert!(!like_match("hello", "world"));
81        assert!(!like_match("hello", "hell"));
82        assert!(!like_match("hello", "_ello_"));
83    }
84
85    #[test]
86    fn test_literal_metacharacters() {
87        // Regex metacharacters must be matched literally, not as regex.
88        assert!(like_match("file.txt", "file%"));
89        assert!(like_match("file.txt", "file.txt"));
90        assert!(!like_match("fileXtxt", "file.txt")); // '.' is literal, not "any char"
91        assert!(like_match("test(1)", "%(%"));
92        assert!(like_match("a+b", "a+b"));
93        assert!(like_match("a[b]", "a[b]"));
94    }
95
96    #[test]
97    fn test_case_sensitive() {
98        assert!(like_match("Hello", "Hello"));
99        assert!(!like_match("Hello", "hello"));
100        assert!(!like_match("HELLO", "%llo"));
101    }
102
103    #[test]
104    fn test_many_percent_no_blowup() {
105        // Pathological pattern that would blow up under naive recursion.
106        let s = "a".repeat(50);
107        assert!(like_match(&s, "%a%a%a%a%a%a%a%a%a%a%"));
108        let s2 = "a".repeat(49) + "b";
109        assert!(!like_match(&s2, "%a%a%a%a%a%a%a%a%a%a%a%c"));
110    }
111}