Skip to main content

sift_core/
verify.rs

1//! Regex compilation — Rust regex syntax (ERE-like), with grep-style `-F`/`-w`/`-x` shaping.
2
3use regex_automata::meta::Regex;
4use regex_syntax::escape;
5
6use crate::search::SearchOptions;
7
8pub fn pattern_branch(p: &str, opts: &SearchOptions) -> String {
9    let mut s = if opts.fixed_strings() {
10        escape(p)
11    } else {
12        p.to_string()
13    };
14    if opts.line_regexp() {
15        s = format!("^(?:{s})$");
16    } else if opts.word_regexp() {
17        s = format!(r"\b(?:{s})\b");
18    }
19    s
20}
21
22/// Build a combined `Regex` from one or more patterns.
23///
24/// # Errors
25///
26/// Returns [`regex_automata::meta::BuildError`] if the combined pattern is invalid.
27pub fn compile_search_pattern(
28    patterns: &[String],
29    opts: &SearchOptions,
30) -> Result<Regex, Box<regex_automata::meta::BuildError>> {
31    debug_assert!(!patterns.is_empty());
32    let branches: Vec<String> = patterns.iter().map(|p| pattern_branch(p, opts)).collect();
33    let combined = if branches.len() == 1 {
34        branches[0].clone()
35    } else {
36        branches
37            .into_iter()
38            .map(|b| format!("(?:{b})"))
39            .collect::<Vec<_>>()
40            .join("|")
41    };
42    let mut builder = Regex::builder();
43    if opts.case_insensitive() {
44        builder.syntax(regex_automata::util::syntax::Config::new().case_insensitive(true));
45    }
46    builder.build(&combined).map_err(Box::new)
47}
48
49/// Build a `Regex` for a single pattern.
50///
51/// # Errors
52///
53/// Returns [`regex_automata::meta::BuildError`] if `pattern` is invalid.
54pub fn compile_pattern(
55    pattern: &str,
56    case_insensitive: bool,
57) -> Result<Regex, Box<regex_automata::meta::BuildError>> {
58    use crate::search::SearchMatchFlags;
59
60    let flags = if case_insensitive {
61        SearchMatchFlags::CASE_INSENSITIVE
62    } else {
63        SearchMatchFlags::empty()
64    };
65    let opts = SearchOptions {
66        flags,
67        max_results: None,
68    };
69    compile_search_pattern(&[pattern.to_string()], &opts)
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75    use crate::search::{SearchMatchFlags, SearchOptions};
76
77    fn opts(flags: SearchMatchFlags) -> SearchOptions {
78        SearchOptions {
79            flags,
80            max_results: None,
81        }
82    }
83
84    #[test]
85    fn alternation_matches_either_pattern() {
86        let flags = SearchMatchFlags::empty();
87        let re =
88            compile_search_pattern(&["foo".to_string(), "bar".to_string()], &opts(flags)).unwrap();
89        let mut cache = regex_automata::meta::Cache::new(&re);
90        assert!(re
91            .search_with(&mut cache, &regex_automata::Input::new(b"foo"))
92            .is_some());
93        assert!(re
94            .search_with(&mut cache, &regex_automata::Input::new(b"bar"))
95            .is_some());
96        assert!(re
97            .search_with(&mut cache, &regex_automata::Input::new(b"baz"))
98            .is_none());
99    }
100
101    #[test]
102    fn fixed_strings_escape_metacharacters() {
103        let flags = SearchMatchFlags::FIXED_STRINGS;
104        let re = compile_search_pattern(&[r"a.c".to_string()], &opts(flags)).unwrap();
105        let mut cache = regex_automata::meta::Cache::new(&re);
106        assert!(re
107            .search_with(&mut cache, &regex_automata::Input::new(b"a.c"))
108            .is_some());
109        assert!(re
110            .search_with(&mut cache, &regex_automata::Input::new(b"abc"))
111            .is_none());
112    }
113
114    #[test]
115    fn case_insensitive() {
116        let flags = SearchMatchFlags::CASE_INSENSITIVE;
117        let re = compile_search_pattern(&["Hello".to_string()], &opts(flags)).unwrap();
118        let mut cache = regex_automata::meta::Cache::new(&re);
119        assert!(re
120            .search_with(&mut cache, &regex_automata::Input::new(b"hello"))
121            .is_some());
122        assert!(re
123            .search_with(&mut cache, &regex_automata::Input::new(b"HELLO"))
124            .is_some());
125    }
126
127    #[test]
128    fn word_regexp() {
129        let flags = SearchMatchFlags::WORD_REGEXP;
130        let re = compile_search_pattern(&["cat".to_string()], &opts(flags)).unwrap();
131        let mut cache = regex_automata::meta::Cache::new(&re);
132        assert!(re
133            .search_with(&mut cache, &regex_automata::Input::new(b"a cat here"))
134            .is_some());
135        assert!(re
136            .search_with(&mut cache, &regex_automata::Input::new(b"concat"))
137            .is_none());
138    }
139
140    #[test]
141    fn line_regexp() {
142        let flags = SearchMatchFlags::LINE_REGEXP;
143        let re = compile_search_pattern(&["yes".to_string()], &opts(flags)).unwrap();
144        let mut cache = regex_automata::meta::Cache::new(&re);
145        assert!(re
146            .search_with(&mut cache, &regex_automata::Input::new(b"yes"))
147            .is_some());
148        assert!(re
149            .search_with(&mut cache, &regex_automata::Input::new(b"oh yes sir"))
150            .is_none());
151    }
152
153    #[test]
154    fn invalid_regex_returns_err() {
155        let flags = SearchMatchFlags::empty();
156        assert!(compile_search_pattern(&["(".to_string()], &opts(flags)).is_err());
157    }
158}