Skip to main content

sift_core/
verify.rs

1//! Regex compilation — Rust regex syntax (ERE-like), with grep-style `-F`/`-w`/`-x` shaping.
2
3use regex_automata::meta::Regex;
4use regex_syntax::escape;
5
6use crate::search::{CaseMode, SearchMatchFlags, SearchOptions};
7
8pub fn pattern_branch(p: &str, opts: &SearchOptions) -> String {
9    let mut s = if opts.fixed_strings() {
10        escape(p)
11    } else {
12        p.to_string()
13    };
14    if opts.line_regexp() {
15        s = format!("^(?:{s})$");
16    } else if opts.word_regexp() {
17        s = format!(r"\b(?:{s})\b");
18    }
19    s
20}
21
22/// Build a combined `Regex` from one or more patterns.
23///
24/// # Errors
25///
26/// Returns [`regex_automata::meta::BuildError`] if the combined pattern is invalid.
27pub fn compile_search_pattern(
28    patterns: &[String],
29    opts: &SearchOptions,
30) -> Result<Regex, Box<regex_automata::meta::BuildError>> {
31    debug_assert!(!patterns.is_empty());
32    let branches: Vec<String> = patterns.iter().map(|p| pattern_branch(p, opts)).collect();
33    let combined = if branches.len() == 1 {
34        branches[0].clone()
35    } else {
36        branches
37            .into_iter()
38            .map(|b| format!("(?:{b})"))
39            .collect::<Vec<_>>()
40            .join("|")
41    };
42    let mut builder = Regex::builder();
43    if opts.case_insensitive() {
44        builder.syntax(regex_automata::util::syntax::Config::new().case_insensitive(true));
45    }
46    builder.build(&combined).map_err(Box::new)
47}
48
49/// Build a `Regex` for a single pattern.
50///
51/// # Errors
52///
53/// Returns [`regex_automata::meta::BuildError`] if `pattern` is invalid.
54pub fn compile_pattern(
55    pattern: &str,
56    case_insensitive: bool,
57) -> Result<Regex, Box<regex_automata::meta::BuildError>> {
58    let case_mode = if case_insensitive {
59        CaseMode::Insensitive
60    } else {
61        CaseMode::Sensitive
62    };
63    let opts = SearchOptions {
64        flags: SearchMatchFlags::default(),
65        case_mode,
66        max_results: None,
67    };
68    compile_search_pattern(&[pattern.to_string()], &opts)
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74    use crate::search::{CaseMode, SearchMatchFlags, SearchOptions};
75
76    fn opts(flags: SearchMatchFlags, case_mode: CaseMode) -> SearchOptions {
77        SearchOptions {
78            flags,
79            case_mode,
80            max_results: None,
81        }
82    }
83
84    #[test]
85    fn alternation_matches_either_pattern() {
86        let re = compile_search_pattern(
87            &["foo".to_string(), "bar".to_string()],
88            &opts(SearchMatchFlags::default(), CaseMode::Sensitive),
89        )
90        .unwrap();
91        let mut cache = regex_automata::meta::Cache::new(&re);
92        assert!(re
93            .search_with(&mut cache, &regex_automata::Input::new(b"foo"))
94            .is_some());
95        assert!(re
96            .search_with(&mut cache, &regex_automata::Input::new(b"bar"))
97            .is_some());
98        assert!(re
99            .search_with(&mut cache, &regex_automata::Input::new(b"baz"))
100            .is_none());
101    }
102
103    #[test]
104    fn fixed_strings_escape_metacharacters() {
105        let re = compile_search_pattern(
106            &[r"a.c".to_string()],
107            &opts(SearchMatchFlags::FIXED_STRINGS, CaseMode::Sensitive),
108        )
109        .unwrap();
110        let mut cache = regex_automata::meta::Cache::new(&re);
111        assert!(re
112            .search_with(&mut cache, &regex_automata::Input::new(b"a.c"))
113            .is_some());
114        assert!(re
115            .search_with(&mut cache, &regex_automata::Input::new(b"abc"))
116            .is_none());
117    }
118
119    #[test]
120    fn case_insensitive() {
121        let re = compile_search_pattern(
122            &["Hello".to_string()],
123            &opts(SearchMatchFlags::default(), CaseMode::Insensitive),
124        )
125        .unwrap();
126        let mut cache = regex_automata::meta::Cache::new(&re);
127        assert!(re
128            .search_with(&mut cache, &regex_automata::Input::new(b"hello"))
129            .is_some());
130        assert!(re
131            .search_with(&mut cache, &regex_automata::Input::new(b"HELLO"))
132            .is_some());
133    }
134
135    #[test]
136    fn word_regexp() {
137        let re = compile_search_pattern(
138            &["cat".to_string()],
139            &opts(SearchMatchFlags::WORD_REGEXP, CaseMode::Sensitive),
140        )
141        .unwrap();
142        let mut cache = regex_automata::meta::Cache::new(&re);
143        assert!(re
144            .search_with(&mut cache, &regex_automata::Input::new(b"a cat here"))
145            .is_some());
146        assert!(re
147            .search_with(&mut cache, &regex_automata::Input::new(b"concat"))
148            .is_none());
149    }
150
151    #[test]
152    fn line_regexp() {
153        let re = compile_search_pattern(
154            &["yes".to_string()],
155            &opts(SearchMatchFlags::LINE_REGEXP, CaseMode::Sensitive),
156        )
157        .unwrap();
158        let mut cache = regex_automata::meta::Cache::new(&re);
159        assert!(re
160            .search_with(&mut cache, &regex_automata::Input::new(b"yes"))
161            .is_some());
162        assert!(re
163            .search_with(&mut cache, &regex_automata::Input::new(b"oh yes sir"))
164            .is_none());
165    }
166
167    #[test]
168    fn invalid_regex_returns_err() {
169        assert!(compile_search_pattern(
170            &["(".to_string()],
171            &opts(SearchMatchFlags::default(), CaseMode::Sensitive)
172        )
173        .is_err());
174    }
175}