1use crate::regex_pool::RegexPool;
8use crate::trigram::{Extractor, Trigram};
9use regex::Regex;
10use regex_syntax::hir::{Hir, HirKind};
11
12#[derive(Debug)]
14pub enum QueryPlan {
15 Literal {
17 pattern: Vec<u8>,
19 trigrams: Vec<Trigram>,
21 regex: Regex,
23 },
24
25 RegexWithLiterals {
27 regex: Regex,
29 required_trigram_sets: Vec<Vec<Trigram>>,
31 },
32
33 CaseInsensitive {
36 regex: Regex,
38 trigram_groups: Vec<Vec<Trigram>>,
40 },
41
42 FullScan {
44 regex: Regex,
46 },
47}
48
49impl QueryPlan {
50 #[must_use]
54 pub fn pattern_str(&self) -> &str {
55 match self {
56 Self::Literal { regex, .. }
57 | Self::RegexWithLiterals { regex, .. }
58 | Self::CaseInsensitive { regex, .. }
59 | Self::FullScan { regex } => regex.as_str(),
60 }
61 }
62}
63
64#[derive(Debug, Default, Clone, Copy)]
66#[allow(clippy::struct_excessive_bools)]
67pub struct QueryOptions {
68 pub is_regex: bool,
70 pub ignore_case: bool,
72 pub multiline: bool,
74 pub word_boundary: bool,
77}
78
79pub struct Planner;
81
82impl Planner {
83 #[must_use]
87 pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
88 Self::plan_with_options(
89 pattern,
90 QueryOptions {
91 is_regex,
92 ..Default::default()
93 },
94 )
95 }
96
97 #[must_use]
102 pub fn plan_with_pool(pattern: &str, options: QueryOptions, pool: &RegexPool) -> QueryPlan {
103 Self::plan_impl(pattern, options, Some(pool))
104 }
105
106 #[must_use]
113 pub fn plan_with_options(pattern: &str, options: QueryOptions) -> QueryPlan {
114 Self::plan_impl(pattern, options, None)
115 }
116
117 fn compile_regex(pat: &str, pool: Option<&RegexPool>) -> Regex {
118 if let Some(p) = pool
119 && let Ok(re) = p.get_or_compile(pat)
120 {
121 return re;
122 }
123 Regex::new(pat).unwrap_or_else(|_| {
124 pool.map_or_else(
125 || Regex::new("").expect("empty regex should always compile"),
126 |p| p.get_or_compile("").expect("empty regex always compiles"),
127 )
128 })
129 }
130
131 fn plan_impl(pattern: &str, options: QueryOptions, pool: Option<&RegexPool>) -> QueryPlan {
132 let mut final_pattern = pattern.to_string();
133 let mut use_regex = options.is_regex;
134 if options.multiline && use_regex {
135 final_pattern = format!("(?s){final_pattern}");
136 }
137
138 if options.word_boundary && !options.is_regex {
142 if options.ignore_case {
143 final_pattern = format!("(?i)\\b{}\\b", regex::escape(&final_pattern));
144 } else {
145 final_pattern = format!("\\b{}\\b", regex::escape(&final_pattern));
146 }
147 use_regex = true;
148 }
149
150 if !use_regex && !options.ignore_case {
151 let bytes = final_pattern.as_bytes().to_vec();
152 let trigrams = Extractor::extract_set(&bytes);
153
154 let escaped = regex::escape(&final_pattern);
155 let regex = Self::compile_regex(&escaped, pool);
156
157 if trigrams.is_empty() {
158 return QueryPlan::FullScan { regex };
159 }
160
161 return QueryPlan::Literal {
162 pattern: bytes,
163 trigrams,
164 regex,
165 };
166 }
167
168 if !use_regex && options.ignore_case {
171 let bytes = final_pattern.as_bytes();
172 let groups = Extractor::extract_groups_case_insensitive(bytes);
173 let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
174 let regex = Self::compile_regex(®ex_pat, pool);
175
176 if groups.is_empty() {
177 return QueryPlan::FullScan { regex };
178 }
179
180 return QueryPlan::CaseInsensitive {
181 regex,
182 trigram_groups: groups,
183 };
184 }
185
186 let regex_pat = if options.ignore_case && !final_pattern.starts_with("(?i)") {
187 format!("(?i){final_pattern}")
188 } else {
189 final_pattern.clone()
190 };
191
192 let regex = Self::compile_regex(®ex_pat, pool);
193
194 let Ok(hir) = regex_syntax::parse(&final_pattern) else {
195 return QueryPlan::FullScan { regex };
196 };
197
198 let mut literals = Vec::new();
199 Self::walk_hir(&hir, &mut literals);
200
201 if options.ignore_case {
205 return QueryPlan::FullScan { regex };
206 }
207
208 let required_trigram_sets: Vec<Vec<Trigram>> = literals
209 .iter()
210 .map(|lit| Extractor::extract_set(lit.as_bytes()))
211 .filter(|t| !t.is_empty())
212 .collect();
213
214 if required_trigram_sets.is_empty() {
215 QueryPlan::FullScan { regex }
216 } else {
217 QueryPlan::RegexWithLiterals {
218 regex,
219 required_trigram_sets,
220 }
221 }
222 }
223
224 fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
225 match hir.kind() {
226 HirKind::Literal(lit) => {
227 out.push(String::from_utf8_lossy(&lit.0).to_string());
228 }
229 HirKind::Concat(children) => {
230 let mut current = String::new();
231 for child in children {
232 if let HirKind::Literal(lit) = child.kind() {
233 current.push_str(&String::from_utf8_lossy(&lit.0));
234 } else {
235 if current.len() >= 3 {
236 out.push(current.clone());
237 }
238 current.clear();
239 Self::walk_hir(child, out);
240 }
241 }
242 if current.len() >= 3 {
243 out.push(current);
244 }
245 }
246 HirKind::Repetition(rep) if rep.min >= 1 => {
247 Self::walk_hir(&rep.sub, out);
248 }
249 _ => {}
251 }
252 }
253}