1use crate::trigram::{Extractor, Trigram};
6use regex::Regex;
7use regex_syntax::hir::{Hir, HirKind};
8
9#[derive(Debug)]
11pub enum QueryPlan {
12 Literal {
14 pattern: Vec<u8>,
16 trigrams: Vec<Trigram>,
18 regex: Regex,
20 },
21
22 RegexWithLiterals {
24 regex: Regex,
26 required_trigram_sets: Vec<Vec<Trigram>>,
28 },
29
30 CaseInsensitive {
33 regex: Regex,
35 trigram_groups: Vec<Vec<Trigram>>,
37 },
38
39 FullScan {
41 regex: Regex,
43 },
44}
45
46pub struct Planner;
48
49impl Planner {
50 #[must_use]
54 pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
55 Self::plan_with_options(pattern, is_regex, false, false)
56 }
57
58 #[must_use]
65 pub fn plan_with_options(
66 pattern: &str,
67 is_regex: bool,
68 ignore_case: bool,
69 multiline: bool,
70 ) -> QueryPlan {
71 let mut final_pattern = pattern.to_string();
72 if multiline && is_regex {
73 final_pattern = format!("(?s){final_pattern}");
74 }
75
76 if !is_regex && !ignore_case {
77 let bytes = final_pattern.as_bytes().to_vec();
78 let trigrams = Extractor::extract_set(&bytes);
79
80 let regex = Regex::new(®ex::escape(&final_pattern))
81 .unwrap_or_else(|_| Regex::new("").expect("empty regex should always compile"));
82
83 if trigrams.is_empty() {
84 return QueryPlan::FullScan { regex };
86 }
87
88 return QueryPlan::Literal {
89 pattern: bytes,
90 trigrams,
91 regex,
92 };
93 }
94
95 if !is_regex && ignore_case {
98 let bytes = final_pattern.as_bytes();
99 let groups = Extractor::extract_groups_case_insensitive(bytes);
100 let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
101 let Ok(regex) = Regex::new(®ex_pat) else {
102 return QueryPlan::FullScan {
103 regex: Regex::new("").expect("empty regex should always compile"),
104 };
105 };
106
107 if groups.is_empty() {
108 return QueryPlan::FullScan { regex };
109 }
110
111 return QueryPlan::CaseInsensitive {
112 regex,
113 trigram_groups: groups,
114 };
115 }
116
117 let regex_pat = if ignore_case && !final_pattern.starts_with("(?i)") {
118 format!("(?i){final_pattern}")
119 } else {
120 final_pattern.clone()
121 };
122
123 let Ok(regex) = Regex::new(®ex_pat) else {
124 return QueryPlan::FullScan {
125 regex: Regex::new("").expect("empty regex should always compile"),
126 };
127 };
128
129 let Ok(hir) = regex_syntax::parse(&final_pattern) else {
130 return QueryPlan::FullScan { regex };
131 };
132
133 let mut literals = Vec::new();
134 Self::walk_hir(&hir, &mut literals);
135
136 if ignore_case {
140 return QueryPlan::FullScan { regex };
141 }
142
143 let required_trigram_sets: Vec<Vec<Trigram>> = literals
144 .iter()
145 .map(|lit| Extractor::extract_set(lit.as_bytes()))
146 .filter(|t| !t.is_empty())
147 .collect();
148
149 if required_trigram_sets.is_empty() {
150 QueryPlan::FullScan { regex }
151 } else {
152 QueryPlan::RegexWithLiterals {
153 regex,
154 required_trigram_sets,
155 }
156 }
157 }
158
159 fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
160 match hir.kind() {
161 HirKind::Literal(lit) => {
162 out.push(String::from_utf8_lossy(&lit.0).to_string());
163 }
164 HirKind::Concat(children) => {
165 let mut current = String::new();
166 for child in children {
167 if let HirKind::Literal(lit) = child.kind() {
168 current.push_str(&String::from_utf8_lossy(&lit.0));
169 } else {
170 if current.len() >= 3 {
171 out.push(current.clone());
172 }
173 current.clear();
174 Self::walk_hir(child, out);
175 }
176 }
177 if current.len() >= 3 {
178 out.push(current);
179 }
180 }
181 HirKind::Repetition(rep)
182 if rep.min >= 1 => {
183 Self::walk_hir(&rep.sub, out);
184 }
185 _ => {}
187 }
188 }
189}