1use crate::trigram::{Extractor, Trigram};
6use regex::Regex;
7use regex_syntax::hir::{Hir, HirKind};
8
9#[derive(Debug)]
10pub enum QueryPlan {
11 Literal {
13 pattern: Vec<u8>,
14 trigrams: Vec<Trigram>,
15 },
16
17 RegexWithLiterals {
19 regex: Regex,
20 required_trigram_sets: Vec<Vec<Trigram>>,
21 },
22
23 CaseInsensitive {
26 regex: Regex,
27 trigram_groups: Vec<Vec<Trigram>>,
28 },
29
30 FullScan { regex: Regex },
32}
33
34pub struct Planner;
35
36impl Planner {
37 pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
38 Self::plan_with_options(pattern, is_regex, false, false)
39 }
40
41 pub fn plan_with_options(
42 pattern: &str,
43 is_regex: bool,
44 ignore_case: bool,
45 multiline: bool,
46 ) -> QueryPlan {
47 let mut final_pattern = pattern.to_string();
48 if multiline && is_regex {
49 final_pattern = format!("(?s){}", final_pattern);
50 }
51
52 if !is_regex && !ignore_case {
53 let bytes = final_pattern.as_bytes().to_vec();
54 let trigrams = Extractor::extract_set(&bytes);
55
56 if trigrams.is_empty() {
57 let regex = Regex::new(®ex::escape(&final_pattern))
59 .unwrap_or_else(|_| Regex::new("").unwrap());
60 return QueryPlan::FullScan { regex };
61 }
62
63 return QueryPlan::Literal {
64 pattern: bytes,
65 trigrams,
66 };
67 }
68
69 if !is_regex && ignore_case {
72 let bytes = final_pattern.as_bytes();
73 let groups = Extractor::extract_groups_case_insensitive(bytes);
74 let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
75 let regex = match Regex::new(®ex_pat) {
76 Ok(r) => r,
77 Err(_) => {
78 return QueryPlan::FullScan {
79 regex: Regex::new("").unwrap(),
80 };
81 }
82 };
83
84 if groups.is_empty() {
85 return QueryPlan::FullScan { regex };
86 }
87
88 return QueryPlan::CaseInsensitive {
89 regex,
90 trigram_groups: groups,
91 };
92 }
93
94 let regex_pat = if ignore_case && !final_pattern.starts_with("(?i)") {
95 format!("(?i){final_pattern}")
96 } else {
97 final_pattern.clone()
98 };
99
100 let regex = match Regex::new(®ex_pat) {
101 Ok(r) => r,
102 Err(_) => {
103 return QueryPlan::FullScan {
104 regex: Regex::new("").unwrap(),
105 };
106 } };
108
109 let hir = match regex_syntax::parse(&final_pattern) {
110 Ok(h) => h,
111 Err(_) => return QueryPlan::FullScan { regex },
112 };
113
114 let mut literals = Vec::new();
115 Self::walk_hir(&hir, &mut literals);
116
117 if ignore_case {
121 return QueryPlan::FullScan { regex };
122 }
123
124 let required_trigram_sets: Vec<Vec<Trigram>> = literals
125 .iter()
126 .map(|lit| Extractor::extract_set(lit.as_bytes()))
127 .filter(|t| !t.is_empty())
128 .collect();
129
130 if required_trigram_sets.is_empty() {
131 QueryPlan::FullScan { regex }
132 } else {
133 QueryPlan::RegexWithLiterals {
134 regex,
135 required_trigram_sets,
136 }
137 }
138 }
139
140 fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
141 match hir.kind() {
142 HirKind::Literal(lit) => {
143 out.push(String::from_utf8_lossy(&lit.0).to_string());
144 }
145 HirKind::Concat(children) => {
146 let mut current = String::new();
147 for child in children {
148 match child.kind() {
149 HirKind::Literal(lit) => {
150 current.push_str(&String::from_utf8_lossy(&lit.0));
151 }
152 _ => {
153 if current.len() >= 3 {
154 out.push(current.clone());
155 }
156 current.clear();
157 Self::walk_hir(child, out);
158 }
159 }
160 }
161 if current.len() >= 3 {
162 out.push(current);
163 }
164 }
165 HirKind::Repetition(rep) => {
166 if rep.min >= 1 {
167 Self::walk_hir(&rep.sub, out);
168 }
169 }
170 _ => {}
172 }
173 }
174}