1use crate::trigram::{Extractor, Trigram};
6use regex::Regex;
7use regex_syntax::hir::{Hir, HirKind};
8
9#[derive(Debug)]
10pub enum QueryPlan {
11 Literal {
13 pattern: Vec<u8>,
14 trigrams: Vec<Trigram>,
15 },
16
17 RegexWithLiterals {
19 regex: Regex,
20 required_trigram_sets: Vec<Vec<Trigram>>,
21 },
22
23 CaseInsensitive {
26 regex: Regex,
27 trigram_groups: Vec<Vec<Trigram>>,
28 },
29
30 FullScan { regex: Regex },
32}
33
34pub struct Planner;
35
36impl Planner {
37 pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
38 Self::plan_with_options(pattern, is_regex, false, false)
39 }
40
41 pub fn plan_with_options(
42 pattern: &str,
43 is_regex: bool,
44 ignore_case: bool,
45 multiline: bool,
46 ) -> QueryPlan {
47 let mut final_pattern = pattern.to_string();
48 if multiline && is_regex {
49 final_pattern = format!("(?s){}", final_pattern);
50 }
51
52 if !is_regex && !ignore_case {
53 let bytes = final_pattern.as_bytes().to_vec();
54 let trigrams = Extractor::extract_set(&bytes);
55
56 if trigrams.is_empty() {
57 let regex = Regex::new(®ex::escape(&final_pattern)).unwrap_or_else(|_| Regex::new("").unwrap());
59 return QueryPlan::FullScan { regex };
60 }
61
62 return QueryPlan::Literal {
63 pattern: bytes,
64 trigrams,
65 };
66 }
67
68 if !is_regex && ignore_case {
71 let bytes = final_pattern.as_bytes();
72 let groups = Extractor::extract_groups_case_insensitive(bytes);
73 let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
74 let regex = match Regex::new(®ex_pat) {
75 Ok(r) => r,
76 Err(_) => {
77 return QueryPlan::FullScan {
78 regex: Regex::new("").unwrap(),
79 }
80 }
81 };
82
83 if groups.is_empty() {
84 return QueryPlan::FullScan { regex };
85 }
86
87 return QueryPlan::CaseInsensitive {
88 regex,
89 trigram_groups: groups,
90 };
91 }
92
93 let regex_pat = if ignore_case && !final_pattern.starts_with("(?i)") {
94 format!("(?i){final_pattern}")
95 } else {
96 final_pattern.clone()
97 };
98
99 let regex = match Regex::new(®ex_pat) {
100 Ok(r) => r,
101 Err(_) => {
102 return QueryPlan::FullScan {
103 regex: Regex::new("").unwrap(),
104 };
105 } };
107
108 let hir = match regex_syntax::parse(&final_pattern) {
109 Ok(h) => h,
110 Err(_) => return QueryPlan::FullScan { regex },
111 };
112
113 let mut literals = Vec::new();
114 Self::walk_hir(&hir, &mut literals);
115
116 if ignore_case {
120 return QueryPlan::FullScan { regex };
121 }
122
123 let required_trigram_sets: Vec<Vec<Trigram>> = literals
124 .iter()
125 .map(|lit| Extractor::extract_set(lit.as_bytes()))
126 .filter(|t| !t.is_empty())
127 .collect();
128
129 if required_trigram_sets.is_empty() {
130 QueryPlan::FullScan { regex }
131 } else {
132 QueryPlan::RegexWithLiterals {
133 regex,
134 required_trigram_sets,
135 }
136 }
137 }
138
139 fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
140 match hir.kind() {
141 HirKind::Literal(lit) => {
142 out.push(String::from_utf8_lossy(&lit.0).to_string());
143 }
144 HirKind::Concat(children) => {
145 let mut current = String::new();
146 for child in children {
147 match child.kind() {
148 HirKind::Literal(lit) => {
149 current.push_str(&String::from_utf8_lossy(&lit.0));
150 }
151 _ => {
152 if current.len() >= 3 {
153 out.push(current.clone());
154 }
155 current.clear();
156 Self::walk_hir(child, out);
157 }
158 }
159 }
160 if current.len() >= 3 {
161 out.push(current);
162 }
163 }
164 HirKind::Repetition(rep) => {
165 if rep.min >= 1 {
166 Self::walk_hir(&rep.sub, out);
167 }
168 }
169 _ => {}
171 }
172 }
173}