ix/planner.rs
1//! Query planner — transforms user input into an optimal index query plan.
2//!
3//! Decomposes regex patterns into required trigram sets.
4//! When a [`RegexPool`] is available, compiled
5//! regexes are sourced from the pool to avoid redundant compilation.
6
7use crate::regex_pool::RegexPool;
8use crate::trigram::{Extractor, Trigram};
9use regex::Regex;
10use regex_syntax::hir::{Hir, HirKind};
11
12/// Optimal query execution strategy chosen by the planner.
13#[derive(Debug)]
14pub enum QueryPlan {
15 /// Fast path: literal string search with pre-computed trigrams.
16 Literal {
17 /// Raw search bytes to match against.
18 pattern: Vec<u8>,
19 /// Set of trigrams extracted from the pattern.
20 trigrams: Vec<Trigram>,
21 /// Pre-compiled regex equivalent to the literal.
22 regex: Regex,
23 },
24
25 /// Regex with extractable literal sub-strings.
26 RegexWithLiterals {
27 /// Pre-compiled user regex.
28 regex: Regex,
29 /// Per-literal required trigram sets (AND across sets).
30 required_trigram_sets: Vec<Vec<Trigram>>,
31 },
32
33 /// Case-insensitive indexed search. Each group = case variants for one
34 /// trigram position. Executor UNIONs within groups, INTERSECTs across.
35 CaseInsensitive {
36 /// Pre-compiled case-insensitive regex.
37 regex: Regex,
38 /// Per-position trigram groups (union within, intersect across).
39 trigram_groups: Vec<Vec<Trigram>>,
40 },
41
42 /// No literals extractable — full scan fallback.
43 FullScan {
44 /// Pre-compiled regex to run against every file.
45 regex: Regex,
46 },
47}
48
49impl QueryPlan {
50 /// Return the regex pattern string for this plan.
51 ///
52 /// Used for cache keying (e.g. negative-result cache fingerprint).
53 #[must_use]
54 pub fn pattern_str(&self) -> &str {
55 match self {
56 Self::Literal { regex, .. }
57 | Self::RegexWithLiterals { regex, .. }
58 | Self::CaseInsensitive { regex, .. }
59 | Self::FullScan { regex } => regex.as_str(),
60 }
61 }
62}
63
64/// Query planning options.
65#[derive(Debug, Default, Clone, Copy)]
66#[allow(clippy::struct_excessive_bools)]
67pub struct QueryOptions {
68 /// Treat pattern as a regex rather than a literal string.
69 pub is_regex: bool,
70 /// Case-insensitive matching.
71 pub ignore_case: bool,
72 /// Enable multiline mode (dot matches newline). Requires `is_regex`.
73 pub multiline: bool,
74 /// Match only at word boundaries (wraps pattern in `\b...\b`).
75 /// Implies `is_regex` internally but enforces whole-word semantics.
76 pub word_boundary: bool,
77}
78
79/// Stateless query planner that decomposes user input into a [`QueryPlan`].
80pub struct Planner;
81
82impl Planner {
83 /// Plan a literal or regex query with default options (non-unicode,
84 /// case-sensitive). See [`plan_with_options`](Self::plan_with_options)
85 /// for full control.
86 #[must_use]
87 pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
88 Self::plan_with_options(
89 pattern,
90 QueryOptions {
91 is_regex,
92 ..Default::default()
93 },
94 )
95 }
96
97 /// Plan a query using a regex pool to avoid redundant compilation.
98 ///
99 /// Identical to [`plan_with_options`](Self::plan_with_options) but
100 /// sources compiled regexes from `pool` when available.
101 #[must_use]
102 pub fn plan_with_pool(pattern: &str, options: QueryOptions, pool: &RegexPool) -> QueryPlan {
103 Self::plan_impl(pattern, options, Some(pool))
104 }
105
106 /// Plan a query with full options.
107 ///
108 /// # Panics
109 ///
110 /// Panics if `Regex::new("")` fails (which should never happen since an empty
111 /// pattern is always valid).
112 #[must_use]
113 pub fn plan_with_options(pattern: &str, options: QueryOptions) -> QueryPlan {
114 Self::plan_impl(pattern, options, None)
115 }
116
117 /// Compile a regex, preferring the pool if available. On failure,
118 /// logs a warning and returns `"^$"` (matches nothing) instead of
119 /// panicking. Library code must never call expect/panic per AGENTS.md.
120 fn compile_regex(pat: &str, pool: Option<&RegexPool>) -> Regex {
121 if let Some(p) = pool
122 && let Ok(re) = p.get_or_compile(pat)
123 {
124 return re;
125 }
126 Regex::new(pat).unwrap_or_else(|_| {
127 tracing::warn!(
128 "ix: regex compilation failed for '{}', using '^$' fallback",
129 pat
130 );
131 // ^$ matches end-of-line only — effectively matches nothing useful.
132 // This is safer than "" which matches EVERY line.
133 Regex::new("^$").unwrap_or_else(|_| {
134 // Absolute last resort: a literal 'a' pattern.
135 // This should never fail unless regex crate is broken.
136 Regex::new("a").unwrap()
137 })
138 })
139 }
140
141 fn plan_impl(pattern: &str, options: QueryOptions, pool: Option<&RegexPool>) -> QueryPlan {
142 let mut final_pattern = pattern.to_string();
143 let mut use_regex = options.is_regex;
144 if options.multiline && use_regex {
145 final_pattern = format!("(?s){final_pattern}");
146 }
147
148 // Word-boundary mode: wrap literal in \b word boundaries.
149 // When word_boundary=true, we use regex mode internally but the semantics
150 // are "whole word match" rather than arbitrary regex.
151 if options.word_boundary && !options.is_regex {
152 if options.ignore_case {
153 final_pattern = format!("(?i)\\b{}\\b", regex::escape(&final_pattern));
154 } else {
155 final_pattern = format!("\\b{}\\b", regex::escape(&final_pattern));
156 }
157 use_regex = true;
158 }
159
160 if !use_regex && !options.ignore_case {
161 let bytes = final_pattern.as_bytes().to_vec();
162 let trigrams = Extractor::extract_set(&bytes);
163
164 let escaped = regex::escape(&final_pattern);
165 let regex = Self::compile_regex(&escaped, pool);
166
167 if trigrams.is_empty() {
168 return QueryPlan::FullScan { regex };
169 }
170
171 return QueryPlan::Literal {
172 pattern: bytes,
173 trigrams,
174 regex,
175 };
176 }
177
178 // Case-insensitive literal: per-position trigram groups.
179 // Executor UNIONs within each group, INTERSECTs across groups.
180 if !use_regex && options.ignore_case {
181 let bytes = final_pattern.as_bytes();
182 let groups = Extractor::extract_groups_case_insensitive(bytes);
183 let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
184 let regex = Self::compile_regex(®ex_pat, pool);
185
186 if groups.is_empty() {
187 return QueryPlan::FullScan { regex };
188 }
189
190 return QueryPlan::CaseInsensitive {
191 regex,
192 trigram_groups: groups,
193 };
194 }
195
196 let regex_pat = if options.ignore_case && !final_pattern.starts_with("(?i)") {
197 format!("(?i){final_pattern}")
198 } else {
199 final_pattern.clone()
200 };
201
202 let regex = Self::compile_regex(®ex_pat, pool);
203
204 let Ok(hir) = regex_syntax::parse(&final_pattern) else {
205 return QueryPlan::FullScan { regex };
206 };
207
208 let mut literals = Vec::new();
209 Self::walk_hir(&hir, &mut literals);
210
211 // For case-insensitive regex, fall back to FullScan — the (?i) regex
212 // handles matching, and extracting trigram groups from regex literals
213 // adds complexity without much narrowing benefit.
214 if options.ignore_case {
215 return QueryPlan::FullScan { regex };
216 }
217
218 let required_trigram_sets: Vec<Vec<Trigram>> = literals
219 .iter()
220 .map(|lit| Extractor::extract_set(lit.as_bytes()))
221 .filter(|t| !t.is_empty())
222 .collect();
223
224 if required_trigram_sets.is_empty() {
225 QueryPlan::FullScan { regex }
226 } else {
227 QueryPlan::RegexWithLiterals {
228 regex,
229 required_trigram_sets,
230 }
231 }
232 }
233
234 fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
235 match hir.kind() {
236 HirKind::Literal(lit) => {
237 out.push(String::from_utf8_lossy(&lit.0).to_string());
238 }
239 HirKind::Concat(children) => {
240 let mut current = String::new();
241 for child in children {
242 if let HirKind::Literal(lit) = child.kind() {
243 current.push_str(&String::from_utf8_lossy(&lit.0));
244 } else {
245 if current.len() >= 3 {
246 out.push(current.clone());
247 }
248 current.clear();
249 Self::walk_hir(child, out);
250 }
251 }
252 if current.len() >= 3 {
253 out.push(current);
254 }
255 }
256 HirKind::Repetition(rep) if rep.min >= 1 => {
257 Self::walk_hir(&rep.sub, out);
258 }
259 // Simplified: we don't extract from Alternation for now as per DESIGN.md
260 _ => {}
261 }
262 }
263}