Skip to main content

ix/
planner.rs

1//! Query planner — transforms user input into an optimal index query plan.
2//!
3//! Decomposes regex patterns into required trigram sets.
4//! When a [`RegexPool`] is available, compiled
5//! regexes are sourced from the pool to avoid redundant compilation.
6
7use crate::regex_pool::RegexPool;
8use crate::trigram::{Extractor, Trigram};
9use regex::Regex;
10use regex_syntax::hir::{Hir, HirKind};
11
12/// Optimal query execution strategy chosen by the planner.
13#[derive(Debug)]
14pub enum QueryPlan {
15    /// Fast path: literal string search with pre-computed trigrams.
16    Literal {
17        /// Raw search bytes to match against.
18        pattern: Vec<u8>,
19        /// Set of trigrams extracted from the pattern.
20        trigrams: Vec<Trigram>,
21        /// Pre-compiled regex equivalent to the literal.
22        regex: Regex,
23    },
24
25    /// Regex with extractable literal sub-strings.
26    RegexWithLiterals {
27        /// Pre-compiled user regex.
28        regex: Regex,
29        /// Per-literal required trigram sets (AND across sets).
30        required_trigram_sets: Vec<Vec<Trigram>>,
31    },
32
33    /// Case-insensitive indexed search. Each group = case variants for one
34    /// trigram position. Executor UNIONs within groups, INTERSECTs across.
35    CaseInsensitive {
36        /// Pre-compiled case-insensitive regex.
37        regex: Regex,
38        /// Per-position trigram groups (union within, intersect across).
39        trigram_groups: Vec<Vec<Trigram>>,
40    },
41
42    /// No literals extractable — full scan fallback.
43    FullScan {
44        /// Pre-compiled regex to run against every file.
45        regex: Regex,
46    },
47}
48
49impl QueryPlan {
50    /// Return the regex pattern string for this plan.
51    ///
52    /// Used for cache keying (e.g. negative-result cache fingerprint).
53    #[must_use]
54    pub fn pattern_str(&self) -> &str {
55        match self {
56            Self::Literal { regex, .. }
57            | Self::RegexWithLiterals { regex, .. }
58            | Self::CaseInsensitive { regex, .. }
59            | Self::FullScan { regex } => regex.as_str(),
60        }
61    }
62}
63
64/// Query planning options.
65#[derive(Debug, Default, Clone, Copy)]
66#[allow(clippy::struct_excessive_bools)]
67pub struct QueryOptions {
68    /// Treat pattern as a regex rather than a literal string.
69    pub is_regex: bool,
70    /// Case-insensitive matching.
71    pub ignore_case: bool,
72    /// Enable multiline mode (dot matches newline). Requires `is_regex`.
73    pub multiline: bool,
74    /// Match only at word boundaries (wraps pattern in `\b...\b`).
75    /// Implies `is_regex` internally but enforces whole-word semantics.
76    pub word_boundary: bool,
77}
78
79/// Stateless query planner that decomposes user input into a [`QueryPlan`].
80pub struct Planner;
81
82impl Planner {
83    /// Plan a literal or regex query with default options (non-unicode,
84    /// case-sensitive). See [`plan_with_options`](Self::plan_with_options)
85    /// for full control.
86    #[must_use]
87    pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
88        Self::plan_with_options(
89            pattern,
90            QueryOptions {
91                is_regex,
92                ..Default::default()
93            },
94        )
95    }
96
97    /// Plan a query using a regex pool to avoid redundant compilation.
98    ///
99    /// Identical to [`plan_with_options`](Self::plan_with_options) but
100    /// sources compiled regexes from `pool` when available.
101    #[must_use]
102    pub fn plan_with_pool(pattern: &str, options: QueryOptions, pool: &RegexPool) -> QueryPlan {
103        Self::plan_impl(pattern, options, Some(pool))
104    }
105
106    /// Plan a query with full options.
107    ///
108    /// # Panics
109    ///
110    /// Panics if `Regex::new("")` fails (which should never happen since an empty
111    /// pattern is always valid).
112    #[must_use]
113    pub fn plan_with_options(pattern: &str, options: QueryOptions) -> QueryPlan {
114        Self::plan_impl(pattern, options, None)
115    }
116
117    /// Compile a regex, preferring the pool if available. On failure,
118    /// logs a warning and returns `"^$"` (matches nothing) instead of
119    /// panicking. Library code must never call expect/panic per AGENTS.md.
120    fn compile_regex(pat: &str, pool: Option<&RegexPool>) -> Regex {
121        if let Some(p) = pool
122            && let Ok(re) = p.get_or_compile(pat)
123        {
124            return re;
125        }
126        Regex::new(pat).unwrap_or_else(|_| {
127            tracing::warn!(
128                "ix: regex compilation failed for '{}', using '^$' fallback",
129                pat
130            );
131            // ^$ matches end-of-line only — effectively matches nothing useful.
132            // This is safer than "" which matches EVERY line.
133            Regex::new("^$").unwrap_or_else(|_| {
134                // Absolute last resort: a literal 'a' pattern.
135                // This should never fail unless regex crate is broken.
136                Regex::new("a").unwrap()
137            })
138        })
139    }
140
141    fn plan_impl(pattern: &str, options: QueryOptions, pool: Option<&RegexPool>) -> QueryPlan {
142        let mut final_pattern = pattern.to_string();
143        let mut use_regex = options.is_regex;
144        if options.multiline && use_regex {
145            final_pattern = format!("(?s){final_pattern}");
146        }
147
148        // Word-boundary mode: wrap literal in \b word boundaries.
149        // When word_boundary=true, we use regex mode internally but the semantics
150        // are "whole word match" rather than arbitrary regex.
151        if options.word_boundary && !options.is_regex {
152            if options.ignore_case {
153                final_pattern = format!("(?i)\\b{}\\b", regex::escape(&final_pattern));
154            } else {
155                final_pattern = format!("\\b{}\\b", regex::escape(&final_pattern));
156            }
157            use_regex = true;
158        }
159
160        if !use_regex && !options.ignore_case {
161            let bytes = final_pattern.as_bytes().to_vec();
162            let trigrams = Extractor::extract_set(&bytes);
163
164            let escaped = regex::escape(&final_pattern);
165            let regex = Self::compile_regex(&escaped, pool);
166
167            if trigrams.is_empty() {
168                return QueryPlan::FullScan { regex };
169            }
170
171            return QueryPlan::Literal {
172                pattern: bytes,
173                trigrams,
174                regex,
175            };
176        }
177
178        // Case-insensitive literal: per-position trigram groups.
179        // Executor UNIONs within each group, INTERSECTs across groups.
180        if !use_regex && options.ignore_case {
181            let bytes = final_pattern.as_bytes();
182            let groups = Extractor::extract_groups_case_insensitive(bytes);
183            let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
184            let regex = Self::compile_regex(&regex_pat, pool);
185
186            if groups.is_empty() {
187                return QueryPlan::FullScan { regex };
188            }
189
190            return QueryPlan::CaseInsensitive {
191                regex,
192                trigram_groups: groups,
193            };
194        }
195
196        let regex_pat = if options.ignore_case && !final_pattern.starts_with("(?i)") {
197            format!("(?i){final_pattern}")
198        } else {
199            final_pattern.clone()
200        };
201
202        let regex = Self::compile_regex(&regex_pat, pool);
203
204        let Ok(hir) = regex_syntax::parse(&final_pattern) else {
205            return QueryPlan::FullScan { regex };
206        };
207
208        let mut literals = Vec::new();
209        Self::walk_hir(&hir, &mut literals);
210
211        // For case-insensitive regex, fall back to FullScan — the (?i) regex
212        // handles matching, and extracting trigram groups from regex literals
213        // adds complexity without much narrowing benefit.
214        if options.ignore_case {
215            return QueryPlan::FullScan { regex };
216        }
217
218        let required_trigram_sets: Vec<Vec<Trigram>> = literals
219            .iter()
220            .map(|lit| Extractor::extract_set(lit.as_bytes()))
221            .filter(|t| !t.is_empty())
222            .collect();
223
224        if required_trigram_sets.is_empty() {
225            QueryPlan::FullScan { regex }
226        } else {
227            QueryPlan::RegexWithLiterals {
228                regex,
229                required_trigram_sets,
230            }
231        }
232    }
233
234    fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
235        match hir.kind() {
236            HirKind::Literal(lit) => {
237                out.push(String::from_utf8_lossy(&lit.0).to_string());
238            }
239            HirKind::Concat(children) => {
240                let mut current = String::new();
241                for child in children {
242                    if let HirKind::Literal(lit) = child.kind() {
243                        current.push_str(&String::from_utf8_lossy(&lit.0));
244                    } else {
245                        if current.len() >= 3 {
246                            out.push(current.clone());
247                        }
248                        current.clear();
249                        Self::walk_hir(child, out);
250                    }
251                }
252                if current.len() >= 3 {
253                    out.push(current);
254                }
255            }
256            HirKind::Repetition(rep) if rep.min >= 1 => {
257                Self::walk_hir(&rep.sub, out);
258            }
259            // Simplified: we don't extract from Alternation for now as per DESIGN.md
260            _ => {}
261        }
262    }
263}