Skip to main content

ix/
planner.rs

1//! Query planner — transforms user input into an optimal index query plan.
2//!
3//! Decomposes regex patterns into required trigram sets.
4
5use crate::trigram::{Extractor, Trigram};
6use regex::Regex;
7use regex_syntax::hir::{Hir, HirKind};
8
9/// Optimal query execution strategy chosen by the planner.
10#[derive(Debug)]
11pub enum QueryPlan {
12    /// Fast path: literal string search with pre-computed trigrams.
13    Literal {
14        /// Raw search bytes to match against.
15        pattern: Vec<u8>,
16        /// Set of trigrams extracted from the pattern.
17        trigrams: Vec<Trigram>,
18        /// Pre-compiled regex equivalent to the literal.
19        regex: Regex,
20    },
21
22    /// Regex with extractable literal sub-strings.
23    RegexWithLiterals {
24        /// Pre-compiled user regex.
25        regex: Regex,
26        /// Per-literal required trigram sets (AND across sets).
27        required_trigram_sets: Vec<Vec<Trigram>>,
28    },
29
30    /// Case-insensitive indexed search. Each group = case variants for one
31    /// trigram position. Executor UNIONs within groups, INTERSECTs across.
32    CaseInsensitive {
33        /// Pre-compiled case-insensitive regex.
34        regex: Regex,
35        /// Per-position trigram groups (union within, intersect across).
36        trigram_groups: Vec<Vec<Trigram>>,
37    },
38
39    /// No literals extractable — full scan fallback.
40    FullScan {
41        /// Pre-compiled regex to run against every file.
42        regex: Regex,
43    },
44}
45
46/// Stateless query planner that decomposes user input into a [`QueryPlan`].
47pub struct Planner;
48
49impl Planner {
50    /// Plan a literal or regex query with default options (non-unicode,
51    /// case-sensitive). See [`plan_with_options`](Self::plan_with_options)
52    /// for full control.
53    #[must_use]
54    pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
55        Self::plan_with_options(pattern, is_regex, false, false)
56    }
57
58    /// Plan a query with full options.
59    ///
60    /// # Panics
61    ///
62    /// Panics if `Regex::new("")` fails (which should never happen since an empty
63    /// pattern is always valid).
64    #[must_use] 
65    pub fn plan_with_options(
66        pattern: &str,
67        is_regex: bool,
68        ignore_case: bool,
69        multiline: bool,
70    ) -> QueryPlan {
71        let mut final_pattern = pattern.to_string();
72        if multiline && is_regex {
73            final_pattern = format!("(?s){final_pattern}");
74        }
75
76        if !is_regex && !ignore_case {
77            let bytes = final_pattern.as_bytes().to_vec();
78            let trigrams = Extractor::extract_set(&bytes);
79
80            let regex = Regex::new(&regex::escape(&final_pattern))
81                .unwrap_or_else(|_| Regex::new("").expect("empty regex should always compile"));
82
83            if trigrams.is_empty() {
84                // Pattern too short for trigrams (< 3 bytes)
85                return QueryPlan::FullScan { regex };
86            }
87
88            return QueryPlan::Literal {
89                pattern: bytes,
90                trigrams,
91                regex,
92            };
93        }
94
95        // Case-insensitive literal: per-position trigram groups.
96        // Executor UNIONs within each group, INTERSECTs across groups.
97        if !is_regex && ignore_case {
98            let bytes = final_pattern.as_bytes();
99            let groups = Extractor::extract_groups_case_insensitive(bytes);
100            let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
101            let Ok(regex) = Regex::new(&regex_pat) else {
102                return QueryPlan::FullScan {
103                    regex: Regex::new("").expect("empty regex should always compile"),
104                };
105            };
106
107            if groups.is_empty() {
108                return QueryPlan::FullScan { regex };
109            }
110
111            return QueryPlan::CaseInsensitive {
112                regex,
113                trigram_groups: groups,
114            };
115        }
116
117        let regex_pat = if ignore_case && !final_pattern.starts_with("(?i)") {
118            format!("(?i){final_pattern}")
119        } else {
120            final_pattern.clone()
121        };
122
123        let Ok(regex) = Regex::new(&regex_pat) else {
124            return QueryPlan::FullScan {
125                regex: Regex::new("").expect("empty regex should always compile"),
126            };
127        };
128
129        let Ok(hir) = regex_syntax::parse(&final_pattern) else {
130            return QueryPlan::FullScan { regex };
131        };
132
133        let mut literals = Vec::new();
134        Self::walk_hir(&hir, &mut literals);
135
136        // For case-insensitive regex, fall back to FullScan — the (?i) regex
137        // handles matching, and extracting trigram groups from regex literals
138        // adds complexity without much narrowing benefit.
139        if ignore_case {
140            return QueryPlan::FullScan { regex };
141        }
142
143        let required_trigram_sets: Vec<Vec<Trigram>> = literals
144            .iter()
145            .map(|lit| Extractor::extract_set(lit.as_bytes()))
146            .filter(|t| !t.is_empty())
147            .collect();
148
149        if required_trigram_sets.is_empty() {
150            QueryPlan::FullScan { regex }
151        } else {
152            QueryPlan::RegexWithLiterals {
153                regex,
154                required_trigram_sets,
155            }
156        }
157    }
158
159    fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
160        match hir.kind() {
161            HirKind::Literal(lit) => {
162                out.push(String::from_utf8_lossy(&lit.0).to_string());
163            }
164            HirKind::Concat(children) => {
165                let mut current = String::new();
166                for child in children {
167                    if let HirKind::Literal(lit) = child.kind() {
168                        current.push_str(&String::from_utf8_lossy(&lit.0));
169                    } else {
170                        if current.len() >= 3 {
171                            out.push(current.clone());
172                        }
173                        current.clear();
174                        Self::walk_hir(child, out);
175                    }
176                }
177                if current.len() >= 3 {
178                    out.push(current);
179                }
180            }
181            HirKind::Repetition(rep)
182                if rep.min >= 1 => {
183                    Self::walk_hir(&rep.sub, out);
184                }
185            // Simplified: we don't extract from Alternation for now as per DESIGN.md
186            _ => {}
187        }
188    }
189}