Skip to main content

ix/
planner.rs

1//! Query planner — transforms user input into an optimal index query plan.
2//!
3//! Decomposes regex patterns into required trigram sets.
4
5use crate::trigram::{Extractor, Trigram};
6use regex::Regex;
7use regex_syntax::hir::{Hir, HirKind};
8
9#[derive(Debug)]
10pub enum QueryPlan {
11    /// Fast path: literal string search
12    Literal {
13        pattern: Vec<u8>,
14        trigrams: Vec<Trigram>,
15    },
16
17    /// Regex with extractable literals
18    RegexWithLiterals {
19        regex: Regex,
20        required_trigram_sets: Vec<Vec<Trigram>>,
21    },
22
23    /// Case-insensitive indexed search. Each group = case variants for one
24    /// trigram position. Executor UNIONs within groups, INTERSECTs across.
25    CaseInsensitive {
26        regex: Regex,
27        trigram_groups: Vec<Vec<Trigram>>,
28    },
29
30    /// No literals extractable — full scan fallback
31    FullScan { regex: Regex },
32}
33
34pub struct Planner;
35
36impl Planner {
37    pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
38        Self::plan_with_options(pattern, is_regex, false, false)
39    }
40
41    pub fn plan_with_options(
42        pattern: &str,
43        is_regex: bool,
44        ignore_case: bool,
45        multiline: bool,
46    ) -> QueryPlan {
47        let mut final_pattern = pattern.to_string();
48        if multiline && is_regex {
49            final_pattern = format!("(?s){}", final_pattern);
50        }
51
52        if !is_regex && !ignore_case {
53            let bytes = final_pattern.as_bytes().to_vec();
54            let trigrams = Extractor::extract_set(&bytes);
55
56            if trigrams.is_empty() {
57                // Pattern too short for trigrams (< 3 bytes)
58                let regex = Regex::new(&regex::escape(&final_pattern)).unwrap_or_else(|_| Regex::new("").unwrap());
59                return QueryPlan::FullScan { regex };
60            }
61
62            return QueryPlan::Literal {
63                pattern: bytes,
64                trigrams,
65            };
66        }
67
68        // Case-insensitive literal: per-position trigram groups.
69        // Executor UNIONs within each group, INTERSECTs across groups.
70        if !is_regex && ignore_case {
71            let bytes = final_pattern.as_bytes();
72            let groups = Extractor::extract_groups_case_insensitive(bytes);
73            let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
74            let regex = match Regex::new(&regex_pat) {
75                Ok(r) => r,
76                Err(_) => {
77                    return QueryPlan::FullScan {
78                        regex: Regex::new("").unwrap(),
79                    }
80                }
81            };
82
83            if groups.is_empty() {
84                return QueryPlan::FullScan { regex };
85            }
86
87            return QueryPlan::CaseInsensitive {
88                regex,
89                trigram_groups: groups,
90            };
91        }
92
93        let regex_pat = if ignore_case && !final_pattern.starts_with("(?i)") {
94            format!("(?i){final_pattern}")
95        } else {
96            final_pattern.clone()
97        };
98
99        let regex = match Regex::new(&regex_pat) {
100            Ok(r) => r,
101            Err(_) => {
102                return QueryPlan::FullScan {
103                    regex: Regex::new("").unwrap(),
104                };
105            } // Should be handled by CLI
106        };
107
108        let hir = match regex_syntax::parse(&final_pattern) {
109            Ok(h) => h,
110            Err(_) => return QueryPlan::FullScan { regex },
111        };
112
113        let mut literals = Vec::new();
114        Self::walk_hir(&hir, &mut literals);
115
116        // For case-insensitive regex, fall back to FullScan — the (?i) regex
117        // handles matching, and extracting trigram groups from regex literals
118        // adds complexity without much narrowing benefit.
119        if ignore_case {
120            return QueryPlan::FullScan { regex };
121        }
122
123        let required_trigram_sets: Vec<Vec<Trigram>> = literals
124            .iter()
125            .map(|lit| Extractor::extract_set(lit.as_bytes()))
126            .filter(|t| !t.is_empty())
127            .collect();
128
129        if required_trigram_sets.is_empty() {
130            QueryPlan::FullScan { regex }
131        } else {
132            QueryPlan::RegexWithLiterals {
133                regex,
134                required_trigram_sets,
135            }
136        }
137    }
138
139    fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
140        match hir.kind() {
141            HirKind::Literal(lit) => {
142                out.push(String::from_utf8_lossy(&lit.0).to_string());
143            }
144            HirKind::Concat(children) => {
145                let mut current = String::new();
146                for child in children {
147                    match child.kind() {
148                        HirKind::Literal(lit) => {
149                            current.push_str(&String::from_utf8_lossy(&lit.0));
150                        }
151                        _ => {
152                            if current.len() >= 3 {
153                                out.push(current.clone());
154                            }
155                            current.clear();
156                            Self::walk_hir(child, out);
157                        }
158                    }
159                }
160                if current.len() >= 3 {
161                    out.push(current);
162                }
163            }
164            HirKind::Repetition(rep) => {
165                if rep.min >= 1 {
166                    Self::walk_hir(&rep.sub, out);
167                }
168            }
169            // Simplified: we don't extract from Alternation for now as per DESIGN.md
170            _ => {}
171        }
172    }
173}