Skip to main content

ix/
planner.rs

1//! Query planner — transforms user input into an optimal index query plan.
2//!
3//! Decomposes regex patterns into required trigram sets.
4
5use crate::trigram::{Extractor, Trigram};
6use regex::Regex;
7use regex_syntax::hir::{Hir, HirKind};
8
9#[derive(Debug)]
10pub enum QueryPlan {
11    /// Fast path: literal string search
12    Literal {
13        pattern: Vec<u8>,
14        trigrams: Vec<Trigram>,
15    },
16
17    /// Regex with extractable literals
18    RegexWithLiterals {
19        regex: Regex,
20        required_trigram_sets: Vec<Vec<Trigram>>,
21    },
22
23    /// Case-insensitive indexed search. Each group = case variants for one
24    /// trigram position. Executor UNIONs within groups, INTERSECTs across.
25    CaseInsensitive {
26        regex: Regex,
27        trigram_groups: Vec<Vec<Trigram>>,
28    },
29
30    /// No literals extractable — full scan fallback
31    FullScan { regex: Regex },
32}
33
34pub struct Planner;
35
36impl Planner {
37    pub fn plan(pattern: &str, is_regex: bool) -> QueryPlan {
38        Self::plan_with_options(pattern, is_regex, false, false)
39    }
40
41    pub fn plan_with_options(
42        pattern: &str,
43        is_regex: bool,
44        ignore_case: bool,
45        multiline: bool,
46    ) -> QueryPlan {
47        let mut final_pattern = pattern.to_string();
48        if multiline && is_regex {
49            final_pattern = format!("(?s){}", final_pattern);
50        }
51
52        if !is_regex && !ignore_case {
53            let bytes = final_pattern.as_bytes().to_vec();
54            let trigrams = Extractor::extract_set(&bytes);
55
56            if trigrams.is_empty() {
57                // Pattern too short for trigrams (< 3 bytes)
58                let regex = Regex::new(&regex::escape(&final_pattern))
59                    .unwrap_or_else(|_| Regex::new("").unwrap());
60                return QueryPlan::FullScan { regex };
61            }
62
63            return QueryPlan::Literal {
64                pattern: bytes,
65                trigrams,
66            };
67        }
68
69        // Case-insensitive literal: per-position trigram groups.
70        // Executor UNIONs within each group, INTERSECTs across groups.
71        if !is_regex && ignore_case {
72            let bytes = final_pattern.as_bytes();
73            let groups = Extractor::extract_groups_case_insensitive(bytes);
74            let regex_pat = format!("(?i){}", regex::escape(&final_pattern));
75            let regex = match Regex::new(&regex_pat) {
76                Ok(r) => r,
77                Err(_) => {
78                    return QueryPlan::FullScan {
79                        regex: Regex::new("").unwrap(),
80                    };
81                }
82            };
83
84            if groups.is_empty() {
85                return QueryPlan::FullScan { regex };
86            }
87
88            return QueryPlan::CaseInsensitive {
89                regex,
90                trigram_groups: groups,
91            };
92        }
93
94        let regex_pat = if ignore_case && !final_pattern.starts_with("(?i)") {
95            format!("(?i){final_pattern}")
96        } else {
97            final_pattern.clone()
98        };
99
100        let regex = match Regex::new(&regex_pat) {
101            Ok(r) => r,
102            Err(_) => {
103                return QueryPlan::FullScan {
104                    regex: Regex::new("").unwrap(),
105                };
106            } // Should be handled by CLI
107        };
108
109        let hir = match regex_syntax::parse(&final_pattern) {
110            Ok(h) => h,
111            Err(_) => return QueryPlan::FullScan { regex },
112        };
113
114        let mut literals = Vec::new();
115        Self::walk_hir(&hir, &mut literals);
116
117        // For case-insensitive regex, fall back to FullScan — the (?i) regex
118        // handles matching, and extracting trigram groups from regex literals
119        // adds complexity without much narrowing benefit.
120        if ignore_case {
121            return QueryPlan::FullScan { regex };
122        }
123
124        let required_trigram_sets: Vec<Vec<Trigram>> = literals
125            .iter()
126            .map(|lit| Extractor::extract_set(lit.as_bytes()))
127            .filter(|t| !t.is_empty())
128            .collect();
129
130        if required_trigram_sets.is_empty() {
131            QueryPlan::FullScan { regex }
132        } else {
133            QueryPlan::RegexWithLiterals {
134                regex,
135                required_trigram_sets,
136            }
137        }
138    }
139
140    fn walk_hir(hir: &Hir, out: &mut Vec<String>) {
141        match hir.kind() {
142            HirKind::Literal(lit) => {
143                out.push(String::from_utf8_lossy(&lit.0).to_string());
144            }
145            HirKind::Concat(children) => {
146                let mut current = String::new();
147                for child in children {
148                    match child.kind() {
149                        HirKind::Literal(lit) => {
150                            current.push_str(&String::from_utf8_lossy(&lit.0));
151                        }
152                        _ => {
153                            if current.len() >= 3 {
154                                out.push(current.clone());
155                            }
156                            current.clear();
157                            Self::walk_hir(child, out);
158                        }
159                    }
160                }
161                if current.len() >= 3 {
162                    out.push(current);
163                }
164            }
165            HirKind::Repetition(rep) => {
166                if rep.min >= 1 {
167                    Self::walk_hir(&rep.sub, out);
168                }
169            }
170            // Simplified: we don't extract from Alternation for now as per DESIGN.md
171            _ => {}
172        }
173    }
174}