1use std::collections::HashMap;
24
25#[derive(Debug, Default)]
27pub struct QueryOptimizer {
28 cost_model: CostModel,
29 cardinality_hints: HashMap<String, CardinalityHint>,
30 total_edges: usize,
31}
32
33#[derive(Debug, Clone)]
35pub struct CostModel {
36 pub row_read_cost: f64,
38 pub index_lookup_cost: f64,
40 pub seq_scan_cost: f64,
42 pub vector_search_cost: f64,
44}
45
46impl Default for CostModel {
47 fn default() -> Self {
48 Self {
49 row_read_cost: 1.0,
50 index_lookup_cost: 0.5,
51 seq_scan_cost: 0.1,
52 vector_search_cost: 10.0,
53 }
54 }
55}
56
57#[derive(Debug, Clone)]
59pub struct CardinalityHint {
60 pub cardinality: usize,
62 pub confidence: f64,
64 pub source: CardinalitySource,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum CardinalitySource {
71 Exact,
73 HyperLogLog,
75 Histogram,
77 Default,
79}
80
81#[derive(Debug, Clone)]
83pub enum QueryPredicate {
84 Eq(String, String),
86 Range {
88 column: String,
89 min: Option<String>,
90 max: Option<String>,
91 },
92 In(String, Vec<String>),
94 Prefix(String, String),
96 TimeRange(u64, u64),
98 Project(u16),
100 Tenant(u32),
102 SpanType(String),
104}
105
106#[derive(Debug, Clone, PartialEq)]
108pub enum QueryOperation {
109 PointLookup,
111 RangeScan,
113 FullScan,
115 VectorSearch { k: usize },
117 LsmRangeScan { start_us: u64, end_us: u64 },
119 GraphTraversal {
121 direction: TraversalDirection,
122 max_depth: usize,
123 },
124}
125
126#[derive(Debug, Clone)]
128pub enum IndexSelection {
129 PrimaryKey,
131 Secondary(String),
133 TimeIndex,
135 VectorIndex,
137 FullScan,
139 LsmScan,
141 CausalIndex,
143 ProjectIndex,
145 MultiIndex(Vec<IndexSelection>),
147}
148
149#[derive(Debug, Clone)]
151pub struct QueryCost {
152 pub estimated_cost: f64,
154 pub total_cost: f64,
156 pub estimated_rows: usize,
158 pub records_returned: usize,
160 pub index: IndexSelection,
162 pub operation: QueryOperation,
164 pub breakdown: Vec<(QueryOperation, f64)>,
166}
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum TraversalDirection {
171 Forward,
172 Backward,
173}
174
175#[derive(Debug, Clone)]
177pub struct QueryPlan {
178 pub cost: QueryCost,
180 pub predicates: Vec<QueryPredicate>,
182 pub direction: TraversalDirection,
184 pub limit: Option<usize>,
186 pub index_selection: IndexSelection,
188 pub operations: Vec<QueryOperation>,
190}
191
192impl QueryOptimizer {
193 pub fn new() -> Self {
195 Self::default()
196 }
197
198 pub fn with_cost_model(cost_model: CostModel) -> Self {
200 Self {
201 cost_model,
202 ..Default::default()
203 }
204 }
205
206 pub fn update_total_edges(&mut self, count: usize, _source: CardinalitySource) {
208 self.total_edges = count;
209 }
210
211 pub fn set_cardinality_hint(&mut self, column: &str, hint: CardinalityHint) {
213 self.cardinality_hints.insert(column.to_string(), hint);
214 }
215
216 pub fn plan_query(&self, predicates: &[QueryPredicate], limit: Option<usize>) -> QueryPlan {
221 if predicates.is_empty() {
222 return self.build_plan(
223 IndexSelection::FullScan,
224 QueryOperation::FullScan,
225 predicates,
226 limit,
227 );
228 }
229
230 let mut candidates: Vec<(IndexSelection, QueryOperation, f64, usize)> = Vec::new();
232
233 for pred in predicates {
234 let sel = self.estimate_selectivity(pred);
235 let est_rows = (self.total_edges as f64 * sel).max(1.0) as usize;
236
237 match pred {
238 QueryPredicate::Eq(_, _) => {
239 let cost = self.cost_model.index_lookup_cost;
240 candidates.push((IndexSelection::PrimaryKey, QueryOperation::PointLookup, cost, 1));
241 }
242 QueryPredicate::TimeRange(start, end) => {
243 let cost = est_rows as f64 * self.cost_model.row_read_cost;
244 candidates.push((
245 IndexSelection::TimeIndex,
246 QueryOperation::LsmRangeScan {
247 start_us: *start,
248 end_us: *end,
249 },
250 cost,
251 est_rows,
252 ));
253 }
254 QueryPredicate::Range { .. } | QueryPredicate::Prefix(_, _) => {
255 let cost = est_rows as f64 * self.cost_model.row_read_cost;
256 candidates.push((
257 IndexSelection::LsmScan,
258 QueryOperation::RangeScan,
259 cost,
260 est_rows,
261 ));
262 }
263 QueryPredicate::Project(_) => {
264 let cost = est_rows as f64 * self.cost_model.index_lookup_cost;
265 candidates.push((
266 IndexSelection::ProjectIndex,
267 QueryOperation::RangeScan,
268 cost,
269 est_rows,
270 ));
271 }
272 QueryPredicate::Tenant(_) | QueryPredicate::SpanType(_) | QueryPredicate::In(_, _) => {
273 let cost = est_rows as f64 * self.cost_model.row_read_cost;
274 candidates.push((
275 IndexSelection::FullScan,
276 QueryOperation::RangeScan,
277 cost,
278 est_rows,
279 ));
280 }
281 }
282 }
283
284 let full_scan_cost = self.total_edges.max(1) as f64 * self.cost_model.seq_scan_cost;
286 candidates.push((
287 IndexSelection::FullScan,
288 QueryOperation::FullScan,
289 full_scan_cost,
290 self.total_edges.max(1),
291 ));
292
293 candidates.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
295 let (index, operation, _cost, _rows) = candidates.into_iter().next().unwrap();
296
297 self.build_plan(index, operation, predicates, limit)
298 }
299
300 fn build_plan(
302 &self,
303 index: IndexSelection,
304 operation: QueryOperation,
305 predicates: &[QueryPredicate],
306 limit: Option<usize>,
307 ) -> QueryPlan {
308 let estimated_rows = match &index {
309 IndexSelection::PrimaryKey => 1,
310 IndexSelection::FullScan => self.total_edges.max(1),
311 _ => {
312 let sel: f64 = predicates
313 .iter()
314 .map(|p| self.estimate_selectivity(p))
315 .product();
316 (self.total_edges as f64 * sel).max(1.0) as usize
317 }
318 };
319
320 let estimated_cost = match &operation {
321 QueryOperation::PointLookup => self.cost_model.index_lookup_cost,
322 QueryOperation::RangeScan => estimated_rows as f64 * self.cost_model.row_read_cost,
323 QueryOperation::FullScan => self.total_edges as f64 * self.cost_model.seq_scan_cost,
324 QueryOperation::VectorSearch { .. } => self.cost_model.vector_search_cost,
325 QueryOperation::LsmRangeScan { .. } => estimated_rows as f64 * self.cost_model.row_read_cost,
326 QueryOperation::GraphTraversal { .. } => estimated_rows as f64 * self.cost_model.row_read_cost,
327 };
328
329 QueryPlan {
330 cost: QueryCost {
331 estimated_cost,
332 total_cost: estimated_cost,
333 estimated_rows,
334 records_returned: estimated_rows,
335 index: index.clone(),
336 operation: operation.clone(),
337 breakdown: vec![(operation.clone(), estimated_cost)],
338 },
339 predicates: predicates.to_vec(),
340 direction: TraversalDirection::Forward,
341 limit,
342 index_selection: index,
343 operations: vec![operation],
344 }
345 }
346
347 pub fn estimate_selectivity(&self, predicate: &QueryPredicate) -> f64 {
349 match predicate {
350 QueryPredicate::Eq(col, _) => {
351 if let Some(hint) = self.cardinality_hints.get(col) {
352 1.0 / hint.cardinality.max(1) as f64
353 } else {
354 0.1 }
356 }
357 QueryPredicate::Range { .. } => 0.25, QueryPredicate::In(_, values) => (values.len() as f64 * 0.1).min(0.5),
359 QueryPredicate::Prefix(_, _) => 0.15, QueryPredicate::TimeRange(_, _) => 0.2,
361 QueryPredicate::Project(_) => 0.1,
362 QueryPredicate::Tenant(_) => 0.05,
363 QueryPredicate::SpanType(_) => 0.15,
364 }
365 }
366}