1use std::collections::HashMap;
24
25#[derive(Debug, Default)]
27pub struct QueryOptimizer {
28 cost_model: CostModel,
29 cardinality_hints: HashMap<String, CardinalityHint>,
30 total_edges: usize,
31}
32
33#[derive(Debug, Clone)]
35pub struct CostModel {
36 pub row_read_cost: f64,
38 pub index_lookup_cost: f64,
40 pub seq_scan_cost: f64,
42 pub vector_search_cost: f64,
44}
45
46impl Default for CostModel {
47 fn default() -> Self {
48 Self {
49 row_read_cost: 1.0,
50 index_lookup_cost: 0.5,
51 seq_scan_cost: 0.1,
52 vector_search_cost: 10.0,
53 }
54 }
55}
56
57#[derive(Debug, Clone)]
59pub struct CardinalityHint {
60 pub cardinality: usize,
62 pub confidence: f64,
64 pub source: CardinalitySource,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum CardinalitySource {
71 Exact,
73 HyperLogLog,
75 Histogram,
77 Default,
79}
80
81#[derive(Debug, Clone)]
83pub enum QueryPredicate {
84 Eq(String, String),
86 Range {
88 column: String,
89 min: Option<String>,
90 max: Option<String>,
91 },
92 In(String, Vec<String>),
94 Prefix(String, String),
96 TimeRange(u64, u64),
98 Project(u16),
100 Tenant(u32),
102 SpanType(String),
104}
105
106#[derive(Debug, Clone, PartialEq)]
108pub enum QueryOperation {
109 PointLookup,
111 RangeScan,
113 FullScan,
115 VectorSearch { k: usize },
117 LsmRangeScan { start_us: u64, end_us: u64 },
119 GraphTraversal {
121 direction: TraversalDirection,
122 max_depth: usize,
123 },
124}
125
126#[derive(Debug, Clone)]
128pub enum IndexSelection {
129 PrimaryKey,
131 Secondary(String),
133 TimeIndex,
135 VectorIndex,
137 FullScan,
139 LsmScan,
141 CausalIndex,
143 ProjectIndex,
145 MultiIndex(Vec<IndexSelection>),
147}
148
149#[derive(Debug, Clone)]
151pub struct QueryCost {
152 pub estimated_cost: f64,
154 pub total_cost: f64,
156 pub estimated_rows: usize,
158 pub records_returned: usize,
160 pub index: IndexSelection,
162 pub operation: QueryOperation,
164 pub breakdown: Vec<(QueryOperation, f64)>,
166}
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum TraversalDirection {
171 Forward,
172 Backward,
173}
174
175#[derive(Debug, Clone)]
177pub struct QueryPlan {
178 pub cost: QueryCost,
180 pub predicates: Vec<QueryPredicate>,
182 pub direction: TraversalDirection,
184 pub limit: Option<usize>,
186 pub index_selection: IndexSelection,
188 pub operations: Vec<QueryOperation>,
190}
191
192impl QueryOptimizer {
193 pub fn new() -> Self {
195 Self::default()
196 }
197
198 pub fn with_cost_model(cost_model: CostModel) -> Self {
200 Self {
201 cost_model,
202 ..Default::default()
203 }
204 }
205
206 pub fn update_total_edges(&mut self, count: usize, _source: CardinalitySource) {
208 self.total_edges = count;
209 }
210
211 pub fn set_cardinality_hint(&mut self, column: &str, hint: CardinalityHint) {
213 self.cardinality_hints.insert(column.to_string(), hint);
214 }
215
216 pub fn plan_query(&self, predicates: &[QueryPredicate], limit: Option<usize>) -> QueryPlan {
221 if predicates.is_empty() {
222 return self.build_plan(
223 IndexSelection::FullScan,
224 QueryOperation::FullScan,
225 predicates,
226 limit,
227 );
228 }
229
230 let mut candidates: Vec<(IndexSelection, QueryOperation, f64, usize)> = Vec::new();
232
233 for pred in predicates {
234 let sel = self.estimate_selectivity(pred);
235 let est_rows = (self.total_edges as f64 * sel).max(1.0) as usize;
236
237 match pred {
238 QueryPredicate::Eq(_, _) => {
239 let cost = self.cost_model.index_lookup_cost;
240 candidates.push((
241 IndexSelection::PrimaryKey,
242 QueryOperation::PointLookup,
243 cost,
244 1,
245 ));
246 }
247 QueryPredicate::TimeRange(start, end) => {
248 let cost = est_rows as f64 * self.cost_model.row_read_cost;
249 candidates.push((
250 IndexSelection::TimeIndex,
251 QueryOperation::LsmRangeScan {
252 start_us: *start,
253 end_us: *end,
254 },
255 cost,
256 est_rows,
257 ));
258 }
259 QueryPredicate::Range { .. } | QueryPredicate::Prefix(_, _) => {
260 let cost = est_rows as f64 * self.cost_model.row_read_cost;
261 candidates.push((
262 IndexSelection::LsmScan,
263 QueryOperation::RangeScan,
264 cost,
265 est_rows,
266 ));
267 }
268 QueryPredicate::Project(_) => {
269 let cost = est_rows as f64 * self.cost_model.index_lookup_cost;
270 candidates.push((
271 IndexSelection::ProjectIndex,
272 QueryOperation::RangeScan,
273 cost,
274 est_rows,
275 ));
276 }
277 QueryPredicate::Tenant(_)
278 | QueryPredicate::SpanType(_)
279 | QueryPredicate::In(_, _) => {
280 let cost = est_rows as f64 * self.cost_model.row_read_cost;
281 candidates.push((
282 IndexSelection::FullScan,
283 QueryOperation::RangeScan,
284 cost,
285 est_rows,
286 ));
287 }
288 }
289 }
290
291 let full_scan_cost = self.total_edges.max(1) as f64 * self.cost_model.seq_scan_cost;
293 candidates.push((
294 IndexSelection::FullScan,
295 QueryOperation::FullScan,
296 full_scan_cost,
297 self.total_edges.max(1),
298 ));
299
300 candidates.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
302 let (index, operation, _cost, _rows) = candidates.into_iter().next().unwrap();
303
304 self.build_plan(index, operation, predicates, limit)
305 }
306
307 fn build_plan(
309 &self,
310 index: IndexSelection,
311 operation: QueryOperation,
312 predicates: &[QueryPredicate],
313 limit: Option<usize>,
314 ) -> QueryPlan {
315 let estimated_rows = match &index {
316 IndexSelection::PrimaryKey => 1,
317 IndexSelection::FullScan => self.total_edges.max(1),
318 _ => {
319 let sel: f64 = predicates
320 .iter()
321 .map(|p| self.estimate_selectivity(p))
322 .product();
323 (self.total_edges as f64 * sel).max(1.0) as usize
324 }
325 };
326
327 let estimated_cost = match &operation {
328 QueryOperation::PointLookup => self.cost_model.index_lookup_cost,
329 QueryOperation::RangeScan => estimated_rows as f64 * self.cost_model.row_read_cost,
330 QueryOperation::FullScan => self.total_edges as f64 * self.cost_model.seq_scan_cost,
331 QueryOperation::VectorSearch { .. } => self.cost_model.vector_search_cost,
332 QueryOperation::LsmRangeScan { .. } => {
333 estimated_rows as f64 * self.cost_model.row_read_cost
334 }
335 QueryOperation::GraphTraversal { .. } => {
336 estimated_rows as f64 * self.cost_model.row_read_cost
337 }
338 };
339
340 QueryPlan {
341 cost: QueryCost {
342 estimated_cost,
343 total_cost: estimated_cost,
344 estimated_rows,
345 records_returned: estimated_rows,
346 index: index.clone(),
347 operation: operation.clone(),
348 breakdown: vec![(operation.clone(), estimated_cost)],
349 },
350 predicates: predicates.to_vec(),
351 direction: TraversalDirection::Forward,
352 limit,
353 index_selection: index,
354 operations: vec![operation],
355 }
356 }
357
358 pub fn estimate_selectivity(&self, predicate: &QueryPredicate) -> f64 {
360 match predicate {
361 QueryPredicate::Eq(col, _) => {
362 if let Some(hint) = self.cardinality_hints.get(col) {
363 1.0 / hint.cardinality.max(1) as f64
364 } else {
365 0.1 }
367 }
368 QueryPredicate::Range { .. } => 0.25, QueryPredicate::In(_, values) => (values.len() as f64 * 0.1).min(0.5),
370 QueryPredicate::Prefix(_, _) => 0.15, QueryPredicate::TimeRange(_, _) => 0.2,
372 QueryPredicate::Project(_) => 0.1,
373 QueryPredicate::Tenant(_) => 0.05,
374 QueryPredicate::SpanType(_) => 0.15,
375 }
376 }
377}