1mod formatter;
16
17use serde::{Deserialize, Serialize};
18use std::collections::HashSet;
19
20use super::ast::{Condition, SelectStatement};
21use crate::collection::search::query::match_planner::{
22 CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
23};
24use crate::velesql::MatchClause;
25
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
28pub struct QueryPlan {
29 pub root: PlanNode,
31 pub estimated_cost_ms: f64,
33 pub index_used: Option<IndexType>,
35 pub filter_strategy: FilterStrategy,
37 #[serde(skip_serializing_if = "Option::is_none")]
39 pub cache_hit: Option<bool>,
40 #[serde(skip_serializing_if = "Option::is_none")]
46 pub plan_reuse_count: Option<u64>,
47}
48
49#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51pub enum PlanNode {
52 VectorSearch(VectorSearchPlan),
54 Filter(FilterPlan),
56 Limit(LimitPlan),
58 Offset(OffsetPlan),
60 TableScan(TableScanPlan),
62 IndexLookup(IndexLookupPlan),
64 Sequence(Vec<PlanNode>),
66 MatchTraversal(MatchTraversalPlan),
68}
69
70#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub struct VectorSearchPlan {
73 pub collection: String,
75 pub ef_search: u32,
77 pub candidates: u32,
79}
80
81#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
83pub struct FilterPlan {
84 pub conditions: String,
86 pub selectivity: f64,
88}
89
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
92pub struct LimitPlan {
93 pub count: u64,
95}
96
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
99pub struct OffsetPlan {
100 pub count: u64,
102}
103
104#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
106pub struct TableScanPlan {
107 pub collection: String,
109}
110
111#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct IndexLookupPlan {
114 pub label: String,
116 pub property: String,
118 pub value: String,
120}
121
122#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
124pub struct MatchTraversalPlan {
125 pub strategy: String,
127 pub start_labels: Vec<String>,
129 pub max_depth: u32,
131 pub relationship_count: usize,
133 pub has_similarity: bool,
135 pub similarity_threshold: Option<f32>,
137}
138
139#[allow(dead_code)] #[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ExplainOutput {
143 pub plan: QueryPlan,
145 #[serde(skip_serializing_if = "Option::is_none")]
147 pub actual_stats: Option<ActualStats>,
148}
149
150#[allow(dead_code)] #[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct ActualStats {
154 pub actual_rows: u64,
156 pub actual_time_ms: f64,
158 pub loops: u64,
160 pub nodes_visited: u64,
162 pub edges_traversed: u64,
164}
165
166#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
168pub enum IndexType {
169 Hnsw,
171 Flat,
173 BinaryQuantization,
175 Property,
177}
178
179#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
181pub enum FilterStrategy {
182 #[default]
184 None,
185 PreFilter,
187 PostFilter,
189}
190
191impl QueryPlan {
192 #[must_use]
194 pub fn from_select(stmt: &SelectStatement) -> Self {
195 Self::from_select_with_indexed_fields(stmt, &HashSet::new())
196 }
197
198 #[must_use]
200 pub fn from_select_with_indexed_fields(
201 stmt: &SelectStatement,
202 indexed_fields: &HashSet<String>,
203 ) -> Self {
204 let mut has_vector_search = false;
205 let mut filter_conditions = Vec::new();
206 let mut index_lookup = None;
207
208 if let Some(ref condition) = stmt.where_clause {
209 Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
210 index_lookup = Self::extract_index_lookup(condition, indexed_fields);
211 }
212
213 let (mut nodes, index_used) = Self::build_scan_node(stmt, has_vector_search, index_lookup);
214 let filter_strategy = Self::append_filter_nodes(&mut nodes, &filter_conditions, stmt);
215
216 Self::assemble_plan(nodes, index_used, filter_strategy, has_vector_search)
217 }
218
219 fn assemble_plan(
221 mut nodes: Vec<PlanNode>,
222 index_used: Option<IndexType>,
223 filter_strategy: FilterStrategy,
224 has_vector_search: bool,
225 ) -> Self {
226 let root = if nodes.len() == 1 {
227 nodes.swap_remove(0)
228 } else {
229 PlanNode::Sequence(nodes)
230 };
231 let estimated_cost_ms = Self::estimate_cost(&root, has_vector_search);
232 Self {
233 root,
234 estimated_cost_ms,
235 index_used,
236 filter_strategy,
237 cache_hit: None,
238 plan_reuse_count: None,
239 }
240 }
241
242 fn build_scan_node(
244 stmt: &SelectStatement,
245 has_vector_search: bool,
246 index_lookup: Option<(String, String)>,
247 ) -> (Vec<PlanNode>, Option<IndexType>) {
248 let mut nodes = Vec::new();
249 let index_used;
250
251 if has_vector_search {
252 index_used = Some(IndexType::Hnsw);
253 let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
254 nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
255 collection: stmt.from.clone(),
256 ef_search: 100,
257 candidates,
258 }));
259 } else if let Some((property, value)) = index_lookup {
260 index_used = Some(IndexType::Property);
261 nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
262 label: stmt.from.clone(),
263 property,
264 value,
265 }));
266 } else {
267 index_used = None;
268 nodes.push(PlanNode::TableScan(TableScanPlan {
269 collection: stmt.from.clone(),
270 }));
271 }
272
273 (nodes, index_used)
274 }
275
276 fn append_filter_nodes(
278 nodes: &mut Vec<PlanNode>,
279 filter_conditions: &[String],
280 stmt: &SelectStatement,
281 ) -> FilterStrategy {
282 let mut filter_strategy = FilterStrategy::None;
283
284 if !filter_conditions.is_empty() {
285 let selectivity = Self::estimate_selectivity(filter_conditions);
286 filter_strategy = if selectivity > 0.1 {
287 FilterStrategy::PostFilter
288 } else {
289 FilterStrategy::PreFilter
290 };
291 nodes.push(PlanNode::Filter(FilterPlan {
292 conditions: filter_conditions.join(" AND "),
293 selectivity,
294 }));
295 }
296
297 if let Some(offset) = stmt.offset {
298 nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
299 }
300 if let Some(limit) = stmt.limit {
301 nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
302 }
303
304 filter_strategy
305 }
306
307 fn analyze_condition(
309 condition: &Condition,
310 has_vector_search: &mut bool,
311 filter_conditions: &mut Vec<String>,
312 ) {
313 match condition {
314 Condition::VectorSearch(_)
315 | Condition::VectorFusedSearch(_)
316 | Condition::SparseVectorSearch(_)
317 | Condition::Similarity(_) => {
318 *has_vector_search = true;
319 }
320 Condition::Comparison(cmp) => {
321 filter_conditions.push(format!("{} {} ?", cmp.column, cmp.operator.as_str()));
322 }
323 Condition::In(inc) => {
324 filter_conditions.push(format!("{} IN (...)", inc.column));
325 }
326 Condition::Between(btw) => {
327 filter_conditions.push(format!("{} BETWEEN ? AND ?", btw.column));
328 }
329 Condition::Like(lk) => {
330 filter_conditions.push(format!("{} LIKE ?", lk.column));
331 }
332 Condition::IsNull(isn) => {
333 let op = if isn.is_null {
334 "IS NULL"
335 } else {
336 "IS NOT NULL"
337 };
338 filter_conditions.push(format!("{} {op}", isn.column));
339 }
340 Condition::Match(m) => {
341 filter_conditions.push(format!("{} MATCH ?", m.column));
342 }
343 Condition::GraphMatch(_) => {
344 filter_conditions.push("MATCH (...)".to_string());
345 }
346 Condition::And(left, right) | Condition::Or(left, right) => {
347 Self::analyze_condition(left, has_vector_search, filter_conditions);
348 Self::analyze_condition(right, has_vector_search, filter_conditions);
349 }
350 Condition::Not(inner) | Condition::Group(inner) => {
351 Self::analyze_condition(inner, has_vector_search, filter_conditions);
352 }
353 }
354 }
355
356 fn extract_index_lookup(
357 condition: &Condition,
358 indexed_fields: &HashSet<String>,
359 ) -> Option<(String, String)> {
360 if let Condition::Comparison(cmp) = condition {
361 if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
362 {
363 return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
364 }
365 }
366 None
367 }
368
369 pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
371 let base = 0.5_f64;
373 base.powi(i32::try_from(conditions.len()).unwrap_or(i32::MAX))
374 }
375
376 fn estimate_cost(root: &PlanNode, has_vector_search: bool) -> f64 {
378 let base_cost = if has_vector_search { 0.05 } else { 1.0 };
379
380 match root {
381 PlanNode::Sequence(nodes) => nodes
382 .iter()
383 .fold(base_cost, |acc, node| acc + Self::node_cost(node)),
384 _ => base_cost + Self::node_cost(root),
385 }
386 }
387
388 pub(crate) fn node_cost(node: &PlanNode) -> f64 {
389 match node {
390 PlanNode::VectorSearch(_) => 0.05,
391 PlanNode::Filter(f) => 0.01 * (1.0 - f.selectivity),
392 PlanNode::Limit(_) | PlanNode::Offset(_) => 0.001,
393 PlanNode::TableScan(_) => 1.0,
394 PlanNode::IndexLookup(_) => 0.0001, PlanNode::Sequence(nodes) => nodes.iter().map(Self::node_cost).sum(),
396 PlanNode::MatchTraversal(mt) => {
397 let base = 0.1;
399 let depth_factor = f64::from(mt.max_depth) * 0.05;
400 let similarity_factor = if mt.has_similarity { 0.05 } else { 0.0 };
401 base + depth_factor + similarity_factor
402 }
403 }
404 }
405
406 #[must_use]
408 pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
409 let strategy = MatchQueryPlanner::plan(match_clause, stats);
410 let strategy_explanation = MatchQueryPlanner::explain(&strategy);
411
412 let (start_labels, max_depth, has_similarity, similarity_threshold) =
413 Self::extract_strategy_info(&strategy);
414
415 let relationship_count = match_clause
416 .patterns
417 .first()
418 .map_or(0, |p| p.relationships.len());
419
420 let traversal = PlanNode::MatchTraversal(MatchTraversalPlan {
421 strategy: strategy_explanation,
422 start_labels,
423 max_depth,
424 relationship_count,
425 has_similarity,
426 similarity_threshold,
427 });
428
429 let mut nodes = vec![traversal];
430 if let Some(limit) = match_clause.return_clause.limit {
431 nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
432 }
433
434 let index_used = if has_similarity {
435 Some(IndexType::Hnsw)
436 } else {
437 None
438 };
439
440 Self::assemble_plan(nodes, index_used, FilterStrategy::None, has_similarity)
441 }
442
443 fn extract_strategy_info(
445 strategy: &MatchExecutionStrategy,
446 ) -> (Vec<String>, u32, bool, Option<f32>) {
447 match strategy {
448 MatchExecutionStrategy::GraphFirst {
449 start_labels,
450 max_depth,
451 } => (start_labels.clone(), *max_depth, false, None),
452 MatchExecutionStrategy::VectorFirst { threshold, .. } => {
453 (Vec::new(), 1, true, Some(*threshold))
454 }
455 MatchExecutionStrategy::Parallel {
456 graph_hint,
457 vector_hint,
458 } => {
459 let (labels, depth) = match graph_hint.as_ref() {
460 MatchExecutionStrategy::GraphFirst {
461 start_labels,
462 max_depth,
463 } => (start_labels.clone(), *max_depth),
464 _ => (Vec::new(), 1),
465 };
466 let threshold = match vector_hint.as_ref() {
467 MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
468 _ => None,
469 };
470 (labels, depth, true, threshold)
471 }
472 }
473 }
474}
475
476