1mod formatter;
16
17use serde::{Deserialize, Serialize};
18use std::collections::HashSet;
19
20use super::ast::{Condition, SelectStatement};
21use crate::collection::search::query::match_planner::{
22 CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
23};
24use crate::velesql::MatchClause;
25
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
28pub struct QueryPlan {
29 pub root: PlanNode,
31 pub estimated_cost_ms: f64,
33 pub index_used: Option<IndexType>,
35 pub filter_strategy: FilterStrategy,
37 #[serde(skip_serializing_if = "Option::is_none")]
39 pub cache_hit: Option<bool>,
40 #[serde(skip_serializing_if = "Option::is_none")]
46 pub plan_reuse_count: Option<u64>,
47}
48
49#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51pub enum PlanNode {
52 VectorSearch(VectorSearchPlan),
54 Filter(FilterPlan),
56 Limit(LimitPlan),
58 Offset(OffsetPlan),
60 TableScan(TableScanPlan),
62 IndexLookup(IndexLookupPlan),
64 Sequence(Vec<PlanNode>),
66 MatchTraversal(MatchTraversalPlan),
68}
69
70#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub struct VectorSearchPlan {
73 pub collection: String,
75 pub ef_search: u32,
77 pub candidates: u32,
79}
80
81#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
83pub struct FilterPlan {
84 pub conditions: String,
86 pub selectivity: f64,
88}
89
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
92pub struct LimitPlan {
93 pub count: u64,
95}
96
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
99pub struct OffsetPlan {
100 pub count: u64,
102}
103
104#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
106pub struct TableScanPlan {
107 pub collection: String,
109}
110
111#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct IndexLookupPlan {
114 pub label: String,
116 pub property: String,
118 pub value: String,
120}
121
122#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
124pub struct MatchTraversalPlan {
125 pub strategy: String,
127 pub start_labels: Vec<String>,
129 pub max_depth: u32,
131 pub relationship_count: usize,
133 pub has_similarity: bool,
135 pub similarity_threshold: Option<f32>,
137}
138
139#[allow(dead_code)] #[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ExplainOutput {
143 pub plan: QueryPlan,
145 #[serde(skip_serializing_if = "Option::is_none")]
147 pub actual_stats: Option<ActualStats>,
148}
149
150#[allow(dead_code)] #[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct ActualStats {
154 pub actual_rows: u64,
156 pub actual_time_ms: f64,
158 pub loops: u64,
160 pub nodes_visited: u64,
162 pub edges_traversed: u64,
164}
165
166#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
168pub enum IndexType {
169 Hnsw,
171 Flat,
173 BinaryQuantization,
175 Property,
177}
178
179#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
181pub enum FilterStrategy {
182 #[default]
184 None,
185 PreFilter,
187 PostFilter,
189}
190
191impl QueryPlan {
192 #[must_use]
194 pub fn from_select(stmt: &SelectStatement) -> Self {
195 Self::from_select_with_indexed_fields(stmt, &HashSet::new())
196 }
197
198 #[must_use]
200 pub fn from_select_with_indexed_fields(
201 stmt: &SelectStatement,
202 indexed_fields: &HashSet<String>,
203 ) -> Self {
204 let mut nodes = Vec::new();
205 let mut has_vector_search = false;
206 let mut filter_conditions = Vec::new();
207 let mut filter_strategy = FilterStrategy::None;
208 let mut index_used = None;
209 let mut index_lookup = None;
210
211 if let Some(ref condition) = stmt.where_clause {
213 Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
214 index_lookup = Self::extract_index_lookup(condition, indexed_fields);
215 }
216
217 if has_vector_search {
219 index_used = Some(IndexType::Hnsw);
220 let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
221 nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
222 collection: stmt.from.clone(),
223 ef_search: 100, candidates,
225 }));
226 } else if let Some((property, value)) = index_lookup {
227 index_used = Some(IndexType::Property);
228 nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
229 label: stmt.from.clone(),
230 property,
231 value,
232 }));
233 } else {
234 nodes.push(PlanNode::TableScan(TableScanPlan {
235 collection: stmt.from.clone(),
236 }));
237 }
238
239 if !filter_conditions.is_empty() {
241 let selectivity = Self::estimate_selectivity(&filter_conditions);
242 filter_strategy = if selectivity > 0.1 {
243 FilterStrategy::PostFilter
244 } else {
245 FilterStrategy::PreFilter
246 };
247
248 nodes.push(PlanNode::Filter(FilterPlan {
249 conditions: filter_conditions.join(" AND "),
250 selectivity,
251 }));
252 }
253
254 if let Some(offset) = stmt.offset {
256 nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
257 }
258
259 if let Some(limit) = stmt.limit {
261 nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
262 }
263
264 let root = if nodes.len() == 1 {
265 nodes.swap_remove(0)
266 } else {
267 PlanNode::Sequence(nodes)
268 };
269
270 let estimated_cost_ms = Self::estimate_cost(&root, has_vector_search);
271
272 Self {
273 root,
274 estimated_cost_ms,
275 index_used,
276 filter_strategy,
277 cache_hit: None,
278 plan_reuse_count: None,
279 }
280 }
281
282 fn analyze_condition(
284 condition: &Condition,
285 has_vector_search: &mut bool,
286 filter_conditions: &mut Vec<String>,
287 ) {
288 match condition {
289 Condition::VectorSearch(_)
290 | Condition::VectorFusedSearch(_)
291 | Condition::SparseVectorSearch(_)
292 | Condition::Similarity(_) => {
293 *has_vector_search = true;
294 }
295 Condition::Comparison(cmp) => {
296 filter_conditions.push(format!("{} {} ?", cmp.column, cmp.operator.as_str()));
297 }
298 Condition::In(inc) => {
299 filter_conditions.push(format!("{} IN (...)", inc.column));
300 }
301 Condition::Between(btw) => {
302 filter_conditions.push(format!("{} BETWEEN ? AND ?", btw.column));
303 }
304 Condition::Like(lk) => {
305 filter_conditions.push(format!("{} LIKE ?", lk.column));
306 }
307 Condition::IsNull(isn) => {
308 let op = if isn.is_null {
309 "IS NULL"
310 } else {
311 "IS NOT NULL"
312 };
313 filter_conditions.push(format!("{} {op}", isn.column));
314 }
315 Condition::Match(m) => {
316 filter_conditions.push(format!("{} MATCH ?", m.column));
317 }
318 Condition::GraphMatch(_) => {
319 filter_conditions.push("MATCH (...)".to_string());
320 }
321 Condition::And(left, right) | Condition::Or(left, right) => {
322 Self::analyze_condition(left, has_vector_search, filter_conditions);
323 Self::analyze_condition(right, has_vector_search, filter_conditions);
324 }
325 Condition::Not(inner) | Condition::Group(inner) => {
326 Self::analyze_condition(inner, has_vector_search, filter_conditions);
327 }
328 }
329 }
330
331 fn extract_index_lookup(
332 condition: &Condition,
333 indexed_fields: &HashSet<String>,
334 ) -> Option<(String, String)> {
335 if let Condition::Comparison(cmp) = condition {
336 if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
337 {
338 return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
339 }
340 }
341 None
342 }
343
344 pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
346 let base = 0.5_f64;
348 base.powi(i32::try_from(conditions.len()).unwrap_or(i32::MAX))
349 }
350
351 fn estimate_cost(root: &PlanNode, has_vector_search: bool) -> f64 {
353 let base_cost = if has_vector_search { 0.05 } else { 1.0 };
354
355 match root {
356 PlanNode::Sequence(nodes) => nodes
357 .iter()
358 .fold(base_cost, |acc, node| acc + Self::node_cost(node)),
359 _ => base_cost + Self::node_cost(root),
360 }
361 }
362
363 pub(crate) fn node_cost(node: &PlanNode) -> f64 {
364 match node {
365 PlanNode::VectorSearch(_) => 0.05,
366 PlanNode::Filter(f) => 0.01 * (1.0 - f.selectivity),
367 PlanNode::Limit(_) | PlanNode::Offset(_) => 0.001,
368 PlanNode::TableScan(_) => 1.0,
369 PlanNode::IndexLookup(_) => 0.0001, PlanNode::Sequence(nodes) => nodes.iter().map(Self::node_cost).sum(),
371 PlanNode::MatchTraversal(mt) => {
372 let base = 0.1;
374 let depth_factor = f64::from(mt.max_depth) * 0.05;
375 let similarity_factor = if mt.has_similarity { 0.05 } else { 0.0 };
376 base + depth_factor + similarity_factor
377 }
378 }
379 }
380
381 #[must_use]
383 pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
384 let strategy = MatchQueryPlanner::plan(match_clause, stats);
385 let strategy_explanation = MatchQueryPlanner::explain(&strategy);
386
387 let (start_labels, max_depth, has_similarity, similarity_threshold) = match &strategy {
389 MatchExecutionStrategy::GraphFirst {
390 start_labels,
391 max_depth,
392 } => (start_labels.clone(), *max_depth, false, None),
393 MatchExecutionStrategy::VectorFirst { threshold, .. } => {
394 (Vec::new(), 1, true, Some(*threshold))
395 }
396 MatchExecutionStrategy::Parallel {
397 graph_hint,
398 vector_hint,
399 } => {
400 let (labels, depth) = match graph_hint.as_ref() {
401 MatchExecutionStrategy::GraphFirst {
402 start_labels,
403 max_depth,
404 } => (start_labels.clone(), *max_depth),
405 _ => (Vec::new(), 1),
406 };
407 let threshold = match vector_hint.as_ref() {
408 MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
409 _ => None,
410 };
411 (labels, depth, true, threshold)
412 }
413 };
414
415 let relationship_count = match_clause
417 .patterns
418 .first()
419 .map_or(0, |p| p.relationships.len());
420
421 let mut nodes = Vec::new();
422
423 nodes.push(PlanNode::MatchTraversal(MatchTraversalPlan {
425 strategy: strategy_explanation,
426 start_labels,
427 max_depth,
428 relationship_count,
429 has_similarity,
430 similarity_threshold,
431 }));
432
433 if let Some(limit) = match_clause.return_clause.limit {
435 nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
436 }
437
438 let root = if nodes.len() == 1 {
439 nodes.swap_remove(0)
440 } else {
441 PlanNode::Sequence(nodes)
442 };
443
444 let estimated_cost_ms = Self::estimate_cost(&root, has_similarity);
445 let index_used = if has_similarity {
446 Some(IndexType::Hnsw)
447 } else {
448 None
449 };
450
451 Self {
452 root,
453 estimated_cost_ms,
454 index_used,
455 filter_strategy: FilterStrategy::None,
456 cache_hit: None,
457 plan_reuse_count: None,
458 }
459 }
460}
461
462