Skip to main content

velesdb_core/velesql/
explain.rs

1//! Query plan explanation for `VelesQL`.
2//!
3//! This module provides EXPLAIN functionality to display query execution plans.
4//!
5//! # Example
6//!
7//! ```ignore
8//! use velesdb_core::velesql::{Parser, QueryPlan};
9//!
10//! let query = Parser::parse("SELECT * FROM docs WHERE vector NEAR $v LIMIT 10")?;
11//! let plan = QueryPlan::from_select(&query.select);
12//! println!("{}", plan.to_tree());
13//! ```
14
15mod formatter;
16
17use serde::{Deserialize, Serialize};
18use std::collections::HashSet;
19
20use super::ast::{Condition, SelectStatement};
21use crate::collection::search::query::match_planner::{
22    CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
23};
24use crate::velesql::MatchClause;
25
26/// Query execution plan generated by EXPLAIN.
27#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
28pub struct QueryPlan {
29    /// Root node of the plan tree.
30    pub root: PlanNode,
31    /// Estimated execution cost in milliseconds.
32    pub estimated_cost_ms: f64,
33    /// Index type used (if any).
34    pub index_used: Option<IndexType>,
35    /// Filter strategy.
36    pub filter_strategy: FilterStrategy,
37    /// Whether this plan was served from the compiled plan cache (CACHE-02).
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub cache_hit: Option<bool>,
40    /// How many times this cached plan has been reused (CACHE-02).
41    ///
42    /// The value is the `reuse_count` atomically incremented inside the cache on every
43    /// successful `get()`. A value of 1 means the plan was found in cache and this is
44    /// its first reuse. The count includes the current call to `explain_query`.
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub plan_reuse_count: Option<u64>,
47}
48
49/// A node in the query execution plan tree.
50#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51pub enum PlanNode {
52    /// Vector similarity search operation.
53    VectorSearch(VectorSearchPlan),
54    /// Metadata filter operation.
55    Filter(FilterPlan),
56    /// Limit results.
57    Limit(LimitPlan),
58    /// Offset skip.
59    Offset(OffsetPlan),
60    /// Table scan (no index).
61    TableScan(TableScanPlan),
62    /// Property index lookup (O(1) instead of scan).
63    IndexLookup(IndexLookupPlan),
64    /// Sequential operations.
65    Sequence(Vec<PlanNode>),
66    /// MATCH graph traversal (EPIC-046 US-004).
67    MatchTraversal(MatchTraversalPlan),
68}
69
70/// Vector search plan details.
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub struct VectorSearchPlan {
73    /// Collection name.
74    pub collection: String,
75    /// `ef_search` parameter (for HNSW).
76    pub ef_search: u32,
77    /// Number of candidates to retrieve.
78    pub candidates: u32,
79}
80
81/// Filter plan details.
82#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
83pub struct FilterPlan {
84    /// Filter conditions as string representation.
85    pub conditions: String,
86    /// Estimated selectivity (0.0 - 1.0).
87    pub selectivity: f64,
88}
89
90/// Limit plan details.
91#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
92pub struct LimitPlan {
93    /// Maximum number of results.
94    pub count: u64,
95}
96
97/// Offset plan details.
98#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
99pub struct OffsetPlan {
100    /// Number of results to skip.
101    pub count: u64,
102}
103
104/// Table scan plan (no index used).
105#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
106pub struct TableScanPlan {
107    /// Collection name.
108    pub collection: String,
109}
110
111/// Property index lookup plan (O(1) lookup).
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct IndexLookupPlan {
114    /// Label being queried.
115    pub label: String,
116    /// Property name with index.
117    pub property: String,
118    /// Value being looked up (as string representation).
119    pub value: String,
120}
121
122/// MATCH traversal plan (EPIC-046 US-004).
123#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
124pub struct MatchTraversalPlan {
125    /// Execution strategy chosen by planner.
126    pub strategy: String,
127    /// Start node labels.
128    pub start_labels: Vec<String>,
129    /// Maximum traversal depth.
130    pub max_depth: u32,
131    /// Number of relationships in pattern.
132    pub relationship_count: usize,
133    /// Has similarity condition.
134    pub has_similarity: bool,
135    /// Similarity threshold (if any).
136    pub similarity_threshold: Option<f32>,
137}
138
139/// EXPLAIN output with optional ANALYZE stats (EPIC-046 US-004).
140#[allow(dead_code)] // Used by API consumers
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ExplainOutput {
143    /// The query plan.
144    pub plan: QueryPlan,
145    /// Actual execution statistics (only with ANALYZE).
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub actual_stats: Option<ActualStats>,
148}
149
150/// Actual execution statistics for EXPLAIN ANALYZE.
151#[allow(dead_code)] // Used by API consumers
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct ActualStats {
154    /// Actual number of rows returned.
155    pub actual_rows: u64,
156    /// Actual execution time in milliseconds.
157    pub actual_time_ms: f64,
158    /// Number of loop iterations.
159    pub loops: u64,
160    /// Number of nodes visited (for graph traversal).
161    pub nodes_visited: u64,
162    /// Number of edges traversed.
163    pub edges_traversed: u64,
164}
165
166/// Type of index used in the query.
167#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
168pub enum IndexType {
169    /// HNSW index for vector search.
170    Hnsw,
171    /// Flat index (brute force).
172    Flat,
173    /// Binary quantization index.
174    BinaryQuantization,
175    /// Property index for equality lookups.
176    Property,
177}
178
179/// Strategy for applying filters.
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
181pub enum FilterStrategy {
182    /// No filter.
183    #[default]
184    None,
185    /// Pre-filtering: filter before vector search (high selectivity).
186    PreFilter,
187    /// Post-filtering: filter after vector search (low selectivity).
188    PostFilter,
189}
190
191impl QueryPlan {
192    /// Creates a new query plan from a SELECT statement.
193    #[must_use]
194    pub fn from_select(stmt: &SelectStatement) -> Self {
195        Self::from_select_with_indexed_fields(stmt, &HashSet::new())
196    }
197
198    /// Creates a new query plan from SELECT with known indexed metadata fields.
199    #[must_use]
200    pub fn from_select_with_indexed_fields(
201        stmt: &SelectStatement,
202        indexed_fields: &HashSet<String>,
203    ) -> Self {
204        let mut has_vector_search = false;
205        let mut filter_conditions = Vec::new();
206        let mut index_lookup = None;
207
208        if let Some(ref condition) = stmt.where_clause {
209            Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
210            index_lookup = Self::extract_index_lookup(condition, indexed_fields);
211        }
212
213        let (mut nodes, index_used) = Self::build_scan_node(stmt, has_vector_search, index_lookup);
214        let filter_strategy = Self::append_filter_nodes(&mut nodes, &filter_conditions, stmt);
215
216        Self::assemble_plan(nodes, index_used, filter_strategy, has_vector_search)
217    }
218
219    /// Collapses a `Vec<PlanNode>` into a single root, estimates cost, and builds the plan.
220    fn assemble_plan(
221        mut nodes: Vec<PlanNode>,
222        index_used: Option<IndexType>,
223        filter_strategy: FilterStrategy,
224        has_vector_search: bool,
225    ) -> Self {
226        let root = if nodes.len() == 1 {
227            nodes.swap_remove(0)
228        } else {
229            PlanNode::Sequence(nodes)
230        };
231        let estimated_cost_ms = Self::estimate_cost(&root, has_vector_search);
232        Self {
233            root,
234            estimated_cost_ms,
235            index_used,
236            filter_strategy,
237            cache_hit: None,
238            plan_reuse_count: None,
239        }
240    }
241
242    /// Builds the primary scan node based on search type.
243    fn build_scan_node(
244        stmt: &SelectStatement,
245        has_vector_search: bool,
246        index_lookup: Option<(String, String)>,
247    ) -> (Vec<PlanNode>, Option<IndexType>) {
248        let mut nodes = Vec::new();
249        let index_used;
250
251        if has_vector_search {
252            index_used = Some(IndexType::Hnsw);
253            let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
254            nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
255                collection: stmt.from.clone(),
256                ef_search: 100,
257                candidates,
258            }));
259        } else if let Some((property, value)) = index_lookup {
260            index_used = Some(IndexType::Property);
261            nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
262                label: stmt.from.clone(),
263                property,
264                value,
265            }));
266        } else {
267            index_used = None;
268            nodes.push(PlanNode::TableScan(TableScanPlan {
269                collection: stmt.from.clone(),
270            }));
271        }
272
273        (nodes, index_used)
274    }
275
276    /// Appends filter, offset, and limit nodes; returns the filter strategy.
277    fn append_filter_nodes(
278        nodes: &mut Vec<PlanNode>,
279        filter_conditions: &[String],
280        stmt: &SelectStatement,
281    ) -> FilterStrategy {
282        let mut filter_strategy = FilterStrategy::None;
283
284        if !filter_conditions.is_empty() {
285            let selectivity = Self::estimate_selectivity(filter_conditions);
286            filter_strategy = if selectivity > 0.1 {
287                FilterStrategy::PostFilter
288            } else {
289                FilterStrategy::PreFilter
290            };
291            nodes.push(PlanNode::Filter(FilterPlan {
292                conditions: filter_conditions.join(" AND "),
293                selectivity,
294            }));
295        }
296
297        if let Some(offset) = stmt.offset {
298            nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
299        }
300        if let Some(limit) = stmt.limit {
301            nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
302        }
303
304        filter_strategy
305    }
306
307    /// Analyzes a condition to extract vector search and filter info.
308    fn analyze_condition(
309        condition: &Condition,
310        has_vector_search: &mut bool,
311        filter_conditions: &mut Vec<String>,
312    ) {
313        match condition {
314            Condition::VectorSearch(_)
315            | Condition::VectorFusedSearch(_)
316            | Condition::SparseVectorSearch(_)
317            | Condition::Similarity(_) => {
318                *has_vector_search = true;
319            }
320            Condition::Comparison(cmp) => {
321                filter_conditions.push(format!("{} {} ?", cmp.column, cmp.operator.as_str()));
322            }
323            Condition::In(inc) => {
324                filter_conditions.push(format!("{} IN (...)", inc.column));
325            }
326            Condition::Between(btw) => {
327                filter_conditions.push(format!("{} BETWEEN ? AND ?", btw.column));
328            }
329            Condition::Like(lk) => {
330                filter_conditions.push(format!("{} LIKE ?", lk.column));
331            }
332            Condition::IsNull(isn) => {
333                let op = if isn.is_null {
334                    "IS NULL"
335                } else {
336                    "IS NOT NULL"
337                };
338                filter_conditions.push(format!("{} {op}", isn.column));
339            }
340            Condition::Match(m) => {
341                filter_conditions.push(format!("{} MATCH ?", m.column));
342            }
343            Condition::GraphMatch(_) => {
344                filter_conditions.push("MATCH (...)".to_string());
345            }
346            Condition::And(left, right) | Condition::Or(left, right) => {
347                Self::analyze_condition(left, has_vector_search, filter_conditions);
348                Self::analyze_condition(right, has_vector_search, filter_conditions);
349            }
350            Condition::Not(inner) | Condition::Group(inner) => {
351                Self::analyze_condition(inner, has_vector_search, filter_conditions);
352            }
353        }
354    }
355
356    fn extract_index_lookup(
357        condition: &Condition,
358        indexed_fields: &HashSet<String>,
359    ) -> Option<(String, String)> {
360        if let Condition::Comparison(cmp) = condition {
361            if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
362            {
363                return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
364            }
365        }
366        None
367    }
368
369    /// Estimates selectivity (placeholder - would need statistics in production).
370    pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
371        // Heuristic: more conditions = lower selectivity
372        let base = 0.5_f64;
373        base.powi(i32::try_from(conditions.len()).unwrap_or(i32::MAX))
374    }
375
376    /// Estimates execution cost in milliseconds.
377    fn estimate_cost(root: &PlanNode, has_vector_search: bool) -> f64 {
378        let base_cost = if has_vector_search { 0.05 } else { 1.0 };
379
380        match root {
381            PlanNode::Sequence(nodes) => nodes
382                .iter()
383                .fold(base_cost, |acc, node| acc + Self::node_cost(node)),
384            _ => base_cost + Self::node_cost(root),
385        }
386    }
387
388    pub(crate) fn node_cost(node: &PlanNode) -> f64 {
389        match node {
390            PlanNode::VectorSearch(_) => 0.05,
391            PlanNode::Filter(f) => 0.01 * (1.0 - f.selectivity),
392            PlanNode::Limit(_) | PlanNode::Offset(_) => 0.001,
393            PlanNode::TableScan(_) => 1.0,
394            PlanNode::IndexLookup(_) => 0.0001, // O(1) lookup is very fast
395            PlanNode::Sequence(nodes) => nodes.iter().map(Self::node_cost).sum(),
396            PlanNode::MatchTraversal(mt) => {
397                // Cost depends on depth and strategy
398                let base = 0.1;
399                let depth_factor = f64::from(mt.max_depth) * 0.05;
400                let similarity_factor = if mt.has_similarity { 0.05 } else { 0.0 };
401                base + depth_factor + similarity_factor
402            }
403        }
404    }
405
406    /// Creates a new query plan from a MATCH clause (EPIC-046 US-004).
407    #[must_use]
408    pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
409        let strategy = MatchQueryPlanner::plan(match_clause, stats);
410        let strategy_explanation = MatchQueryPlanner::explain(&strategy);
411
412        let (start_labels, max_depth, has_similarity, similarity_threshold) =
413            Self::extract_strategy_info(&strategy);
414
415        let relationship_count = match_clause
416            .patterns
417            .first()
418            .map_or(0, |p| p.relationships.len());
419
420        let traversal = PlanNode::MatchTraversal(MatchTraversalPlan {
421            strategy: strategy_explanation,
422            start_labels,
423            max_depth,
424            relationship_count,
425            has_similarity,
426            similarity_threshold,
427        });
428
429        let mut nodes = vec![traversal];
430        if let Some(limit) = match_clause.return_clause.limit {
431            nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
432        }
433
434        let index_used = if has_similarity {
435            Some(IndexType::Hnsw)
436        } else {
437            None
438        };
439
440        Self::assemble_plan(nodes, index_used, FilterStrategy::None, has_similarity)
441    }
442
443    /// Extracts traversal parameters from a `MatchExecutionStrategy`.
444    fn extract_strategy_info(
445        strategy: &MatchExecutionStrategy,
446    ) -> (Vec<String>, u32, bool, Option<f32>) {
447        match strategy {
448            MatchExecutionStrategy::GraphFirst {
449                start_labels,
450                max_depth,
451            } => (start_labels.clone(), *max_depth, false, None),
452            MatchExecutionStrategy::VectorFirst { threshold, .. } => {
453                (Vec::new(), 1, true, Some(*threshold))
454            }
455            MatchExecutionStrategy::Parallel {
456                graph_hint,
457                vector_hint,
458            } => {
459                let (labels, depth) = match graph_hint.as_ref() {
460                    MatchExecutionStrategy::GraphFirst {
461                        start_labels,
462                        max_depth,
463                    } => (start_labels.clone(), *max_depth),
464                    _ => (Vec::new(), 1),
465                };
466                let threshold = match vector_hint.as_ref() {
467                    MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
468                    _ => None,
469                };
470                (labels, depth, true, threshold)
471            }
472        }
473    }
474}
475
476// Rendering, formatting, Display impl, and as_str() methods moved to explain/formatter.rs
477// Tests in explain_tests.rs per project rules (tests in separate files)