Skip to main content

velesdb_core/velesql/
explain.rs

1//! Query plan explanation for `VelesQL`.
2//!
3//! This module provides EXPLAIN functionality to display query execution plans.
4//!
5//! # Example
6//!
7//! ```ignore
8//! use velesdb_core::velesql::{Parser, QueryPlan};
9//!
10//! let query = Parser::parse("SELECT * FROM docs WHERE vector NEAR $v LIMIT 10")?;
11//! let plan = QueryPlan::from_select(&query.select);
12//! println!("{}", plan.to_tree());
13//! ```
14
15mod formatter;
16
17use serde::{Deserialize, Serialize};
18use std::collections::HashSet;
19
20use super::ast::{Condition, SelectStatement};
21use crate::collection::search::query::match_planner::{
22    CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
23};
24use crate::velesql::MatchClause;
25
26/// Query execution plan generated by EXPLAIN.
27#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
28pub struct QueryPlan {
29    /// Root node of the plan tree.
30    pub root: PlanNode,
31    /// Estimated execution cost in milliseconds.
32    pub estimated_cost_ms: f64,
33    /// Index type used (if any).
34    pub index_used: Option<IndexType>,
35    /// Filter strategy.
36    pub filter_strategy: FilterStrategy,
37    /// Whether this plan was served from the compiled plan cache (CACHE-02).
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub cache_hit: Option<bool>,
40    /// How many times this cached plan has been reused (CACHE-02).
41    ///
42    /// The value is the `reuse_count` atomically incremented inside the cache on every
43    /// successful `get()`. A value of 1 means the plan was found in cache and this is
44    /// its first reuse. The count includes the current call to `explain_query`.
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub plan_reuse_count: Option<u64>,
47}
48
49/// A node in the query execution plan tree.
50#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
51pub enum PlanNode {
52    /// Vector similarity search operation.
53    VectorSearch(VectorSearchPlan),
54    /// Metadata filter operation.
55    Filter(FilterPlan),
56    /// Limit results.
57    Limit(LimitPlan),
58    /// Offset skip.
59    Offset(OffsetPlan),
60    /// Table scan (no index).
61    TableScan(TableScanPlan),
62    /// Property index lookup (O(1) instead of scan).
63    IndexLookup(IndexLookupPlan),
64    /// Sequential operations.
65    Sequence(Vec<PlanNode>),
66    /// MATCH graph traversal (EPIC-046 US-004).
67    MatchTraversal(MatchTraversalPlan),
68}
69
70/// Vector search plan details.
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub struct VectorSearchPlan {
73    /// Collection name.
74    pub collection: String,
75    /// `ef_search` parameter (for HNSW).
76    pub ef_search: u32,
77    /// Number of candidates to retrieve.
78    pub candidates: u32,
79}
80
81/// Filter plan details.
82#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
83pub struct FilterPlan {
84    /// Filter conditions as string representation.
85    pub conditions: String,
86    /// Estimated selectivity (0.0 - 1.0).
87    pub selectivity: f64,
88}
89
90/// Limit plan details.
91#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
92pub struct LimitPlan {
93    /// Maximum number of results.
94    pub count: u64,
95}
96
97/// Offset plan details.
98#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
99pub struct OffsetPlan {
100    /// Number of results to skip.
101    pub count: u64,
102}
103
104/// Table scan plan (no index used).
105#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
106pub struct TableScanPlan {
107    /// Collection name.
108    pub collection: String,
109}
110
111/// Property index lookup plan (O(1) lookup).
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct IndexLookupPlan {
114    /// Label being queried.
115    pub label: String,
116    /// Property name with index.
117    pub property: String,
118    /// Value being looked up (as string representation).
119    pub value: String,
120}
121
122/// MATCH traversal plan (EPIC-046 US-004).
123#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
124pub struct MatchTraversalPlan {
125    /// Execution strategy chosen by planner.
126    pub strategy: String,
127    /// Start node labels.
128    pub start_labels: Vec<String>,
129    /// Maximum traversal depth.
130    pub max_depth: u32,
131    /// Number of relationships in pattern.
132    pub relationship_count: usize,
133    /// Has similarity condition.
134    pub has_similarity: bool,
135    /// Similarity threshold (if any).
136    pub similarity_threshold: Option<f32>,
137}
138
139/// EXPLAIN output with optional ANALYZE stats (EPIC-046 US-004).
140#[allow(dead_code)] // Used by API consumers
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ExplainOutput {
143    /// The query plan.
144    pub plan: QueryPlan,
145    /// Actual execution statistics (only with ANALYZE).
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub actual_stats: Option<ActualStats>,
148}
149
150/// Actual execution statistics for EXPLAIN ANALYZE.
151#[allow(dead_code)] // Used by API consumers
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct ActualStats {
154    /// Actual number of rows returned.
155    pub actual_rows: u64,
156    /// Actual execution time in milliseconds.
157    pub actual_time_ms: f64,
158    /// Number of loop iterations.
159    pub loops: u64,
160    /// Number of nodes visited (for graph traversal).
161    pub nodes_visited: u64,
162    /// Number of edges traversed.
163    pub edges_traversed: u64,
164}
165
166/// Type of index used in the query.
167#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
168pub enum IndexType {
169    /// HNSW index for vector search.
170    Hnsw,
171    /// Flat index (brute force).
172    Flat,
173    /// Binary quantization index.
174    BinaryQuantization,
175    /// Property index for equality lookups.
176    Property,
177}
178
179/// Strategy for applying filters.
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
181pub enum FilterStrategy {
182    /// No filter.
183    #[default]
184    None,
185    /// Pre-filtering: filter before vector search (high selectivity).
186    PreFilter,
187    /// Post-filtering: filter after vector search (low selectivity).
188    PostFilter,
189}
190
191impl QueryPlan {
192    /// Creates a new query plan from a SELECT statement.
193    #[must_use]
194    pub fn from_select(stmt: &SelectStatement) -> Self {
195        Self::from_select_with_indexed_fields(stmt, &HashSet::new())
196    }
197
198    /// Creates a new query plan from SELECT with known indexed metadata fields.
199    #[must_use]
200    pub fn from_select_with_indexed_fields(
201        stmt: &SelectStatement,
202        indexed_fields: &HashSet<String>,
203    ) -> Self {
204        let mut nodes = Vec::new();
205        let mut has_vector_search = false;
206        let mut filter_conditions = Vec::new();
207        let mut filter_strategy = FilterStrategy::None;
208        let mut index_used = None;
209        let mut index_lookup = None;
210
211        // Analyze WHERE clause
212        if let Some(ref condition) = stmt.where_clause {
213            Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
214            index_lookup = Self::extract_index_lookup(condition, indexed_fields);
215        }
216
217        // Build plan nodes
218        if has_vector_search {
219            index_used = Some(IndexType::Hnsw);
220            let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
221            nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
222                collection: stmt.from.clone(),
223                ef_search: 100, // Default HNSW parameter
224                candidates,
225            }));
226        } else if let Some((property, value)) = index_lookup {
227            index_used = Some(IndexType::Property);
228            nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
229                label: stmt.from.clone(),
230                property,
231                value,
232            }));
233        } else {
234            nodes.push(PlanNode::TableScan(TableScanPlan {
235                collection: stmt.from.clone(),
236            }));
237        }
238
239        // Add filter if needed
240        if !filter_conditions.is_empty() {
241            let selectivity = Self::estimate_selectivity(&filter_conditions);
242            filter_strategy = if selectivity > 0.1 {
243                FilterStrategy::PostFilter
244            } else {
245                FilterStrategy::PreFilter
246            };
247
248            nodes.push(PlanNode::Filter(FilterPlan {
249                conditions: filter_conditions.join(" AND "),
250                selectivity,
251            }));
252        }
253
254        // Add offset before limit
255        if let Some(offset) = stmt.offset {
256            nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
257        }
258
259        // Add limit
260        if let Some(limit) = stmt.limit {
261            nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
262        }
263
264        let root = if nodes.len() == 1 {
265            nodes.swap_remove(0)
266        } else {
267            PlanNode::Sequence(nodes)
268        };
269
270        let estimated_cost_ms = Self::estimate_cost(&root, has_vector_search);
271
272        Self {
273            root,
274            estimated_cost_ms,
275            index_used,
276            filter_strategy,
277            cache_hit: None,
278            plan_reuse_count: None,
279        }
280    }
281
282    /// Analyzes a condition to extract vector search and filter info.
283    fn analyze_condition(
284        condition: &Condition,
285        has_vector_search: &mut bool,
286        filter_conditions: &mut Vec<String>,
287    ) {
288        match condition {
289            Condition::VectorSearch(_)
290            | Condition::VectorFusedSearch(_)
291            | Condition::SparseVectorSearch(_)
292            | Condition::Similarity(_) => {
293                *has_vector_search = true;
294            }
295            Condition::Comparison(cmp) => {
296                filter_conditions.push(format!("{} {} ?", cmp.column, cmp.operator.as_str()));
297            }
298            Condition::In(inc) => {
299                filter_conditions.push(format!("{} IN (...)", inc.column));
300            }
301            Condition::Between(btw) => {
302                filter_conditions.push(format!("{} BETWEEN ? AND ?", btw.column));
303            }
304            Condition::Like(lk) => {
305                filter_conditions.push(format!("{} LIKE ?", lk.column));
306            }
307            Condition::IsNull(isn) => {
308                let op = if isn.is_null {
309                    "IS NULL"
310                } else {
311                    "IS NOT NULL"
312                };
313                filter_conditions.push(format!("{} {op}", isn.column));
314            }
315            Condition::Match(m) => {
316                filter_conditions.push(format!("{} MATCH ?", m.column));
317            }
318            Condition::GraphMatch(_) => {
319                filter_conditions.push("MATCH (...)".to_string());
320            }
321            Condition::And(left, right) | Condition::Or(left, right) => {
322                Self::analyze_condition(left, has_vector_search, filter_conditions);
323                Self::analyze_condition(right, has_vector_search, filter_conditions);
324            }
325            Condition::Not(inner) | Condition::Group(inner) => {
326                Self::analyze_condition(inner, has_vector_search, filter_conditions);
327            }
328        }
329    }
330
331    fn extract_index_lookup(
332        condition: &Condition,
333        indexed_fields: &HashSet<String>,
334    ) -> Option<(String, String)> {
335        if let Condition::Comparison(cmp) = condition {
336            if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
337            {
338                return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
339            }
340        }
341        None
342    }
343
344    /// Estimates selectivity (placeholder - would need statistics in production).
345    pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
346        // Heuristic: more conditions = lower selectivity
347        let base = 0.5_f64;
348        base.powi(i32::try_from(conditions.len()).unwrap_or(i32::MAX))
349    }
350
351    /// Estimates execution cost in milliseconds.
352    fn estimate_cost(root: &PlanNode, has_vector_search: bool) -> f64 {
353        let base_cost = if has_vector_search { 0.05 } else { 1.0 };
354
355        match root {
356            PlanNode::Sequence(nodes) => nodes
357                .iter()
358                .fold(base_cost, |acc, node| acc + Self::node_cost(node)),
359            _ => base_cost + Self::node_cost(root),
360        }
361    }
362
363    pub(crate) fn node_cost(node: &PlanNode) -> f64 {
364        match node {
365            PlanNode::VectorSearch(_) => 0.05,
366            PlanNode::Filter(f) => 0.01 * (1.0 - f.selectivity),
367            PlanNode::Limit(_) | PlanNode::Offset(_) => 0.001,
368            PlanNode::TableScan(_) => 1.0,
369            PlanNode::IndexLookup(_) => 0.0001, // O(1) lookup is very fast
370            PlanNode::Sequence(nodes) => nodes.iter().map(Self::node_cost).sum(),
371            PlanNode::MatchTraversal(mt) => {
372                // Cost depends on depth and strategy
373                let base = 0.1;
374                let depth_factor = f64::from(mt.max_depth) * 0.05;
375                let similarity_factor = if mt.has_similarity { 0.05 } else { 0.0 };
376                base + depth_factor + similarity_factor
377            }
378        }
379    }
380
381    /// Creates a new query plan from a MATCH clause (EPIC-046 US-004).
382    #[must_use]
383    pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
384        let strategy = MatchQueryPlanner::plan(match_clause, stats);
385        let strategy_explanation = MatchQueryPlanner::explain(&strategy);
386
387        // Extract info from strategy
388        let (start_labels, max_depth, has_similarity, similarity_threshold) = match &strategy {
389            MatchExecutionStrategy::GraphFirst {
390                start_labels,
391                max_depth,
392            } => (start_labels.clone(), *max_depth, false, None),
393            MatchExecutionStrategy::VectorFirst { threshold, .. } => {
394                (Vec::new(), 1, true, Some(*threshold))
395            }
396            MatchExecutionStrategy::Parallel {
397                graph_hint,
398                vector_hint,
399            } => {
400                let (labels, depth) = match graph_hint.as_ref() {
401                    MatchExecutionStrategy::GraphFirst {
402                        start_labels,
403                        max_depth,
404                    } => (start_labels.clone(), *max_depth),
405                    _ => (Vec::new(), 1),
406                };
407                let threshold = match vector_hint.as_ref() {
408                    MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
409                    _ => None,
410                };
411                (labels, depth, true, threshold)
412            }
413        };
414
415        // Count relationships
416        let relationship_count = match_clause
417            .patterns
418            .first()
419            .map_or(0, |p| p.relationships.len());
420
421        let mut nodes = Vec::new();
422
423        // Main MATCH traversal node
424        nodes.push(PlanNode::MatchTraversal(MatchTraversalPlan {
425            strategy: strategy_explanation,
426            start_labels,
427            max_depth,
428            relationship_count,
429            has_similarity,
430            similarity_threshold,
431        }));
432
433        // Add limit if present
434        if let Some(limit) = match_clause.return_clause.limit {
435            nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
436        }
437
438        let root = if nodes.len() == 1 {
439            nodes.swap_remove(0)
440        } else {
441            PlanNode::Sequence(nodes)
442        };
443
444        let estimated_cost_ms = Self::estimate_cost(&root, has_similarity);
445        let index_used = if has_similarity {
446            Some(IndexType::Hnsw)
447        } else {
448            None
449        };
450
451        Self {
452            root,
453            estimated_cost_ms,
454            index_used,
455            filter_strategy: FilterStrategy::None,
456            cache_hit: None,
457            plan_reuse_count: None,
458        }
459    }
460}
461
462// Rendering, formatting, Display impl, and as_str() methods moved to explain/formatter.rs
463// Tests in explain_tests.rs per project rules (tests in separate files)