velesdb_core/velesql/
explain.rs

1//! Query plan explanation for `VelesQL`.
2//!
3//! This module provides EXPLAIN functionality to display query execution plans.
4//!
5//! # Example
6//!
7//! ```ignore
8//! use velesdb_core::velesql::{Parser, QueryPlan};
9//!
10//! let query = Parser::parse("SELECT * FROM docs WHERE vector NEAR $v LIMIT 10")?;
11//! let plan = QueryPlan::from_select(&query.select);
12//! println!("{}", plan.to_tree());
13//! ```
14
15use serde::{Deserialize, Serialize};
16use std::fmt::{self, Write as _};
17
18use super::ast::{Condition, SelectStatement};
19
20/// Query execution plan generated by EXPLAIN.
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
22pub struct QueryPlan {
23    /// Root node of the plan tree.
24    pub root: PlanNode,
25    /// Estimated execution cost in milliseconds.
26    pub estimated_cost_ms: f64,
27    /// Index type used (if any).
28    pub index_used: Option<IndexType>,
29    /// Filter strategy.
30    pub filter_strategy: FilterStrategy,
31}
32
33/// A node in the query execution plan tree.
34#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
35pub enum PlanNode {
36    /// Vector similarity search operation.
37    VectorSearch(VectorSearchPlan),
38    /// Metadata filter operation.
39    Filter(FilterPlan),
40    /// Limit results.
41    Limit(LimitPlan),
42    /// Offset skip.
43    Offset(OffsetPlan),
44    /// Table scan (no index).
45    TableScan(TableScanPlan),
46    /// Sequential operations.
47    Sequence(Vec<PlanNode>),
48}
49
50/// Vector search plan details.
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
52pub struct VectorSearchPlan {
53    /// Collection name.
54    pub collection: String,
55    /// `ef_search` parameter (for HNSW).
56    pub ef_search: u32,
57    /// Number of candidates to retrieve.
58    pub candidates: u32,
59}
60
61/// Filter plan details.
62#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
63pub struct FilterPlan {
64    /// Filter conditions as string representation.
65    pub conditions: String,
66    /// Estimated selectivity (0.0 - 1.0).
67    pub selectivity: f64,
68}
69
70/// Limit plan details.
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub struct LimitPlan {
73    /// Maximum number of results.
74    pub count: u64,
75}
76
77/// Offset plan details.
78#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
79pub struct OffsetPlan {
80    /// Number of results to skip.
81    pub count: u64,
82}
83
84/// Table scan plan (no index used).
85#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
86pub struct TableScanPlan {
87    /// Collection name.
88    pub collection: String,
89}
90
91/// Type of index used in the query.
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
93pub enum IndexType {
94    /// HNSW index for vector search.
95    Hnsw,
96    /// Flat index (brute force).
97    Flat,
98    /// Binary quantization index.
99    BinaryQuantization,
100}
101
102/// Strategy for applying filters.
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
104pub enum FilterStrategy {
105    /// No filter.
106    #[default]
107    None,
108    /// Pre-filtering: filter before vector search (high selectivity).
109    PreFilter,
110    /// Post-filtering: filter after vector search (low selectivity).
111    PostFilter,
112}
113
114impl QueryPlan {
115    /// Creates a new query plan from a SELECT statement.
116    #[must_use]
117    pub fn from_select(stmt: &SelectStatement) -> Self {
118        let mut nodes = Vec::new();
119        let mut has_vector_search = false;
120        let mut filter_conditions = Vec::new();
121        let mut filter_strategy = FilterStrategy::None;
122        let mut index_used = None;
123
124        // Analyze WHERE clause
125        if let Some(ref condition) = stmt.where_clause {
126            Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
127        }
128
129        // Build plan nodes
130        if has_vector_search {
131            index_used = Some(IndexType::Hnsw);
132            let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
133            nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
134                collection: stmt.from.clone(),
135                ef_search: 100, // Default HNSW parameter
136                candidates,
137            }));
138        } else {
139            nodes.push(PlanNode::TableScan(TableScanPlan {
140                collection: stmt.from.clone(),
141            }));
142        }
143
144        // Add filter if needed
145        if !filter_conditions.is_empty() {
146            let selectivity = Self::estimate_selectivity(&filter_conditions);
147            filter_strategy = if selectivity > 0.1 {
148                FilterStrategy::PostFilter
149            } else {
150                FilterStrategy::PreFilter
151            };
152
153            nodes.push(PlanNode::Filter(FilterPlan {
154                conditions: filter_conditions.join(" AND "),
155                selectivity,
156            }));
157        }
158
159        // Add offset before limit
160        if let Some(offset) = stmt.offset {
161            nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
162        }
163
164        // Add limit
165        if let Some(limit) = stmt.limit {
166            nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
167        }
168
169        let root = if nodes.len() == 1 {
170            nodes.swap_remove(0)
171        } else {
172            PlanNode::Sequence(nodes)
173        };
174
175        let estimated_cost_ms = Self::estimate_cost(&root, has_vector_search);
176
177        Self {
178            root,
179            estimated_cost_ms,
180            index_used,
181            filter_strategy,
182        }
183    }
184
185    /// Analyzes a condition to extract vector search and filter info.
186    fn analyze_condition(
187        condition: &Condition,
188        has_vector_search: &mut bool,
189        filter_conditions: &mut Vec<String>,
190    ) {
191        match condition {
192            Condition::VectorSearch(_) | Condition::VectorFusedSearch(_) => {
193                *has_vector_search = true;
194            }
195            Condition::Comparison(cmp) => {
196                filter_conditions.push(format!("{} {} ?", cmp.column, cmp.operator.as_str()));
197            }
198            Condition::In(inc) => {
199                filter_conditions.push(format!("{} IN (...)", inc.column));
200            }
201            Condition::Between(btw) => {
202                filter_conditions.push(format!("{} BETWEEN ? AND ?", btw.column));
203            }
204            Condition::Like(lk) => {
205                filter_conditions.push(format!("{} LIKE ?", lk.column));
206            }
207            Condition::IsNull(isn) => {
208                let op = if isn.is_null {
209                    "IS NULL"
210                } else {
211                    "IS NOT NULL"
212                };
213                filter_conditions.push(format!("{} {op}", isn.column));
214            }
215            Condition::Match(m) => {
216                filter_conditions.push(format!("{} MATCH ?", m.column));
217            }
218            Condition::And(left, right) | Condition::Or(left, right) => {
219                Self::analyze_condition(left, has_vector_search, filter_conditions);
220                Self::analyze_condition(right, has_vector_search, filter_conditions);
221            }
222            Condition::Not(inner) | Condition::Group(inner) => {
223                Self::analyze_condition(inner, has_vector_search, filter_conditions);
224            }
225        }
226    }
227
228    /// Estimates selectivity (placeholder - would need statistics in production).
229    fn estimate_selectivity(conditions: &[String]) -> f64 {
230        // Heuristic: more conditions = lower selectivity
231        let base = 0.5_f64;
232        base.powi(i32::try_from(conditions.len()).unwrap_or(i32::MAX))
233    }
234
235    /// Estimates execution cost in milliseconds.
236    fn estimate_cost(root: &PlanNode, has_vector_search: bool) -> f64 {
237        let base_cost = if has_vector_search { 0.05 } else { 1.0 };
238
239        match root {
240            PlanNode::Sequence(nodes) => nodes
241                .iter()
242                .fold(base_cost, |acc, node| acc + Self::node_cost(node)),
243            _ => base_cost + Self::node_cost(root),
244        }
245    }
246
247    fn node_cost(node: &PlanNode) -> f64 {
248        match node {
249            PlanNode::VectorSearch(_) => 0.05,
250            PlanNode::Filter(f) => 0.01 * (1.0 - f.selectivity),
251            PlanNode::Limit(_) | PlanNode::Offset(_) => 0.001,
252            PlanNode::TableScan(_) => 1.0,
253            PlanNode::Sequence(nodes) => nodes.iter().map(Self::node_cost).sum(),
254        }
255    }
256
257    /// Renders the plan as a tree string.
258    #[must_use]
259    pub fn to_tree(&self) -> String {
260        let mut output = String::from("Query Plan:\n");
261        Self::render_node(&self.root, &mut output, "", true);
262
263        let _ = write!(
264            output,
265            "\nEstimated cost: {:.3}ms\n",
266            self.estimated_cost_ms
267        );
268
269        if let Some(ref idx) = self.index_used {
270            let _ = writeln!(output, "Index used: {}", idx.as_str());
271        }
272
273        if self.filter_strategy != FilterStrategy::None {
274            let _ = writeln!(output, "Filter strategy: {}", self.filter_strategy.as_str());
275        }
276
277        output
278    }
279
280    fn render_node(node: &PlanNode, output: &mut String, prefix: &str, is_last: bool) {
281        let connector = if is_last { "└─ " } else { "├─ " };
282        let child_prefix = format!("{}{}", prefix, if is_last { "   " } else { "│  " });
283
284        match node {
285            PlanNode::VectorSearch(vs) => {
286                let _ = writeln!(output, "{prefix}{connector}VectorSearch");
287                let _ = writeln!(output, "{child_prefix}├─ Collection: {}", vs.collection);
288                let _ = writeln!(output, "{child_prefix}├─ ef_search: {}", vs.ef_search);
289                let _ = writeln!(output, "{child_prefix}└─ Candidates: {}", vs.candidates);
290            }
291            PlanNode::Filter(f) => {
292                let _ = writeln!(output, "{prefix}{connector}Filter");
293                let _ = writeln!(output, "{child_prefix}├─ Conditions: {}", f.conditions);
294                let _ = writeln!(
295                    output,
296                    "{child_prefix}└─ Selectivity: {:.1}%",
297                    f.selectivity * 100.0
298                );
299            }
300            PlanNode::Limit(l) => {
301                let _ = writeln!(output, "{prefix}{connector}Limit: {}", l.count);
302            }
303            PlanNode::Offset(o) => {
304                let _ = writeln!(output, "{prefix}{connector}Offset: {}", o.count);
305            }
306            PlanNode::TableScan(ts) => {
307                let _ = writeln!(output, "{prefix}{connector}TableScan: {}", ts.collection);
308            }
309            PlanNode::Sequence(nodes) => {
310                for (i, child) in nodes.iter().enumerate() {
311                    Self::render_node(child, output, prefix, i == nodes.len() - 1);
312                }
313            }
314        }
315    }
316
317    /// Renders the plan as JSON.
318    ///
319    /// # Errors
320    ///
321    /// Returns an error if serialization fails.
322    pub fn to_json(&self) -> Result<String, serde_json::Error> {
323        serde_json::to_string_pretty(self)
324    }
325}
326
327impl IndexType {
328    /// Returns the index type as a string.
329    #[must_use]
330    pub const fn as_str(&self) -> &'static str {
331        match self {
332            Self::Hnsw => "HNSW",
333            Self::Flat => "Flat",
334            Self::BinaryQuantization => "BinaryQuantization",
335        }
336    }
337}
338
339impl FilterStrategy {
340    /// Returns the filter strategy as a string.
341    #[must_use]
342    pub const fn as_str(&self) -> &'static str {
343        match self {
344            Self::None => "none",
345            Self::PreFilter => "pre-filtering (high selectivity)",
346            Self::PostFilter => "post-filtering (low selectivity)",
347        }
348    }
349}
350
351impl super::ast::CompareOp {
352    /// Returns the operator as a string.
353    #[must_use]
354    pub const fn as_str(&self) -> &'static str {
355        match self {
356            Self::Eq => "=",
357            Self::NotEq => "!=",
358            Self::Gt => ">",
359            Self::Gte => ">=",
360            Self::Lt => "<",
361            Self::Lte => "<=",
362        }
363    }
364}
365
366impl fmt::Display for QueryPlan {
367    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
368        write!(f, "{}", self.to_tree())
369    }
370}