Skip to main content

velesdb_core/velesql/explain/
plan_builder.rs

1//! Plan construction logic for `VelesQL` EXPLAIN.
2//!
3//! Contains `impl QueryPlan` methods for building plans from SELECT statements,
4//! MATCH clauses, and related query structures.
5
6use std::collections::HashSet;
7
8use super::filter_strategy::{estimate_filter_stats, resolve_filter_strategy};
9use super::formatter;
10use super::node_stats;
11use super::types::{
12    FilterPlan, FilterStrategy, FusionInfo, IndexLookupPlan, IndexType, LimitPlan,
13    MatchTraversalPlan, OffsetPlan, PlanNode, QueryPlan, TableScanPlan, VectorSearchPlan,
14};
15use crate::collection::search::query::match_planner::{
16    CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
17};
18use crate::collection::stats::CollectionStats as CoreCollectionStats;
19use crate::velesql::ast::{Condition, LetBinding, SelectStatement};
20use crate::velesql::MatchClause;
21
22impl QueryPlan {
23    /// Creates a new query plan from a SELECT statement.
24    #[must_use]
25    pub fn from_select(stmt: &SelectStatement) -> Self {
26        Self::from_select_with_stats(stmt, &HashSet::new(), None)
27    }
28
29    /// Creates a new query plan from SELECT with known indexed metadata fields.
30    #[must_use]
31    pub fn from_select_with_indexed_fields(
32        stmt: &SelectStatement,
33        indexed_fields: &HashSet<String>,
34    ) -> Self {
35        Self::from_select_with_stats(stmt, indexed_fields, None)
36    }
37
38    /// Creates a query plan with access to calibrated collection statistics.
39    ///
40    /// When `stats` is `Some`, cost and filter-strategy decisions use the
41    /// calibrated `CostEstimator` pipeline (issue #471). When `None`, falls
42    /// back bit-for-bit to the heuristic path so legacy tests and callers
43    /// without a resolved collection keep working.
44    #[must_use]
45    pub fn from_select_with_stats(
46        stmt: &SelectStatement,
47        indexed_fields: &HashSet<String>,
48        stats: Option<&CoreCollectionStats>,
49    ) -> Self {
50        let mut has_vector_search = false;
51        let mut filter_conditions = Vec::new();
52        let mut index_lookup = None;
53
54        if let Some(ref condition) = stmt.where_clause {
55            Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
56            index_lookup = Self::extract_index_lookup(condition, indexed_fields);
57        }
58
59        let (mut nodes, index_used) = Self::build_scan_node(stmt, has_vector_search, index_lookup);
60        let filter_strategy = Self::append_filter_nodes_with_stats(
61            &mut nodes,
62            &filter_conditions,
63            stmt,
64            has_vector_search,
65            stats,
66        );
67
68        let mut plan = Self::assemble_plan_with_stats(
69            nodes,
70            index_used,
71            filter_strategy,
72            has_vector_search,
73            stats,
74        );
75        plan.with_options = Self::extract_with_options(stmt);
76        plan.fusion_info = Self::extract_fusion_info(stmt);
77        plan
78    }
79
80    /// Creates a full query plan from a `Query`, including LET bindings (issue #471).
81    #[must_use]
82    pub fn from_query(query: &crate::velesql::ast::Query) -> Self {
83        Self::from_query_with_stats(query, &HashSet::new(), None)
84    }
85
86    /// Creates a full query plan from a `Query`, with optional collection stats
87    /// for calibrated cost estimation (issue #471).
88    #[must_use]
89    pub fn from_query_with_stats(
90        query: &crate::velesql::ast::Query,
91        indexed_fields: &HashSet<String>,
92        stats: Option<&CoreCollectionStats>,
93    ) -> Self {
94        let mut plan = Self::from_select_with_stats(&query.select, indexed_fields, stats);
95        plan.let_bindings = Self::format_let_bindings(&query.let_bindings);
96        plan
97    }
98
99    /// Creates a new query plan from a MATCH clause (EPIC-046 US-004).
100    #[must_use]
101    pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
102        let strategy = MatchQueryPlanner::plan(match_clause, stats);
103        let strategy_explanation = MatchQueryPlanner::explain(&strategy);
104
105        let (start_labels, max_depth, has_similarity, similarity_threshold) =
106            Self::extract_strategy_info(&strategy);
107
108        let relationship_count = match_clause
109            .patterns
110            .first()
111            .map_or(0, |p| p.relationships.len());
112
113        let traversal = PlanNode::MatchTraversal(MatchTraversalPlan {
114            strategy: strategy_explanation,
115            start_labels,
116            max_depth,
117            relationship_count,
118            has_similarity,
119            similarity_threshold,
120        });
121
122        let mut nodes = vec![traversal];
123        if let Some(limit) = match_clause.return_clause.limit {
124            nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
125        }
126
127        let index_used = if has_similarity {
128            Some(IndexType::Hnsw)
129        } else {
130            None
131        };
132
133        Self::assemble_plan_with_stats(
134            nodes,
135            index_used,
136            FilterStrategy::None,
137            has_similarity,
138            None,
139        )
140    }
141
142    /// Variant of `assemble_plan` with optional calibrated `CollectionStats`.
143    fn assemble_plan_with_stats(
144        mut nodes: Vec<PlanNode>,
145        index_used: Option<IndexType>,
146        filter_strategy: FilterStrategy,
147        has_vector_search: bool,
148        stats: Option<&CoreCollectionStats>,
149    ) -> Self {
150        let root = if nodes.len() == 1 {
151            nodes.swap_remove(0)
152        } else {
153            PlanNode::Sequence(nodes)
154        };
155        let estimated_cost_ms = node_stats::estimate_cost(&root, has_vector_search, stats);
156        Self {
157            root,
158            estimated_cost_ms,
159            index_used,
160            filter_strategy,
161            with_options: Vec::new(),
162            let_bindings: Vec::new(),
163            fusion_info: None,
164            cache_hit: None,
165            plan_reuse_count: None,
166        }
167    }
168
169    /// Default `ef_search` when the WITH clause does not specify one.
170    const DEFAULT_EF_SEARCH: u32 = 100;
171
172    /// Builds the primary scan node based on search type.
173    fn build_scan_node(
174        stmt: &SelectStatement,
175        has_vector_search: bool,
176        index_lookup: Option<(String, String)>,
177    ) -> (Vec<PlanNode>, Option<IndexType>) {
178        let mut nodes = Vec::new();
179        let index_used;
180
181        if has_vector_search {
182            index_used = Some(IndexType::Hnsw);
183            let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
184            let ef_search = Self::resolve_ef_search(stmt);
185            nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
186                collection: stmt.from.clone(),
187                ef_search,
188                candidates,
189            }));
190        } else if let Some((property, value)) = index_lookup {
191            index_used = Some(IndexType::Property);
192            nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
193                label: stmt.from.clone(),
194                property,
195                value,
196            }));
197        } else {
198            index_used = None;
199            nodes.push(PlanNode::TableScan(TableScanPlan {
200                collection: stmt.from.clone(),
201            }));
202        }
203
204        (nodes, index_used)
205    }
206
207    /// Reads `ef_search` from the WITH clause, falling back to [`Self::DEFAULT_EF_SEARCH`].
208    #[allow(clippy::cast_possible_truncation)]
209    fn resolve_ef_search(stmt: &SelectStatement) -> u32 {
210        stmt.with_clause
211            .as_ref()
212            .and_then(crate::velesql::ast::WithClause::get_ef_search)
213            .map_or(Self::DEFAULT_EF_SEARCH, |v| v as u32)
214    }
215
216    /// Extracts WITH clause options as display pairs (issue #471).
217    fn extract_with_options(stmt: &SelectStatement) -> Vec<(String, String)> {
218        let Some(ref wc) = stmt.with_clause else {
219            return Vec::new();
220        };
221        wc.options
222            .iter()
223            .map(|opt| (opt.key.clone(), formatter::format_with_value(&opt.value)))
224            .collect()
225    }
226
227    /// Extracts FUSION clause info for EXPLAIN display (issue #471).
228    fn extract_fusion_info(stmt: &SelectStatement) -> Option<FusionInfo> {
229        let fc = stmt.fusion_clause.as_ref()?;
230        let strategy = match fc.strategy {
231            crate::velesql::ast::FusionStrategyType::Rrf => "RRF",
232            crate::velesql::ast::FusionStrategyType::Weighted => "Weighted",
233            crate::velesql::ast::FusionStrategyType::Maximum => "Maximum",
234            crate::velesql::ast::FusionStrategyType::Rsf => "RSF",
235            crate::velesql::ast::FusionStrategyType::Average => "Average",
236        };
237        let weights = Self::format_fusion_weights(fc);
238        Some(FusionInfo {
239            strategy: strategy.to_string(),
240            k: fc.k,
241            weights,
242        })
243    }
244
245    /// Formats fusion weights into a human-readable string.
246    fn format_fusion_weights(fc: &crate::velesql::ast::FusionClause) -> Option<String> {
247        let mut parts = Vec::new();
248        if let Some(vw) = fc.vector_weight {
249            parts.push(format!("vector={vw}"));
250        }
251        if let Some(gw) = fc.graph_weight {
252            parts.push(format!("graph={gw}"));
253        }
254        if let Some(dw) = fc.dense_weight {
255            parts.push(format!("dense={dw}"));
256        }
257        if let Some(sw) = fc.sparse_weight {
258            parts.push(format!("sparse={sw}"));
259        }
260        if parts.is_empty() {
261            None
262        } else {
263            Some(parts.join(", "))
264        }
265    }
266
267    /// Formats LET bindings as `"name = expr"` strings (issue #471).
268    fn format_let_bindings(bindings: &[LetBinding]) -> Vec<String> {
269        bindings
270            .iter()
271            .map(|b| format!("{} = {}", b.name, b.expr))
272            .collect()
273    }
274
275    /// Variant of `append_filter_nodes` with access to the calibrated
276    /// `CostEstimator` (issue #471).
277    ///
278    /// Selectivity and filter strategy use histogram data when `stats` is
279    /// `Some`. When `None`, the historical heuristic (selectivity from
280    /// condition count, 0.1 threshold) is preserved bit-for-bit.
281    fn append_filter_nodes_with_stats(
282        nodes: &mut Vec<PlanNode>,
283        filter_conditions: &[String],
284        stmt: &SelectStatement,
285        has_vector_search: bool,
286        stats: Option<&CoreCollectionStats>,
287    ) -> FilterStrategy {
288        let mut filter_strategy = FilterStrategy::None;
289
290        if !filter_conditions.is_empty() {
291            let heuristic_fallback = Self::estimate_selectivity(filter_conditions);
292            let (selectivity, estimation_method, estimated_rows) =
293                estimate_filter_stats(stmt, heuristic_fallback, stats);
294
295            // Reason: plan_builder owns stmt so the real ef_search/candidates
296            // are the same values used in `build_scan_node` above
297            // (Devin finding 4). These values drive the pre/post-filter cost
298            // comparison so it reflects the user's actual WITH clause instead
299            // of a fixed k = 10.
300            let ef_search = Self::resolve_ef_search(stmt);
301            let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
302
303            filter_strategy = resolve_filter_strategy(
304                selectivity,
305                has_vector_search,
306                ef_search,
307                candidates,
308                stats,
309            );
310
311            nodes.push(PlanNode::Filter(FilterPlan {
312                conditions: filter_conditions.join(" AND "),
313                selectivity,
314                estimated_rows,
315                estimation_method,
316            }));
317        }
318
319        if let Some(offset) = stmt.offset {
320            nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
321        }
322        if let Some(limit) = stmt.limit {
323            nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
324        }
325
326        filter_strategy
327    }
328
329    /// Analyzes a condition to extract vector search and filter info.
330    fn analyze_condition(
331        condition: &Condition,
332        has_vector_search: &mut bool,
333        filter_conditions: &mut Vec<String>,
334    ) {
335        match condition {
336            Condition::VectorSearch(_)
337            | Condition::VectorFusedSearch(_)
338            | Condition::SparseVectorSearch(_)
339            | Condition::Similarity(_) => {
340                *has_vector_search = true;
341            }
342            Condition::And(left, right) | Condition::Or(left, right) => {
343                Self::analyze_condition(left, has_vector_search, filter_conditions);
344                Self::analyze_condition(right, has_vector_search, filter_conditions);
345            }
346            Condition::Not(inner) | Condition::Group(inner) => {
347                Self::analyze_condition(inner, has_vector_search, filter_conditions);
348            }
349            leaf => {
350                if let Some(desc) = Self::describe_leaf_condition(leaf) {
351                    filter_conditions.push(desc);
352                }
353            }
354        }
355    }
356
357    /// Renders a non-composite condition as a short human-readable string for
358    /// the EXPLAIN plan filter list. Returns `None` for vector/composite
359    /// variants — those are handled in [`analyze_condition`] itself.
360    fn describe_leaf_condition(condition: &Condition) -> Option<String> {
361        let desc = match condition {
362            Condition::Comparison(cmp) => {
363                format!("{} {} ?", cmp.column, cmp.operator.as_str())
364            }
365            Condition::In(inc) => {
366                let op = if inc.negated { "NOT IN" } else { "IN" };
367                format!("{} {op} (...)", inc.column)
368            }
369            Condition::Between(btw) => format!("{} BETWEEN ? AND ?", btw.column),
370            Condition::Like(lk) => format!("{} LIKE ?", lk.column),
371            Condition::IsNull(isn) => {
372                let op = if isn.is_null {
373                    "IS NULL"
374                } else {
375                    "IS NOT NULL"
376                };
377                format!("{} {op}", isn.column)
378            }
379            Condition::Match(m) => format!("{} MATCH ?", m.column),
380            Condition::ContainsText(ct) => format!("{} CONTAINS_TEXT ?", ct.column),
381            Condition::GraphMatch(_) => "MATCH (...)".to_string(),
382            Condition::Contains(cc) => {
383                let mode_str = match cc.mode {
384                    crate::velesql::ContainsMode::Single => "CONTAINS",
385                    crate::velesql::ContainsMode::Any => "CONTAINS ANY",
386                    crate::velesql::ContainsMode::All => "CONTAINS ALL",
387                };
388                format!("{} {mode_str} ?", cc.column)
389            }
390            Condition::GeoDistance(gd) => format!(
391                "GEO_DISTANCE({}, {}, {}) {} ?",
392                gd.column,
393                gd.lat,
394                gd.lng,
395                gd.operator.as_str()
396            ),
397            Condition::GeoBbox(gb) => format!("GEO_BBOX({}, ...)", gb.column),
398            _ => return None,
399        };
400        Some(desc)
401    }
402
403    fn extract_index_lookup(
404        condition: &Condition,
405        indexed_fields: &HashSet<String>,
406    ) -> Option<(String, String)> {
407        if let Condition::Comparison(cmp) = condition {
408            if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
409            {
410                return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
411            }
412        }
413        if let Condition::In(inc) = condition {
414            if indexed_fields.contains(&inc.column) {
415                let op = if inc.negated { "NOT IN" } else { "IN" };
416                return Some((inc.column.clone(), format!("{op} (...)")));
417            }
418        }
419        None
420    }
421
422    /// Estimates selectivity (placeholder - would need statistics in production).
423    pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
424        node_stats::estimate_selectivity(conditions, None)
425    }
426
427    /// Returns the heuristic cost for a single plan node.
428    #[cfg(test)]
429    pub(crate) fn node_cost(node: &PlanNode) -> f64 {
430        node_stats::node_cost(node)
431    }
432
433    /// Extracts traversal parameters from a `MatchExecutionStrategy`.
434    fn extract_strategy_info(
435        strategy: &MatchExecutionStrategy,
436    ) -> (Vec<String>, u32, bool, Option<f32>) {
437        match strategy {
438            MatchExecutionStrategy::GraphFirst {
439                start_labels,
440                max_depth,
441            } => (start_labels.clone(), *max_depth, false, None),
442            MatchExecutionStrategy::VectorFirst { threshold, .. } => {
443                (Vec::new(), 1, true, Some(*threshold))
444            }
445            MatchExecutionStrategy::Parallel {
446                graph_hint,
447                vector_hint,
448            } => {
449                let (labels, depth) = match graph_hint.as_ref() {
450                    MatchExecutionStrategy::GraphFirst {
451                        start_labels,
452                        max_depth,
453                    } => (start_labels.clone(), *max_depth),
454                    _ => (Vec::new(), 1),
455                };
456                let threshold = match vector_hint.as_ref() {
457                    MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
458                    _ => None,
459                };
460                (labels, depth, true, threshold)
461            }
462        }
463    }
464}