Skip to main content

velesdb_core/velesql/explain/
plan_builder.rs

1//! Plan construction logic for `VelesQL` EXPLAIN.
2//!
3//! Contains `impl QueryPlan` methods for building plans from SELECT statements,
4//! MATCH clauses, and related query structures.
5
6use std::collections::HashSet;
7
8use super::filter_strategy::{estimate_filter_stats, resolve_filter_strategy};
9use super::formatter;
10use super::node_stats;
11use super::types::{
12    FilterPlan, FilterStrategy, FusionInfo, IndexLookupPlan, IndexType, LimitPlan,
13    MatchTraversalPlan, OffsetPlan, PlanNode, QueryPlan, TableScanPlan, VectorSearchPlan,
14};
15use crate::collection::search::query::match_planner::{
16    CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
17};
18use crate::collection::stats::CollectionStats as CoreCollectionStats;
19use crate::velesql::ast::{Condition, LetBinding, SelectStatement, DEFAULT_SELECT_LIMIT};
20use crate::velesql::MatchClause;
21
22impl QueryPlan {
23    /// Creates a new query plan from a SELECT statement.
24    #[must_use]
25    pub fn from_select(stmt: &SelectStatement) -> Self {
26        Self::from_select_with_stats(stmt, &HashSet::new(), None)
27    }
28
29    /// Creates a new query plan from SELECT with known indexed metadata fields.
30    #[must_use]
31    pub fn from_select_with_indexed_fields(
32        stmt: &SelectStatement,
33        indexed_fields: &HashSet<String>,
34    ) -> Self {
35        Self::from_select_with_stats(stmt, indexed_fields, None)
36    }
37
38    /// Creates a query plan with access to calibrated collection statistics.
39    ///
40    /// When `stats` is `Some`, cost and filter-strategy decisions use the
41    /// calibrated `CostEstimator` pipeline (issue #471). When `None`, falls
42    /// back bit-for-bit to the heuristic path so legacy tests and callers
43    /// without a resolved collection keep working.
44    #[must_use]
45    pub fn from_select_with_stats(
46        stmt: &SelectStatement,
47        indexed_fields: &HashSet<String>,
48        stats: Option<&CoreCollectionStats>,
49    ) -> Self {
50        Self::build_select_plan(stmt, indexed_fields, stats, true)
51    }
52
53    /// Shared SELECT plan construction.
54    ///
55    /// `implicit_limit` controls whether the engine default
56    /// [`DEFAULT_SELECT_LIMIT`] is surfaced as a Limit node when the statement
57    /// has no explicit LIMIT. Plain SELECT statements pass `true`; MATCH and
58    /// compound queries pass `false` (no implicit limit applies to them).
59    fn build_select_plan(
60        stmt: &SelectStatement,
61        indexed_fields: &HashSet<String>,
62        stats: Option<&CoreCollectionStats>,
63        implicit_limit: bool,
64    ) -> Self {
65        let mut has_vector_search = false;
66        let mut filter_conditions = Vec::new();
67        let mut index_lookup = None;
68
69        if let Some(ref condition) = stmt.where_clause {
70            Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
71            index_lookup = Self::extract_index_lookup(condition, indexed_fields);
72        }
73
74        let (mut nodes, index_used) = Self::build_scan_node(stmt, has_vector_search, index_lookup);
75        let filter_strategy = Self::append_filter_nodes_with_stats(
76            &mut nodes,
77            &filter_conditions,
78            stmt,
79            has_vector_search,
80            stats,
81            implicit_limit,
82        );
83
84        let mut plan = Self::assemble_plan_with_stats(
85            nodes,
86            index_used,
87            filter_strategy,
88            has_vector_search,
89            stats,
90        );
91        plan.with_options = Self::extract_with_options(stmt);
92        plan.fusion_info = Self::extract_fusion_info(stmt);
93        plan
94    }
95
96    /// Creates a full query plan from a `Query`, including LET bindings (issue #471).
97    #[must_use]
98    pub fn from_query(query: &crate::velesql::ast::Query) -> Self {
99        Self::from_query_with_stats(query, &HashSet::new(), None)
100    }
101
102    /// Creates a full query plan from a `Query`, with optional collection stats
103    /// for calibrated cost estimation (issue #471).
104    #[must_use]
105    pub fn from_query_with_stats(
106        query: &crate::velesql::ast::Query,
107        indexed_fields: &HashSet<String>,
108        stats: Option<&CoreCollectionStats>,
109    ) -> Self {
110        // MATCH and compound queries have no implicit default LIMIT.
111        let implicit_limit = query.match_clause.is_none() && query.compound.is_none();
112        let mut plan =
113            Self::build_select_plan(&query.select, indexed_fields, stats, implicit_limit);
114        plan.let_bindings = Self::format_let_bindings(&query.let_bindings);
115        plan
116    }
117
118    /// Creates a new query plan from a MATCH clause (EPIC-046 US-004).
119    #[must_use]
120    pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
121        let strategy = MatchQueryPlanner::plan(match_clause, stats);
122        let strategy_explanation = MatchQueryPlanner::explain(&strategy);
123
124        let (start_labels, max_depth, has_similarity, similarity_threshold) =
125            Self::extract_strategy_info(&strategy);
126
127        let relationship_count = match_clause
128            .patterns
129            .first()
130            .map_or(0, |p| p.relationships.len());
131
132        let traversal = PlanNode::MatchTraversal(MatchTraversalPlan {
133            strategy: strategy_explanation,
134            start_labels,
135            max_depth,
136            relationship_count,
137            has_similarity,
138            similarity_threshold,
139        });
140
141        let mut nodes = vec![traversal];
142        if let Some(limit) = match_clause.return_clause.limit {
143            nodes.push(PlanNode::Limit(LimitPlan {
144                count: limit,
145                is_default: false,
146            }));
147        }
148
149        let index_used = if has_similarity {
150            Some(IndexType::Hnsw)
151        } else {
152            None
153        };
154
155        Self::assemble_plan_with_stats(
156            nodes,
157            index_used,
158            FilterStrategy::None,
159            has_similarity,
160            None,
161        )
162    }
163
164    /// Variant of `assemble_plan` with optional calibrated `CollectionStats`.
165    fn assemble_plan_with_stats(
166        mut nodes: Vec<PlanNode>,
167        index_used: Option<IndexType>,
168        filter_strategy: FilterStrategy,
169        has_vector_search: bool,
170        stats: Option<&CoreCollectionStats>,
171    ) -> Self {
172        let root = if nodes.len() == 1 {
173            nodes.swap_remove(0)
174        } else {
175            PlanNode::Sequence(nodes)
176        };
177        let estimated_cost_ms = node_stats::estimate_cost(&root, has_vector_search, stats);
178        Self {
179            root,
180            estimated_cost_ms,
181            index_used,
182            filter_strategy,
183            with_options: Vec::new(),
184            let_bindings: Vec::new(),
185            fusion_info: None,
186            cache_hit: None,
187            plan_reuse_count: None,
188        }
189    }
190
191    /// Default `ef_search` when the WITH clause does not specify one.
192    const DEFAULT_EF_SEARCH: u32 = 100;
193
194    /// Builds the primary scan node based on search type.
195    fn build_scan_node(
196        stmt: &SelectStatement,
197        has_vector_search: bool,
198        index_lookup: Option<(String, String)>,
199    ) -> (Vec<PlanNode>, Option<IndexType>) {
200        let mut nodes = Vec::new();
201        let index_used;
202
203        if has_vector_search {
204            index_used = Some(IndexType::Hnsw);
205            let candidates =
206                u32::try_from(stmt.limit.unwrap_or(DEFAULT_SELECT_LIMIT)).unwrap_or(u32::MAX);
207            let ef_search = Self::resolve_ef_search(stmt);
208            nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
209                collection: stmt.from.clone(),
210                ef_search,
211                candidates,
212            }));
213        } else if let Some((property, value)) = index_lookup {
214            index_used = Some(IndexType::Property);
215            nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
216                label: stmt.from.clone(),
217                property,
218                value,
219            }));
220        } else {
221            index_used = None;
222            nodes.push(PlanNode::TableScan(TableScanPlan {
223                collection: stmt.from.clone(),
224            }));
225        }
226
227        (nodes, index_used)
228    }
229
230    /// Reads `ef_search` from the WITH clause, falling back to [`Self::DEFAULT_EF_SEARCH`].
231    #[allow(clippy::cast_possible_truncation)]
232    fn resolve_ef_search(stmt: &SelectStatement) -> u32 {
233        stmt.with_clause
234            .as_ref()
235            .and_then(crate::velesql::ast::WithClause::get_ef_search)
236            .map_or(Self::DEFAULT_EF_SEARCH, |v| v as u32)
237    }
238
239    /// Extracts WITH clause options as display pairs (issue #471).
240    fn extract_with_options(stmt: &SelectStatement) -> Vec<(String, String)> {
241        let Some(ref wc) = stmt.with_clause else {
242            return Vec::new();
243        };
244        wc.options
245            .iter()
246            .map(|opt| (opt.key.clone(), formatter::format_with_value(&opt.value)))
247            .collect()
248    }
249
250    /// Extracts FUSION clause info for EXPLAIN display (issue #471).
251    fn extract_fusion_info(stmt: &SelectStatement) -> Option<FusionInfo> {
252        let fc = stmt.fusion_clause.as_ref()?;
253        let strategy = match fc.strategy {
254            crate::velesql::ast::FusionStrategyType::Rrf => "RRF",
255            crate::velesql::ast::FusionStrategyType::Weighted => "Weighted",
256            crate::velesql::ast::FusionStrategyType::Maximum => "Maximum",
257            crate::velesql::ast::FusionStrategyType::Rsf => "RSF",
258            crate::velesql::ast::FusionStrategyType::Average => "Average",
259        };
260        let weights = Self::format_fusion_weights(fc);
261        Some(FusionInfo {
262            strategy: strategy.to_string(),
263            k: fc.k,
264            weights,
265        })
266    }
267
268    /// Formats fusion weights into a human-readable string.
269    fn format_fusion_weights(fc: &crate::velesql::ast::FusionClause) -> Option<String> {
270        let mut parts = Vec::new();
271        if let Some(vw) = fc.vector_weight {
272            parts.push(format!("vector={vw}"));
273        }
274        if let Some(gw) = fc.graph_weight {
275            parts.push(format!("graph={gw}"));
276        }
277        if let Some(dw) = fc.dense_weight {
278            parts.push(format!("dense={dw}"));
279        }
280        if let Some(sw) = fc.sparse_weight {
281            parts.push(format!("sparse={sw}"));
282        }
283        if parts.is_empty() {
284            None
285        } else {
286            Some(parts.join(", "))
287        }
288    }
289
290    /// Formats LET bindings as `"name = expr"` strings (issue #471).
291    fn format_let_bindings(bindings: &[LetBinding]) -> Vec<String> {
292        bindings
293            .iter()
294            .map(|b| format!("{} = {}", b.name, b.expr))
295            .collect()
296    }
297
298    /// Variant of `append_filter_nodes` with access to the calibrated
299    /// `CostEstimator` (issue #471).
300    ///
301    /// Selectivity and filter strategy use histogram data when `stats` is
302    /// `Some`. When `None`, the historical heuristic (selectivity from
303    /// condition count, 0.1 threshold) is preserved bit-for-bit.
304    fn append_filter_nodes_with_stats(
305        nodes: &mut Vec<PlanNode>,
306        filter_conditions: &[String],
307        stmt: &SelectStatement,
308        has_vector_search: bool,
309        stats: Option<&CoreCollectionStats>,
310        implicit_limit: bool,
311    ) -> FilterStrategy {
312        let mut filter_strategy = FilterStrategy::None;
313
314        if !filter_conditions.is_empty() {
315            let heuristic_fallback = Self::estimate_selectivity(filter_conditions);
316            let (selectivity, estimation_method, estimated_rows) =
317                estimate_filter_stats(stmt, heuristic_fallback, stats);
318
319            // Reason: plan_builder owns stmt so the real ef_search/candidates
320            // are the same values used in `build_scan_node` above
321            // (Devin finding 4). These values drive the pre/post-filter cost
322            // comparison so it reflects the user's actual WITH clause instead
323            // of a fixed k = 10.
324            let ef_search = Self::resolve_ef_search(stmt);
325            let candidates =
326                u32::try_from(stmt.limit.unwrap_or(DEFAULT_SELECT_LIMIT)).unwrap_or(u32::MAX);
327
328            filter_strategy = resolve_filter_strategy(
329                selectivity,
330                has_vector_search,
331                ef_search,
332                candidates,
333                stats,
334            );
335
336            nodes.push(PlanNode::Filter(FilterPlan {
337                conditions: filter_conditions.join(" AND "),
338                selectivity,
339                estimated_rows,
340                estimation_method,
341            }));
342        }
343
344        if let Some(offset) = stmt.offset {
345            nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
346        }
347        Self::push_limit_node(nodes, stmt, implicit_limit);
348
349        filter_strategy
350    }
351
352    /// Pushes the Limit node for a statement.
353    ///
354    /// Without an explicit LIMIT, plain SELECT statements (`implicit_limit ==
355    /// true`) surface the engine default [`DEFAULT_SELECT_LIMIT`] so EXPLAIN
356    /// matches what execution actually returns. MATCH and compound queries
357    /// (`implicit_limit == false`) keep their unlimited semantics: no node.
358    fn push_limit_node(nodes: &mut Vec<PlanNode>, stmt: &SelectStatement, implicit_limit: bool) {
359        let (count, is_default) = match (stmt.limit, implicit_limit) {
360            (Some(limit), _) => (limit, false),
361            (None, true) => (DEFAULT_SELECT_LIMIT, true),
362            (None, false) => return,
363        };
364        nodes.push(PlanNode::Limit(LimitPlan { count, is_default }));
365    }
366
367    /// Analyzes a condition to extract vector search and filter info.
368    fn analyze_condition(
369        condition: &Condition,
370        has_vector_search: &mut bool,
371        filter_conditions: &mut Vec<String>,
372    ) {
373        match condition {
374            Condition::VectorSearch(_)
375            | Condition::VectorFusedSearch(_)
376            | Condition::SparseVectorSearch(_)
377            | Condition::Similarity(_) => {
378                *has_vector_search = true;
379            }
380            Condition::And(left, right) | Condition::Or(left, right) => {
381                Self::analyze_condition(left, has_vector_search, filter_conditions);
382                Self::analyze_condition(right, has_vector_search, filter_conditions);
383            }
384            Condition::Not(inner) | Condition::Group(inner) => {
385                Self::analyze_condition(inner, has_vector_search, filter_conditions);
386            }
387            leaf => {
388                if let Some(desc) = Self::describe_leaf_condition(leaf) {
389                    filter_conditions.push(desc);
390                }
391            }
392        }
393    }
394
395    /// Renders a non-composite condition as a short human-readable string for
396    /// the EXPLAIN plan filter list. Returns `None` for vector/composite
397    /// variants — those are handled in [`analyze_condition`] itself.
398    fn describe_leaf_condition(condition: &Condition) -> Option<String> {
399        let desc = match condition {
400            Condition::Comparison(cmp) => {
401                format!("{} {} ?", cmp.column, cmp.operator.as_str())
402            }
403            Condition::In(inc) => {
404                let op = if inc.negated { "NOT IN" } else { "IN" };
405                format!("{} {op} (...)", inc.column)
406            }
407            Condition::Between(btw) => format!("{} BETWEEN ? AND ?", btw.column),
408            Condition::Like(lk) => format!("{} LIKE ?", lk.column),
409            Condition::IsNull(isn) => {
410                let op = if isn.is_null {
411                    "IS NULL"
412                } else {
413                    "IS NOT NULL"
414                };
415                format!("{} {op}", isn.column)
416            }
417            Condition::Match(m) => format!("{} MATCH ?", m.column),
418            Condition::ContainsText(ct) => format!("{} CONTAINS_TEXT ?", ct.column),
419            Condition::GraphMatch(_) => "MATCH (...)".to_string(),
420            Condition::Contains(cc) => {
421                let mode_str = match cc.mode {
422                    crate::velesql::ContainsMode::Single => "CONTAINS",
423                    crate::velesql::ContainsMode::Any => "CONTAINS ANY",
424                    crate::velesql::ContainsMode::All => "CONTAINS ALL",
425                };
426                format!("{} {mode_str} ?", cc.column)
427            }
428            Condition::GeoDistance(gd) => format!(
429                "GEO_DISTANCE({}, {}, {}) {} ?",
430                gd.column,
431                gd.lat,
432                gd.lng,
433                gd.operator.as_str()
434            ),
435            Condition::GeoBbox(gb) => format!("GEO_BBOX({}, ...)", gb.column),
436            _ => return None,
437        };
438        Some(desc)
439    }
440
441    fn extract_index_lookup(
442        condition: &Condition,
443        indexed_fields: &HashSet<String>,
444    ) -> Option<(String, String)> {
445        if let Condition::Comparison(cmp) = condition {
446            if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
447            {
448                return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
449            }
450        }
451        if let Condition::In(inc) = condition {
452            if indexed_fields.contains(&inc.column) {
453                let op = if inc.negated { "NOT IN" } else { "IN" };
454                return Some((inc.column.clone(), format!("{op} (...)")));
455            }
456        }
457        None
458    }
459
460    /// Estimates selectivity (placeholder - would need statistics in production).
461    pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
462        node_stats::estimate_selectivity(conditions, None)
463    }
464
465    /// Returns the heuristic cost for a single plan node.
466    #[cfg(test)]
467    pub(crate) fn node_cost(node: &PlanNode) -> f64 {
468        node_stats::node_cost(node)
469    }
470
471    /// Extracts traversal parameters from a `MatchExecutionStrategy`.
472    fn extract_strategy_info(
473        strategy: &MatchExecutionStrategy,
474    ) -> (Vec<String>, u32, bool, Option<f32>) {
475        match strategy {
476            MatchExecutionStrategy::GraphFirst {
477                start_labels,
478                max_depth,
479            } => (start_labels.clone(), *max_depth, false, None),
480            MatchExecutionStrategy::VectorFirst { threshold, .. } => {
481                (Vec::new(), 1, true, Some(*threshold))
482            }
483            MatchExecutionStrategy::Parallel {
484                graph_hint,
485                vector_hint,
486            } => {
487                let (labels, depth) = match graph_hint.as_ref() {
488                    MatchExecutionStrategy::GraphFirst {
489                        start_labels,
490                        max_depth,
491                    } => (start_labels.clone(), *max_depth),
492                    _ => (Vec::new(), 1),
493                };
494                let threshold = match vector_hint.as_ref() {
495                    MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
496                    _ => None,
497                };
498                (labels, depth, true, threshold)
499            }
500        }
501    }
502}