1use std::collections::HashSet;
7
8use super::filter_strategy::{estimate_filter_stats, resolve_filter_strategy};
9use super::formatter;
10use super::node_stats;
11use super::types::{
12 FilterPlan, FilterStrategy, FusionInfo, IndexLookupPlan, IndexType, LimitPlan,
13 MatchTraversalPlan, OffsetPlan, PlanNode, QueryPlan, TableScanPlan, VectorSearchPlan,
14};
15use crate::collection::search::query::match_planner::{
16 CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
17};
18use crate::collection::stats::CollectionStats as CoreCollectionStats;
19use crate::velesql::ast::{Condition, LetBinding, SelectStatement, DEFAULT_SELECT_LIMIT};
20use crate::velesql::MatchClause;
21
22impl QueryPlan {
23 #[must_use]
25 pub fn from_select(stmt: &SelectStatement) -> Self {
26 Self::from_select_with_stats(stmt, &HashSet::new(), None)
27 }
28
29 #[must_use]
31 pub fn from_select_with_indexed_fields(
32 stmt: &SelectStatement,
33 indexed_fields: &HashSet<String>,
34 ) -> Self {
35 Self::from_select_with_stats(stmt, indexed_fields, None)
36 }
37
38 #[must_use]
45 pub fn from_select_with_stats(
46 stmt: &SelectStatement,
47 indexed_fields: &HashSet<String>,
48 stats: Option<&CoreCollectionStats>,
49 ) -> Self {
50 Self::build_select_plan(stmt, indexed_fields, stats, true)
51 }
52
53 fn build_select_plan(
60 stmt: &SelectStatement,
61 indexed_fields: &HashSet<String>,
62 stats: Option<&CoreCollectionStats>,
63 implicit_limit: bool,
64 ) -> Self {
65 let mut has_vector_search = false;
66 let mut filter_conditions = Vec::new();
67 let mut index_lookup = None;
68
69 if let Some(ref condition) = stmt.where_clause {
70 Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
71 index_lookup = Self::extract_index_lookup(condition, indexed_fields);
72 }
73
74 let (mut nodes, index_used) = Self::build_scan_node(stmt, has_vector_search, index_lookup);
75 let filter_strategy = Self::append_filter_nodes_with_stats(
76 &mut nodes,
77 &filter_conditions,
78 stmt,
79 has_vector_search,
80 stats,
81 implicit_limit,
82 );
83
84 let mut plan = Self::assemble_plan_with_stats(
85 nodes,
86 index_used,
87 filter_strategy,
88 has_vector_search,
89 stats,
90 );
91 plan.with_options = Self::extract_with_options(stmt);
92 plan.fusion_info = Self::extract_fusion_info(stmt);
93 plan
94 }
95
96 #[must_use]
98 pub fn from_query(query: &crate::velesql::ast::Query) -> Self {
99 Self::from_query_with_stats(query, &HashSet::new(), None)
100 }
101
102 #[must_use]
105 pub fn from_query_with_stats(
106 query: &crate::velesql::ast::Query,
107 indexed_fields: &HashSet<String>,
108 stats: Option<&CoreCollectionStats>,
109 ) -> Self {
110 let implicit_limit = query.match_clause.is_none() && query.compound.is_none();
112 let mut plan =
113 Self::build_select_plan(&query.select, indexed_fields, stats, implicit_limit);
114 plan.let_bindings = Self::format_let_bindings(&query.let_bindings);
115 plan
116 }
117
118 #[must_use]
120 pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
121 let strategy = MatchQueryPlanner::plan(match_clause, stats);
122 let strategy_explanation = MatchQueryPlanner::explain(&strategy);
123
124 let (start_labels, max_depth, has_similarity, similarity_threshold) =
125 Self::extract_strategy_info(&strategy);
126
127 let relationship_count = match_clause
128 .patterns
129 .first()
130 .map_or(0, |p| p.relationships.len());
131
132 let traversal = PlanNode::MatchTraversal(MatchTraversalPlan {
133 strategy: strategy_explanation,
134 start_labels,
135 max_depth,
136 relationship_count,
137 has_similarity,
138 similarity_threshold,
139 });
140
141 let mut nodes = vec![traversal];
142 if let Some(limit) = match_clause.return_clause.limit {
143 nodes.push(PlanNode::Limit(LimitPlan {
144 count: limit,
145 is_default: false,
146 }));
147 }
148
149 let index_used = if has_similarity {
150 Some(IndexType::Hnsw)
151 } else {
152 None
153 };
154
155 Self::assemble_plan_with_stats(
156 nodes,
157 index_used,
158 FilterStrategy::None,
159 has_similarity,
160 None,
161 )
162 }
163
164 fn assemble_plan_with_stats(
166 mut nodes: Vec<PlanNode>,
167 index_used: Option<IndexType>,
168 filter_strategy: FilterStrategy,
169 has_vector_search: bool,
170 stats: Option<&CoreCollectionStats>,
171 ) -> Self {
172 let root = if nodes.len() == 1 {
173 nodes.swap_remove(0)
174 } else {
175 PlanNode::Sequence(nodes)
176 };
177 let estimated_cost_ms = node_stats::estimate_cost(&root, has_vector_search, stats);
178 Self {
179 root,
180 estimated_cost_ms,
181 index_used,
182 filter_strategy,
183 with_options: Vec::new(),
184 let_bindings: Vec::new(),
185 fusion_info: None,
186 cache_hit: None,
187 plan_reuse_count: None,
188 }
189 }
190
191 const DEFAULT_EF_SEARCH: u32 = 100;
193
194 fn build_scan_node(
196 stmt: &SelectStatement,
197 has_vector_search: bool,
198 index_lookup: Option<(String, String)>,
199 ) -> (Vec<PlanNode>, Option<IndexType>) {
200 let mut nodes = Vec::new();
201 let index_used;
202
203 if has_vector_search {
204 index_used = Some(IndexType::Hnsw);
205 let candidates =
206 u32::try_from(stmt.limit.unwrap_or(DEFAULT_SELECT_LIMIT)).unwrap_or(u32::MAX);
207 let ef_search = Self::resolve_ef_search(stmt);
208 nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
209 collection: stmt.from.clone(),
210 ef_search,
211 candidates,
212 }));
213 } else if let Some((property, value)) = index_lookup {
214 index_used = Some(IndexType::Property);
215 nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
216 label: stmt.from.clone(),
217 property,
218 value,
219 }));
220 } else {
221 index_used = None;
222 nodes.push(PlanNode::TableScan(TableScanPlan {
223 collection: stmt.from.clone(),
224 }));
225 }
226
227 (nodes, index_used)
228 }
229
230 #[allow(clippy::cast_possible_truncation)]
232 fn resolve_ef_search(stmt: &SelectStatement) -> u32 {
233 stmt.with_clause
234 .as_ref()
235 .and_then(crate::velesql::ast::WithClause::get_ef_search)
236 .map_or(Self::DEFAULT_EF_SEARCH, |v| v as u32)
237 }
238
239 fn extract_with_options(stmt: &SelectStatement) -> Vec<(String, String)> {
241 let Some(ref wc) = stmt.with_clause else {
242 return Vec::new();
243 };
244 wc.options
245 .iter()
246 .map(|opt| (opt.key.clone(), formatter::format_with_value(&opt.value)))
247 .collect()
248 }
249
250 fn extract_fusion_info(stmt: &SelectStatement) -> Option<FusionInfo> {
252 let fc = stmt.fusion_clause.as_ref()?;
253 let strategy = match fc.strategy {
254 crate::velesql::ast::FusionStrategyType::Rrf => "RRF",
255 crate::velesql::ast::FusionStrategyType::Weighted => "Weighted",
256 crate::velesql::ast::FusionStrategyType::Maximum => "Maximum",
257 crate::velesql::ast::FusionStrategyType::Rsf => "RSF",
258 crate::velesql::ast::FusionStrategyType::Average => "Average",
259 };
260 let weights = Self::format_fusion_weights(fc);
261 Some(FusionInfo {
262 strategy: strategy.to_string(),
263 k: fc.k,
264 weights,
265 })
266 }
267
268 fn format_fusion_weights(fc: &crate::velesql::ast::FusionClause) -> Option<String> {
270 let mut parts = Vec::new();
271 if let Some(vw) = fc.vector_weight {
272 parts.push(format!("vector={vw}"));
273 }
274 if let Some(gw) = fc.graph_weight {
275 parts.push(format!("graph={gw}"));
276 }
277 if let Some(dw) = fc.dense_weight {
278 parts.push(format!("dense={dw}"));
279 }
280 if let Some(sw) = fc.sparse_weight {
281 parts.push(format!("sparse={sw}"));
282 }
283 if parts.is_empty() {
284 None
285 } else {
286 Some(parts.join(", "))
287 }
288 }
289
290 fn format_let_bindings(bindings: &[LetBinding]) -> Vec<String> {
292 bindings
293 .iter()
294 .map(|b| format!("{} = {}", b.name, b.expr))
295 .collect()
296 }
297
298 fn append_filter_nodes_with_stats(
305 nodes: &mut Vec<PlanNode>,
306 filter_conditions: &[String],
307 stmt: &SelectStatement,
308 has_vector_search: bool,
309 stats: Option<&CoreCollectionStats>,
310 implicit_limit: bool,
311 ) -> FilterStrategy {
312 let mut filter_strategy = FilterStrategy::None;
313
314 if !filter_conditions.is_empty() {
315 let heuristic_fallback = Self::estimate_selectivity(filter_conditions);
316 let (selectivity, estimation_method, estimated_rows) =
317 estimate_filter_stats(stmt, heuristic_fallback, stats);
318
319 let ef_search = Self::resolve_ef_search(stmt);
325 let candidates =
326 u32::try_from(stmt.limit.unwrap_or(DEFAULT_SELECT_LIMIT)).unwrap_or(u32::MAX);
327
328 filter_strategy = resolve_filter_strategy(
329 selectivity,
330 has_vector_search,
331 ef_search,
332 candidates,
333 stats,
334 );
335
336 nodes.push(PlanNode::Filter(FilterPlan {
337 conditions: filter_conditions.join(" AND "),
338 selectivity,
339 estimated_rows,
340 estimation_method,
341 }));
342 }
343
344 if let Some(offset) = stmt.offset {
345 nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
346 }
347 Self::push_limit_node(nodes, stmt, implicit_limit);
348
349 filter_strategy
350 }
351
352 fn push_limit_node(nodes: &mut Vec<PlanNode>, stmt: &SelectStatement, implicit_limit: bool) {
359 let (count, is_default) = match (stmt.limit, implicit_limit) {
360 (Some(limit), _) => (limit, false),
361 (None, true) => (DEFAULT_SELECT_LIMIT, true),
362 (None, false) => return,
363 };
364 nodes.push(PlanNode::Limit(LimitPlan { count, is_default }));
365 }
366
367 fn analyze_condition(
369 condition: &Condition,
370 has_vector_search: &mut bool,
371 filter_conditions: &mut Vec<String>,
372 ) {
373 match condition {
374 Condition::VectorSearch(_)
375 | Condition::VectorFusedSearch(_)
376 | Condition::SparseVectorSearch(_)
377 | Condition::Similarity(_) => {
378 *has_vector_search = true;
379 }
380 Condition::And(left, right) | Condition::Or(left, right) => {
381 Self::analyze_condition(left, has_vector_search, filter_conditions);
382 Self::analyze_condition(right, has_vector_search, filter_conditions);
383 }
384 Condition::Not(inner) | Condition::Group(inner) => {
385 Self::analyze_condition(inner, has_vector_search, filter_conditions);
386 }
387 leaf => {
388 if let Some(desc) = Self::describe_leaf_condition(leaf) {
389 filter_conditions.push(desc);
390 }
391 }
392 }
393 }
394
395 fn describe_leaf_condition(condition: &Condition) -> Option<String> {
399 let desc = match condition {
400 Condition::Comparison(cmp) => {
401 format!("{} {} ?", cmp.column, cmp.operator.as_str())
402 }
403 Condition::In(inc) => {
404 let op = if inc.negated { "NOT IN" } else { "IN" };
405 format!("{} {op} (...)", inc.column)
406 }
407 Condition::Between(btw) => format!("{} BETWEEN ? AND ?", btw.column),
408 Condition::Like(lk) => format!("{} LIKE ?", lk.column),
409 Condition::IsNull(isn) => {
410 let op = if isn.is_null {
411 "IS NULL"
412 } else {
413 "IS NOT NULL"
414 };
415 format!("{} {op}", isn.column)
416 }
417 Condition::Match(m) => format!("{} MATCH ?", m.column),
418 Condition::ContainsText(ct) => format!("{} CONTAINS_TEXT ?", ct.column),
419 Condition::GraphMatch(_) => "MATCH (...)".to_string(),
420 Condition::Contains(cc) => {
421 let mode_str = match cc.mode {
422 crate::velesql::ContainsMode::Single => "CONTAINS",
423 crate::velesql::ContainsMode::Any => "CONTAINS ANY",
424 crate::velesql::ContainsMode::All => "CONTAINS ALL",
425 };
426 format!("{} {mode_str} ?", cc.column)
427 }
428 Condition::GeoDistance(gd) => format!(
429 "GEO_DISTANCE({}, {}, {}) {} ?",
430 gd.column,
431 gd.lat,
432 gd.lng,
433 gd.operator.as_str()
434 ),
435 Condition::GeoBbox(gb) => format!("GEO_BBOX({}, ...)", gb.column),
436 _ => return None,
437 };
438 Some(desc)
439 }
440
441 fn extract_index_lookup(
442 condition: &Condition,
443 indexed_fields: &HashSet<String>,
444 ) -> Option<(String, String)> {
445 if let Condition::Comparison(cmp) = condition {
446 if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
447 {
448 return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
449 }
450 }
451 if let Condition::In(inc) = condition {
452 if indexed_fields.contains(&inc.column) {
453 let op = if inc.negated { "NOT IN" } else { "IN" };
454 return Some((inc.column.clone(), format!("{op} (...)")));
455 }
456 }
457 None
458 }
459
460 pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
462 node_stats::estimate_selectivity(conditions, None)
463 }
464
465 #[cfg(test)]
467 pub(crate) fn node_cost(node: &PlanNode) -> f64 {
468 node_stats::node_cost(node)
469 }
470
471 fn extract_strategy_info(
473 strategy: &MatchExecutionStrategy,
474 ) -> (Vec<String>, u32, bool, Option<f32>) {
475 match strategy {
476 MatchExecutionStrategy::GraphFirst {
477 start_labels,
478 max_depth,
479 } => (start_labels.clone(), *max_depth, false, None),
480 MatchExecutionStrategy::VectorFirst { threshold, .. } => {
481 (Vec::new(), 1, true, Some(*threshold))
482 }
483 MatchExecutionStrategy::Parallel {
484 graph_hint,
485 vector_hint,
486 } => {
487 let (labels, depth) = match graph_hint.as_ref() {
488 MatchExecutionStrategy::GraphFirst {
489 start_labels,
490 max_depth,
491 } => (start_labels.clone(), *max_depth),
492 _ => (Vec::new(), 1),
493 };
494 let threshold = match vector_hint.as_ref() {
495 MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
496 _ => None,
497 };
498 (labels, depth, true, threshold)
499 }
500 }
501 }
502}