1use std::collections::HashSet;
7
8use super::filter_strategy::{estimate_filter_stats, resolve_filter_strategy};
9use super::formatter;
10use super::node_stats;
11use super::types::{
12 FilterPlan, FilterStrategy, FusionInfo, IndexLookupPlan, IndexType, LimitPlan,
13 MatchTraversalPlan, OffsetPlan, PlanNode, QueryPlan, TableScanPlan, VectorSearchPlan,
14};
15use crate::collection::search::query::match_planner::{
16 CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
17};
18use crate::collection::stats::CollectionStats as CoreCollectionStats;
19use crate::velesql::ast::{Condition, LetBinding, SelectStatement};
20use crate::velesql::MatchClause;
21
22impl QueryPlan {
23 #[must_use]
25 pub fn from_select(stmt: &SelectStatement) -> Self {
26 Self::from_select_with_stats(stmt, &HashSet::new(), None)
27 }
28
29 #[must_use]
31 pub fn from_select_with_indexed_fields(
32 stmt: &SelectStatement,
33 indexed_fields: &HashSet<String>,
34 ) -> Self {
35 Self::from_select_with_stats(stmt, indexed_fields, None)
36 }
37
38 #[must_use]
45 pub fn from_select_with_stats(
46 stmt: &SelectStatement,
47 indexed_fields: &HashSet<String>,
48 stats: Option<&CoreCollectionStats>,
49 ) -> Self {
50 let mut has_vector_search = false;
51 let mut filter_conditions = Vec::new();
52 let mut index_lookup = None;
53
54 if let Some(ref condition) = stmt.where_clause {
55 Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
56 index_lookup = Self::extract_index_lookup(condition, indexed_fields);
57 }
58
59 let (mut nodes, index_used) = Self::build_scan_node(stmt, has_vector_search, index_lookup);
60 let filter_strategy = Self::append_filter_nodes_with_stats(
61 &mut nodes,
62 &filter_conditions,
63 stmt,
64 has_vector_search,
65 stats,
66 );
67
68 let mut plan = Self::assemble_plan_with_stats(
69 nodes,
70 index_used,
71 filter_strategy,
72 has_vector_search,
73 stats,
74 );
75 plan.with_options = Self::extract_with_options(stmt);
76 plan.fusion_info = Self::extract_fusion_info(stmt);
77 plan
78 }
79
80 #[must_use]
82 pub fn from_query(query: &crate::velesql::ast::Query) -> Self {
83 Self::from_query_with_stats(query, &HashSet::new(), None)
84 }
85
86 #[must_use]
89 pub fn from_query_with_stats(
90 query: &crate::velesql::ast::Query,
91 indexed_fields: &HashSet<String>,
92 stats: Option<&CoreCollectionStats>,
93 ) -> Self {
94 let mut plan = Self::from_select_with_stats(&query.select, indexed_fields, stats);
95 plan.let_bindings = Self::format_let_bindings(&query.let_bindings);
96 plan
97 }
98
99 #[must_use]
101 pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
102 let strategy = MatchQueryPlanner::plan(match_clause, stats);
103 let strategy_explanation = MatchQueryPlanner::explain(&strategy);
104
105 let (start_labels, max_depth, has_similarity, similarity_threshold) =
106 Self::extract_strategy_info(&strategy);
107
108 let relationship_count = match_clause
109 .patterns
110 .first()
111 .map_or(0, |p| p.relationships.len());
112
113 let traversal = PlanNode::MatchTraversal(MatchTraversalPlan {
114 strategy: strategy_explanation,
115 start_labels,
116 max_depth,
117 relationship_count,
118 has_similarity,
119 similarity_threshold,
120 });
121
122 let mut nodes = vec![traversal];
123 if let Some(limit) = match_clause.return_clause.limit {
124 nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
125 }
126
127 let index_used = if has_similarity {
128 Some(IndexType::Hnsw)
129 } else {
130 None
131 };
132
133 Self::assemble_plan_with_stats(
134 nodes,
135 index_used,
136 FilterStrategy::None,
137 has_similarity,
138 None,
139 )
140 }
141
142 fn assemble_plan_with_stats(
144 mut nodes: Vec<PlanNode>,
145 index_used: Option<IndexType>,
146 filter_strategy: FilterStrategy,
147 has_vector_search: bool,
148 stats: Option<&CoreCollectionStats>,
149 ) -> Self {
150 let root = if nodes.len() == 1 {
151 nodes.swap_remove(0)
152 } else {
153 PlanNode::Sequence(nodes)
154 };
155 let estimated_cost_ms = node_stats::estimate_cost(&root, has_vector_search, stats);
156 Self {
157 root,
158 estimated_cost_ms,
159 index_used,
160 filter_strategy,
161 with_options: Vec::new(),
162 let_bindings: Vec::new(),
163 fusion_info: None,
164 cache_hit: None,
165 plan_reuse_count: None,
166 }
167 }
168
169 const DEFAULT_EF_SEARCH: u32 = 100;
171
172 fn build_scan_node(
174 stmt: &SelectStatement,
175 has_vector_search: bool,
176 index_lookup: Option<(String, String)>,
177 ) -> (Vec<PlanNode>, Option<IndexType>) {
178 let mut nodes = Vec::new();
179 let index_used;
180
181 if has_vector_search {
182 index_used = Some(IndexType::Hnsw);
183 let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
184 let ef_search = Self::resolve_ef_search(stmt);
185 nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
186 collection: stmt.from.clone(),
187 ef_search,
188 candidates,
189 }));
190 } else if let Some((property, value)) = index_lookup {
191 index_used = Some(IndexType::Property);
192 nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
193 label: stmt.from.clone(),
194 property,
195 value,
196 }));
197 } else {
198 index_used = None;
199 nodes.push(PlanNode::TableScan(TableScanPlan {
200 collection: stmt.from.clone(),
201 }));
202 }
203
204 (nodes, index_used)
205 }
206
207 #[allow(clippy::cast_possible_truncation)]
209 fn resolve_ef_search(stmt: &SelectStatement) -> u32 {
210 stmt.with_clause
211 .as_ref()
212 .and_then(crate::velesql::ast::WithClause::get_ef_search)
213 .map_or(Self::DEFAULT_EF_SEARCH, |v| v as u32)
214 }
215
216 fn extract_with_options(stmt: &SelectStatement) -> Vec<(String, String)> {
218 let Some(ref wc) = stmt.with_clause else {
219 return Vec::new();
220 };
221 wc.options
222 .iter()
223 .map(|opt| (opt.key.clone(), formatter::format_with_value(&opt.value)))
224 .collect()
225 }
226
227 fn extract_fusion_info(stmt: &SelectStatement) -> Option<FusionInfo> {
229 let fc = stmt.fusion_clause.as_ref()?;
230 let strategy = match fc.strategy {
231 crate::velesql::ast::FusionStrategyType::Rrf => "RRF",
232 crate::velesql::ast::FusionStrategyType::Weighted => "Weighted",
233 crate::velesql::ast::FusionStrategyType::Maximum => "Maximum",
234 crate::velesql::ast::FusionStrategyType::Rsf => "RSF",
235 crate::velesql::ast::FusionStrategyType::Average => "Average",
236 };
237 let weights = Self::format_fusion_weights(fc);
238 Some(FusionInfo {
239 strategy: strategy.to_string(),
240 k: fc.k,
241 weights,
242 })
243 }
244
245 fn format_fusion_weights(fc: &crate::velesql::ast::FusionClause) -> Option<String> {
247 let mut parts = Vec::new();
248 if let Some(vw) = fc.vector_weight {
249 parts.push(format!("vector={vw}"));
250 }
251 if let Some(gw) = fc.graph_weight {
252 parts.push(format!("graph={gw}"));
253 }
254 if let Some(dw) = fc.dense_weight {
255 parts.push(format!("dense={dw}"));
256 }
257 if let Some(sw) = fc.sparse_weight {
258 parts.push(format!("sparse={sw}"));
259 }
260 if parts.is_empty() {
261 None
262 } else {
263 Some(parts.join(", "))
264 }
265 }
266
267 fn format_let_bindings(bindings: &[LetBinding]) -> Vec<String> {
269 bindings
270 .iter()
271 .map(|b| format!("{} = {}", b.name, b.expr))
272 .collect()
273 }
274
275 fn append_filter_nodes_with_stats(
282 nodes: &mut Vec<PlanNode>,
283 filter_conditions: &[String],
284 stmt: &SelectStatement,
285 has_vector_search: bool,
286 stats: Option<&CoreCollectionStats>,
287 ) -> FilterStrategy {
288 let mut filter_strategy = FilterStrategy::None;
289
290 if !filter_conditions.is_empty() {
291 let heuristic_fallback = Self::estimate_selectivity(filter_conditions);
292 let (selectivity, estimation_method, estimated_rows) =
293 estimate_filter_stats(stmt, heuristic_fallback, stats);
294
295 let ef_search = Self::resolve_ef_search(stmt);
301 let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
302
303 filter_strategy = resolve_filter_strategy(
304 selectivity,
305 has_vector_search,
306 ef_search,
307 candidates,
308 stats,
309 );
310
311 nodes.push(PlanNode::Filter(FilterPlan {
312 conditions: filter_conditions.join(" AND "),
313 selectivity,
314 estimated_rows,
315 estimation_method,
316 }));
317 }
318
319 if let Some(offset) = stmt.offset {
320 nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
321 }
322 if let Some(limit) = stmt.limit {
323 nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
324 }
325
326 filter_strategy
327 }
328
329 fn analyze_condition(
331 condition: &Condition,
332 has_vector_search: &mut bool,
333 filter_conditions: &mut Vec<String>,
334 ) {
335 match condition {
336 Condition::VectorSearch(_)
337 | Condition::VectorFusedSearch(_)
338 | Condition::SparseVectorSearch(_)
339 | Condition::Similarity(_) => {
340 *has_vector_search = true;
341 }
342 Condition::And(left, right) | Condition::Or(left, right) => {
343 Self::analyze_condition(left, has_vector_search, filter_conditions);
344 Self::analyze_condition(right, has_vector_search, filter_conditions);
345 }
346 Condition::Not(inner) | Condition::Group(inner) => {
347 Self::analyze_condition(inner, has_vector_search, filter_conditions);
348 }
349 leaf => {
350 if let Some(desc) = Self::describe_leaf_condition(leaf) {
351 filter_conditions.push(desc);
352 }
353 }
354 }
355 }
356
357 fn describe_leaf_condition(condition: &Condition) -> Option<String> {
361 let desc = match condition {
362 Condition::Comparison(cmp) => {
363 format!("{} {} ?", cmp.column, cmp.operator.as_str())
364 }
365 Condition::In(inc) => {
366 let op = if inc.negated { "NOT IN" } else { "IN" };
367 format!("{} {op} (...)", inc.column)
368 }
369 Condition::Between(btw) => format!("{} BETWEEN ? AND ?", btw.column),
370 Condition::Like(lk) => format!("{} LIKE ?", lk.column),
371 Condition::IsNull(isn) => {
372 let op = if isn.is_null {
373 "IS NULL"
374 } else {
375 "IS NOT NULL"
376 };
377 format!("{} {op}", isn.column)
378 }
379 Condition::Match(m) => format!("{} MATCH ?", m.column),
380 Condition::ContainsText(ct) => format!("{} CONTAINS_TEXT ?", ct.column),
381 Condition::GraphMatch(_) => "MATCH (...)".to_string(),
382 Condition::Contains(cc) => {
383 let mode_str = match cc.mode {
384 crate::velesql::ContainsMode::Single => "CONTAINS",
385 crate::velesql::ContainsMode::Any => "CONTAINS ANY",
386 crate::velesql::ContainsMode::All => "CONTAINS ALL",
387 };
388 format!("{} {mode_str} ?", cc.column)
389 }
390 Condition::GeoDistance(gd) => format!(
391 "GEO_DISTANCE({}, {}, {}) {} ?",
392 gd.column,
393 gd.lat,
394 gd.lng,
395 gd.operator.as_str()
396 ),
397 Condition::GeoBbox(gb) => format!("GEO_BBOX({}, ...)", gb.column),
398 _ => return None,
399 };
400 Some(desc)
401 }
402
403 fn extract_index_lookup(
404 condition: &Condition,
405 indexed_fields: &HashSet<String>,
406 ) -> Option<(String, String)> {
407 if let Condition::Comparison(cmp) = condition {
408 if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
409 {
410 return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
411 }
412 }
413 if let Condition::In(inc) = condition {
414 if indexed_fields.contains(&inc.column) {
415 let op = if inc.negated { "NOT IN" } else { "IN" };
416 return Some((inc.column.clone(), format!("{op} (...)")));
417 }
418 }
419 None
420 }
421
422 pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
424 node_stats::estimate_selectivity(conditions, None)
425 }
426
427 #[cfg(test)]
429 pub(crate) fn node_cost(node: &PlanNode) -> f64 {
430 node_stats::node_cost(node)
431 }
432
433 fn extract_strategy_info(
435 strategy: &MatchExecutionStrategy,
436 ) -> (Vec<String>, u32, bool, Option<f32>) {
437 match strategy {
438 MatchExecutionStrategy::GraphFirst {
439 start_labels,
440 max_depth,
441 } => (start_labels.clone(), *max_depth, false, None),
442 MatchExecutionStrategy::VectorFirst { threshold, .. } => {
443 (Vec::new(), 1, true, Some(*threshold))
444 }
445 MatchExecutionStrategy::Parallel {
446 graph_hint,
447 vector_hint,
448 } => {
449 let (labels, depth) = match graph_hint.as_ref() {
450 MatchExecutionStrategy::GraphFirst {
451 start_labels,
452 max_depth,
453 } => (start_labels.clone(), *max_depth),
454 _ => (Vec::new(), 1),
455 };
456 let threshold = match vector_hint.as_ref() {
457 MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
458 _ => None,
459 };
460 (labels, depth, true, threshold)
461 }
462 }
463 }
464}