1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
//! MATCH Query Planner for optimal execution strategy (EPIC-045 US-006).
//!
//! This module provides cost-based query planning for MATCH queries,
//! choosing between Graph-First, Vector-First, or Parallel execution.
// Reason: Numeric casts in query planning are intentional:
// - u64->f64 for limit calculations: precision loss acceptable for estimates
// - f32->f64 for selectivity: values bounded (0.0-1.0 range)
// - usize->f64 for estimate calculations: values bounded (limit * 100 max)
// - All casts used for cost estimation, not precise calculations
#![allow(clippy::cast_precision_loss)]
#![allow(clippy::cast_possible_truncation)]
use crate::velesql::{Condition, MatchClause};
use serde::{Deserialize, Serialize};
/// Execution strategy for MATCH queries.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub enum MatchExecutionStrategy {
/// Traverse graph first, then filter by similarity.
/// Best when: graph is selective, or no similarity condition.
GraphFirst {
/// Labels to filter start nodes.
start_labels: Vec<String>,
/// Maximum traversal depth.
max_depth: u32,
},
/// Vector search first, then validate graph paths.
/// Best when: similarity condition on start node, graph is dense.
VectorFirst {
/// Alias of the node with similarity condition.
similarity_alias: String,
/// Top-k candidates to fetch from vector index.
top_k: usize,
/// Similarity threshold.
threshold: f32,
},
/// Execute both strategies in parallel, then merge.
/// Best when: both graph and vector are selective.
Parallel {
/// Graph-first hint.
graph_hint: Box<MatchExecutionStrategy>,
/// Vector-first hint.
vector_hint: Box<MatchExecutionStrategy>,
},
}
impl Default for MatchExecutionStrategy {
fn default() -> Self {
Self::GraphFirst {
start_labels: Vec::new(),
max_depth: 10,
}
}
}
/// Statistics about a collection for cost estimation.
#[derive(Debug, Clone, Default)]
pub struct CollectionStats {
/// Total number of nodes/points.
pub total_nodes: usize,
/// Total number of edges.
pub total_edges: usize,
/// Average edges per node (density).
pub avg_degree: f64,
/// Number of distinct labels.
pub label_count: usize,
/// Estimated selectivity per label (0.0-1.0).
pub label_selectivity: f64,
}
/// Query planner for MATCH queries.
#[derive(Debug, Default)]
pub struct MatchQueryPlanner;
impl MatchQueryPlanner {
/// Analyze a MATCH clause and choose optimal execution strategy.
///
/// # Arguments
///
/// * `match_clause` - The parsed MATCH clause
/// * `stats` - Collection statistics for cost estimation
///
/// # Returns
///
/// The optimal execution strategy.
#[must_use]
pub fn plan(match_clause: &MatchClause, stats: &CollectionStats) -> MatchExecutionStrategy {
let has_similarity = Self::has_similarity_condition(match_clause.where_clause.as_ref());
let similarity_info = Self::extract_similarity_info(match_clause.where_clause.as_ref());
let similarity_on_start =
Self::is_similarity_on_start(match_clause, similarity_info.as_ref());
let start_labels = Self::extract_start_labels(match_clause);
let max_depth = Self::count_hops(match_clause);
if has_similarity && similarity_on_start {
Self::plan_vector_first(match_clause, stats, similarity_info)
} else if !has_similarity {
MatchExecutionStrategy::GraphFirst {
start_labels,
max_depth,
}
} else if Self::should_use_parallel(stats, similarity_info.as_ref()) {
Self::plan_parallel(
match_clause,
stats,
similarity_info,
start_labels,
max_depth,
)
} else {
MatchExecutionStrategy::GraphFirst {
start_labels,
max_depth,
}
}
}
/// Checks if the similarity condition targets the start node.
fn is_similarity_on_start(
match_clause: &MatchClause,
similarity_info: Option<&(String, f32, String)>,
) -> bool {
let start_alias = match_clause
.patterns
.first()
.and_then(|p| p.nodes.first())
.and_then(|n| n.alias.as_ref());
similarity_info.is_some_and(|(alias, _, _)| Some(alias) == start_alias)
}
/// Extracts start labels from the first pattern node.
fn extract_start_labels(match_clause: &MatchClause) -> Vec<String> {
match_clause
.patterns
.first()
.and_then(|p| p.nodes.first())
.map(|n| n.labels.clone())
.unwrap_or_default()
}
/// Plans a vector-first strategy.
fn plan_vector_first(
match_clause: &MatchClause,
stats: &CollectionStats,
similarity_info: Option<(String, f32, String)>,
) -> MatchExecutionStrategy {
let (alias, threshold, _) = similarity_info.unwrap_or_default();
MatchExecutionStrategy::VectorFirst {
similarity_alias: alias,
top_k: Self::estimate_top_k(match_clause, stats, threshold),
threshold,
}
}
/// Plans a parallel (graph + vector) strategy.
fn plan_parallel(
match_clause: &MatchClause,
stats: &CollectionStats,
similarity_info: Option<(String, f32, String)>,
start_labels: Vec<String>,
max_depth: u32,
) -> MatchExecutionStrategy {
let (alias, threshold, _) = similarity_info.unwrap_or_default();
MatchExecutionStrategy::Parallel {
graph_hint: Box::new(MatchExecutionStrategy::GraphFirst {
start_labels,
max_depth,
}),
vector_hint: Box::new(MatchExecutionStrategy::VectorFirst {
similarity_alias: alias,
top_k: Self::estimate_top_k(match_clause, stats, threshold),
threshold,
}),
}
}
/// Check if WHERE clause contains a similarity condition.
fn has_similarity_condition(where_clause: Option<&Condition>) -> bool {
where_clause.is_some_and(Self::condition_has_similarity)
}
/// Recursively check if a condition contains similarity().
fn condition_has_similarity(condition: &Condition) -> bool {
match condition {
Condition::Similarity(_) => true,
Condition::And(left, right) | Condition::Or(left, right) => {
Self::condition_has_similarity(left) || Self::condition_has_similarity(right)
}
Condition::Not(inner) => Self::condition_has_similarity(inner),
_ => false,
}
}
/// Extract similarity info: (alias, threshold, field).
fn extract_similarity_info(where_clause: Option<&Condition>) -> Option<(String, f32, String)> {
where_clause.and_then(Self::extract_from_condition)
}
/// Recursively extract similarity info from condition.
fn extract_from_condition(condition: &Condition) -> Option<(String, f32, String)> {
match condition {
Condition::Similarity(sim) => {
// Parse field to get alias (e.g., "doc.embedding" -> "doc")
let alias = sim
.field
.split('.')
.next()
.unwrap_or(&sim.field)
.to_string();
let field = sim
.field
.split('.')
.nth(1)
.unwrap_or("embedding")
.to_string();
#[allow(clippy::cast_possible_truncation)]
let threshold = sim.threshold as f32;
Some((alias, threshold, field))
}
Condition::And(left, right) | Condition::Or(left, right) => {
Self::extract_from_condition(left).or_else(|| Self::extract_from_condition(right))
}
Condition::Not(inner) => Self::extract_from_condition(inner),
_ => None,
}
}
/// Count the number of hops in the pattern.
pub(crate) fn count_hops(match_clause: &MatchClause) -> u32 {
match_clause.patterns.first().map_or(1, |p| {
p.relationships
.iter()
.map(|r| r.range.map_or(1, |(_, max)| max))
.sum()
})
}
/// Estimate top-k based on limit and selectivity.
fn estimate_top_k(
match_clause: &MatchClause,
stats: &CollectionStats,
threshold: f32,
) -> usize {
let limit = match_clause
.return_clause
.limit
.and_then(|l| usize::try_from(l).ok())
.unwrap_or(100);
let selectivity = Self::estimate_selectivity(threshold);
// Over-fetch to account for graph filtering
let graph_factor = if stats.avg_degree > 0.0 {
(1.0 / stats.label_selectivity).min(10.0)
} else {
2.0
};
// Reason: limit, graph_factor and selectivity are all positive, so ceil() >= 0.
#[allow(clippy::cast_sign_loss)]
let estimated = (limit as f64 * graph_factor / selectivity).ceil() as usize;
estimated.clamp(limit, limit * 100)
}
/// Estimate selectivity based on similarity threshold.
/// Higher threshold = more selective.
pub(crate) fn estimate_selectivity(threshold: f32) -> f64 {
// Heuristic: threshold 0.9 → ~10% pass, 0.5 → ~50% pass
(1.0 - f64::from(threshold)).max(0.01)
}
/// Decide if parallel execution is beneficial.
fn should_use_parallel(
stats: &CollectionStats,
similarity_info: Option<&(String, f32, String)>,
) -> bool {
// Use parallel when:
// 1. Collection is large (>10k nodes)
// 2. Graph is dense (avg_degree > 5)
// 3. Similarity threshold is high (>0.8)
let large_collection = stats.total_nodes > 10_000;
let dense_graph = stats.avg_degree > 5.0;
let high_threshold = similarity_info.is_some_and(|(_, t, _)| *t > 0.8);
large_collection && dense_graph && high_threshold
}
/// Generate a human-readable explanation of the chosen strategy.
#[must_use]
pub fn explain(strategy: &MatchExecutionStrategy) -> String {
match strategy {
MatchExecutionStrategy::GraphFirst {
start_labels,
max_depth,
} => {
let labels = if start_labels.is_empty() {
"any".to_string()
} else {
start_labels.join(", ")
};
format!(
"GraphFirst: Traverse from nodes with labels [{labels}], max depth {max_depth}",
)
}
MatchExecutionStrategy::VectorFirst {
similarity_alias,
top_k,
threshold,
} => {
format!(
"VectorFirst: Search top-{top_k} candidates for '{similarity_alias}' with threshold {threshold:.2}, then validate graph",
)
}
MatchExecutionStrategy::Parallel {
graph_hint,
vector_hint,
} => {
format!(
"Parallel:\n - {}\n - {}",
Self::explain(graph_hint),
Self::explain(vector_hint)
)
}
}
}
}
// Tests moved to match_planner_tests.rs per project rules