1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
//! Condition extraction utilities for VelesQL queries.
//!
//! Extracts vector searches, similarity conditions, and metadata filters
//! from complex WHERE clause condition trees.
use crate::collection::types::Collection;
use crate::error::{Error, Result};
use crate::velesql::Condition;
/// Converts a JSON number to f32, rejecting out-of-range finite values.
///
/// - Finite values within f32 range are truncated.
/// - Non-finite values (NaN, Inf) pass through as f32.
/// - Finite values outside f32 range or non-numeric values produce an error.
fn json_value_to_f32(v: &serde_json::Value, param_name: &str) -> Result<f32> {
v.as_f64().and_then(f64_to_f32).ok_or_else(|| {
Error::Config(format!(
"Invalid vector parameter ${param_name}: value out of f32 range or not a number"
))
})
}
/// Narrows an f64 to f32, returning `None` for finite values outside f32 range.
#[allow(clippy::cast_possible_truncation)]
fn f64_to_f32(f: f64) -> Option<f32> {
if !f.is_finite() {
return Some(f as f32);
}
if f >= f64::from(f32::MIN) && f <= f64::from(f32::MAX) {
return Some(f as f32);
}
None
}
impl Collection {
/// Helper to extract MATCH query from any nested condition.
pub(crate) fn extract_match_query(condition: &Condition) -> Option<String> {
match condition {
Condition::Match(m) => Some(m.query.clone()),
Condition::And(left, right) => {
Self::extract_match_query(left).or_else(|| Self::extract_match_query(right))
}
Condition::Group(inner) => Self::extract_match_query(inner),
_ => None,
}
}
/// Internal helper to extract vector search from WHERE clause.
///
/// Delegates to [`resolve_vector`](Self::resolve_vector) for parameter
/// resolution, eliminating the duplicated f64-to-f32 conversion logic.
#[allow(clippy::self_only_used_in_recursion)]
pub(crate) fn extract_vector_search(
&self,
condition: &mut Condition,
params: &std::collections::HashMap<String, serde_json::Value>,
) -> Result<Option<Vec<f32>>> {
match condition {
Condition::VectorSearch(vs) => {
let vec = Self::resolve_vector(&vs.vector, params)?;
Ok(Some(vec))
}
Condition::And(left, right) => {
if let Some(v) = self.extract_vector_search(left, params)? {
return Ok(Some(v));
}
self.extract_vector_search(right, params)
}
Condition::Group(inner) => self.extract_vector_search(inner, params),
_ => Ok(None),
}
}
/// Extract ALL similarity conditions from WHERE clause (EPIC-044 US-001).
/// Returns Vec of (field, vector, operator, threshold) for cascade filtering.
///
/// Delegates to [`resolve_vector`](Self::resolve_vector) for parameter
/// resolution, eliminating the duplicated f64-to-f32 conversion logic.
#[allow(clippy::type_complexity)]
#[allow(clippy::only_used_in_recursion)]
#[allow(clippy::self_only_used_in_recursion)]
pub(crate) fn extract_all_similarity_conditions(
&self,
condition: &Condition,
params: &std::collections::HashMap<String, serde_json::Value>,
) -> Result<Vec<(String, Vec<f32>, crate::velesql::CompareOp, f64)>> {
match condition {
Condition::Similarity(sim) => {
let vec = Self::resolve_vector(&sim.vector, params)?;
Ok(vec![(sim.field.clone(), vec, sim.operator, sim.threshold)])
}
// AND/OR: collect from both sides (AND=cascade, OR=validation only)
Condition::And(left, right) | Condition::Or(left, right) => {
let mut results = self.extract_all_similarity_conditions(left, params)?;
results.extend(self.extract_all_similarity_conditions(right, params)?);
Ok(results)
}
Condition::Group(inner) | Condition::Not(inner) => {
self.extract_all_similarity_conditions(inner, params)
}
_ => Ok(vec![]),
}
}
/// Extract non-similarity parts of a condition for metadata filtering.
///
/// This removes `SimilarityFilter` conditions from the tree and returns
/// only the metadata filter parts (e.g., `category = 'tech'`).
pub(crate) fn extract_metadata_filter(condition: &Condition) -> Option<Condition> {
match condition {
// Remove vector/sparse/graph search conditions — they are handled
// separately by the query executor and must not be pushed down as
// payload filters.
Condition::Similarity(_)
| Condition::VectorSearch(_)
| Condition::VectorFusedSearch(_)
| Condition::SparseVectorSearch(_)
| Condition::GraphMatch(_)
| Condition::Match(_) => None,
// For AND: keep both sides if they exist, or just one side
Condition::And(left, right) => {
let left_filter = Self::extract_metadata_filter(left);
let right_filter = Self::extract_metadata_filter(right);
match (left_filter, right_filter) {
(Some(l), Some(r)) => Some(Condition::And(Box::new(l), Box::new(r))),
(Some(l), None) => Some(l),
(None, Some(r)) => Some(r),
(None, None) => None,
}
}
// For OR: both sides must exist
// FLAG-13: This is intentionally asymmetric with AND.
// AND can work with partial conditions (e.g., similarity() AND metadata)
// but OR semantically requires both sides to be evaluable.
// Without both sides, we cannot properly evaluate the OR condition.
Condition::Or(left, right) => {
let left_filter = Self::extract_metadata_filter(left);
let right_filter = Self::extract_metadata_filter(right);
match (left_filter, right_filter) {
(Some(l), Some(r)) => Some(Condition::Or(Box::new(l), Box::new(r))),
_ => None, // OR requires both sides
}
}
// Unwrap groups
Condition::Group(inner) => {
Self::extract_metadata_filter(inner).map(|c| Condition::Group(Box::new(c)))
}
// Handle NOT: preserve NOT wrapper if inner condition exists
// Note: NOT similarity() is rejected earlier in validation, so we only
// need to handle NOT with metadata conditions here
Condition::Not(inner) => {
Self::extract_metadata_filter(inner).map(|c| Condition::Not(Box::new(c)))
}
// Keep all other conditions (comparisons, IN, BETWEEN, etc.)
other => Some(other.clone()),
}
}
/// Collect graph MATCH predicates embedded in SELECT WHERE.
pub(crate) fn collect_graph_match_predicates(
condition: &Condition,
out: &mut Vec<crate::velesql::GraphMatchPredicate>,
) {
match condition {
Condition::GraphMatch(gm) => out.push(gm.clone()),
Condition::And(left, right) | Condition::Or(left, right) => {
Self::collect_graph_match_predicates(left, out);
Self::collect_graph_match_predicates(right, out);
}
Condition::Group(inner) | Condition::Not(inner) => {
Self::collect_graph_match_predicates(inner, out);
}
_ => {}
}
}
/// Resolve a vector expression to actual vector values.
///
/// This is the single source of truth for converting `VectorExpr`
/// parameters (JSON arrays) to `Vec<f32>`. Both `extract_vector_search`
/// and `extract_all_similarity_conditions` delegate here.
///
/// # Errors
///
/// Returns an error if:
/// - The parameter is missing
/// - The parameter is not an array
/// - Any value is not a number or is outside f32 representable range
pub(crate) fn resolve_vector(
vector: &crate::velesql::VectorExpr,
params: &std::collections::HashMap<String, serde_json::Value>,
) -> Result<Vec<f32>> {
use crate::velesql::VectorExpr;
match vector {
VectorExpr::Literal(v) => Ok(v.clone()),
VectorExpr::Parameter(name) => Self::resolve_vector_parameter(name, params),
}
}
/// Resolves a named vector parameter from the query params map.
fn resolve_vector_parameter(
name: &str,
params: &std::collections::HashMap<String, serde_json::Value>,
) -> Result<Vec<f32>> {
let val = params
.get(name)
.ok_or_else(|| Error::Config(format!("Missing query parameter: ${name}")))?;
let serde_json::Value::Array(arr) = val else {
return Err(Error::Config(format!(
"Invalid vector parameter ${name}: expected array"
)));
};
arr.iter()
.map(|v| json_value_to_f32(v, name))
.collect::<Result<Vec<f32>>>()
}
/// Compute the metric score between two vectors using the collection's configured metric.
///
/// **Note:** This returns the raw metric score, not a normalized similarity.
/// The interpretation depends on the metric:
/// - **Cosine**: Returns cosine similarity (higher = more similar)
/// - **DotProduct**: Returns dot product (higher = more similar)
/// - **Euclidean**: Returns euclidean distance (lower = more similar)
/// - **Hamming**: Returns hamming distance (lower = more similar)
/// - **Jaccard**: Returns jaccard similarity (higher = more similar)
///
/// Use `metric.higher_is_better()` to determine score interpretation.
pub(crate) fn compute_metric_score(&self, a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
// Use the collection's configured metric for consistent behavior
let metric = self.config.read().metric;
metric.calculate(a, b)
}
}
// Tests moved to extraction_tests.rs per project rules