1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
//! Query validation for VelesQL similarity queries.
//!
//! Validates that similarity() queries don't use unsupported patterns:
//! - Multiple similarity() in OR (requires union of results)
//! - similarity() in OR with non-similarity conditions
//! - NOT similarity() patterns
//!
//! # Supported Patterns (EPIC-044 US-001)
//!
//! Multiple similarity() with AND is supported:
//! ```sql
//! WHERE similarity(v, $v1) > 0.8 AND similarity(v, $v2) > 0.7
//! ```
//! This applies filters sequentially (cascade).
use crate::collection::types::Collection;
use crate::error::{Error, Result};
use crate::velesql::Condition;
use super::condition_tree::{any_subtree, count_matching_leaves, is_vector_leaf};
impl Collection {
/// Validate that similarity() queries don't use unsupported patterns.
///
/// # Supported Patterns (EPIC-044 US-001)
///
/// - **Multiple similarity() with AND**: Filters applied sequentially
/// `WHERE similarity(v, $v1) > 0.8 AND similarity(v, $v2) > 0.7`
///
/// # Unsupported Patterns
///
/// 1. **similarity() in OR with non-similarity conditions** (EPIC-044 US-002):
/// `WHERE similarity(v, $v) > 0.8 OR category = 'tech'`
/// ✅ NOW SUPPORTED - Executes vector search + metadata scan, then unions results.
///
/// 2. **Multiple similarity() in OR**:
/// `WHERE similarity(v, $v1) > 0.8 OR similarity(v, $v2) > 0.7`
/// This would require union of two vector searches - not currently supported.
///
/// 3. **NOT similarity()**:
/// Cannot be efficiently executed with ANN indexes.
///
/// Returns Ok(()) if the query structure is valid, or an error describing the issue.
pub(crate) fn validate_similarity_query_structure(condition: &Condition) -> Result<()> {
let similarity_count = Self::count_similarity_conditions(condition);
// Multiple similarity() in OR is not supported (would require union of vector searches)
if similarity_count > 1 && Self::has_multiple_similarity_in_or(condition) {
return Err(Error::Config(
"Multiple similarity() conditions in OR are not supported. \
Use AND to apply filters sequentially, or split into separate queries."
.to_string(),
));
}
// EPIC-044 US-002: similarity() OR metadata IS now supported (union mode)
// Only block when multiple similarity() are in OR (handled above)
// EPIC-044 US-003: NOT similarity() IS now supported via full scan
// Warning: This requires scanning all documents - use with LIMIT for performance
Ok(())
}
/// Check if similarity() appears under a NOT condition.
/// This pattern is not supported because negating similarity cannot be efficiently executed.
///
/// # Note
///
/// This function is prepared for when VelesQL parser supports `NOT condition` syntax.
/// Currently, the parser only supports `IS NOT NULL` and `!=` operators.
/// When parser is extended (see EPIC-005), this validation will activate.
pub(crate) fn has_similarity_under_not(condition: &Condition) -> bool {
any_subtree(
condition,
&|c| matches!(c, Condition::Not(inner) if Self::count_similarity_conditions(inner) > 0),
)
}
/// Check if multiple similarity() conditions appear under the same OR.
/// This pattern requires unioning two vector search results which is not supported.
///
/// # Example (Unsupported)
/// ```sql
/// WHERE similarity(v, $v1) > 0.8 OR similarity(v, $v2) > 0.7
/// ```
pub(crate) fn has_multiple_similarity_in_or(condition: &Condition) -> bool {
any_subtree(condition, &|c| {
matches!(c, Condition::Or(left, right)
if Self::count_similarity_conditions(left) > 0
&& Self::count_similarity_conditions(right) > 0
)
})
}
/// Count the number of vector search conditions in a condition tree.
/// Includes Similarity, VectorSearch (NEAR), and VectorFusedSearch (NEAR_FUSED).
pub(crate) fn count_similarity_conditions(condition: &Condition) -> usize {
count_matching_leaves(condition, is_vector_leaf)
}
/// Check if similarity() appears in an OR clause with non-similarity conditions.
/// This pattern cannot be correctly executed with current architecture.
pub(crate) fn has_similarity_in_problematic_or(condition: &Condition) -> bool {
any_subtree(condition, &|c| {
if let Condition::Or(left, right) = c {
let left_has_sim = Self::count_similarity_conditions(left) > 0;
let right_has_sim = Self::count_similarity_conditions(right) > 0;
let left_has_other = Self::has_non_similarity_conditions(left);
let right_has_other = Self::has_non_similarity_conditions(right);
// Problematic: one side has similarity, other side has non-similarity
// e.g., similarity() > 0.8 OR category = 'tech'
(left_has_sim && right_has_other && !right_has_sim)
|| (right_has_sim && left_has_other && !left_has_sim)
} else {
false
}
})
}
/// Check if a condition contains non-similarity conditions (metadata filters).
pub(crate) fn has_non_similarity_conditions(condition: &Condition) -> bool {
// A non-similarity leaf is any leaf that is NOT a vector type and NOT a
// structural combinator (And/Or/Group/Not).
count_matching_leaves(condition, |c| {
!is_vector_leaf(c)
&& !matches!(
c,
Condition::And(..)
| Condition::Or(..)
| Condition::Group(_)
| Condition::Not(_)
)
}) > 0
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::velesql::{CompareOp, Comparison, SimilarityCondition, Value, VectorExpr};
fn make_similarity_condition() -> Condition {
Condition::Similarity(SimilarityCondition {
field: "vector".to_string(),
vector: VectorExpr::Literal(vec![0.1, 0.2, 0.3]),
operator: CompareOp::Gt,
threshold: 0.8,
})
}
fn make_compare_condition() -> Condition {
Condition::Comparison(Comparison {
column: "category".to_string(),
operator: CompareOp::Eq,
value: Value::String("tech".to_string()),
})
}
#[test]
fn test_validate_single_similarity_and_metadata_ok() {
// similarity() AND category = 'tech' - should be OK
let cond = Condition::And(
Box::new(make_similarity_condition()),
Box::new(make_compare_condition()),
);
assert!(Collection::validate_similarity_query_structure(&cond).is_ok());
}
#[test]
fn test_validate_similarity_or_metadata_ok() {
// EPIC-044 US-002: similarity() OR category = 'tech' - NOW OK (union mode)
let cond = Condition::Or(
Box::new(make_similarity_condition()),
Box::new(make_compare_condition()),
);
assert!(Collection::validate_similarity_query_structure(&cond).is_ok());
}
#[test]
fn test_validate_multiple_similarity_with_and_ok() {
// EPIC-044 US-001: similarity() AND similarity() - should be OK (cascade filtering)
let cond = Condition::And(
Box::new(make_similarity_condition()),
Box::new(make_similarity_condition()),
);
assert!(Collection::validate_similarity_query_structure(&cond).is_ok());
}
#[test]
fn test_validate_multiple_similarity_with_or_fails() {
// similarity() OR similarity() - should FAIL (would require union)
let cond = Condition::Or(
Box::new(make_similarity_condition()),
Box::new(make_similarity_condition()),
);
let result = Collection::validate_similarity_query_structure(&cond);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("OR"));
}
#[test]
fn test_validate_three_similarity_with_and_ok() {
// EPIC-044 US-001: Three similarity() with AND - should be OK
let cond = Condition::And(
Box::new(make_similarity_condition()),
Box::new(Condition::And(
Box::new(make_similarity_condition()),
Box::new(make_similarity_condition()),
)),
);
assert!(Collection::validate_similarity_query_structure(&cond).is_ok());
}
#[test]
fn test_validate_metadata_only_ok() {
// category = 'tech' AND status = 'active' - should be OK
let cond = Condition::And(
Box::new(make_compare_condition()),
Box::new(make_compare_condition()),
);
assert!(Collection::validate_similarity_query_structure(&cond).is_ok());
}
#[test]
fn test_validate_metadata_or_ok() {
// category = 'tech' OR status = 'active' - should be OK (no similarity)
let cond = Condition::Or(
Box::new(make_compare_condition()),
Box::new(make_compare_condition()),
);
assert!(Collection::validate_similarity_query_structure(&cond).is_ok());
}
#[test]
fn test_count_similarity_conditions() {
assert_eq!(
Collection::count_similarity_conditions(&make_similarity_condition()),
1
);
assert_eq!(
Collection::count_similarity_conditions(&make_compare_condition()),
0
);
let double = Condition::And(
Box::new(make_similarity_condition()),
Box::new(make_similarity_condition()),
);
assert_eq!(Collection::count_similarity_conditions(&double), 2);
}
}