1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
//! Union query execution for similarity() OR metadata patterns (EPIC-044 US-002).
//!
//! Handles OR-based queries that combine vector similarity with metadata filters,
//! including nested AND/OR patterns.
use crate::collection::types::Collection;
use crate::error::Result;
use crate::point::SearchResult;
/// Maximum allowed LIMIT value (re-imported from parent for local use).
const MAX_LIMIT: usize = 100_000;
impl Collection {
/// EPIC-044 US-002: Execute union query for similarity() OR metadata patterns.
///
/// This method handles queries like:
/// `WHERE similarity(v, $v) > 0.8 OR category = 'tech'`
///
/// Issue #122: Also handles nested patterns like:
/// `WHERE (similarity(v, $v) > 0.8 OR category = 'tech') AND status = 'active'`
///
/// It executes:
/// 1. Vector search for similarity matches
/// 2. Metadata scan for non-similarity matches
/// 3. Apply outer AND filters to both result sets
/// 4. Merges results with deduplication (by point ID)
///
/// Scoring:
/// - Similarity matches: use similarity score
/// - Metadata-only matches: use score 1.0
/// - Both matching: use similarity score (higher priority)
pub(crate) fn execute_union_query(
&self,
condition: &crate::velesql::Condition,
params: &std::collections::HashMap<String, serde_json::Value>,
limit: usize,
) -> Result<Vec<SearchResult>> {
use std::collections::HashMap;
// Issue #122: Extract similarity, metadata, AND outer filter from condition
let (similarity_cond, metadata_cond, outer_filter) =
Self::split_or_condition_with_outer_filter(condition);
let mut results_map: HashMap<u64, SearchResult> = HashMap::new();
// 1. Execute similarity search if we have a similarity condition
if let Some(sim_cond) = similarity_cond {
self.collect_similarity_results(
&sim_cond,
params,
limit,
outer_filter.as_ref(),
&mut results_map,
)?;
}
// 2. Execute metadata scan if we have a metadata condition
if let Some(meta_cond) = metadata_cond {
self.collect_metadata_results(
meta_cond,
outer_filter.as_ref(),
limit,
&mut results_map,
);
}
// 3. Collect and return results
let mut results: Vec<SearchResult> = results_map.into_values().collect();
// Sort by score descending (similarity matches first)
results.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
results.truncate(limit);
Ok(results)
}
/// Collects similarity search results into the results map, applying
/// optional outer filter.
fn collect_similarity_results(
&self,
sim_cond: &crate::velesql::Condition,
params: &std::collections::HashMap<String, serde_json::Value>,
limit: usize,
outer_filter: Option<&crate::velesql::Condition>,
results_map: &mut std::collections::HashMap<u64, SearchResult>,
) -> Result<()> {
let similarity_conditions = self.extract_all_similarity_conditions(sim_cond, params)?;
if let Some((field, vec, op, threshold)) = similarity_conditions.first() {
let overfetch_factor = 10;
let candidates_k = limit.saturating_mul(overfetch_factor).min(MAX_LIMIT);
let candidates = self.search(vec, candidates_k)?;
let filter_k = limit.saturating_mul(2);
let filtered =
self.filter_by_similarity(candidates, field, vec, *op, *threshold, filter_k);
for result in filtered {
if let Some(outer) = outer_filter {
if !Self::matches_metadata_filter(&result.point, outer) {
continue;
}
}
results_map.insert(result.point.id, result);
}
}
Ok(())
}
/// Collects metadata scan results into the results map, combining with
/// optional outer filter. Existing entries (from similarity) are preserved.
fn collect_metadata_results(
&self,
meta_cond: crate::velesql::Condition,
outer_filter: Option<&crate::velesql::Condition>,
limit: usize,
results_map: &mut std::collections::HashMap<u64, SearchResult>,
) {
let combined_cond = match outer_filter {
Some(outer) => {
crate::velesql::Condition::And(Box::new(meta_cond), Box::new(outer.clone()))
}
None => meta_cond,
};
let filter = crate::filter::Filter::new(crate::filter::Condition::from(combined_cond));
let metadata_results = self.execute_scan_query(&filter, limit);
for result in metadata_results {
// Only add if not already found by similarity search
// If already present, keep the similarity score (higher priority)
results_map.entry(result.point.id).or_insert(result);
}
}
/// Check if a point matches a metadata filter condition.
/// Used for applying outer AND filters to similarity results.
pub(crate) fn matches_metadata_filter(
point: &crate::Point,
condition: &crate::velesql::Condition,
) -> bool {
let filter = crate::filter::Filter::new(crate::filter::Condition::from(condition.clone()));
match point.payload.as_ref() {
Some(payload) => filter.matches(payload),
None => false, // No payload means filter doesn't match
}
}
/// Split an OR condition into similarity and metadata parts, extracting outer AND filters.
///
/// For `similarity() > 0.8 OR category = 'tech'`, returns:
/// - similarity_cond: Some(similarity() > 0.8)
/// - metadata_cond: Some(category = 'tech')
/// - outer_filter: None
///
/// For `(similarity() > 0.8 OR category = 'tech') AND status = 'active'`, returns:
/// - similarity_cond: Some(similarity() > 0.8)
/// - metadata_cond: Some(category = 'tech')
/// - outer_filter: Some(status = 'active')
///
/// Issue #122: Handle nested AND/OR patterns correctly.
pub(crate) fn split_or_condition_with_outer_filter(
condition: &crate::velesql::Condition,
) -> (
Option<crate::velesql::Condition>,
Option<crate::velesql::Condition>,
Option<crate::velesql::Condition>,
) {
match condition {
crate::velesql::Condition::Or(left, right) => {
// Direct OR at top level
let left_has_sim = Self::count_similarity_conditions(left) > 0;
let right_has_sim = Self::count_similarity_conditions(right) > 0;
match (left_has_sim, right_has_sim) {
(true, false) => (Some((**left).clone()), Some((**right).clone()), None),
(false, true) => (Some((**right).clone()), Some((**left).clone()), None),
_ => (Some(condition.clone()), None, None),
}
}
crate::velesql::Condition::And(left, right) => {
// Issue #122: Check if one side contains an OR with similarity
let left_has_problematic_or = Self::has_similarity_in_problematic_or(left);
let right_has_problematic_or = Self::has_similarity_in_problematic_or(right);
match (left_has_problematic_or, right_has_problematic_or) {
(true, false) => {
// Left has the OR, right is an outer filter
let (sim, meta, inner_filter) =
Self::split_or_condition_with_outer_filter(left);
// Combine inner_filter with right as outer filter
let outer = match inner_filter {
Some(inner) => Some(crate::velesql::Condition::And(
Box::new(inner),
Box::new((**right).clone()),
)),
None => Some((**right).clone()),
};
(sim, meta, outer)
}
(false, true) => {
// Right has the OR, left is an outer filter
let (sim, meta, inner_filter) =
Self::split_or_condition_with_outer_filter(right);
let outer = match inner_filter {
Some(inner) => Some(crate::velesql::Condition::And(
Box::new((**left).clone()),
Box::new(inner),
)),
None => Some((**left).clone()),
};
(sim, meta, outer)
}
_ => {
// Both or neither - treat as before
if Self::count_similarity_conditions(condition) > 0 {
(Some(condition.clone()), None, None)
} else {
(None, Some(condition.clone()), None)
}
}
}
}
crate::velesql::Condition::Group(inner) => {
// Unwrap group and recurse
Self::split_or_condition_with_outer_filter(inner)
}
// Not an OR or AND condition - treat as similarity if it contains similarity
_ => {
if Self::count_similarity_conditions(condition) > 0 {
(Some(condition.clone()), None, None)
} else {
(None, Some(condition.clone()), None)
}
}
}
}
}