1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
// SPDX-License-Identifier: BUSL-1.1
//! Integration coverage for vector-search triggers in WHERE clauses.
//!
//! `try_extract_where_search` matches `TextMatch` and the spatial predicates
//! but falls through silently on `VectorSearch`, `MultiVectorSearch`, and
//! `HybridSearch`. The rewritten `vector_distance(...)` call then survives
//! into the scan filter as a non-boolean (float) predicate, which the
//! evaluator silently rejects — every documented `WHERE embedding <-> $q`
//! shape returns zero rows even though the index is populated and reachable
//! via SEARCH and via `vector_distance` in the SELECT list.
//!
//! The docs (`docs/full-text-search.md`, `docs/graph.md`,
//! `docs/ai/rag-pipelines.md`, `docs/ai/multi-modal-search.md`,
//! `docs/ai/agent-memory.md`, ...) all advertise the canonical
//! `WHERE embedding <-> $query_vector LIMIT N` shape as a vector search.
//! These tests pin that contract for every distance operator the
//! preprocessor rewrites: `<->` (L2), `<=>` (cosine), `<#>` (inner product).
mod common;
use common::pgwire_harness::TestServer;
async fn create_vector_collection(server: &TestServer, name: &str) {
server
.exec(&format!("CREATE COLLECTION {name}"))
.await
.unwrap();
server
.exec(&format!(
"CREATE VECTOR INDEX idx_{name}_emb ON {name} METRIC cosine DIM 4"
))
.await
.unwrap();
for (id, v) in [
("r0", [0.10f32, 0.20, 0.30, 0.40]),
("r1", [0.11, 0.21, 0.31, 0.41]),
("r2", [0.90, 0.80, 0.70, 0.60]),
] {
server
.exec(&format!(
"INSERT INTO {name} (id, embedding) VALUES \
('{id}', ARRAY[{},{},{},{}])",
v[0], v[1], v[2], v[3]
))
.await
.unwrap();
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn arrow_distance_in_where_returns_nearest_rows() {
let server = TestServer::start().await;
create_vector_collection(&server, "vec_arrow").await;
let rows = server
.query_text(
"SELECT id FROM vec_arrow \
WHERE embedding <-> ARRAY[0.1, 0.2, 0.3, 0.4] \
LIMIT 2",
)
.await
.unwrap();
assert_eq!(
rows.len(),
2,
"WHERE embedding <-> qvec LIMIT 2 must trigger a vector top-K, not a scalar predicate that silently matches no rows"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn cosine_distance_in_where_returns_nearest_rows() {
let server = TestServer::start().await;
create_vector_collection(&server, "vec_cos").await;
let rows = server
.query_text(
"SELECT id FROM vec_cos \
WHERE embedding <=> ARRAY[0.1, 0.2, 0.3, 0.4] \
LIMIT 2",
)
.await
.unwrap();
assert_eq!(
rows.len(),
2,
"WHERE embedding <=> qvec LIMIT 2 must trigger a cosine vector search"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn inner_product_distance_in_where_returns_nearest_rows() {
let server = TestServer::start().await;
create_vector_collection(&server, "vec_ip").await;
let rows = server
.query_text(
"SELECT id FROM vec_ip \
WHERE embedding <#> ARRAY[0.1, 0.2, 0.3, 0.4] \
LIMIT 2",
)
.await
.unwrap();
assert_eq!(
rows.len(),
2,
"WHERE embedding <#> qvec LIMIT 2 must trigger an inner-product vector search"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn vector_distance_function_in_where_returns_nearest_rows() {
let server = TestServer::start().await;
create_vector_collection(&server, "vec_func").await;
// The preprocessor rewrites `<->` to `vector_distance(...)`. The
// user-written form must behave identically — both flow through
// `try_extract_where_search` with the same `SearchTrigger::VectorSearch`
// tag and must not fall through to a scalar match-none.
let rows = server
.query_text(
"SELECT id FROM vec_func \
WHERE vector_distance(embedding, ARRAY[0.1, 0.2, 0.3, 0.4]) \
LIMIT 2",
)
.await
.unwrap();
assert_eq!(
rows.len(),
2,
"WHERE vector_distance(field, qvec) LIMIT 2 must trigger a vector top-K"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn arrow_distance_with_and_filter_in_where_returns_nearest_rows() {
// Pattern from docs/full-text-search.md and docs/ai/rag-pipelines.md:
// WHERE other_predicate AND embedding <-> $q LIMIT N
// The AND branch in `try_extract_where_search` recurses on both sides,
// so a vector trigger sitting next to a non-search predicate must
// still be detected — and the non-search predicate must be carried
// through as a scan filter, not silently dropped.
let server = TestServer::start().await;
server.exec("CREATE COLLECTION vec_filter").await.unwrap();
server
.exec("CREATE VECTOR INDEX idx_vec_filter_emb ON vec_filter METRIC cosine DIM 4")
.await
.unwrap();
server
.exec(
"INSERT INTO vec_filter (id, tenant, embedding) \
VALUES ('a', 't1', ARRAY[0.10, 0.20, 0.30, 0.40])",
)
.await
.unwrap();
server
.exec(
"INSERT INTO vec_filter (id, tenant, embedding) \
VALUES ('b', 't1', ARRAY[0.11, 0.21, 0.31, 0.41])",
)
.await
.unwrap();
server
.exec(
"INSERT INTO vec_filter (id, tenant, embedding) \
VALUES ('c', 't2', ARRAY[0.10, 0.20, 0.30, 0.40])",
)
.await
.unwrap();
// Baseline: same vector search WITHOUT the tenant predicate must return
// all 3 inserted rows (LIMIT 5, only 3 candidates).
let unfiltered = server
.query_text(
"SELECT id FROM vec_filter \
WHERE embedding <-> ARRAY[0.1, 0.2, 0.3, 0.4] \
LIMIT 5",
)
.await
.unwrap();
assert_eq!(
unfiltered.len(),
3,
"baseline (no tenant filter) must return all 3 inserted rows; got {unfiltered:?}"
);
// With AND-sibling tenant filter, the t2 row must be excluded — only the
// 2 t1 rows survive the filter even though all 3 are within `LIMIT 5`.
let filtered = server
.query_text(
"SELECT id FROM vec_filter \
WHERE tenant = 't1' AND embedding <-> ARRAY[0.1, 0.2, 0.3, 0.4] \
LIMIT 5",
)
.await
.unwrap();
assert_eq!(
filtered.len(),
2,
"tenant='t1' AND embedding <-> q must filter the t2 row out; \
3 rows means the AND-sibling filter was silently dropped; got {filtered:?}"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn arrow_distance_in_where_does_not_silently_match_none_for_delete() {
// Silent-failure regression guard: if the WHERE-clause vector trigger
// were ever "fixed" by treating the float predicate as truthy (≠ 0),
// a `DELETE WHERE embedding <-> q` would wipe every row whose distance
// is non-zero — i.e. nearly the entire table. A correct fix turns the
// predicate into a top-K, and DELETE on a top-K WHERE is currently
// expected to either return a typed error or scope to the K matched
// rows. Either way, a 3-row table must NOT be left empty.
let server = TestServer::start().await;
create_vector_collection(&server, "vec_delete").await;
// Best-effort DELETE; either rejection or scoped delete is acceptable,
// but a silent table-wipe is not.
let _ = server
.exec(
"DELETE FROM vec_delete \
WHERE embedding <-> ARRAY[0.1, 0.2, 0.3, 0.4] \
LIMIT 1",
)
.await;
let remaining = server
.query_text("SELECT id FROM vec_delete")
.await
.unwrap();
assert!(
!remaining.is_empty(),
"DELETE with a vector-search WHERE must not silently wipe the table"
);
assert!(
remaining.len() >= 2,
"at most one row should be deleted; got {} rows remaining",
remaining.len()
);
}