1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
//! Additional tests for DISTINCT deduplication logic.
//!
//! The core unit tests live inline in `distinct.rs`. These tests exercise
//! edge cases: null payloads, mixed types, and `SelectColumns::All`.
#[cfg(test)]
mod tests {
use crate::collection::search::query::distinct::{apply_distinct, compute_distinct_key};
use crate::point::{Point, SearchResult};
use crate::velesql::{Column, SelectColumns};
fn make_result(id: u64, payload: Option<serde_json::Value>) -> SearchResult {
SearchResult::new(
Point {
id,
vector: vec![0.0; 4],
payload,
sparse_vectors: None,
},
1.0,
)
}
// -----------------------------------------------------------------------
// Null / missing payload handling
// -----------------------------------------------------------------------
#[test]
fn test_distinct_null_payloads_collapse_to_one() {
let results = vec![
make_result(1, None),
make_result(2, None),
make_result(3, None),
];
let columns = SelectColumns::All;
let distinct = apply_distinct(results, &columns);
// All three have null payload -> same key -> only first survives.
assert_eq!(distinct.len(), 1, "null payloads should deduplicate to one");
assert_eq!(distinct[0].point.id, 1, "first inserted wins");
}
#[test]
fn test_distinct_null_vs_some_are_different() {
let results = vec![
make_result(1, None),
make_result(2, Some(serde_json::json!({"name": "Alice"}))),
];
let columns = SelectColumns::All;
let distinct = apply_distinct(results, &columns);
assert_eq!(distinct.len(), 2, "null vs Some should be distinct");
}
// -----------------------------------------------------------------------
// SelectColumns::All deduplication
// -----------------------------------------------------------------------
#[test]
fn test_distinct_select_all_uses_full_payload() {
let results = vec![
make_result(1, Some(serde_json::json!({"a": 1, "b": 2}))),
make_result(2, Some(serde_json::json!({"a": 1, "b": 2}))),
make_result(3, Some(serde_json::json!({"a": 1, "b": 3}))),
];
let columns = SelectColumns::All;
let distinct = apply_distinct(results, &columns);
// id=1 and id=2 are identical payloads -> collapse.
assert_eq!(distinct.len(), 2);
}
// -----------------------------------------------------------------------
// compute_distinct_key with missing column
// -----------------------------------------------------------------------
#[test]
fn test_distinct_key_missing_column_is_null() {
let r = make_result(1, Some(serde_json::json!({"name": "Alice"})));
let key = compute_distinct_key(&r, &["missing_field".to_string()], false);
assert_eq!(key, "null", "missing column should produce 'null' key");
}
// -----------------------------------------------------------------------
// Mixed column types produce unique keys
// -----------------------------------------------------------------------
#[test]
fn test_distinct_mixed_column_types() {
let results = vec![
make_result(1, Some(serde_json::json!({"val": 42}))),
make_result(2, Some(serde_json::json!({"val": "42"}))),
];
let columns = SelectColumns::Columns(vec![Column {
name: "val".to_string(),
alias: None,
}]);
let distinct = apply_distinct(results, &columns);
// Number 42 vs string "42" should be distinct (different JSON repr).
assert_eq!(distinct.len(), 2, "number vs string should be distinct");
}
// -----------------------------------------------------------------------
// Mixed with qualified_wildcards must dedup by FULL payload, not just
// the explicit `columns` list. Regression for the Devin finding:
// `SELECT DISTINCT ctx.*, title FROM docs` must collapse rows only when
// ALL payload fields match, not only when `title` matches.
// -----------------------------------------------------------------------
#[test]
fn test_distinct_mixed_with_qualified_wildcard_dedupes_by_full_payload() {
// Two rows share `title` but differ in a wildcard-expanded field
// (`author`). Without the fix, dedup would collapse them (only
// `title` considered). With the fix, dedup uses the full payload,
// so both survive.
let results = vec![
make_result(
1,
Some(serde_json::json!({"title": "T1", "author": "Alice"})),
),
make_result(2, Some(serde_json::json!({"title": "T1", "author": "Bob"}))),
// Third row is an exact duplicate of row 1 → should collapse.
make_result(
3,
Some(serde_json::json!({"title": "T1", "author": "Alice"})),
),
];
let columns = SelectColumns::Mixed {
columns: vec![Column {
name: "title".to_string(),
alias: None,
}],
aggregations: Vec::new(),
similarity_scores: Vec::new(),
qualified_wildcards: vec!["ctx".to_string()],
window_functions: Vec::new(),
};
let distinct = apply_distinct(results, &columns);
assert_eq!(
distinct.len(),
2,
"rows 1 and 2 differ (Alice vs Bob) so both survive; row 3 is an \
exact duplicate of row 1 and is dropped"
);
// Preserves insertion order: row 1 and row 2 survive.
assert_eq!(distinct[0].point.id, 1);
assert_eq!(distinct[1].point.id, 2);
}
/// Control case: same query shape but WITHOUT qualified_wildcards —
/// dedup must still use only the explicit `columns` list (current
/// behaviour preserved for backward compatibility).
#[test]
fn test_distinct_mixed_without_qualified_wildcard_dedupes_by_columns_only() {
let results = vec![
make_result(
1,
Some(serde_json::json!({"title": "T1", "author": "Alice"})),
),
make_result(2, Some(serde_json::json!({"title": "T1", "author": "Bob"}))),
];
let columns = SelectColumns::Mixed {
columns: vec![Column {
name: "title".to_string(),
alias: None,
}],
aggregations: Vec::new(),
similarity_scores: Vec::new(),
qualified_wildcards: Vec::new(),
window_functions: Vec::new(),
};
let distinct = apply_distinct(results, &columns);
assert_eq!(
distinct.len(),
1,
"no wildcard → dedup by `title` only → both rows collapse"
);
assert_eq!(distinct[0].point.id, 1);
}
}