1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
//! Vector upsert/update behavior and optimization guard tests.
//!
//! This suite keeps functional and regression coverage for repeated `add()` calls
//! with the same `NodeId` in one semantic location.
use aletheiadb::core::id::NodeId;
use aletheiadb::index::vector::{DistanceMetric, HnswIndexBuilder, VectorIndex};
#[path = "vector_update_optimization/regression_behavior.rs"]
mod regression_behavior;
#[path = "vector_update_optimization/regression_coverage.rs"]
mod regression_coverage;
/// Test that updating a vector works correctly.
///
/// This test verifies the FUNCTIONAL correctness of vector updates.
/// The optimization in Issue #207 should not change this behavior.
#[test]
fn test_vector_update_correctness() {
let index = HnswIndexBuilder::new(4, DistanceMetric::Cosine)
.build()
.unwrap();
let node = NodeId::new(1).unwrap();
// Add initial vector
index.add(node, &[1.0, 0.0, 0.0, 0.0]).unwrap();
assert_eq!(index.len(), 1);
// Update with different vector (this should NOT increase the count)
index.add(node, &[0.0, 1.0, 0.0, 0.0]).unwrap();
assert_eq!(index.len(), 1, "Update should not increase vector count");
// Search should find the UPDATED vector, not the original
let results = index.search(&[0.0, 1.0, 0.0, 0.0], 1).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, node);
assert!(
results[0].1 > 0.99,
"Updated vector should have high similarity to query"
);
// Verify old vector is NOT findable
let old_results = index.search(&[1.0, 0.0, 0.0, 0.0], 1).unwrap();
assert_eq!(old_results.len(), 1);
assert_eq!(old_results[0].0, node);
// Should have LOW similarity since we updated to a different vector
assert!(
old_results[0].1 < 0.5,
"Old vector should have low similarity after update"
);
}
/// Test multiple consecutive updates to the same node.
///
/// Verifies that multiple updates work correctly without accumulating vectors.
#[test]
fn test_multiple_consecutive_updates() {
let index = HnswIndexBuilder::new(4, DistanceMetric::Cosine)
.build()
.unwrap();
let node = NodeId::new(42).unwrap();
// Perform 10 updates
for i in 0..10 {
let mut vector = vec![0.0f32; 4];
vector[i % 4] = 1.0;
index.add(node, &vector).unwrap();
// Should always have exactly 1 vector
assert_eq!(
index.len(),
1,
"After {} updates, should still have 1 vector",
i + 1
);
}
// Final verification - should find the last vector
let query = vec![0.0, 1.0, 0.0, 0.0]; // Last update was i=9, so 9%4=1
let results = index.search(&query, 1).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, node);
}
/// Test concurrent updates to different nodes.
///
/// Ensures that updates to different nodes don't interfere with each other.
#[test]
fn test_concurrent_updates_different_nodes() {
use std::sync::Arc;
use std::thread;
let index = Arc::new(
HnswIndexBuilder::new(8, DistanceMetric::Cosine)
.build()
.unwrap(),
);
// Add initial vectors for 10 nodes
for i in 1..=10 {
let node = NodeId::new(i).unwrap();
let vector = vec![i as f32; 8];
index.add(node, &vector).unwrap();
}
assert_eq!(index.len(), 10);
// Spawn threads to update each node concurrently
let handles: Vec<_> = (1..=10)
.map(|i| {
let index_clone = Arc::clone(&index);
thread::spawn(move || {
let node = NodeId::new(i).unwrap();
// Update 5 times
for j in 0..5 {
let mut vector = vec![0.0f32; 8];
vector[(i + j) as usize % 8] = 1.0;
index_clone.add(node, &vector).unwrap();
}
})
})
.collect();
// Wait for all threads
for handle in handles {
handle.join().unwrap();
}
// Should still have exactly 10 vectors
assert_eq!(
index.len(),
10,
"After concurrent updates, should still have 10 vectors"
);
}
/// Test that update after delete works correctly.
///
/// This ensures the optimization handles the delete -> add -> add pattern.
#[test]
fn test_update_after_delete() {
let index = HnswIndexBuilder::new(4, DistanceMetric::Cosine)
.build()
.unwrap();
let node = NodeId::new(1).unwrap();
// Add initial vector
index.add(node, &[1.0, 0.0, 0.0, 0.0]).unwrap();
assert_eq!(index.len(), 1);
// Delete it
index.remove(node).unwrap();
assert_eq!(index.len(), 0);
// Re-add with same NodeId (should allocate new key)
index.add(node, &[0.0, 1.0, 0.0, 0.0]).unwrap();
assert_eq!(index.len(), 1);
// Update the re-added vector
index.add(node, &[0.0, 0.0, 1.0, 0.0]).unwrap();
assert_eq!(index.len(), 1);
// Verify we can find the final vector
let results = index.search(&[0.0, 0.0, 1.0, 0.0], 1).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, node);
assert!(results[0].1 > 0.99);
}
/// Benchmark-style test to verify optimization intent.
///
/// This test documents the expected behavior: updating should be efficient.
/// While we can't directly measure FFI calls in a unit test, we can verify
/// that updates complete in reasonable time even with many iterations.
#[test]
fn test_update_performance_intention() {
let index = HnswIndexBuilder::new(128, DistanceMetric::Cosine)
.build()
.unwrap();
let node = NodeId::new(1).unwrap();
// Perform 1000 updates
// With the optimization, this should be ~1000 FFI calls (just adds)
// Without optimization, this would be ~2000 FFI calls (1000 removes + 1000 adds)
let start = std::time::Instant::now();
for i in 0..1000 {
let mut vector = vec![0.0f32; 128];
vector[i % 128] = 1.0;
index.add(node, &vector).unwrap();
}
let duration = start.elapsed();
// Should still have exactly 1 vector
assert_eq!(index.len(), 1);
// Sanity check: 1000 updates should complete reasonably fast
// Even with FFI overhead, this should be well under 1 second
assert!(
duration.as_secs() < 5,
"1000 updates took {:?}, may indicate performance issue",
duration
);
println!(
"1000 vector updates completed in {:?} ({:.2} µs/update)",
duration,
duration.as_micros() as f64 / 1000.0
);
}