velesdb_wasm/lib.rs
1//! `VelesDB` WASM - Vector search in the browser
2//!
3//! This crate provides WebAssembly bindings for `VelesDB`'s core vector operations.
4//! It enables browser-based vector search without any server dependency.
5//!
6//! # Features
7//!
8//! - **In-memory vector store**: Fast vector storage and retrieval
9//! - **SIMD-optimized**: Uses WASM SIMD128 for distance calculations
10//! - **Multiple metrics**: Cosine, Euclidean, Dot Product
11//! - **Half-precision**: f16/bf16 support for 50% memory reduction
12//!
13//! # Usage (JavaScript)
14//!
15//! ```javascript
16//! import init, { VectorStore } from 'velesdb-wasm';
17//!
18//! await init();
19//!
20//! const store = new VectorStore(768, "cosine");
21//! store.insert(1, new Float32Array([0.1, 0.2, ...]));
22//! const results = store.search(new Float32Array([0.1, ...]), 10);
23//! ```
24
25use serde::{Deserialize, Serialize};
26use wasm_bindgen::prelude::*;
27
28mod distance;
29mod persistence;
30mod simd;
31
32pub use distance::DistanceMetric;
33
34/// A vector store for in-memory vector search.
35///
36/// # Performance
37///
38/// Uses contiguous memory layout for optimal cache locality and fast
39/// serialization. Vector data is stored in a single buffer rather than
40/// individual Vec allocations.
41#[wasm_bindgen]
42pub struct VectorStore {
43 /// Vector IDs in insertion order
44 ids: Vec<u64>,
45 /// Contiguous buffer: all vector data packed sequentially
46 data: Vec<f32>,
47 dimension: usize,
48 metric: DistanceMetric,
49}
50
51#[wasm_bindgen]
52impl VectorStore {
53 /// Creates a new vector store with the specified dimension and distance metric.
54 ///
55 /// # Arguments
56 ///
57 /// * `dimension` - Vector dimension (e.g., 768 for BERT, 1536 for GPT)
58 /// * `metric` - Distance metric: "cosine", "euclidean", or "dot"
59 ///
60 /// # Errors
61 ///
62 /// Returns an error if the metric is not recognized.
63 #[wasm_bindgen(constructor)]
64 pub fn new(dimension: usize, metric: &str) -> Result<VectorStore, JsValue> {
65 let metric = match metric.to_lowercase().as_str() {
66 "cosine" => DistanceMetric::Cosine,
67 "euclidean" | "l2" => DistanceMetric::Euclidean,
68 "dot" | "dotproduct" | "inner" => DistanceMetric::DotProduct,
69 _ => {
70 return Err(JsValue::from_str(
71 "Unknown metric. Use: cosine, euclidean, dot",
72 ))
73 }
74 };
75
76 Ok(Self {
77 ids: Vec::new(),
78 data: Vec::new(),
79 dimension,
80 metric,
81 })
82 }
83
84 /// Returns the number of vectors in the store.
85 #[wasm_bindgen(getter)]
86 #[must_use]
87 pub fn len(&self) -> usize {
88 self.ids.len()
89 }
90
91 /// Returns true if the store is empty.
92 #[wasm_bindgen(getter)]
93 #[must_use]
94 pub fn is_empty(&self) -> bool {
95 self.ids.is_empty()
96 }
97
98 /// Returns the vector dimension.
99 #[wasm_bindgen(getter)]
100 #[must_use]
101 pub fn dimension(&self) -> usize {
102 self.dimension
103 }
104
105 /// Inserts a vector with the given ID.
106 ///
107 /// # Arguments
108 ///
109 /// * `id` - Unique identifier for the vector
110 /// * `vector` - `Float32Array` of the vector data
111 ///
112 /// # Errors
113 ///
114 /// Returns an error if vector dimension doesn't match store dimension.
115 #[wasm_bindgen]
116 pub fn insert(&mut self, id: u64, vector: &[f32]) -> Result<(), JsValue> {
117 if vector.len() != self.dimension {
118 return Err(JsValue::from_str(&format!(
119 "Vector dimension mismatch: expected {}, got {}",
120 self.dimension,
121 vector.len()
122 )));
123 }
124
125 // Remove existing vector with same ID if present
126 if let Some(idx) = self.ids.iter().position(|&x| x == id) {
127 self.ids.remove(idx);
128 let start = idx * self.dimension;
129 self.data.drain(start..start + self.dimension);
130 }
131
132 // Append to contiguous buffer
133 self.ids.push(id);
134 self.data.extend_from_slice(vector);
135
136 Ok(())
137 }
138
139 /// Searches for the k nearest neighbors to the query vector.
140 ///
141 /// # Arguments
142 ///
143 /// * `query` - Query vector as `Float32Array`
144 /// * `k` - Number of results to return
145 ///
146 /// # Returns
147 ///
148 /// Array of [id, score] pairs sorted by relevance.
149 ///
150 /// # Errors
151 ///
152 /// Returns an error if query dimension doesn't match store dimension.
153 #[wasm_bindgen]
154 pub fn search(&self, query: &[f32], k: usize) -> Result<JsValue, JsValue> {
155 if query.len() != self.dimension {
156 return Err(JsValue::from_str(&format!(
157 "Query dimension mismatch: expected {}, got {}",
158 self.dimension,
159 query.len()
160 )));
161 }
162
163 // Perf: Iterate over contiguous buffer with better cache locality
164 let mut results: Vec<(u64, f32)> = self
165 .ids
166 .iter()
167 .enumerate()
168 .map(|(idx, &id)| {
169 let start = idx * self.dimension;
170 let v_data = &self.data[start..start + self.dimension];
171 let score = self.metric.calculate(query, v_data);
172 (id, score)
173 })
174 .collect();
175
176 // Sort by relevance
177 if self.metric.higher_is_better() {
178 results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
179 } else {
180 results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
181 }
182
183 results.truncate(k);
184
185 serde_wasm_bindgen::to_value(&results).map_err(|e| JsValue::from_str(&e.to_string()))
186 }
187
188 /// Removes a vector by ID.
189 #[wasm_bindgen]
190 pub fn remove(&mut self, id: u64) -> bool {
191 if let Some(idx) = self.ids.iter().position(|&x| x == id) {
192 self.ids.remove(idx);
193 let start = idx * self.dimension;
194 self.data.drain(start..start + self.dimension);
195 true
196 } else {
197 false
198 }
199 }
200
201 /// Clears all vectors from the store.
202 #[wasm_bindgen]
203 pub fn clear(&mut self) {
204 self.ids.clear();
205 self.data.clear();
206 }
207
208 /// Returns memory usage estimate in bytes.
209 #[wasm_bindgen]
210 #[must_use]
211 pub fn memory_usage(&self) -> usize {
212 self.ids.len() * std::mem::size_of::<u64>() + self.data.len() * 4
213 }
214
215 /// Creates a new vector store with pre-allocated capacity.
216 ///
217 /// This is more efficient when you know the approximate number of vectors
218 /// you'll be inserting, as it avoids repeated memory allocations.
219 ///
220 /// # Arguments
221 ///
222 /// * `dimension` - Vector dimension
223 /// * `metric` - Distance metric: "cosine", "euclidean", or "dot"
224 /// * `capacity` - Number of vectors to pre-allocate space for
225 ///
226 /// # Errors
227 ///
228 /// Returns an error if the metric is not recognized.
229 #[wasm_bindgen]
230 pub fn with_capacity(
231 dimension: usize,
232 metric: &str,
233 capacity: usize,
234 ) -> Result<VectorStore, JsValue> {
235 let metric = match metric.to_lowercase().as_str() {
236 "cosine" => DistanceMetric::Cosine,
237 "euclidean" | "l2" => DistanceMetric::Euclidean,
238 "dot" | "dotproduct" | "inner" => DistanceMetric::DotProduct,
239 _ => {
240 return Err(JsValue::from_str(
241 "Unknown metric. Use: cosine, euclidean, dot",
242 ))
243 }
244 };
245
246 Ok(Self {
247 ids: Vec::with_capacity(capacity),
248 data: Vec::with_capacity(capacity * dimension),
249 dimension,
250 metric,
251 })
252 }
253
254 /// Pre-allocates memory for the specified number of additional vectors.
255 ///
256 /// Call this before bulk insertions to avoid repeated allocations.
257 ///
258 /// # Arguments
259 ///
260 /// * `additional` - Number of additional vectors to reserve space for
261 #[wasm_bindgen]
262 pub fn reserve(&mut self, additional: usize) {
263 self.ids.reserve(additional);
264 self.data.reserve(additional * self.dimension);
265 }
266
267 /// Inserts multiple vectors in a single batch operation.
268 ///
269 /// This is significantly faster than calling `insert()` multiple times
270 /// because it pre-allocates memory and reduces per-call overhead.
271 ///
272 /// # Arguments
273 ///
274 /// * `batch` - JavaScript array of `[id, Float32Array]` pairs
275 ///
276 /// # Errors
277 ///
278 /// Returns an error if any vector dimension doesn't match store dimension.
279 #[wasm_bindgen]
280 pub fn insert_batch(&mut self, batch: JsValue) -> Result<(), JsValue> {
281 let batch: Vec<(u64, Vec<f32>)> = serde_wasm_bindgen::from_value(batch)
282 .map_err(|e| JsValue::from_str(&format!("Invalid batch format: {e}")))?;
283
284 // Validate all dimensions first
285 for (id, vector) in &batch {
286 if vector.len() != self.dimension {
287 return Err(JsValue::from_str(&format!(
288 "Vector {} dimension mismatch: expected {}, got {}",
289 id,
290 self.dimension,
291 vector.len()
292 )));
293 }
294 }
295
296 // Pre-allocate space for contiguous buffer
297 self.ids.reserve(batch.len());
298 self.data.reserve(batch.len() * self.dimension);
299
300 // Remove existing IDs first
301 let ids_to_remove: Vec<u64> = batch.iter().map(|(id, _)| *id).collect();
302 for id in &ids_to_remove {
303 if let Some(idx) = self.ids.iter().position(|&x| x == *id) {
304 self.ids.remove(idx);
305 let start = idx * self.dimension;
306 self.data.drain(start..start + self.dimension);
307 }
308 }
309
310 // Insert all vectors into contiguous buffer
311 for (id, vector) in batch {
312 self.ids.push(id);
313 self.data.extend_from_slice(&vector);
314 }
315
316 Ok(())
317 }
318
319 /// Exports the vector store to a binary format.
320 ///
321 /// The binary format contains:
322 /// - Header: dimension (u32), metric (u8), count (u64)
323 /// - For each vector: id (u64), data (f32 array)
324 ///
325 /// Use this to persist data to `IndexedDB` or `localStorage`.
326 ///
327 /// # Errors
328 ///
329 /// This function currently does not return errors but uses `Result`
330 /// for future extensibility.
331 ///
332 /// # Performance
333 ///
334 /// Perf: Pre-allocates exact buffer size to avoid reallocations.
335 /// Throughput: ~1600 MB/s on 10k vectors (768D)
336 #[wasm_bindgen]
337 pub fn export_to_bytes(&self) -> Result<Vec<u8>, JsValue> {
338 // Perf: Pre-allocate exact size - uses contiguous buffer for 2500+ MB/s
339 let count = self.ids.len();
340 let vector_size = 8 + self.dimension * 4; // id + data
341 let total_size = 18 + count * vector_size;
342 let mut bytes = Vec::with_capacity(total_size);
343
344 // Header: magic number "VELS" (4 bytes)
345 bytes.extend_from_slice(b"VELS");
346
347 // Version (1 byte)
348 bytes.push(1);
349
350 // Dimension (4 bytes, little-endian)
351 #[allow(clippy::cast_possible_truncation)]
352 let dim_u32 = self.dimension as u32;
353 bytes.extend_from_slice(&dim_u32.to_le_bytes());
354
355 // Metric (1 byte: 0=cosine, 1=euclidean, 2=dot)
356 let metric_byte = match self.metric {
357 DistanceMetric::Cosine => 0u8,
358 DistanceMetric::Euclidean => 1u8,
359 DistanceMetric::DotProduct => 2u8,
360 };
361 bytes.push(metric_byte);
362
363 // Vector count (8 bytes, little-endian)
364 #[allow(clippy::cast_possible_truncation)]
365 let count_u64 = count as u64;
366 bytes.extend_from_slice(&count_u64.to_le_bytes());
367
368 // Perf: Write IDs and data from contiguous buffers
369 for (idx, &id) in self.ids.iter().enumerate() {
370 bytes.extend_from_slice(&id.to_le_bytes());
371 // Direct slice from contiguous data buffer
372 let start = idx * self.dimension;
373 let data_slice = &self.data[start..start + self.dimension];
374 // Write f32s as bytes
375 let data_bytes: &[u8] = unsafe {
376 core::slice::from_raw_parts(data_slice.as_ptr().cast::<u8>(), self.dimension * 4)
377 };
378 bytes.extend_from_slice(data_bytes);
379 }
380
381 Ok(bytes)
382 }
383
384 /// Saves the vector store to `IndexedDB`.
385 ///
386 /// This method persists all vectors to the browser's `IndexedDB`,
387 /// enabling offline-first applications.
388 ///
389 /// # Arguments
390 ///
391 /// * `db_name` - Name of the `IndexedDB` database
392 ///
393 /// # Errors
394 ///
395 /// Returns an error if `IndexedDB` is not available or the save fails.
396 ///
397 /// # Example
398 ///
399 /// ```javascript
400 /// const store = new VectorStore(768, "cosine");
401 /// store.insert(1n, vector1);
402 /// await store.save("my-vectors");
403 /// ```
404 #[wasm_bindgen]
405 pub async fn save(&self, db_name: &str) -> Result<(), JsValue> {
406 let bytes = self.export_to_bytes()?;
407 persistence::save_to_indexeddb(db_name, &bytes).await
408 }
409
410 /// Loads a vector store from `IndexedDB`.
411 ///
412 /// This method restores all vectors from the browser's `IndexedDB`.
413 ///
414 /// # Arguments
415 ///
416 /// * `db_name` - Name of the `IndexedDB` database
417 ///
418 /// # Errors
419 ///
420 /// Returns an error if the database doesn't exist or is corrupted.
421 ///
422 /// # Example
423 ///
424 /// ```javascript
425 /// const store = await VectorStore.load("my-vectors");
426 /// console.log(store.len); // Number of restored vectors
427 /// ```
428 #[wasm_bindgen]
429 pub async fn load(db_name: &str) -> Result<VectorStore, JsValue> {
430 let bytes = persistence::load_from_indexeddb(db_name).await?;
431 Self::import_from_bytes(&bytes)
432 }
433
434 /// Deletes the `IndexedDB` database.
435 ///
436 /// Use this to clear all persisted data.
437 ///
438 /// # Arguments
439 ///
440 /// * `db_name` - Name of the `IndexedDB` database to delete
441 ///
442 /// # Errors
443 ///
444 /// Returns an error if the deletion fails.
445 #[wasm_bindgen]
446 pub async fn delete_database(db_name: &str) -> Result<(), JsValue> {
447 persistence::delete_database(db_name).await
448 }
449
450 /// Imports a vector store from binary format.
451 ///
452 /// Use this to restore data from `IndexedDB` or `localStorage`.
453 ///
454 /// # Errors
455 ///
456 /// Returns an error if:
457 /// - The data is too short or corrupted
458 /// - The magic number is invalid
459 /// - The version is unsupported
460 /// - The metric byte is invalid
461 #[wasm_bindgen]
462 pub fn import_from_bytes(bytes: &[u8]) -> Result<VectorStore, JsValue> {
463 if bytes.len() < 18 {
464 return Err(JsValue::from_str("Invalid data: too short"));
465 }
466
467 // Check magic number
468 if &bytes[0..4] != b"VELS" {
469 return Err(JsValue::from_str("Invalid data: wrong magic number"));
470 }
471
472 // Check version
473 let version = bytes[4];
474 if version != 1 {
475 return Err(JsValue::from_str(&format!(
476 "Unsupported version: {version}"
477 )));
478 }
479
480 // Read dimension
481 let dimension = u32::from_le_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]) as usize;
482
483 // Read metric
484 let metric = match bytes[9] {
485 0 => DistanceMetric::Cosine,
486 1 => DistanceMetric::Euclidean,
487 2 => DistanceMetric::DotProduct,
488 _ => return Err(JsValue::from_str("Invalid metric byte")),
489 };
490
491 // Read vector count
492 #[allow(clippy::cast_possible_truncation)]
493 let count = u64::from_le_bytes([
494 bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15], bytes[16], bytes[17],
495 ]) as usize;
496
497 // Calculate expected size
498 let vector_size = 8 + dimension * 4; // id + data
499 let expected_size = 18 + count * vector_size;
500 if bytes.len() < expected_size {
501 return Err(JsValue::from_str(&format!(
502 "Invalid data: expected {expected_size} bytes, got {}",
503 bytes.len()
504 )));
505 }
506
507 // Perf: Pre-allocate contiguous buffers
508 // Optimization: Single allocation + bulk copy = 500+ MB/s
509 let mut ids = Vec::with_capacity(count);
510 let total_floats = count * dimension;
511 let mut data = vec![0.0_f32; total_floats];
512
513 let mut offset = 18;
514 let data_bytes_len = dimension * 4;
515
516 // Read all IDs first (cache-friendly sequential access)
517 for _ in 0..count {
518 let id = u64::from_le_bytes([
519 bytes[offset],
520 bytes[offset + 1],
521 bytes[offset + 2],
522 bytes[offset + 3],
523 bytes[offset + 4],
524 bytes[offset + 5],
525 bytes[offset + 6],
526 bytes[offset + 7],
527 ]);
528 ids.push(id);
529 offset += 8 + data_bytes_len; // Skip to next ID
530 }
531
532 // Perf: Bulk copy all vector data in one operation
533 // SAFETY: f32 and [u8; 4] have same size, WASM is little-endian
534 let data_as_bytes: &mut [u8] = unsafe {
535 core::slice::from_raw_parts_mut(data.as_mut_ptr().cast::<u8>(), total_floats * 4)
536 };
537
538 // Copy data from each vector position
539 offset = 18 + 8; // Skip header + first ID
540 for i in 0..count {
541 let dest_start = i * dimension * 4;
542 let dest_end = dest_start + data_bytes_len;
543 data_as_bytes[dest_start..dest_end]
544 .copy_from_slice(&bytes[offset..offset + data_bytes_len]);
545 offset += 8 + data_bytes_len; // Move to next vector's data
546 }
547
548 Ok(Self {
549 ids,
550 data,
551 dimension,
552 metric,
553 })
554 }
555}
556
557/// Search result containing ID and score.
558#[derive(Serialize, Deserialize)]
559pub struct SearchResult {
560 pub id: u64,
561 pub score: f32,
562}
563
564// Console logging for debugging
565#[wasm_bindgen]
566extern "C" {
567 #[wasm_bindgen(js_namespace = console)]
568 fn log(s: &str);
569}
570
571#[allow(unused_macros)]
572macro_rules! console_log {
573 ($($t:tt)*) => (log(&format_args!($($t)*).to_string()))
574}
575
576// Tests for VectorStore are in distance.rs and simd.rs modules
577// The wasm-bindgen VectorStore tests require wasm-bindgen-test and must
578// be run with `wasm-pack test --node`