use crate::error::EdgeVecError;
use crate::flat::BinaryFlatIndex;
use crate::hnsw::{GraphError, HnswConfig, HnswIndex};
#[cfg(feature = "sparse")]
use crate::hybrid::{FusionMethod, HybridSearchConfig, HybridSearcher};
use crate::metadata::validation::{validate_key, validate_value, MAX_KEYS_PER_VECTOR};
use crate::metadata::MetadataStore;
use crate::persistence::{chunking::ChunkIter, ChunkedWriter, PersistenceError};
#[cfg(feature = "sparse")]
use crate::sparse::{SparseSearcher, SparseStorage, SparseVector};
use crate::storage::VectorStorage;
use js_sys::{Array, Float32Array, Function, Object, Reflect, Uint32Array, Uint8Array};
use serde::{Deserialize, Serialize};
use std::sync::{
atomic::{AtomicBool, Ordering},
Arc, Once,
};
use wasm_bindgen::prelude::*;
mod batch;
pub mod filter;
mod iterator;
mod memory;
mod metadata;
pub use batch::{BatchInsertConfig, BatchInsertResult};
pub use iterator::PersistenceIterator;
pub use memory::{
track_batch_insert, track_vector_insert, MemoryConfig, MemoryPressure, MemoryPressureLevel,
MemoryRecommendation,
};
pub use metadata::JsMetadataValue;
const HNSW_HEADER_SIZE: usize = 64;
const HNSW_NODE_OVERHEAD: usize = 64;
const DEFAULT_CHUNK_SIZE: usize = 10 * 1024 * 1024;
#[wasm_bindgen(module = "/src/js/storage.js")]
extern "C" {
#[wasm_bindgen(js_name = IndexedDbBackend)]
pub type IndexedDbBackend;
#[wasm_bindgen(static_method_of = IndexedDbBackend, catch)]
pub async fn write(name: &str, data: &[u8]) -> Result<(), JsValue>;
#[wasm_bindgen(static_method_of = IndexedDbBackend, catch)]
pub async fn read(name: &str) -> Result<JsValue, JsValue>;
}
static INIT: Once = Once::new();
#[wasm_bindgen]
pub fn init_logging() {
console_error_panic_hook::set_once();
let _ = console_log::init_with_level(log::Level::Info);
}
#[wasm_bindgen(js_name = "getSimdBackend")]
#[must_use]
pub fn get_simd_backend() -> String {
cfg_if::cfg_if! {
if #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] {
"wasm_simd128".to_string()
} else if #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] {
"avx2".to_string()
} else {
"scalar".to_string()
}
}
}
#[wasm_bindgen(js_name = "benchmarkHamming")]
#[allow(clippy::cast_possible_truncation)] pub fn benchmark_hamming(bytes: usize, iterations: usize) -> f64 {
use crate::metric::{Hamming, Metric};
let a: Vec<u8> = (0..bytes).map(|i| (i * 17 + 31) as u8).collect();
let b: Vec<u8> = (0..bytes).map(|i| (i * 13 + 47) as u8).collect();
let perf = web_sys::window().and_then(|w| w.performance());
let start = perf.as_ref().map_or(0.0, web_sys::Performance::now);
let mut sum: f32 = 0.0;
for _ in 0..iterations {
sum += Hamming::distance(&a, &b);
}
let end = perf.as_ref().map_or(0.0, web_sys::Performance::now);
if sum < 0.0 {
web_sys::console::log_1(&format!("sum={sum}").into());
}
#[allow(clippy::cast_precision_loss)]
let result = (end - start) * 1000.0 / iterations as f64;
result }
#[wasm_bindgen(js_name = "benchmarkHammingBatch")]
#[allow(clippy::needless_pass_by_value)] #[allow(clippy::cast_possible_truncation)] pub fn benchmark_hamming_batch(
vectors_js: &js_sys::Array,
query_js: Uint8Array,
iterations: usize,
) -> String {
use crate::simd::popcount::simd_popcount_xor;
let vectors: Vec<Vec<u8>> = vectors_js
.iter()
.map(|v| Uint8Array::from(v).to_vec())
.collect();
let num_vectors = vectors.len();
let bytes_per_vector = if num_vectors > 0 { vectors[0].len() } else { 0 };
let query: Vec<u8> = query_js.to_vec();
let perf = web_sys::window().and_then(|w| w.performance());
for v in vectors.iter().take(100.min(num_vectors)) {
let _ = crate::metric::simd::hamming_distance(&query, v);
let _ = simd_popcount_xor(&query, v);
}
let start_new = perf.as_ref().map_or(0.0, web_sys::Performance::now);
let mut total_dist_new: u64 = 0;
for _ in 0..iterations {
for v in &vectors {
total_dist_new += u64::from(crate::metric::simd::hamming_distance(&query, v));
}
}
let end_new = perf.as_ref().map_or(0.0, web_sys::Performance::now);
let start_current = perf.as_ref().map_or(0.0, web_sys::Performance::now);
let mut total_dist_current: u64 = 0;
for _ in 0..iterations {
for v in &vectors {
total_dist_current += u64::from(simd_popcount_xor(&query, v));
}
}
let end_current = perf.as_ref().map_or(0.0, web_sys::Performance::now);
if total_dist_new == 0 || total_dist_current == 0 {
web_sys::console::log_1(&format!("sums: {total_dist_new} {total_dist_current}").into());
}
let new_ms = end_new - start_new;
let current_ms = end_current - start_current;
let speedup = current_ms / new_ms;
let total_comparisons = num_vectors * iterations;
#[allow(clippy::cast_precision_loss)]
let new_throughput = (total_comparisons as f64) / (new_ms / 1000.0);
#[allow(clippy::cast_precision_loss)]
let current_throughput = (total_comparisons as f64) / (current_ms / 1000.0);
let format_throughput = |t: f64| -> String {
if t >= 1_000_000.0 {
format!("{:.1}M vec/s", t / 1_000_000.0)
} else if t >= 1_000.0 {
format!("{:.1}K vec/s", t / 1_000.0)
} else {
format!("{:.0} vec/s", t)
}
};
format!(
r#"{{"num_vectors": {num_vectors}, "bytes_per_vector": {bytes_per_vector}, "iterations": {iterations}, "new_ms": {new_ms:.2}, "current_ms": {current_ms:.2}, "speedup": {speedup:.2}, "new_throughput": "{}", "current_throughput": "{}"}}"#,
format_throughput(new_throughput),
format_throughput(current_throughput)
)
}
#[wasm_bindgen]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum VectorType {
Float32 = 0,
Binary = 1,
}
#[wasm_bindgen]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MetricType {
L2 = 0,
Cosine = 1,
Dot = 2,
Hamming = 3,
}
#[wasm_bindgen]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub enum JsIndexType {
Flat = 0,
#[default]
Hnsw = 1,
}
#[wasm_bindgen]
pub struct EdgeVecConfig {
pub dimensions: u32,
m: Option<u32>,
m0: Option<u32>,
ef_construction: Option<u32>,
ef_search: Option<u32>,
metric: Option<String>,
vector_type: Option<VectorType>,
index_type: Option<JsIndexType>,
}
#[wasm_bindgen]
impl EdgeVecConfig {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(dimensions: u32) -> EdgeVecConfig {
EdgeVecConfig {
dimensions,
m: None,
m0: None,
ef_construction: None,
ef_search: None,
metric: None,
vector_type: None,
index_type: None, }
}
#[wasm_bindgen(setter)]
pub fn set_m(&mut self, m: u32) {
self.m = Some(m);
}
#[wasm_bindgen(setter)]
pub fn set_m0(&mut self, m0: u32) {
self.m0 = Some(m0);
}
#[wasm_bindgen(setter)]
pub fn set_ef_construction(&mut self, ef: u32) {
self.ef_construction = Some(ef);
}
#[wasm_bindgen(setter)]
pub fn set_ef_search(&mut self, ef: u32) {
self.ef_search = Some(ef);
}
#[wasm_bindgen(setter)]
pub fn set_metric(&mut self, metric: String) {
self.metric = Some(metric);
}
#[wasm_bindgen(js_name = "setMetricType")]
pub fn set_metric_type(&mut self, metric_type: MetricType) {
let metric_str = match metric_type {
MetricType::L2 => "l2",
MetricType::Cosine => "cosine",
MetricType::Dot => "dot",
MetricType::Hamming => "hamming",
};
self.metric = Some(metric_str.to_string());
}
#[wasm_bindgen(setter)]
pub fn set_vector_type(&mut self, vt: VectorType) {
self.vector_type = Some(vt);
if vt == VectorType::Binary && self.metric.is_none() {
self.metric = Some("hamming".to_string());
}
}
#[wasm_bindgen(getter)]
pub fn vector_type(&self) -> Option<VectorType> {
self.vector_type
}
#[wasm_bindgen(setter, js_name = "indexType")]
pub fn set_index_type(&mut self, index_type: JsIndexType) {
self.index_type = Some(index_type);
}
#[wasm_bindgen(getter, js_name = "indexType")]
pub fn index_type(&self) -> JsIndexType {
self.index_type.unwrap_or_default()
}
#[wasm_bindgen(js_name = "isFlat")]
pub fn is_flat(&self) -> bool {
matches!(self.index_type, Some(JsIndexType::Flat))
}
#[wasm_bindgen(js_name = "isHnsw")]
pub fn is_hnsw(&self) -> bool {
!self.is_flat()
}
}
#[derive(Serialize, Deserialize)]
#[serde(tag = "variant_type")]
pub(crate) enum IndexVariant {
#[serde(rename = "hnsw")]
Hnsw {
index: Box<HnswIndex>,
storage: VectorStorage,
},
#[serde(rename = "flat")]
Flat { index: BinaryFlatIndex },
}
#[allow(dead_code)]
impl IndexVariant {
#[inline]
pub fn dimensions(&self) -> u32 {
match self {
IndexVariant::Hnsw { index, .. } => index.config.dimensions,
IndexVariant::Flat { index } => index.dimensions() as u32,
}
}
#[inline]
pub fn len(&self) -> usize {
match self {
IndexVariant::Hnsw { index, .. } => index.len(),
IndexVariant::Flat { index } => index.len(),
}
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn memory_usage(&self) -> usize {
match self {
IndexVariant::Hnsw { index, storage } => {
let vector_data = storage.data_f32.capacity() * std::mem::size_of::<f32>()
+ storage.quantized_data.capacity()
+ storage.binary_data.capacity();
let deleted_bits = storage.deleted.capacity() / 8;
vector_data + deleted_bits + index.memory_usage()
}
IndexVariant::Flat { index } => index.memory_usage(),
}
}
#[inline]
pub fn serialized_size(&self) -> usize {
match self {
IndexVariant::Hnsw { index, storage } => {
let vector_data = storage.data_f32.len() * std::mem::size_of::<f32>()
+ storage.quantized_data.len()
+ storage.binary_data.len();
HNSW_HEADER_SIZE + vector_data + index.len() * HNSW_NODE_OVERHEAD
}
IndexVariant::Flat { index } => index.serialized_size(),
}
}
#[inline]
pub fn is_hnsw(&self) -> bool {
matches!(self, IndexVariant::Hnsw { .. })
}
#[inline]
pub fn is_flat(&self) -> bool {
matches!(self, IndexVariant::Flat { .. })
}
#[inline]
pub fn hnsw_config(&self) -> Option<&HnswConfig> {
match self {
IndexVariant::Hnsw { index, .. } => Some(&index.config),
IndexVariant::Flat { .. } => None,
}
}
#[inline]
pub fn as_hnsw_mut(&mut self) -> Result<(&mut HnswIndex, &mut VectorStorage), EdgeVecError> {
match self {
IndexVariant::Hnsw { index, storage } => Ok((index, storage)),
IndexVariant::Flat { .. } => Err(EdgeVecError::Validation(
"This operation is only supported for HNSW index. Use IndexType.Hnsw in config."
.to_string(),
)),
}
}
#[inline]
pub fn as_hnsw(&self) -> Result<(&HnswIndex, &VectorStorage), EdgeVecError> {
match self {
IndexVariant::Hnsw { index, storage } => Ok((index, storage)),
IndexVariant::Flat { .. } => Err(EdgeVecError::Validation(
"This operation is only supported for HNSW index. Use IndexType.Hnsw in config."
.to_string(),
)),
}
}
#[inline]
pub fn as_flat_mut(&mut self) -> Result<&mut BinaryFlatIndex, EdgeVecError> {
match self {
IndexVariant::Flat { index } => Ok(index),
IndexVariant::Hnsw { .. } => Err(EdgeVecError::Validation(
"This operation is only supported for Flat index. Use IndexType.Flat in config."
.to_string(),
)),
}
}
#[inline]
pub fn as_flat(&self) -> Result<&BinaryFlatIndex, EdgeVecError> {
match self {
IndexVariant::Flat { index } => Ok(index),
IndexVariant::Hnsw { .. } => Err(EdgeVecError::Validation(
"This operation is only supported for Flat index. Use IndexType.Flat in config."
.to_string(),
)),
}
}
}
#[derive(Serialize, Deserialize)]
#[allow(clippy::unsafe_derive_deserialize)]
#[wasm_bindgen]
pub struct EdgeVec {
inner: IndexVariant,
#[serde(default)]
metadata: MetadataStore,
#[serde(skip, default)]
memory_config: MemoryConfig,
#[cfg(feature = "sparse")]
#[serde(skip, default)]
sparse_storage: Option<SparseStorage>,
#[serde(skip, default = "default_liveness")]
liveness: Arc<AtomicBool>,
}
fn default_liveness() -> Arc<AtomicBool> {
Arc::new(AtomicBool::new(true))
}
impl Drop for EdgeVec {
fn drop(&mut self) {
self.liveness.store(false, Ordering::Release);
}
}
#[wasm_bindgen]
impl EdgeVec {
#[wasm_bindgen(constructor)]
pub fn new(config: &EdgeVecConfig) -> Result<EdgeVec, JsValue> {
INIT.call_once(|| {
init_logging();
});
let inner = match config.index_type() {
JsIndexType::Flat => {
let index =
BinaryFlatIndex::new(config.dimensions as usize).map_err(EdgeVecError::from)?;
IndexVariant::Flat { index }
}
JsIndexType::Hnsw => {
let metric_code = match config.metric.as_deref() {
Some("cosine") => HnswConfig::METRIC_COSINE,
Some("dot") => HnswConfig::METRIC_DOT_PRODUCT,
Some("l2") | None => HnswConfig::METRIC_L2_SQUARED,
Some("hamming") => HnswConfig::METRIC_HAMMING,
Some(other) => {
return Err(
EdgeVecError::Validation(format!("Unknown metric: {other}")).into()
)
}
};
let mut hnsw_config = HnswConfig::new(config.dimensions);
if let Some(m) = config.m {
hnsw_config.m = m;
}
if let Some(m0) = config.m0 {
hnsw_config.m0 = m0;
}
if let Some(ef) = config.ef_construction {
hnsw_config.ef_construction = ef;
}
if let Some(ef) = config.ef_search {
hnsw_config.ef_search = ef;
}
hnsw_config.metric = metric_code;
if config.vector_type == Some(VectorType::Binary)
&& metric_code != HnswConfig::METRIC_HAMMING
{
return Err(EdgeVecError::Validation(format!(
"VectorType::Binary requires metric='hamming'. Current metric is '{}'",
match metric_code {
HnswConfig::METRIC_L2_SQUARED => "l2",
HnswConfig::METRIC_COSINE => "cosine",
HnswConfig::METRIC_DOT_PRODUCT => "dot",
_ => "unknown",
}
))
.into());
}
let mut storage = VectorStorage::new(&hnsw_config, None);
if config.vector_type == Some(VectorType::Binary)
|| metric_code == HnswConfig::METRIC_HAMMING
{
storage
.set_storage_type(crate::storage::StorageType::Binary(config.dimensions));
}
let index = HnswIndex::new(hnsw_config, &storage).map_err(EdgeVecError::from)?;
IndexVariant::Hnsw {
index: Box::new(index),
storage,
}
}
};
Ok(EdgeVec {
inner,
metadata: MetadataStore::new(),
memory_config: MemoryConfig::default(),
#[cfg(feature = "sparse")]
sparse_storage: None,
liveness: Arc::new(AtomicBool::new(true)),
})
}
#[wasm_bindgen]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn insert(&mut self, vector: Float32Array) -> Result<u32, JsValue> {
match &mut self.inner {
IndexVariant::Hnsw { index, storage } => {
let len = vector.length();
let dimensions = index.config.dimensions;
if len != dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: dimensions as usize,
actual: len as usize,
})
.into());
}
let vec = vector.to_vec();
if vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Vector contains non-finite values".to_string(),
)
.into());
}
let id = index.insert(&vec, storage).map_err(EdgeVecError::from)?;
track_vector_insert(dimensions);
if id.0 > u64::from(u32::MAX) {
return Err(
EdgeVecError::Validation("Vector ID overflowed u32".to_string()).into(),
);
}
Ok(id.0 as u32)
}
IndexVariant::Flat { .. } => Err(EdgeVecError::Validation(
"insert() not supported for Flat index. Use insertBinary() instead.".to_string(),
)
.into()),
}
}
#[wasm_bindgen(js_name = "insertBinary")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn insert_binary(&mut self, vector: Uint8Array) -> Result<u32, JsValue> {
match &mut self.inner {
IndexVariant::Hnsw { index, storage } => {
if index.config.metric != HnswConfig::METRIC_HAMMING {
return Err(EdgeVecError::Validation(
"insertBinary requires metric='hamming'. Current metric is not Hamming."
.to_string(),
)
.into());
}
let expected_bytes = ((index.config.dimensions + 7) / 8) as usize;
let len = vector.length() as usize;
if len != expected_bytes {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: expected_bytes,
actual: len,
})
.into());
}
let vec = vector.to_vec();
let id = index
.insert_binary(&vec, storage)
.map_err(EdgeVecError::from)?;
if id.0 > u64::from(u32::MAX) {
return Err(
EdgeVecError::Validation("Vector ID overflowed u32".to_string()).into(),
);
}
Ok(id.0 as u32)
}
IndexVariant::Flat { index } => {
let expected_bytes = index.bytes_per_vector();
let len = vector.length() as usize;
if len != expected_bytes {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: expected_bytes,
actual: len,
})
.into());
}
let vec = vector.to_vec();
let id = index.insert(&vec).map_err(EdgeVecError::from)?;
if id.0 > u64::from(u32::MAX) {
return Err(
EdgeVecError::Validation("Vector ID overflowed u32".to_string()).into(),
);
}
Ok(id.0 as u32)
}
}
}
#[wasm_bindgen(js_name = "insertWithBq")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn insert_with_bq(&mut self, vector: Float32Array) -> Result<u32, JsValue> {
match &mut self.inner {
IndexVariant::Hnsw { index, storage } => {
if index.config.metric != HnswConfig::METRIC_HAMMING {
return Err(EdgeVecError::Validation(
"insertWithBq requires metric='hamming'. Current metric is not Hamming."
.to_string(),
)
.into());
}
let len = vector.length();
if len != index.config.dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: index.config.dimensions as usize,
actual: len as usize,
})
.into());
}
let vec = vector.to_vec();
if vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Vector contains non-finite values".to_string(),
)
.into());
}
let id = index
.insert_with_bq(&vec, storage)
.map_err(EdgeVecError::from)?;
if id.0 > u64::from(u32::MAX) {
return Err(
EdgeVecError::Validation("Vector ID overflowed u32".to_string()).into(),
);
}
Ok(id.0 as u32)
}
IndexVariant::Flat { .. } => Err(EdgeVecError::Validation(
"insertWithBq() not supported for Flat index. Use insertBinary() instead."
.to_string(),
)
.into()),
}
}
#[wasm_bindgen(js_name = "searchBinary")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_binary(&self, query: Uint8Array, k: usize) -> Result<JsValue, JsValue> {
match &self.inner {
IndexVariant::Hnsw { index, storage } => {
if index.config.metric != HnswConfig::METRIC_HAMMING {
return Err(EdgeVecError::Validation(
"searchBinary requires metric='hamming'. Current metric is not Hamming."
.to_string(),
)
.into());
}
let expected_bytes = ((index.config.dimensions + 7) / 8) as usize;
let len = query.length() as usize;
if len != expected_bytes {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: expected_bytes,
actual: len,
})
.into());
}
let vec = query.to_vec();
let results = index
.search_binary(&vec, k, storage)
.map_err(EdgeVecError::from)?;
let arr = Array::new_with_length(results.len() as u32);
for (i, result) in results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(result.vector_id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("score"),
&JsValue::from(result.distance),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
IndexVariant::Flat { index } => {
let expected_bytes = index.bytes_per_vector();
let len = query.length() as usize;
if len != expected_bytes {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: expected_bytes,
actual: len,
})
.into());
}
let vec = query.to_vec();
let results = index.search(&vec, k).map_err(EdgeVecError::from)?;
let arr = Array::new_with_length(results.len() as u32);
for (i, result) in results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(result.id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("score"),
&JsValue::from(result.distance),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
}
}
#[wasm_bindgen(js_name = "searchBinaryWithEf")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_binary_with_ef(
&self,
query: Uint8Array,
k: usize,
ef_search: usize,
) -> Result<JsValue, JsValue> {
let (index, storage) = self.inner.as_hnsw()?;
if index.config.metric != HnswConfig::METRIC_HAMMING {
return Err(EdgeVecError::Validation(
"searchBinaryWithEf requires metric='hamming'. Current metric is not Hamming."
.to_string(),
)
.into());
}
let expected_bytes = ((index.config.dimensions + 7) / 8) as usize;
let len = query.length() as usize;
if len != expected_bytes {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: expected_bytes,
actual: len,
})
.into());
}
let vec = query.to_vec();
let results = index
.search_binary_with_ef(&vec, k, ef_search, storage)
.map_err(EdgeVecError::from)?;
let arr = Array::new_with_length(results.len() as u32);
for (i, result) in results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(result.vector_id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("score"),
&JsValue::from(result.distance),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
#[wasm_bindgen(js_name = "searchBinaryFiltered")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_binary_filtered(
&mut self,
query: Uint8Array,
k: usize,
options_json: &str,
) -> Result<String, JsValue> {
use crate::filter::{parse, FilterStrategy, FilteredSearcher};
let (index, storage) = self.inner.as_hnsw()?;
let total_start = web_sys::window()
.and_then(|w| w.performance())
.map(|p| p.now());
if index.config.metric != HnswConfig::METRIC_HAMMING {
return Err(EdgeVecError::Validation(
"searchBinaryFiltered requires metric='hamming'. Current metric is not Hamming."
.to_string(),
)
.into());
}
let expected_bytes = ((index.config.dimensions + 7) / 8) as usize;
let len = query.length() as usize;
if len != expected_bytes {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: expected_bytes,
actual: len,
})
.into());
}
let query_vec = query.to_vec();
let options: SearchFilteredOptions = serde_json::from_str(options_json)
.map_err(|e| JsValue::from_str(&format!("Invalid options JSON: {e}")))?;
let filter_start = web_sys::window()
.and_then(|w| w.performance())
.map(|p| p.now());
let filter = match &options.filter {
Some(filter_str) => {
Some(parse(filter_str).map_err(|e| filter::filter_error_to_jsvalue(&e))?)
}
None => None,
};
let strategy = match options.strategy.as_deref() {
Some("pre") => FilterStrategy::PreFilter,
Some("post") => FilterStrategy::PostFilter {
oversample: options.oversample_factor.unwrap_or(3.0),
},
Some("hybrid") => FilterStrategy::Hybrid {
oversample_min: 1.5,
oversample_max: options.oversample_factor.unwrap_or(10.0),
},
_ => FilterStrategy::Auto,
};
let metadata_adapter = EdgeVecMetadataAdapter::new(&self.metadata, index.len());
let mut searcher = FilteredSearcher::new(index, storage, &metadata_adapter);
let result = searcher
.search_binary_filtered(&query_vec, k, filter.as_ref(), strategy)
.map_err(|e| JsValue::from_str(&format!("Binary filtered search failed: {e}")))?;
let filter_time_ms = match (
filter_start,
web_sys::window().and_then(|w| w.performance()),
) {
(Some(start), Some(perf)) => perf.now() - start,
_ => 0.0,
};
let include_metadata = options.include_metadata.unwrap_or(false);
let response = SearchFilteredResult {
results: result
.results
.iter()
.map(|r| {
let id = r.vector_id.0 as u32;
SearchFilteredItem {
id,
score: r.distance,
metadata: if include_metadata {
self.metadata
.get_all(id)
.and_then(|m| serde_json::to_value(m).ok())
} else {
None
},
vector: None, }
})
.collect(),
complete: result.complete,
observed_selectivity: result.observed_selectivity,
strategy_used: strategy_to_string(&result.strategy_used),
vectors_evaluated: result.vectors_evaluated,
filter_time_ms,
total_time_ms: match (total_start, web_sys::window().and_then(|w| w.performance())) {
(Some(start), Some(perf)) => perf.now() - start,
_ => 0.0,
},
};
serde_json::to_string(&response)
.map_err(|e| JsValue::from_str(&format!("Serialization error: {e}")))
}
#[wasm_bindgen(js_name = insertBatchFlat)]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn insert_batch_flat(
&mut self,
vectors: Float32Array,
count: usize,
) -> Result<Uint32Array, JsValue> {
let (index, storage) = self.inner.as_hnsw_mut()?;
let dim = index.config.dimensions as usize;
let expected_len = count * dim;
if vectors.length() as usize != expected_len {
return Err(EdgeVecError::Validation(format!(
"Batch dimension mismatch: expected {} ({} * {}), got {}",
expected_len,
count,
dim,
vectors.length()
))
.into());
}
let vec_data = vectors.to_vec();
if vec_data.iter().any(|v| !v.is_finite()) {
return Err(
EdgeVecError::Validation("Vectors contain non-finite values".to_string()).into(),
);
}
let mut ids = Vec::with_capacity(count);
for i in 0..count {
let start = i * dim;
let end = start + dim;
let vector_slice = &vec_data[start..end];
let id = index
.insert(vector_slice, storage)
.map_err(EdgeVecError::from)?;
if id.0 > u64::from(u32::MAX) {
return Err(
EdgeVecError::Validation("Vector ID overflowed u32".to_string()).into(),
);
}
ids.push(id.0 as u32);
}
track_batch_insert(count, index.config.dimensions);
Ok(Uint32Array::from(&ids[..]))
}
#[wasm_bindgen(js_name = insertBatch)]
pub fn insert_batch_v2(
&mut self,
vectors: Array,
config: Option<batch::BatchInsertConfig>,
) -> Result<batch::BatchInsertResult, JsValue> {
batch::insert_batch_impl(self, vectors, config)
}
#[wasm_bindgen(js_name = insertBatchWithProgress)]
#[allow(clippy::needless_pass_by_value)]
pub fn insert_batch_with_progress(
&mut self,
vectors: Array,
on_progress: Function,
) -> Result<batch::BatchInsertResult, JsValue> {
let this = JsValue::NULL;
let total = vectors.length();
let _ = on_progress.call2(&this, &JsValue::from(0u32), &JsValue::from(total));
let config = batch::BatchInsertConfig::new();
let result = batch::insert_batch_impl(self, vectors, Some(config))?;
let _ = on_progress.call2(&this, &JsValue::from(total), &JsValue::from(total));
Ok(result)
}
#[wasm_bindgen]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search(&self, query: Float32Array, k: usize) -> Result<JsValue, JsValue> {
let (index, storage) = self.inner.as_hnsw()?;
let len = query.length();
if len != index.config.dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: index.config.dimensions as usize,
actual: len as usize,
})
.into());
}
let vec = query.to_vec();
if vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Query vector contains non-finite values".to_string(),
)
.into());
}
let results = index.search(&vec, k, storage).map_err(EdgeVecError::from)?;
let arr = Array::new_with_length(results.len() as u32);
for (i, result) in results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(result.vector_id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("score"),
&JsValue::from(result.distance),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
#[wasm_bindgen]
pub fn save_stream(&self, chunk_size: Option<usize>) -> Result<PersistenceIterator, JsValue> {
let (index, storage) = self.inner.as_hnsw()?;
let size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE);
let writer = (storage, index);
let iter = writer.export_chunked(size);
#[allow(unsafe_code)]
let static_iter = unsafe { std::mem::transmute::<ChunkIter<'_>, ChunkIter<'static>>(iter) };
Ok(PersistenceIterator {
iter: static_iter,
liveness: self.liveness.clone(),
})
}
#[wasm_bindgen]
pub async fn save(&self, name: String) -> Result<(), JsValue> {
let bytes = postcard::to_stdvec(self).map_err(|e| {
EdgeVecError::Persistence(PersistenceError::Corrupted(format!(
"Serialization failed: {e}"
)))
})?;
IndexedDbBackend::write(&name, &bytes).await
}
#[wasm_bindgen]
pub async fn load(name: String) -> Result<EdgeVec, JsValue> {
INIT.call_once(|| {
init_logging();
});
let val = IndexedDbBackend::read(&name).await?;
let bytes = Uint8Array::new(&val).to_vec();
let mut edge_vec: EdgeVec = postcard::from_bytes(&bytes).map_err(|e| {
EdgeVecError::Persistence(PersistenceError::Corrupted(format!(
"Deserialization failed: {e}"
)))
})?;
edge_vec.liveness = Arc::new(AtomicBool::new(true));
Ok(edge_vec)
}
#[wasm_bindgen(js_name = softDelete)]
#[allow(clippy::cast_possible_truncation)]
pub fn soft_delete(&mut self, vector_id: u32) -> Result<bool, JsValue> {
let (index, _storage) = self.inner.as_hnsw_mut()?;
let id = crate::hnsw::VectorId(u64::from(vector_id));
index
.soft_delete(id)
.map_err(|e| JsValue::from_str(&format!("soft_delete failed: {e}")))
}
#[wasm_bindgen(js_name = isDeleted)]
#[allow(clippy::cast_possible_truncation)]
pub fn is_deleted(&self, vector_id: u32) -> Result<bool, JsValue> {
let (index, _storage) = self.inner.as_hnsw()?;
let id = crate::hnsw::VectorId(u64::from(vector_id));
index
.is_deleted(id)
.map_err(|e| JsValue::from_str(&format!("is_deleted failed: {e}")))
}
#[wasm_bindgen(js_name = deletedCount)]
#[allow(clippy::cast_possible_truncation)]
pub fn deleted_count(&self) -> Result<u32, JsValue> {
let (index, _storage) = self.inner.as_hnsw()?;
Ok(index.deleted_count() as u32)
}
#[wasm_bindgen(js_name = liveCount)]
#[allow(clippy::cast_possible_truncation)]
pub fn live_count(&self) -> Result<u32, JsValue> {
let (index, _storage) = self.inner.as_hnsw()?;
Ok(index.live_count() as u32)
}
#[wasm_bindgen(js_name = tombstoneRatio)]
pub fn tombstone_ratio(&self) -> Result<f64, JsValue> {
let (index, _storage) = self.inner.as_hnsw()?;
Ok(index.tombstone_ratio())
}
#[wasm_bindgen(js_name = needsCompaction)]
pub fn needs_compaction(&self) -> Result<bool, JsValue> {
let (index, _storage) = self.inner.as_hnsw()?;
Ok(index.needs_compaction())
}
#[wasm_bindgen(js_name = compactionThreshold)]
pub fn compaction_threshold(&self) -> Result<f64, JsValue> {
let (index, _storage) = self.inner.as_hnsw()?;
Ok(index.compaction_threshold())
}
#[wasm_bindgen(js_name = setCompactionThreshold)]
pub fn set_compaction_threshold(&mut self, ratio: f64) -> Result<(), JsValue> {
let (index, _storage) = self.inner.as_hnsw_mut()?;
index.set_compaction_threshold(ratio);
Ok(())
}
#[wasm_bindgen(js_name = compactionWarning)]
pub fn compaction_warning(&self) -> Result<Option<String>, JsValue> {
let (index, _storage) = self.inner.as_hnsw()?;
Ok(index.compaction_warning())
}
#[wasm_bindgen]
#[allow(clippy::cast_possible_truncation)]
pub fn compact(&mut self) -> Result<WasmCompactionResult, JsValue> {
let (index, storage) = self.inner.as_hnsw_mut()?;
let (new_index, new_storage, result) = index
.compact(storage)
.map_err(|e| JsValue::from_str(&format!("compact failed: {e}")))?;
self.inner = IndexVariant::Hnsw {
index: Box::new(new_index),
storage: new_storage,
};
Ok(WasmCompactionResult {
tombstones_removed: result.tombstones_removed as u32,
new_size: result.new_size as u32,
duration_ms: result.duration_ms as u32,
})
}
#[wasm_bindgen(js_name = softDeleteBatch)]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn soft_delete_batch(
&mut self,
ids: js_sys::Uint32Array,
) -> Result<WasmBatchDeleteResult, JsValue> {
let (index, _storage) = self.inner.as_hnsw_mut()?;
let id_vec: Vec<u32> = ids.to_vec();
let vec_ids: Vec<crate::hnsw::VectorId> = id_vec
.iter()
.map(|&id| crate::hnsw::VectorId(u64::from(id)))
.collect();
let result = index.soft_delete_batch(&vec_ids);
Ok(WasmBatchDeleteResult {
deleted: result.deleted as u32,
already_deleted: result.already_deleted as u32,
invalid_ids: result.invalid_ids as u32,
total: result.total as u32,
unique_count: result.unique_count as u32,
})
}
#[wasm_bindgen(js_name = softDeleteBatchCompat)]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
pub fn soft_delete_batch_compat(
&mut self,
ids: js_sys::Float64Array,
) -> Result<WasmBatchDeleteResult, JsValue> {
let (index, _storage) = self.inner.as_hnsw_mut()?;
let id_vec: Vec<f64> = ids.to_vec();
let vec_ids: Vec<crate::hnsw::VectorId> = id_vec
.iter()
.map(|&id| crate::hnsw::VectorId(id as u64))
.collect();
let result = index.soft_delete_batch(&vec_ids);
Ok(WasmBatchDeleteResult {
deleted: result.deleted as u32,
already_deleted: result.already_deleted as u32,
invalid_ids: result.invalid_ids as u32,
total: result.total as u32,
unique_count: result.unique_count as u32,
})
}
#[wasm_bindgen(js_name = "setMetadata")]
pub fn set_metadata(
&mut self,
vector_id: u32,
key: &str,
value: &metadata::JsMetadataValue,
) -> Result<(), JsError> {
self.metadata
.insert(vector_id, key, value.inner.clone())
.map_err(metadata::metadata_error_to_js)
}
#[wasm_bindgen(js_name = "getMetadata")]
#[must_use]
pub fn get_metadata(&self, vector_id: u32, key: &str) -> Option<metadata::JsMetadataValue> {
metadata::metadata_value_to_js(self.metadata.get(vector_id, key))
}
#[wasm_bindgen(js_name = "getAllMetadata")]
#[must_use]
pub fn get_all_metadata(&self, vector_id: u32) -> JsValue {
metadata::metadata_to_js_object(&self.metadata, vector_id)
}
#[wasm_bindgen(js_name = "deleteMetadata")]
pub fn delete_metadata(&mut self, vector_id: u32, key: &str) -> Result<bool, JsError> {
self.metadata
.delete(vector_id, key)
.map_err(metadata::metadata_error_to_js)
}
#[wasm_bindgen(js_name = "deleteAllMetadata")]
pub fn delete_all_metadata(&mut self, vector_id: u32) -> bool {
self.metadata.delete_all(vector_id)
}
#[wasm_bindgen(js_name = "hasMetadata")]
#[must_use]
pub fn has_metadata(&self, vector_id: u32, key: &str) -> bool {
self.metadata.has_key(vector_id, key)
}
#[wasm_bindgen(js_name = "metadataKeyCount")]
#[must_use]
pub fn metadata_key_count(&self, vector_id: u32) -> usize {
self.metadata.key_count(vector_id)
}
#[wasm_bindgen(js_name = "metadataVectorCount")]
#[must_use]
pub fn metadata_vector_count(&self) -> usize {
self.metadata.vector_count()
}
#[wasm_bindgen(js_name = "totalMetadataCount")]
#[must_use]
pub fn total_metadata_count(&self) -> usize {
self.metadata.total_key_count()
}
#[wasm_bindgen(js_name = "insertWithMetadata")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn insert_with_metadata(
&mut self,
vector: Float32Array,
metadata_js: JsValue,
) -> Result<u32, JsValue> {
let dimensions = self.inner.dimensions();
let len = vector.length();
if len != dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: dimensions as usize,
actual: len as usize,
})
.into());
}
let vec = vector.to_vec();
if vec.iter().any(|v| !v.is_finite()) {
return Err(
EdgeVecError::Validation("Vector contains non-finite values".to_string()).into(),
);
}
let metadata_map = parse_js_metadata_object(&metadata_js)?;
if metadata_map.len() > MAX_KEYS_PER_VECTOR {
return Err(EdgeVecError::Validation(format!(
"Too many metadata keys: {} (max {})",
metadata_map.len(),
MAX_KEYS_PER_VECTOR
))
.into());
}
for (key, value) in &metadata_map {
validate_key(key).map_err(|e| {
EdgeVecError::Validation(format!("Invalid metadata key '{}': {}", key, e))
})?;
validate_value(value).map_err(|e| {
EdgeVecError::Validation(format!("Invalid metadata value for '{}': {}", key, e))
})?;
}
let (index, storage) = self.inner.as_hnsw_mut()?;
let id = index.insert(&vec, storage).map_err(EdgeVecError::from)?;
#[allow(clippy::cast_possible_truncation)]
let metadata_id = id.0 as u32;
for (key, value) in metadata_map {
self.metadata
.insert(metadata_id, &key, value)
.map_err(|e| EdgeVecError::Validation(format!("Metadata insert failed: {e}")))?;
}
track_vector_insert(dimensions);
if id.0 > u64::from(u32::MAX) {
return Err(EdgeVecError::Validation("Vector ID overflowed u32".to_string()).into());
}
Ok(id.0 as u32)
}
#[wasm_bindgen(js_name = "searchWithFilter")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_with_filter(
&mut self,
query: Float32Array,
filter: &str,
k: usize,
) -> Result<JsValue, JsValue> {
use crate::filter::{parse, FilterStrategy, FilteredSearcher};
if k == 0 {
return Err(JsValue::from_str("k must be greater than 0"));
}
let (index, storage) = self.inner.as_hnsw()?;
let len = query.length();
if len != index.config.dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: index.config.dimensions as usize,
actual: len as usize,
})
.into());
}
let query_vec = query.to_vec();
if query_vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Query vector contains non-finite values".to_string(),
)
.into());
}
let filter_expr = parse(filter).map_err(|e| filter::filter_error_to_jsvalue(&e))?;
let metadata_adapter = EdgeVecMetadataAdapter::new(&self.metadata, index.len());
let mut searcher = FilteredSearcher::new(index, storage, &metadata_adapter);
let result = searcher
.search_filtered(&query_vec, k, Some(&filter_expr), FilterStrategy::Auto)
.map_err(|e| JsValue::from_str(&format!("Search failed: {e}")))?;
let arr = Array::new_with_length(result.results.len() as u32);
for (i, r) in result.results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(r.vector_id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("distance"),
&JsValue::from(r.distance),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
#[wasm_bindgen(js_name = "getVectorMetadata")]
#[must_use]
pub fn get_vector_metadata(&self, id: u32) -> JsValue {
metadata::metadata_to_js_object(&self.metadata, id)
}
#[wasm_bindgen(js_name = "searchBQ")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_bq(&self, query: Float32Array, k: usize) -> Result<JsValue, JsValue> {
if k == 0 {
return Err(JsValue::from_str("k must be greater than 0"));
}
let (index, storage) = self.inner.as_hnsw()?;
let len = query.length();
if len != index.config.dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: index.config.dimensions as usize,
actual: len as usize,
})
.into());
}
let query_vec = query.to_vec();
if query_vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Query vector contains non-finite values".to_string(),
)
.into());
}
let results = index
.search_bq(&query_vec, k, storage)
.map_err(EdgeVecError::from)?;
let arr = Array::new_with_length(results.len() as u32);
for (i, (vector_id, similarity)) in results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(vector_id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("distance"),
&JsValue::from(*similarity),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
#[wasm_bindgen(js_name = "searchBQRescored")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_bq_rescored(
&self,
query: Float32Array,
k: usize,
rescore_factor: usize,
) -> Result<JsValue, JsValue> {
if k == 0 {
return Err(JsValue::from_str("k must be greater than 0"));
}
if rescore_factor == 0 {
return Err(JsValue::from_str("rescoreFactor must be greater than 0"));
}
let (index, storage) = self.inner.as_hnsw()?;
let len = query.length();
if len != index.config.dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: index.config.dimensions as usize,
actual: len as usize,
})
.into());
}
let query_vec = query.to_vec();
if query_vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Query vector contains non-finite values".to_string(),
)
.into());
}
let results = index
.search_bq_rescored(&query_vec, k, rescore_factor, storage)
.map_err(EdgeVecError::from)?;
let arr = Array::new_with_length(results.len() as u32);
for (i, (vector_id, similarity)) in results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(vector_id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("distance"),
&JsValue::from(*similarity),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
#[wasm_bindgen(js_name = "searchHybrid")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_hybrid(
&mut self,
query: Float32Array,
options: JsValue,
) -> Result<JsValue, JsValue> {
use crate::filter::{parse, FilterStrategy, FilteredSearcher};
let opts = parse_hybrid_search_options(&options)?;
if opts.k == 0 {
return Err(JsValue::from_str("k must be greater than 0"));
}
let (index, storage) = self.inner.as_hnsw()?;
let len = query.length();
if len != index.config.dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: index.config.dimensions as usize,
actual: len as usize,
})
.into());
}
let query_vec = query.to_vec();
if query_vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Query vector contains non-finite values".to_string(),
)
.into());
}
let use_bq = opts.use_bq && index.bq_storage.is_some();
let rescore_factor = opts.rescore_factor.max(1);
let results: Vec<(crate::hnsw::VectorId, f32)> = if use_bq {
if let Some(ref filter_str) = opts.filter {
let filter_expr =
parse(filter_str).map_err(|e| filter::filter_error_to_jsvalue(&e))?;
let overfetch_k = opts.k.saturating_mul(rescore_factor);
let bq_candidates = index
.search_bq(&query_vec, overfetch_k, storage)
.map_err(EdgeVecError::from)?;
let empty_map = std::collections::HashMap::new();
let mut filtered: Vec<_> = bq_candidates
.into_iter()
.filter(|(vid, _)| {
let metadata = self.metadata.get_all(vid.0 as u32).unwrap_or(&empty_map);
crate::filter::evaluate(&filter_expr, metadata).unwrap_or(false)
})
.take(opts.k)
.collect();
if !filtered.is_empty() {
use super::hnsw::rescore::rescore_top_k;
let rescored =
rescore_top_k(&filtered, &query_vec, storage, opts.k.min(filtered.len()));
filtered = rescored
.into_iter()
.map(|(id, dist)| (id, 1.0 / (1.0 + dist)))
.collect();
}
filtered
} else {
index
.search_bq_rescored(&query_vec, opts.k, rescore_factor, storage)
.map_err(EdgeVecError::from)?
}
} else if let Some(ref filter_str) = opts.filter {
let filter_expr = parse(filter_str).map_err(|e| filter::filter_error_to_jsvalue(&e))?;
let metadata_adapter = EdgeVecMetadataAdapter::new(&self.metadata, index.len());
let mut searcher = FilteredSearcher::new(index, storage, &metadata_adapter);
let result = searcher
.search_filtered(&query_vec, opts.k, Some(&filter_expr), FilterStrategy::Auto)
.map_err(|e| JsValue::from_str(&format!("Search failed: {e}")))?;
result
.results
.into_iter()
.map(|r| (r.vector_id, r.distance))
.collect()
} else {
let search_results = index
.search(&query_vec, opts.k, storage)
.map_err(EdgeVecError::from)?;
search_results
.into_iter()
.map(|r| (r.vector_id, r.distance))
.collect()
};
let arr = Array::new_with_length(results.len() as u32);
for (i, (vector_id, distance)) in results.iter().enumerate() {
let obj = Object::new();
Reflect::set(
&obj,
&JsValue::from_str("id"),
&JsValue::from(vector_id.0 as u32),
)?;
Reflect::set(
&obj,
&JsValue::from_str("distance"),
&JsValue::from(*distance),
)?;
arr.set(i as u32, obj.into());
}
Ok(arr.into())
}
#[wasm_bindgen(js_name = "hasBQ")]
#[must_use]
pub fn has_bq(&self) -> bool {
match &self.inner {
IndexVariant::Hnsw { index, .. } => index.bq_storage.is_some(),
IndexVariant::Flat { .. } => false, }
}
#[wasm_bindgen(js_name = "enableBQ")]
pub fn enable_bq(&mut self) -> Result<(), JsValue> {
let (index, storage) = self.inner.as_hnsw_mut()?;
index
.enable_bq(storage)
.map_err(|e| EdgeVecError::from(e).into())
}
#[wasm_bindgen(js_name = "searchFiltered")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_possible_truncation)]
pub fn search_filtered(
&mut self,
query: Float32Array,
k: usize,
options_json: &str,
) -> Result<String, JsValue> {
use crate::filter::{parse, FilterStrategy, FilteredSearcher};
let total_start = web_sys::window()
.and_then(|w| w.performance())
.map(|p| p.now());
let (index, storage) = self.inner.as_hnsw()?;
let len = query.length();
if len != index.config.dimensions {
return Err(EdgeVecError::Graph(GraphError::DimensionMismatch {
expected: index.config.dimensions as usize,
actual: len as usize,
})
.into());
}
let query_vec = query.to_vec();
if query_vec.iter().any(|v| !v.is_finite()) {
return Err(EdgeVecError::Validation(
"Query vector contains non-finite values".to_string(),
)
.into());
}
let options: SearchFilteredOptions = serde_json::from_str(options_json)
.map_err(|e| JsValue::from_str(&format!("Invalid options JSON: {e}")))?;
let filter_start = web_sys::window()
.and_then(|w| w.performance())
.map(|p| p.now());
let filter = match &options.filter {
Some(filter_str) => {
Some(parse(filter_str).map_err(|e| filter::filter_error_to_jsvalue(&e))?)
}
None => None,
};
let strategy = match options.strategy.as_deref() {
Some("pre") => FilterStrategy::PreFilter,
Some("post") => FilterStrategy::PostFilter {
oversample: options.oversample_factor.unwrap_or(3.0),
},
Some("hybrid") => FilterStrategy::Hybrid {
oversample_min: 1.5,
oversample_max: options.oversample_factor.unwrap_or(10.0),
},
_ => FilterStrategy::Auto,
};
let metadata_adapter = EdgeVecMetadataAdapter::new(&self.metadata, index.len());
let mut searcher = FilteredSearcher::new(index, storage, &metadata_adapter);
let result = searcher
.search_filtered(&query_vec, k, filter.as_ref(), strategy)
.map_err(|e| JsValue::from_str(&format!("Search failed: {e}")))?;
let filter_time_ms = match (
filter_start,
web_sys::window().and_then(|w| w.performance()),
) {
(Some(start), Some(perf)) => perf.now() - start,
_ => 0.0,
};
let include_metadata = options.include_metadata.unwrap_or(false);
let include_vectors = options.include_vectors.unwrap_or(false);
let response = SearchFilteredResult {
results: result
.results
.iter()
.map(|r| {
let id = r.vector_id.0 as u32;
SearchFilteredItem {
id,
score: r.distance,
metadata: if include_metadata {
self.metadata
.get_all(id)
.and_then(|m| serde_json::to_value(m).ok())
} else {
None
},
vector: if include_vectors {
Some(storage.get_vector(r.vector_id).to_vec())
} else {
None
},
}
})
.collect(),
complete: result.complete,
observed_selectivity: result.observed_selectivity,
strategy_used: strategy_to_string(&result.strategy_used),
vectors_evaluated: result.vectors_evaluated,
filter_time_ms,
total_time_ms: match (total_start, web_sys::window().and_then(|w| w.performance())) {
(Some(start), Some(perf)) => perf.now() - start,
_ => 0.0,
},
};
serde_json::to_string(&response)
.map_err(|e| JsValue::from_str(&format!("Serialization error: {e}")))
}
#[wasm_bindgen(js_name = "getMemoryPressure")]
pub fn get_memory_pressure(&self) -> Result<JsValue, JsValue> {
let pressure = MemoryPressure::current_with_thresholds(
self.memory_config.warning_threshold,
self.memory_config.critical_threshold,
);
serde_wasm_bindgen::to_value(&pressure).map_err(|e| JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "setMemoryConfig")]
pub fn set_memory_config(&mut self, config: JsValue) -> Result<(), JsValue> {
let config: MemoryConfig = serde_wasm_bindgen::from_value(config)
.map_err(|e| JsValue::from_str(&format!("Invalid config: {e}")))?;
if config.warning_threshold <= 0.0 || config.warning_threshold >= 100.0 {
return Err(JsValue::from_str(
"warningThreshold must be between 0 and 100",
));
}
if config.critical_threshold <= 0.0 || config.critical_threshold >= 100.0 {
return Err(JsValue::from_str(
"criticalThreshold must be between 0 and 100",
));
}
if config.warning_threshold >= config.critical_threshold {
return Err(JsValue::from_str(
"warningThreshold must be less than criticalThreshold",
));
}
self.memory_config = config;
Ok(())
}
#[wasm_bindgen(js_name = "canInsert")]
#[must_use]
pub fn can_insert(&self) -> bool {
if !self.memory_config.block_inserts_on_critical {
return true;
}
let pressure = MemoryPressure::current_with_thresholds(
self.memory_config.warning_threshold,
self.memory_config.critical_threshold,
);
pressure.level != MemoryPressureLevel::Critical
}
#[wasm_bindgen(js_name = "getMemoryRecommendation")]
pub fn get_memory_recommendation(&self) -> Result<JsValue, JsValue> {
let pressure = MemoryPressure::current_with_thresholds(
self.memory_config.warning_threshold,
self.memory_config.critical_threshold,
);
let needs_compaction = match &self.inner {
IndexVariant::Hnsw { index, .. } => index.needs_compaction(),
IndexVariant::Flat { .. } => false,
};
let recommendation = match pressure.level {
MemoryPressureLevel::Normal => MemoryRecommendation {
action: "none".to_string(),
message: "Memory usage is healthy.".to_string(),
can_insert: true,
suggest_compact: needs_compaction,
},
MemoryPressureLevel::Warning => MemoryRecommendation {
action: "compact".to_string(),
message: format!(
"Memory usage at {:.1}%. Consider running compact() to free deleted vectors.",
pressure.usage_percent
),
can_insert: true,
suggest_compact: needs_compaction,
},
MemoryPressureLevel::Critical => MemoryRecommendation {
action: "reduce".to_string(),
message: format!(
"Memory usage critical at {:.1}%. Inserts blocked. Run compact() or delete vectors.",
pressure.usage_percent
),
can_insert: !self.memory_config.block_inserts_on_critical,
suggest_compact: true,
},
};
serde_wasm_bindgen::to_value(&recommendation).map_err(|e| JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "getMemoryConfig")]
pub fn get_memory_config(&self) -> Result<JsValue, JsValue> {
serde_wasm_bindgen::to_value(&self.memory_config)
.map_err(|e| JsValue::from_str(&e.to_string()))
}
#[wasm_bindgen(js_name = "memoryUsage")]
pub fn memory_usage(&self) -> usize {
self.inner.memory_usage()
}
#[wasm_bindgen(js_name = "serializedSize")]
pub fn serialized_size(&self) -> usize {
self.inner.serialized_size()
}
#[cfg(feature = "sparse")]
#[wasm_bindgen(js_name = "initSparseStorage")]
pub fn init_sparse_storage(&mut self) {
if self.sparse_storage.is_none() {
self.sparse_storage = Some(SparseStorage::new());
}
}
#[cfg(feature = "sparse")]
#[wasm_bindgen(js_name = "hasSparseStorage")]
#[must_use]
pub fn has_sparse_storage(&self) -> bool {
self.sparse_storage.is_some()
}
#[cfg(feature = "sparse")]
#[wasm_bindgen(js_name = "sparseCount")]
#[must_use]
pub fn sparse_count(&self) -> usize {
self.sparse_storage.as_ref().map_or(0, SparseStorage::len)
}
#[cfg(feature = "sparse")]
#[wasm_bindgen(js_name = "insertSparse")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::cast_precision_loss)]
pub fn insert_sparse(
&mut self,
indices: Uint32Array,
values: Float32Array,
dim: u32,
) -> Result<f64, JsValue> {
if indices.length() != values.length() {
return Err(JsValue::from_str(
"indices and values must have the same length",
));
}
let indices_vec: Vec<u32> = indices.to_vec();
let values_vec: Vec<f32> = values.to_vec();
let vector = SparseVector::new(indices_vec, values_vec, dim)
.map_err(|e| JsValue::from_str(&e.to_string()))?;
let sparse_storage = self.sparse_storage.get_or_insert_with(SparseStorage::new);
let id = sparse_storage
.insert(&vector)
.map_err(|e| JsValue::from_str(&e.to_string()))?;
Ok(id.as_u64() as f64)
}
#[cfg(feature = "sparse")]
#[wasm_bindgen(js_name = "searchSparse")]
#[allow(clippy::needless_pass_by_value)]
pub fn search_sparse(
&self,
indices: Uint32Array,
values: Float32Array,
dim: u32,
k: usize,
) -> Result<String, JsValue> {
if indices.length() != values.length() {
return Err(JsValue::from_str(
"indices and values must have the same length",
));
}
if k == 0 {
return Err(JsValue::from_str("k must be greater than 0"));
}
let indices_vec: Vec<u32> = indices.to_vec();
let values_vec: Vec<f32> = values.to_vec();
let query = SparseVector::new(indices_vec, values_vec, dim)
.map_err(|e| JsValue::from_str(&e.to_string()))?;
let sparse_storage = self.sparse_storage.as_ref().ok_or_else(|| {
JsValue::from_str("Sparse storage not initialized. Call initSparseStorage() first.")
})?;
let searcher = SparseSearcher::new(sparse_storage);
let results = searcher.search(&query, k);
let json_results: Vec<serde_json::Value> = results
.iter()
.map(|r| {
serde_json::json!({
"id": r.id.as_u64(),
"score": r.score
})
})
.collect();
serde_json::to_string(&json_results).map_err(|e| JsValue::from_str(&e.to_string()))
}
#[cfg(feature = "sparse")]
#[wasm_bindgen(js_name = "hybridSearch")]
#[allow(clippy::needless_pass_by_value)]
#[allow(clippy::too_many_arguments)]
pub fn hybrid_search(
&self,
dense_query: Float32Array,
sparse_indices: Uint32Array,
sparse_values: Float32Array,
sparse_dim: u32,
options_json: &str,
) -> Result<String, JsValue> {
let (hnsw_index, storage) = match &self.inner {
IndexVariant::Hnsw { index, storage } => (index.as_ref(), storage),
IndexVariant::Flat { .. } => {
return Err(JsValue::from_str(
"Hybrid search is only supported for HNSW indexes (dense vectors). \
Flat/binary indexes do not support hybrid search.",
));
}
};
let options: SparseHybridOptions = serde_json::from_str(options_json)
.map_err(|e| JsValue::from_str(&format!("Invalid options JSON: {e}")))?;
let expected_dims = hnsw_index.config.dimensions;
if dense_query.length() != expected_dims {
return Err(JsValue::from_str(&format!(
"Dense query dimension mismatch: expected {}, got {}",
expected_dims,
dense_query.length()
)));
}
if sparse_indices.length() != sparse_values.length() {
return Err(JsValue::from_str(
"sparse_indices and sparse_values must have the same length",
));
}
let dense_vec: Vec<f32> = dense_query.to_vec();
let sparse_indices_vec: Vec<u32> = sparse_indices.to_vec();
let sparse_values_vec: Vec<f32> = sparse_values.to_vec();
if dense_vec.iter().any(|v| !v.is_finite()) {
return Err(JsValue::from_str("Dense query contains non-finite values"));
}
let sparse_query = SparseVector::new(sparse_indices_vec, sparse_values_vec, sparse_dim)
.map_err(|e| JsValue::from_str(&e.to_string()))?;
let sparse_storage = self.sparse_storage.as_ref().ok_or_else(|| {
JsValue::from_str("Sparse storage not initialized. Call initSparseStorage() first.")
})?;
let fusion = match &options.fusion {
HybridFusionOption::Rrf => FusionMethod::rrf(),
HybridFusionOption::Linear { alpha, .. } => {
FusionMethod::linear(*alpha).map_err(|e| JsValue::from_str(&e))?
}
};
let config = HybridSearchConfig::new(
options.dense_k.unwrap_or(20),
options.sparse_k.unwrap_or(20),
options.k,
fusion,
);
let searcher = HybridSearcher::new(hnsw_index, storage, sparse_storage);
let results = searcher
.search(&dense_vec, &sparse_query, &config)
.map_err(|e| JsValue::from_str(&e.to_string()))?;
let json_results: Vec<serde_json::Value> = results
.iter()
.map(|r| {
let mut obj = serde_json::json!({
"id": r.id.0,
"score": r.score
});
if let Some(rank) = r.dense_rank {
obj["dense_rank"] = serde_json::json!(rank);
}
if let Some(score) = r.dense_score {
obj["dense_score"] = serde_json::json!(score);
}
if let Some(rank) = r.sparse_rank {
obj["sparse_rank"] = serde_json::json!(rank);
}
if let Some(score) = r.sparse_score {
obj["sparse_score"] = serde_json::json!(score);
}
obj
})
.collect();
serde_json::to_string(&json_results).map_err(|e| JsValue::from_str(&e.to_string()))
}
}
#[wasm_bindgen]
#[derive(Debug, Clone)]
pub struct WasmCompactionResult {
#[wasm_bindgen(readonly)]
pub tombstones_removed: u32,
#[wasm_bindgen(readonly)]
pub new_size: u32,
#[wasm_bindgen(readonly)]
pub duration_ms: u32,
}
#[wasm_bindgen]
#[derive(Debug, Clone)]
pub struct WasmBatchDeleteResult {
deleted: u32,
already_deleted: u32,
invalid_ids: u32,
total: u32,
unique_count: u32,
}
#[wasm_bindgen]
impl WasmBatchDeleteResult {
#[wasm_bindgen(getter)]
#[must_use]
pub fn deleted(&self) -> u32 {
self.deleted
}
#[wasm_bindgen(getter, js_name = "alreadyDeleted")]
#[must_use]
pub fn already_deleted(&self) -> u32 {
self.already_deleted
}
#[wasm_bindgen(getter, js_name = "invalidIds")]
#[must_use]
pub fn invalid_ids(&self) -> u32 {
self.invalid_ids
}
#[wasm_bindgen(getter)]
#[must_use]
pub fn total(&self) -> u32 {
self.total
}
#[wasm_bindgen(getter, js_name = "uniqueCount")]
#[must_use]
pub fn unique_count(&self) -> u32 {
self.unique_count
}
#[wasm_bindgen(js_name = "allValid")]
#[must_use]
pub fn all_valid(&self) -> bool {
self.invalid_ids == 0
}
#[wasm_bindgen(js_name = "anyDeleted")]
#[must_use]
pub fn any_deleted(&self) -> bool {
self.deleted > 0
}
}
use crate::filter::FilterStrategy;
use crate::metadata::MetadataValue;
use std::collections::HashMap;
struct EdgeVecMetadataAdapter<'a> {
store: &'a crate::metadata::MetadataStore,
total_vectors: usize,
}
impl<'a> EdgeVecMetadataAdapter<'a> {
fn new(store: &'a crate::metadata::MetadataStore, total_vectors: usize) -> Self {
Self {
store,
total_vectors,
}
}
}
impl crate::filter::MetadataStore for EdgeVecMetadataAdapter<'_> {
#[allow(clippy::cast_possible_truncation)]
fn get_metadata(&self, id: usize) -> Option<&HashMap<String, MetadataValue>> {
self.store.get_all((id + 1) as u32)
}
fn len(&self) -> usize {
self.total_vectors
}
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct SearchFilteredOptions {
filter: Option<String>,
strategy: Option<String>,
oversample_factor: Option<f32>,
include_metadata: Option<bool>,
include_vectors: Option<bool>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SearchFilteredResult {
results: Vec<SearchFilteredItem>,
complete: bool,
observed_selectivity: f32,
strategy_used: String,
vectors_evaluated: usize,
filter_time_ms: f64,
total_time_ms: f64,
}
#[derive(Serialize)]
struct SearchFilteredItem {
id: u32,
score: f32,
#[serde(skip_serializing_if = "Option::is_none")]
metadata: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
vector: Option<Vec<f32>>,
}
fn strategy_to_string(strategy: &FilterStrategy) -> String {
match strategy {
FilterStrategy::PreFilter => "pre".to_string(),
FilterStrategy::PostFilter { .. } => "post".to_string(),
FilterStrategy::Hybrid { .. } => "hybrid".to_string(),
FilterStrategy::Auto => "auto".to_string(),
}
}
const JS_MAX_SAFE_INT: f64 = 9_007_199_254_740_991.0;
const JS_MIN_SAFE_INT: f64 = -9_007_199_254_740_991.0;
#[allow(clippy::cast_possible_truncation)]
fn parse_js_metadata_object(js_obj: &JsValue) -> Result<HashMap<String, MetadataValue>, JsValue> {
use js_sys::Object as JsObject;
if !js_obj.is_object() {
return Err(JsValue::from_str("Metadata must be a JavaScript object"));
}
let obj = JsObject::try_from(js_obj)
.ok_or_else(|| JsValue::from_str("Failed to convert metadata to JavaScript object"))?;
let mut metadata = HashMap::new();
let keys = JsObject::keys(obj);
for i in 0..keys.length() {
let key_js = keys.get(i);
let key = key_js
.as_string()
.ok_or_else(|| JsValue::from_str("Metadata key must be a string"))?;
let value_js = Reflect::get(obj, &key_js)?;
let value = parse_js_metadata_value(&key, &value_js)?;
metadata.insert(key, value);
}
Ok(metadata)
}
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_precision_loss)]
fn parse_js_metadata_value(key: &str, value: &JsValue) -> Result<MetadataValue, JsValue> {
if value.is_null() || value.is_undefined() {
return Err(JsValue::from_str(&format!(
"Metadata value for key '{key}' cannot be null or undefined"
)));
}
if let Some(s) = value.as_string() {
return Ok(MetadataValue::String(s));
}
if let Some(b) = value.as_bool() {
return Ok(MetadataValue::Boolean(b));
}
if let Some(n) = value.as_f64() {
if !n.is_finite() {
return Err(JsValue::from_str(&format!(
"Metadata value for key '{key}' must be finite (not NaN or Infinity)"
)));
}
if n.fract() == 0.0 && (JS_MIN_SAFE_INT..=JS_MAX_SAFE_INT).contains(&n) {
return Ok(MetadataValue::Integer(n as i64));
}
return Ok(MetadataValue::Float(n));
}
if js_sys::Array::is_array(value) {
let arr = js_sys::Array::from(value);
let mut strings = Vec::with_capacity(arr.length() as usize);
for i in 0..arr.length() {
let item = arr.get(i);
let s = item.as_string().ok_or_else(|| {
JsValue::from_str(&format!(
"Metadata array for key '{key}' must contain only strings, found non-string at index {i}"
))
})?;
strings.push(s);
}
return Ok(MetadataValue::StringArray(strings));
}
Err(JsValue::from_str(&format!(
"Unsupported metadata value type for key '{key}'. Supported types: string, number, boolean, string[]"
)))
}
struct HybridSearchOptions {
k: usize,
filter: Option<String>,
use_bq: bool,
rescore_factor: usize,
}
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
fn parse_hybrid_search_options(options: &JsValue) -> Result<HybridSearchOptions, JsValue> {
if !options.is_object() {
return Err(JsValue::from_str(
"Options must be a JavaScript object with at least { k: number }",
));
}
let k_js = Reflect::get(options, &JsValue::from_str("k"))?;
let k = k_js
.as_f64()
.ok_or_else(|| JsValue::from_str("Options.k is required and must be a positive number"))?
as usize;
let filter_js = Reflect::get(options, &JsValue::from_str("filter"))?;
let filter = if filter_js.is_undefined() || filter_js.is_null() {
None
} else {
filter_js.as_string()
};
let use_bq_js = Reflect::get(options, &JsValue::from_str("useBQ"))?;
let use_bq = if use_bq_js.is_undefined() || use_bq_js.is_null() {
true
} else {
use_bq_js.as_bool().unwrap_or(true)
};
let rescore_factor_js = Reflect::get(options, &JsValue::from_str("rescoreFactor"))?;
let rescore_factor = if rescore_factor_js.is_undefined() || rescore_factor_js.is_null() {
3
} else {
rescore_factor_js.as_f64().unwrap_or(3.0) as usize
};
Ok(HybridSearchOptions {
k,
filter,
use_bq,
rescore_factor,
})
}
#[cfg(feature = "sparse")]
#[derive(Deserialize)]
struct SparseHybridOptions {
k: usize,
dense_k: Option<usize>,
sparse_k: Option<usize>,
#[serde(default)]
fusion: HybridFusionOption,
}
#[cfg(feature = "sparse")]
#[derive(Debug, Clone, Default)]
enum HybridFusionOption {
#[default]
Rrf,
Linear { alpha: f32 },
}
#[cfg(feature = "sparse")]
impl<'de> Deserialize<'de> for HybridFusionOption {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{self, MapAccess, Visitor};
struct FusionVisitor;
impl<'de> Visitor<'de> for FusionVisitor {
type Value = HybridFusionOption;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str(r#""rrf" or { "type": "linear", "alpha": number }"#)
}
fn visit_str<E>(self, value: &str) -> Result<HybridFusionOption, E>
where
E: de::Error,
{
match value.to_lowercase().as_str() {
"rrf" => Ok(HybridFusionOption::Rrf),
"linear" => Err(de::Error::custom(
"linear fusion requires an object with alpha: { \"type\": \"linear\", \"alpha\": 0.7 }",
)),
_ => Err(de::Error::unknown_variant(value, &["rrf", "linear"])),
}
}
fn visit_map<M>(self, mut map: M) -> Result<HybridFusionOption, M::Error>
where
M: MapAccess<'de>,
{
let mut fusion_type: Option<String> = None;
let mut alpha: Option<f32> = None;
while let Some(key) = map.next_key::<String>()? {
match key.as_str() {
"type" => fusion_type = Some(map.next_value()?),
"alpha" => alpha = Some(map.next_value()?),
_ => {
let _: serde_json::Value = map.next_value()?;
}
}
}
match fusion_type.as_deref() {
Some("linear") => {
let alpha = alpha.ok_or_else(|| de::Error::missing_field("alpha"))?;
Ok(HybridFusionOption::Linear { alpha })
}
Some("rrf") | None => Ok(HybridFusionOption::Rrf),
Some(other) => Err(de::Error::unknown_variant(other, &["rrf", "linear"])),
}
}
}
deserializer.deserialize_any(FusionVisitor)
}
}