pub use builder::HnswConfigBuilder;
pub use config::{HnswConfig, hnsw_config};
pub use distance_metric::{DistanceMetric, compute_distance};
pub use errors::{
HnswConfigError, HnswError, HnswIndexError, HnswMultiLayerError, HnswStorageError,
};
pub use index::{HnswIndex, HnswIndexStats};
pub use storage::{
InMemoryVectorStorage, VectorBatch, VectorRecord, VectorStorage, VectorStorageStats,
};
pub use multilayer::{LayerMappings, LevelDistributor, MultiLayerNodeManager};
pub mod batch_filter;
pub mod builder;
pub mod config;
pub mod distance_functions;
pub mod distance_metric;
pub mod errors;
pub mod index;
pub mod layer;
pub mod multilayer;
pub mod neighborhood;
pub mod serialization;
pub mod simd;
pub mod storage;
#[cfg(feature = "native-v3")]
pub mod v3_storage;
pub use batch_filter::{filter_allowed_scalar, filter_batch, filter_denied_scalar};
pub use serialization::{decode_varint_scalar, delta_decode, delta_encode, encode_varint_scalar};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hnsw_module_imports() {
let _config = HnswConfig::default();
let _builder = HnswConfigBuilder::new();
let _metric = DistanceMetric::Cosine;
assert_eq!(_config.dimension, 768);
assert_eq!(_config.distance_metric, DistanceMetric::Cosine);
}
#[test]
fn test_hnsw_config_builder() {
let config = HnswConfigBuilder::new()
.dimension(256)
.m_connections(12)
.ef_construction(150)
.ef_search(40)
.max_layers(12)
.distance_metric(DistanceMetric::Euclidean)
.build()
.unwrap();
assert_eq!(config.dimension, 256);
assert_eq!(config.m, 12);
assert_eq!(config.ef_construction, 150);
assert_eq!(config.ef_search, 40);
assert_eq!(config.ml, 12);
assert_eq!(config.distance_metric, DistanceMetric::Euclidean);
}
#[test]
fn test_distance_metrics() {
let a = vec![1.0, 0.0, 0.0];
let b = vec![0.0, 1.0, 0.0];
let cosine_dist = compute_distance(DistanceMetric::Cosine, &a, &b);
let euclidean_dist = compute_distance(DistanceMetric::Euclidean, &a, &b);
let manhattan_dist = compute_distance(DistanceMetric::Manhattan, &a, &b);
let dot_dist = compute_distance(DistanceMetric::DotProduct, &a, &b);
assert!((cosine_dist - 0.5).abs() < f32::EPSILON); assert!((euclidean_dist - std::f32::consts::SQRT_2).abs() < f32::EPSILON);
assert_eq!(manhattan_dist, 2.0);
assert_eq!(dot_dist, 0.0); }
#[test]
fn test_error_handling() {
let result = HnswConfigBuilder::new().build(); assert!(result.is_ok());
let mut builder = HnswConfigBuilder::new();
builder.config.dimension = 0; let result = builder.build();
assert!(matches!(result, Err(HnswConfigError::InvalidDimension)));
}
#[test]
fn test_hnsw_config_function() {
let config = hnsw_config()
.dimension(512)
.m_connections(24)
.distance_metric(DistanceMetric::Manhattan)
.build()
.unwrap();
assert_eq!(config.dimension, 512);
assert_eq!(config.m, 24);
assert_eq!(config.distance_metric, DistanceMetric::Manhattan);
}
#[test]
fn test_default_configuration() {
let config = HnswConfig::default();
assert_eq!(config.dimension, 768); assert_eq!(config.m, 16); assert_eq!(config.ef_construction, 200); assert_eq!(config.ef_search, 50); assert_eq!(config.ml, 16); assert_eq!(config.distance_metric, DistanceMetric::Cosine);
}
#[test]
fn test_high_precision_configuration() {
let config = HnswConfig {
dimension: 1536,
m: 32,
ef_construction: 400,
ef_search: 100,
ml: 24,
distance_metric: DistanceMetric::Cosine,
enable_multilayer: false,
multilayer_level_distribution_base: None,
multilayer_deterministic_seed: None,
};
assert_eq!(config.dimension, 1536);
assert_eq!(config.m, 32);
assert!(config.ef_construction >= config.m);
assert_eq!(config.distance_metric, DistanceMetric::Cosine);
assert!(!config.enable_multilayer);
}
#[test]
fn test_fast_construction_configuration() {
let config = HnswConfig {
dimension: 384,
m: 8,
ef_construction: 100,
ef_search: 20,
ml: 12,
distance_metric: DistanceMetric::Euclidean,
enable_multilayer: false,
multilayer_level_distribution_base: None,
multilayer_deterministic_seed: None,
};
assert_eq!(config.dimension, 384);
assert_eq!(config.m, 8);
assert_eq!(config.ef_construction, 100);
assert_eq!(config.ef_search, 20);
assert_eq!(config.distance_metric, DistanceMetric::Euclidean);
assert!(!config.enable_multilayer);
}
#[test]
fn test_batch_filter_module() {
let ids = vec![1, 2, 3, 4, 5];
let allowed = vec![2, 3, 4];
let filtered = crate::hnsw::batch_filter::filter_allowed_scalar(&ids, &allowed);
assert_eq!(filtered, vec![2, 3, 4]);
}
#[test]
fn test_serialization_module() {
use crate::hnsw::serialization::{decode_varint_scalar, encode_varint_scalar};
let value = 300u32;
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, value).unwrap();
let decoded = decode_varint_scalar(buffer.as_slice()).unwrap();
assert_eq!(decoded, value);
}
#[test]
fn test_simd_matches_scalar_dot_product() {
use crate::hnsw::simd::{dot_product, dot_product_scalar};
let test_cases = vec![
(vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]),
(
(0..100).map(|i| i as f32).collect(),
(100..200).map(|i| i as f32).collect(),
),
(
(0..1000).map(|i| i as f32).collect(),
(1000..2000).map(|i| i as f32).collect(),
),
];
for (a, b) in test_cases {
let scalar = dot_product_scalar(&a, &b);
let simd = dot_product(&a, &b);
let abs_diff = (scalar - simd).abs();
let rel_error = if scalar.abs() > f32::EPSILON {
abs_diff / scalar.abs()
} else {
abs_diff
};
assert!(
rel_error < 1e-5 || abs_diff < f32::EPSILON * 100.0,
"Dot product differs for size {}: scalar={}, simd={}, diff={}, rel_error={}",
a.len(),
scalar,
simd,
abs_diff,
rel_error
);
}
}
#[test]
fn test_simd_matches_scalar_euclidean() {
use crate::hnsw::simd::{euclidean_distance, euclidean_distance_scalar};
let a: Vec<f32> = (0..100).map(|i| i as f32).collect();
let b: Vec<f32> = (100..200).map(|i| i as f32).collect();
let scalar = euclidean_distance_scalar(&a, &b);
let simd = euclidean_distance(&a, &b);
let abs_diff = (scalar - simd).abs();
let rel_error = if scalar.abs() > f32::EPSILON {
abs_diff / scalar.abs()
} else {
abs_diff
};
assert!(
rel_error < 1e-5 || abs_diff < f32::EPSILON * 10.0,
"Euclidean: scalar={}, simd={}, diff={}, rel_error={}",
scalar,
simd,
abs_diff,
rel_error
);
}
#[test]
fn test_simd_matches_scalar_cosine() {
use crate::hnsw::simd::{cosine_similarity, cosine_similarity_scalar};
let a: Vec<f32> = (1..100).map(|i| (i as f32).recip()).collect();
let b: Vec<f32> = (101..200).map(|i| (i as f32).recip()).collect();
let scalar = cosine_similarity_scalar(&a, &b);
let simd = cosine_similarity(&a, &b);
let abs_diff = (scalar - simd).abs();
let rel_error = if scalar.abs() > f32::EPSILON {
abs_diff / scalar.abs()
} else {
abs_diff
};
assert!(
rel_error < 1e-4 || abs_diff < f32::EPSILON * 100.0,
"Cosine: scalar={}, simd={}, diff={}, rel_error={}",
scalar,
simd,
abs_diff,
rel_error
);
}
#[test]
fn test_simd_matches_scalar_norm_squared() {
use crate::hnsw::simd::{compute_norm_squared, compute_norm_squared_scalar};
let v: Vec<f32> = (1..=500).map(|i| i as f32 * 0.1).collect();
let scalar = compute_norm_squared_scalar(&v);
let simd = compute_norm_squared(&v);
let abs_diff = (scalar - simd).abs();
let rel_error = if scalar.abs() > f32::EPSILON {
abs_diff / scalar.abs()
} else {
abs_diff
};
assert!(
rel_error < 1e-5 || abs_diff < 0.01,
"Norm squared: scalar={}, simd={}, diff={}, rel_error={}",
scalar,
simd,
abs_diff,
rel_error
);
}
#[test]
fn test_simd_correctness_edge_cases() {
use crate::hnsw::simd::{dot_product, euclidean_distance};
let a = vec![1.0, 2.0];
let b = vec![3.0, 4.0];
let dot = dot_product(&a, &b);
assert!((dot - 11.0).abs() < f32::EPSILON);
let dist = euclidean_distance(&a, &b);
assert!((dist - 2.8284271).abs() < 0.0001);
let c: Vec<f32> = (1..=17).map(|i| i as f32).collect();
let d: Vec<f32> = (18..=34).map(|i| i as f32).collect();
let dot2 = dot_product(&c, &d);
let expected: f32 = c.iter().zip(d.iter()).map(|(x, y)| x * y).sum();
assert!((dot2 - expected).abs() < f32::EPSILON * 10.0);
}
#[test]
fn test_batch_filter_correctness() {
use crate::hnsw::batch_filter::{filter_allowed_scalar, filter_batch};
let ids: Vec<u64> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let allowed: Vec<u64> = vec![2, 4, 6, 8, 10];
let scalar_result = filter_allowed_scalar(&ids, &allowed);
let simd_result = filter_batch(&ids, &allowed, true);
assert_eq!(scalar_result, simd_result);
assert_eq!(simd_result, vec![2, 4, 6, 8, 10]);
}
#[test]
fn test_delta_encode_correctness() {
use crate::hnsw::serialization::{delta_decode, delta_encode, delta_encode_scalar};
let values: Vec<u32> = vec![100, 105, 110, 115, 120, 125];
let scalar_deltas = delta_encode_scalar(&values);
let simd_deltas = delta_encode(&values);
assert_eq!(scalar_deltas, simd_deltas);
assert_eq!(simd_deltas, vec![100, 5, 5, 5, 5, 5]);
let restored = delta_decode(&simd_deltas);
assert_eq!(restored, values);
}
#[test]
fn test_varint_encoding_round_trip() {
use crate::hnsw::serialization::{decode_varint_scalar, encode_varint_scalar};
let test_values = vec![0u32, 1, 127, 128, 300, 16383, 16384, u32::MAX];
for value in test_values {
let mut buffer = Vec::new();
encode_varint_scalar(&mut buffer, value).unwrap();
let decoded = decode_varint_scalar(buffer.as_slice()).unwrap();
assert_eq!(decoded, value, "Failed to round-trip value {}", value);
}
}
}
#[cfg(all(test, feature = "native-v3"))]
mod v3_storage_tests;