use crate::RetrieveError;
pub trait ANNIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError>;
fn add_slice(&mut self, doc_id: u32, vector: &[f32]) -> Result<(), RetrieveError> {
self.add(doc_id, vector.to_vec())
}
fn build(&mut self) -> Result<(), RetrieveError>;
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError>;
fn size_bytes(&self) -> usize;
fn stats(&self) -> ANNStats;
fn dimension(&self) -> usize;
fn num_vectors(&self) -> usize;
}
#[derive(Debug, Clone)]
pub struct ANNStats {
pub num_vectors: usize,
pub dimension: usize,
pub size_bytes: usize,
pub algorithm: String,
}
#[cfg(feature = "hnsw")]
impl ANNIndex for crate::hnsw::HNSWIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn add_slice(&mut self, doc_id: u32, vector: &[f32]) -> Result<(), RetrieveError> {
self.add_slice(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k, self.params.ef_search)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
+ self
.layers
.iter()
.map(|l| l.len() * std::mem::size_of::<u32>())
.sum::<usize>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "HNSW".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "scann")]
impl ANNIndex for crate::scann::search::SCANNIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn add_slice(&mut self, doc_id: u32, vector: &[f32]) -> Result<(), RetrieveError> {
self.add_slice(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
+ self
.partition_centroids
.iter()
.map(|c| c.len() * std::mem::size_of::<f32>())
.sum::<usize>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "AnisotropicVQ-kmeans".to_string(), }
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "ivf_pq")]
impl ANNIndex for crate::ivf_pq::search::IVFPQIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn add_slice(&mut self, doc_id: u32, vector: &[f32]) -> Result<(), RetrieveError> {
self.add_slice(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
+ self
.centroids
.iter()
.map(|c| c.len() * std::mem::size_of::<f32>())
.sum::<usize>()
+ self.quantized_codes.len() * std::mem::size_of::<u8>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "IVF-PQ".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "kdtree")]
impl ANNIndex for crate::classic::trees::kdtree::KDTreeIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "KD-Tree".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "balltree")]
impl ANNIndex for crate::classic::trees::balltree::BallTreeIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "Ball-Tree".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "kmeans_tree")]
impl ANNIndex for crate::classic::trees::kmeans_tree::KMeansTreeIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "K-Means-Tree".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "rptree")]
impl ANNIndex for crate::classic::trees::random_projection::RPTreeIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "RP-Tree".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "sng")]
impl ANNIndex for crate::sng::SNGIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
+ self
.neighbors
.iter()
.map(|n| n.len() * std::mem::size_of::<u32>())
.sum::<usize>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "OPT-SNG".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "lsh")]
impl ANNIndex for crate::hash::search::LSHIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
+ self
.hash_tables
.iter()
.map(|t| t.len() * std::mem::size_of::<u32>())
.sum::<usize>()
+ self
.hash_functions
.iter()
.map(|f| f.len() * std::mem::size_of::<f32>())
.sum::<usize>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "LSH".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}
#[cfg(feature = "diskann")]
impl ANNIndex for crate::diskann::graph::DiskANNIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn add_slice(&mut self, doc_id: u32, vector: &[f32]) -> Result<(), RetrieveError> {
self.add_slice(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k)
}
fn size_bytes(&self) -> usize {
self.size_bytes()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors(),
dimension: self.dimension(),
size_bytes: self.size_bytes(),
algorithm: "DiskANN".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension()
}
fn num_vectors(&self) -> usize {
self.num_vectors()
}
}
#[cfg(feature = "nsw")]
impl ANNIndex for crate::nsw::NSWIndex {
fn add(&mut self, doc_id: u32, vector: Vec<f32>) -> Result<(), RetrieveError> {
self.add(doc_id, vector)
}
fn add_slice(&mut self, doc_id: u32, vector: &[f32]) -> Result<(), RetrieveError> {
self.add_slice(doc_id, vector)
}
fn build(&mut self) -> Result<(), RetrieveError> {
self.build()
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
self.search(query, k, self.params.ef_search)
}
fn size_bytes(&self) -> usize {
self.vectors.len() * std::mem::size_of::<f32>()
+ self
.neighbors
.iter()
.map(|n| n.len() * std::mem::size_of::<u32>())
.sum::<usize>()
}
fn stats(&self) -> ANNStats {
ANNStats {
num_vectors: self.num_vectors,
dimension: self.dimension,
size_bytes: self.size_bytes(),
algorithm: "NSW".to_string(),
}
}
fn dimension(&self) -> usize {
self.dimension
}
fn num_vectors(&self) -> usize {
self.num_vectors
}
}