pub mod config;
pub mod factory;
pub mod field;
pub mod field_factory;
pub mod flat;
pub mod hnsw;
pub mod io;
pub mod ivf;
pub mod segmented_field;
pub mod storage;
pub mod wal;
use std::sync::Arc;
use parking_lot::RwLock;
use crate::embedding::embedder::Embedder;
use crate::error::{LaurusError, Result};
use crate::storage::Storage;
use crate::vector::core::vector::Vector;
use crate::vector::index::config::{
FlatIndexConfig, HnswIndexConfig, IvfIndexConfig, VectorIndexTypeConfig,
};
use crate::vector::reader::VectorIndexReader;
use crate::vector::search::searcher::VectorIndexSearcher;
use crate::vector::writer::VectorIndexWriter;
pub trait VectorIndex: Send + Sync + std::fmt::Debug {
fn reader(&self) -> Result<Arc<dyn VectorIndexReader>>;
fn writer(&self) -> Result<Box<dyn VectorIndexWriter>>;
fn storage(&self) -> &Arc<dyn Storage>;
fn close(&self) -> Result<()>;
fn is_closed(&self) -> bool;
fn stats(&self) -> Result<VectorIndexStats>;
fn optimize(&self) -> Result<()>;
fn refresh(&self) -> Result<()> {
Ok(())
}
fn searcher(&self) -> Result<Box<dyn VectorIndexSearcher>>;
fn embedder(&self) -> Arc<dyn Embedder>;
fn last_wal_seq(&self) -> u64 {
0
}
fn set_last_wal_seq(&self, _seq: u64) -> Result<()> {
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct VectorIndexStats {
pub vector_count: u64,
pub dimension: usize,
pub total_size: u64,
pub deleted_count: u64,
pub last_modified: u64,
}
use crate::vector::index::flat::reader::FlatVectorIndexReader;
use crate::vector::index::hnsw::reader::HnswIndexReader;
use crate::vector::index::ivf::reader::IvfIndexReader;
pub struct ManagedVectorIndex {
config: VectorIndexTypeConfig,
builder: Arc<RwLock<Box<dyn VectorIndexWriter>>>,
is_finalized: Arc<RwLock<bool>>,
storage: Option<Arc<dyn Storage>>,
index_path: Arc<RwLock<Option<String>>>,
}
impl ManagedVectorIndex {
pub fn new(
config: VectorIndexTypeConfig,
storage: Arc<dyn Storage>,
path: impl Into<String>,
) -> Result<Self> {
let path = path.into();
let builder: Box<dyn VectorIndexWriter> = match &config {
VectorIndexTypeConfig::Flat(flat_config) => {
let writer_config = Self::default_writer_config();
Box::new(flat::writer::FlatIndexWriter::with_storage(
flat_config.clone(),
writer_config,
path.clone(),
storage.clone(),
)?)
}
VectorIndexTypeConfig::HNSW(hnsw_config) => {
let writer_config = Self::default_writer_config();
Box::new(hnsw::writer::HnswIndexWriter::with_storage(
hnsw_config.clone(),
writer_config,
path.clone(),
storage.clone(),
)?)
}
VectorIndexTypeConfig::IVF(ivf_config) => {
let writer_config = Self::default_writer_config();
Box::new(ivf::writer::IvfIndexWriter::with_storage(
ivf_config.clone(),
writer_config,
path.clone(),
storage.clone(),
)?)
}
};
Ok(Self {
config,
builder: Arc::new(RwLock::new(builder)),
is_finalized: Arc::new(RwLock::new(false)),
storage: Some(storage),
index_path: Arc::new(RwLock::new(Some(path))),
})
}
fn default_writer_config() -> crate::vector::writer::VectorIndexWriterConfig {
crate::vector::writer::VectorIndexWriterConfig::default()
}
pub fn add_vectors(&mut self, vectors: Vec<(u64, String, Vector)>) -> Result<()> {
let finalized = *self.is_finalized.read();
if finalized {
return Err(LaurusError::InvalidOperation(
"Cannot add vectors to finalized index".to_string(),
));
}
let mut builder = self.builder.write();
builder.add_vectors(vectors)?;
Ok(())
}
pub fn finalize(&mut self) -> Result<()> {
let mut builder = self.builder.write();
builder.finalize()?;
*self.is_finalized.write() = true;
Ok(())
}
pub fn delete_document(&self, doc_id: u64) -> Result<()> {
let mut builder = self.builder.write();
builder.delete_document(doc_id)
}
pub fn config(&self) -> &VectorIndexTypeConfig {
&self.config
}
pub fn progress(&self) -> f32 {
let builder = self.builder.read();
builder.progress()
}
pub fn estimated_memory_usage(&self) -> usize {
let builder = self.builder.read();
builder.estimated_memory_usage()
}
pub fn is_finalized(&self) -> bool {
*self.is_finalized.read()
}
pub fn vectors(&self) -> Result<Vec<(u64, String, Vector)>> {
let finalized = *self.is_finalized.read();
if !finalized {
return Err(LaurusError::InvalidOperation(
"Index must be finalized before accessing vectors".to_string(),
));
}
let builder = self.builder.read();
Ok(builder.vectors().to_vec())
}
pub fn write(&self) -> Result<()> {
let finalized = *self.is_finalized.read();
if !finalized {
return Err(LaurusError::InvalidOperation(
"Index must be finalized before writing".to_string(),
));
}
let builder = self.builder.read();
if !builder.has_storage() {
return Err(LaurusError::InvalidOperation(
"Index was not created with storage support".to_string(),
));
}
builder.write()?;
Ok(())
}
pub fn has_storage(&self) -> bool {
self.storage.is_some()
}
pub fn reader(&self) -> Result<Arc<dyn crate::vector::reader::VectorIndexReader>> {
let finalized = *self.is_finalized.read();
if !finalized {
return Err(LaurusError::InvalidOperation(
"Index must be finalized before creating a reader".to_string(),
));
}
if let Some(storage) = &self.storage {
let path_guard = self.index_path.read();
if let Some(path) = &*path_guard {
return match &self.config {
VectorIndexTypeConfig::Flat(c) => Ok(Arc::new(FlatVectorIndexReader::load(
&**storage,
path,
c.distance_metric,
)?)),
VectorIndexTypeConfig::HNSW(c) => Ok(Arc::new(HnswIndexReader::load(
&**storage,
path,
c.distance_metric,
)?)),
VectorIndexTypeConfig::IVF(c) => Ok(Arc::new(IvfIndexReader::load(
&**storage,
path,
c.distance_metric,
)?)),
};
}
return Err(LaurusError::InvalidOperation(
"Index has not been written to storage, cannot create reader from storage without path.".to_string(),
));
}
let vectors = self.vectors()?;
let reader = crate::vector::reader::SimpleVectorReader::new(
vectors,
self.config.dimension(),
self.config.distance_metric(),
)?;
Ok(Arc::new(reader))
}
}