pub mod field_reader;
pub mod maintenance;
pub mod reader;
pub mod searcher;
pub mod segment;
pub mod writer;
use std::path::Path;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use parking_lot::RwLock;
use crate::embedding::embedder::Embedder;
use crate::error::{LaurusError, Result};
use crate::storage::Storage;
use crate::vector::index::config::FlatIndexConfig;
use crate::vector::index::flat::searcher::FlatVectorSearcher;
use crate::vector::index::flat::writer::FlatIndexWriter;
use crate::vector::index::{VectorIndex, VectorIndexStats};
use crate::vector::reader::VectorIndexReader;
use crate::vector::search::searcher::VectorIndexSearcher;
use crate::vector::store::embedding_writer::EmbeddingVectorIndexWriter;
use crate::vector::writer::{VectorIndexWriter, VectorIndexWriterConfig};
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
struct IndexMetadata {
vector_count: u64,
dimension: usize,
created: u64,
modified: u64,
}
impl Default for IndexMetadata {
fn default() -> Self {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
Self {
vector_count: 0,
dimension: 0,
created: now,
modified: now,
}
}
}
pub struct FlatIndex {
name: String,
storage: Arc<dyn Storage>,
config: FlatIndexConfig,
closed: AtomicBool,
metadata: RwLock<IndexMetadata>,
}
impl std::fmt::Debug for FlatIndex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FlatIndex")
.field("name", &self.name)
.field("storage", &self.storage)
.field("config", &self.config)
.field("closed", &self.closed.load(Ordering::SeqCst))
.field("metadata", &*self.metadata.read())
.finish()
}
}
impl FlatIndex {
pub fn create(storage: Arc<dyn Storage>, name: &str, config: FlatIndexConfig) -> Result<Self> {
let metadata = IndexMetadata {
dimension: config.dimension,
..Default::default()
};
let index = FlatIndex {
name: name.to_string(),
storage,
config,
closed: AtomicBool::new(false),
metadata: RwLock::new(metadata),
};
index.write_metadata()?;
Ok(index)
}
pub fn open(storage: Arc<dyn Storage>, name: &str, config: FlatIndexConfig) -> Result<Self> {
if !storage.file_exists("metadata.json") {
return Err(LaurusError::index("Index does not exist"));
}
let metadata = Self::read_metadata(storage.as_ref(), name)?;
if metadata.dimension != 0 && metadata.dimension != config.dimension {
return Err(LaurusError::index(format!(
"Dimension mismatch: stored {}, config {}",
metadata.dimension, config.dimension
)));
}
Ok(FlatIndex {
name: name.to_string(),
storage,
config,
closed: AtomicBool::new(false),
metadata: RwLock::new(metadata),
})
}
pub fn create_in_dir<P: AsRef<Path>>(
dir: P,
name: &str,
config: FlatIndexConfig,
) -> Result<Self> {
use crate::storage::file::{FileStorage, FileStorageConfig};
let storage_config = FileStorageConfig::new(&dir);
let storage = Arc::new(FileStorage::new(&dir, storage_config)?);
Self::create(storage, name, config)
}
pub fn open_dir<P: AsRef<Path>>(dir: P, name: &str, config: FlatIndexConfig) -> Result<Self> {
use crate::storage::file::{FileStorage, FileStorageConfig};
let storage_config = FileStorageConfig::new(&dir);
let storage = Arc::new(FileStorage::new(&dir, storage_config)?);
Self::open(storage, name, config)
}
fn write_metadata(&self) -> Result<()> {
let metadata = self.metadata.read();
let metadata_json = serde_json::to_string_pretty(&*metadata)
.map_err(|e| LaurusError::index(format!("Failed to serialize metadata: {e}")))?;
drop(metadata);
let mut output = self.storage.create_output("metadata.json")?;
std::io::Write::write_all(&mut output, metadata_json.as_bytes())?;
output.close()?;
Ok(())
}
fn read_metadata(storage: &dyn Storage, _: &str) -> Result<IndexMetadata> {
let input = storage.open_input("metadata.json")?;
let metadata: IndexMetadata = serde_json::from_reader(input)
.map_err(|e| LaurusError::index(format!("Failed to deserialize metadata: {e}")))?;
Ok(metadata)
}
fn update_metadata(&self) -> Result<()> {
{
let mut metadata = self.metadata.write();
metadata.modified = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
}
self.write_metadata()
}
fn check_closed(&self) -> Result<()> {
if self.closed.load(Ordering::SeqCst) {
return Err(LaurusError::InvalidOperation("Index is closed".to_string()));
}
Ok(())
}
}
impl VectorIndex for FlatIndex {
fn reader(&self) -> Result<Arc<dyn VectorIndexReader>> {
self.check_closed()?;
use crate::vector::index::flat::reader::FlatVectorIndexReader;
let reader = FlatVectorIndexReader::load(
self.storage.clone(),
&self.name,
self.config.distance_metric,
)?;
Ok(Arc::new(reader))
}
fn writer(&self) -> Result<Box<dyn VectorIndexWriter>> {
self.check_closed()?;
let inner_writer = FlatIndexWriter::with_storage(
self.config.clone(),
VectorIndexWriterConfig::default(),
self.name.clone(),
self.storage.clone(),
)?;
let embedder = self.embedder();
let writer = EmbeddingVectorIndexWriter::new(Box::new(inner_writer), embedder);
Ok(Box::new(writer))
}
fn storage(&self) -> &Arc<dyn Storage> {
&self.storage
}
fn close(&self) -> Result<()> {
self.closed.store(true, Ordering::SeqCst);
Ok(())
}
fn is_closed(&self) -> bool {
self.closed.load(Ordering::SeqCst)
}
fn stats(&self) -> Result<VectorIndexStats> {
self.check_closed()?;
let metadata = self.metadata.read();
Ok(VectorIndexStats {
vector_count: metadata.vector_count,
dimension: metadata.dimension,
total_size: 0,
deleted_count: 0,
last_modified: metadata.modified,
})
}
fn optimize(&self) -> Result<()> {
self.check_closed()?;
self.update_metadata()?;
Ok(())
}
fn searcher(&self) -> Result<Box<dyn VectorIndexSearcher>> {
self.check_closed()?;
let reader = self.reader()?;
Ok(Box::new(FlatVectorSearcher::new(reader)?))
}
fn embedder(&self) -> Arc<dyn Embedder> {
Arc::clone(&self.config.embedder)
}
}