use std::collections::HashMap;
use std::sync::Arc;
use crate::error::Result;
use crate::maintenance::deletion::DeletionBitmap;
use crate::vector::core::distance::DistanceMetric;
use crate::vector::core::vector::Vector;
#[derive(Debug, Clone)]
pub struct VectorStats {
pub vector_count: usize,
pub dimension: usize,
pub memory_usage: usize,
pub build_time_ms: u64,
}
#[derive(Debug, Clone)]
pub struct VectorIndexMetadata {
pub index_type: String,
pub created_at: chrono::DateTime<chrono::Utc>,
pub modified_at: chrono::DateTime<chrono::Utc>,
pub version: String,
pub build_config: serde_json::Value,
pub custom_metadata: std::collections::HashMap<String, String>,
}
#[derive(Debug, Clone)]
pub struct ValidationReport {
pub is_valid: bool,
pub errors: Vec<String>,
pub warnings: Vec<String>,
pub repair_suggestions: Vec<String>,
}
pub trait VectorIndexReader: Send + Sync + std::fmt::Debug {
fn as_any(&self) -> &dyn std::any::Any;
fn get_vector(&self, doc_id: u64, field_name: &str) -> Result<Option<Vector>>;
fn get_vectors_for_doc(&self, doc_id: u64) -> Result<Vec<(String, Vector)>>;
fn get_vectors(&self, doc_ids: &[(u64, String)]) -> Result<Vec<Option<Vector>>>;
fn vector_ids(&self) -> Result<Vec<(u64, String)>>;
fn vector_count(&self) -> usize;
fn dimension(&self) -> usize;
fn distance_metric(&self) -> DistanceMetric;
fn stats(&self) -> VectorStats;
fn contains_vector(&self, doc_id: u64, field_name: &str) -> bool;
fn get_vector_range(
&self,
start_doc_id: u64,
end_doc_id: u64,
) -> Result<Vec<(u64, String, Vector)>>;
fn get_vectors_by_field(&self, field_name: &str) -> Result<Vec<(u64, Vector)>>;
fn field_names(&self) -> Result<Vec<String>>;
fn vector_iterator(&self) -> Result<Box<dyn VectorIterator>>;
fn metadata(&self) -> Result<VectorIndexMetadata>;
fn validate(&self) -> Result<ValidationReport>;
}
pub trait VectorIterator: Send {
fn next(&mut self) -> Result<Option<(u64, String, Vector)>>;
fn skip_to(&mut self, doc_id: u64, field_name: &str) -> Result<bool>;
fn position(&self) -> (u64, String);
fn reset(&mut self) -> Result<()>;
}
#[derive(Debug)]
pub struct SimpleVectorReader {
vectors: HashMap<(u64, String), Vector>,
vector_ids: Vec<(u64, String)>,
dimension: usize,
distance_metric: DistanceMetric,
deletion_bitmap: Option<Arc<DeletionBitmap>>,
}
impl SimpleVectorReader {
pub fn new(
vectors: Vec<(u64, String, Vector)>,
dimension: usize,
distance_metric: DistanceMetric,
) -> Result<Self> {
let vector_ids: Vec<(u64, String)> = vectors
.iter()
.map(|(id, field, _)| (*id, field.clone()))
.collect();
let vectors: HashMap<(u64, String), Vector> = vectors
.into_iter()
.map(|(id, field, vec)| ((id, field), vec))
.collect();
Ok(Self {
vectors,
vector_ids,
dimension,
distance_metric,
deletion_bitmap: None,
})
}
pub fn set_deletion_bitmap(&mut self, bitmap: Arc<DeletionBitmap>) {
self.deletion_bitmap = Some(bitmap);
}
fn is_deleted(&self, doc_id: u64) -> bool {
if let Some(bitmap) = &self.deletion_bitmap {
bitmap.is_deleted(doc_id)
} else {
false
}
}
}
impl VectorIndexReader for SimpleVectorReader {
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn get_vector(&self, doc_id: u64, field_name: &str) -> Result<Option<Vector>> {
if self.is_deleted(doc_id) {
return Ok(None);
}
Ok(self.vectors.get(&(doc_id, field_name.to_string())).cloned())
}
fn get_vectors_for_doc(&self, doc_id: u64) -> Result<Vec<(String, Vector)>> {
Ok(self
.vectors
.iter()
.filter(|((id, _), _)| *id == doc_id)
.map(|((_, field), vec)| (field.clone(), vec.clone()))
.collect())
}
fn get_vectors(&self, doc_ids: &[(u64, String)]) -> Result<Vec<Option<Vector>>> {
Ok(doc_ids
.iter()
.map(|(id, field)| self.vectors.get(&(*id, field.clone())).cloned())
.collect())
}
fn vector_ids(&self) -> Result<Vec<(u64, String)>> {
Ok(self.vector_ids.clone())
}
fn vector_count(&self) -> usize {
self.vectors.len()
}
fn dimension(&self) -> usize {
self.dimension
}
fn distance_metric(&self) -> DistanceMetric {
self.distance_metric
}
fn stats(&self) -> VectorStats {
let memory_usage = self.vectors.len() * (8 + self.dimension * 4);
VectorStats {
vector_count: self.vectors.len(),
dimension: self.dimension,
memory_usage,
build_time_ms: 0,
}
}
fn contains_vector(&self, doc_id: u64, field_name: &str) -> bool {
self.vectors.contains_key(&(doc_id, field_name.to_string()))
}
fn get_vector_range(
&self,
start_doc_id: u64,
end_doc_id: u64,
) -> Result<Vec<(u64, String, Vector)>> {
Ok(self
.vectors
.iter()
.filter(|((id, _), _)| *id >= start_doc_id && *id <= end_doc_id)
.map(|((id, field), v)| (*id, field.clone(), v.clone()))
.collect())
}
fn get_vectors_by_field(&self, field_name: &str) -> Result<Vec<(u64, Vector)>> {
Ok(self
.vectors
.iter()
.filter(|((_, field), _)| field == field_name)
.map(|((id, _), vec)| (*id, vec.clone()))
.collect())
}
fn field_names(&self) -> Result<Vec<String>> {
use std::collections::HashSet;
let fields: HashSet<String> = self
.vectors
.keys()
.map(|(_, field)| field.clone())
.collect();
Ok(fields.into_iter().collect())
}
fn vector_iterator(&self) -> Result<Box<dyn VectorIterator>> {
Ok(Box::new(SimpleVectorIterator::new(
self.vectors
.iter()
.map(|((id, field), v)| (*id, field.clone(), v.clone()))
.collect(),
)))
}
fn metadata(&self) -> Result<VectorIndexMetadata> {
Ok(VectorIndexMetadata {
index_type: "Simple".to_string(),
created_at: chrono::Utc::now(),
modified_at: chrono::Utc::now(),
version: "1.0".to_string(),
build_config: serde_json::json!({}),
custom_metadata: HashMap::new(),
})
}
fn validate(&self) -> Result<ValidationReport> {
let all_valid = self.vectors.values().all(|v| v.is_valid());
Ok(ValidationReport {
is_valid: all_valid,
errors: vec![],
warnings: vec![],
repair_suggestions: vec![],
})
}
}
pub struct SimpleVectorIterator {
vectors: Vec<(u64, String, Vector)>,
position: usize,
}
impl SimpleVectorIterator {
pub fn new(vectors: Vec<(u64, String, Vector)>) -> Self {
Self {
vectors,
position: 0,
}
}
}
impl VectorIterator for SimpleVectorIterator {
fn next(&mut self) -> Result<Option<(u64, String, Vector)>> {
if self.position < self.vectors.len() {
let result = self.vectors[self.position].clone();
self.position += 1;
Ok(Some(result))
} else {
Ok(None)
}
}
fn skip_to(&mut self, doc_id: u64, field_name: &str) -> Result<bool> {
while self.position < self.vectors.len() {
let (id, field, _) = &self.vectors[self.position];
if *id > doc_id || (*id == doc_id && field.as_str() >= field_name) {
return Ok(true);
}
self.position += 1;
}
Ok(false)
}
fn position(&self) -> (u64, String) {
if self.position < self.vectors.len() {
let (id, field, _) = &self.vectors[self.position];
(*id, field.clone())
} else {
(u64::MAX, String::new())
}
}
fn reset(&mut self) -> Result<()> {
self.position = 0;
Ok(())
}
}
pub struct VectorIndexReaderFactory;
impl VectorIndexReaderFactory {
pub fn create_reader(
index_type: &str,
index_data: &[u8],
deletion_bitmap: Option<Arc<DeletionBitmap>>,
) -> Result<Arc<dyn VectorIndexReader>> {
use crate::vector::index::flat::reader::FlatVectorIndexReader;
use crate::vector::index::hnsw::reader::HnswIndexReader;
use crate::vector::index::ivf::reader::IvfIndexReader;
match index_type.to_lowercase().as_str() {
"flat" => {
let mut reader = FlatVectorIndexReader::from_bytes(index_data)?;
if let Some(bitmap) = deletion_bitmap {
reader.set_deletion_bitmap(bitmap);
}
Ok(Arc::new(reader))
}
"hnsw" => {
let mut reader = HnswIndexReader::from_bytes(index_data)?;
if let Some(bitmap) = deletion_bitmap {
reader.set_deletion_bitmap(bitmap);
}
Ok(Arc::new(reader))
}
"ivf" => {
let mut reader = IvfIndexReader::from_bytes(index_data)?;
if let Some(bitmap) = deletion_bitmap {
reader.set_deletion_bitmap(bitmap);
}
Ok(Arc::new(reader))
}
_ => Err(crate::error::LaurusError::InvalidOperation(format!(
"Unknown index type: {index_type}"
))),
}
}
}