use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use crate::distance::Distance;
use crate::error::{Error, Result};
use crate::index::{FlatIndex, HNSWIndex, Index, IndexType};
use crate::query::{Filter, FilterEvaluator};
use crate::storage::{MemoryStorage, Storage};
use crate::types::{Metadata, VectorId};
#[derive(Debug, Clone)]
pub struct PartialIndexConfig {
pub filter: Filter,
pub index_type: IndexType,
pub distance: Distance,
pub description: Option<String>,
}
impl PartialIndexConfig {
pub fn new(filter: Filter) -> Self {
Self {
filter,
index_type: IndexType::Flat,
distance: Distance::Cosine,
description: None,
}
}
pub fn with_index_type(mut self, index_type: IndexType) -> Self {
self.index_type = index_type;
self
}
pub fn with_distance(mut self, distance: Distance) -> Self {
self.distance = distance;
self
}
pub fn with_description(mut self, description: impl Into<String>) -> Self {
self.description = Some(description.into());
self
}
pub fn with_hnsw(mut self, m: usize, ef_construction: usize) -> Self {
self.index_type = IndexType::HNSW { m, ef_construction };
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PartialIndexStats {
pub name: String,
pub document_count: usize,
pub index_type: String,
pub description: Option<String>,
}
pub struct PartialIndex {
pub name: String,
pub config: PartialIndexConfig,
index: Arc<dyn Index>,
storage: MemoryStorage,
document_ids: RwLock<Vec<VectorId>>,
}
impl PartialIndex {
pub fn new(name: impl Into<String>, config: PartialIndexConfig) -> Result<Self> {
let index: Arc<dyn Index> = match &config.index_type {
IndexType::Flat => Arc::new(FlatIndex::new()),
IndexType::HNSW { m, ef_construction } => {
Arc::new(HNSWIndex::new(*m, *ef_construction))
}
IndexType::IVF {
num_clusters,
num_probes,
} => Arc::new(crate::index::IVFIndex::new(*num_clusters, *num_probes)),
};
Ok(Self {
name: name.into(),
config,
index,
storage: MemoryStorage::new(),
document_ids: RwLock::new(Vec::new()),
})
}
pub fn matches(&self, metadata: Option<&Metadata>) -> bool {
FilterEvaluator::evaluate(&self.config.filter, metadata)
}
pub fn try_add(&self, id: &str, vector: &[f32], metadata: Option<&Metadata>) -> Result<bool> {
if !self.matches(metadata) {
return Ok(false);
}
self.storage
.insert(id.to_string(), Some(vector.to_vec()), metadata.cloned())?;
self.index
.add(id, vector, &self.storage, self.config.distance)?;
self.document_ids.write().push(id.to_string());
Ok(true)
}
pub fn remove(&self, id: &str) -> Result<()> {
self.storage.delete(id)?;
self.index.remove(id)?;
self.document_ids.write().retain(|doc_id| doc_id != id);
Ok(())
}
pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(VectorId, f32)>> {
let results = self
.index
.search(query, k, &self.storage, self.config.distance)?;
Ok(results.into_iter().map(|r| (r.id, r.distance)).collect())
}
pub fn len(&self) -> usize {
self.document_ids.read().len()
}
pub fn is_empty(&self) -> bool {
self.document_ids.read().is_empty()
}
pub fn document_ids(&self) -> Vec<VectorId> {
self.document_ids.read().clone()
}
pub fn stats(&self) -> PartialIndexStats {
PartialIndexStats {
name: self.name.clone(),
document_count: self.len(),
index_type: match self.config.index_type {
IndexType::Flat => "Flat".to_string(),
IndexType::HNSW { m, ef_construction } => {
format!("HNSW(m={}, ef={})", m, ef_construction)
}
IndexType::IVF {
num_clusters,
num_probes,
} => {
format!("IVF(clusters={}, probes={})", num_clusters, num_probes)
}
},
description: self.config.description.clone(),
}
}
pub fn rebuild<'a>(
&self,
documents: impl Iterator<Item = (&'a str, &'a [f32], Option<&'a Metadata>)>,
) -> Result<usize> {
let old_ids = self.document_ids.read().clone();
for id in &old_ids {
let _ = self.index.remove(id);
let _ = self.storage.delete(id);
}
self.document_ids.write().clear();
let mut count = 0;
for (id, vector, metadata) in documents {
if self.try_add(id, vector, metadata)? {
count += 1;
}
}
Ok(count)
}
}
pub struct PartialIndexManager {
indexes: RwLock<HashMap<String, Arc<PartialIndex>>>,
}
impl Default for PartialIndexManager {
fn default() -> Self {
Self::new()
}
}
impl PartialIndexManager {
pub fn new() -> Self {
Self {
indexes: RwLock::new(HashMap::new()),
}
}
pub fn create_index(&self, name: &str, config: PartialIndexConfig) -> Result<()> {
let mut indexes = self.indexes.write();
if indexes.contains_key(name) {
return Err(Error::AlreadyExists(name.to_string()));
}
let index = PartialIndex::new(name, config)?;
indexes.insert(name.to_string(), Arc::new(index));
Ok(())
}
pub fn drop_index(&self, name: &str) -> Result<()> {
let mut indexes = self.indexes.write();
if indexes.remove(name).is_none() {
return Err(Error::NotFound(name.to_string()));
}
Ok(())
}
pub fn get_index(&self, name: &str) -> Option<Arc<PartialIndex>> {
self.indexes.read().get(name).cloned()
}
pub fn list_indexes(&self) -> Vec<PartialIndexStats> {
self.indexes
.read()
.values()
.map(|idx| idx.stats())
.collect()
}
pub fn on_insert(
&self,
id: &str,
vector: &[f32],
metadata: Option<&Metadata>,
) -> Result<Vec<String>> {
let indexes = self.indexes.read();
let mut added_to = Vec::new();
for (name, index) in indexes.iter() {
if index.try_add(id, vector, metadata)? {
added_to.push(name.clone());
}
}
Ok(added_to)
}
pub fn on_delete(&self, id: &str) -> Result<()> {
let indexes = self.indexes.read();
for index in indexes.values() {
let _ = index.remove(id); }
Ok(())
}
pub fn on_update(&self, id: &str, vector: &[f32], metadata: Option<&Metadata>) -> Result<()> {
let indexes = self.indexes.read();
for index in indexes.values() {
let _ = index.remove(id);
let _ = index.try_add(id, vector, metadata)?;
}
Ok(())
}
pub fn search(
&self,
index_name: &str,
query: &[f32],
k: usize,
) -> Result<Vec<(VectorId, f32)>> {
let indexes = self.indexes.read();
match indexes.get(index_name) {
Some(index) => index.search(query, k),
None => Err(Error::NotFound(index_name.to_string())),
}
}
pub fn len(&self) -> usize {
self.indexes.read().len()
}
pub fn is_empty(&self) -> bool {
self.indexes.read().is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_partial_index_creation() {
let config = PartialIndexConfig::new(Filter::eq("category", "tech"));
let index = PartialIndex::new("tech_index", config).unwrap();
assert_eq!(index.name, "tech_index");
assert!(index.is_empty());
}
#[test]
fn test_partial_index_filtering() {
let config = PartialIndexConfig::new(Filter::eq("category", "tech"));
let index = PartialIndex::new("tech_index", config).unwrap();
let mut meta1 = Metadata::new();
meta1.insert("category", "tech");
let vector1 = vec![0.1, 0.2, 0.3];
let added = index.try_add("doc1", &vector1, Some(&meta1)).unwrap();
assert!(added);
assert_eq!(index.len(), 1);
let mut meta2 = Metadata::new();
meta2.insert("category", "sports");
let vector2 = vec![0.4, 0.5, 0.6];
let added = index.try_add("doc2", &vector2, Some(&meta2)).unwrap();
assert!(!added);
assert_eq!(index.len(), 1);
}
#[test]
fn test_partial_index_search() {
let config = PartialIndexConfig::new(Filter::eq("category", "tech"));
let index = PartialIndex::new("tech_index", config).unwrap();
let mut meta = Metadata::new();
meta.insert("category", "tech");
index
.try_add("doc1", &[1.0, 0.0, 0.0], Some(&meta))
.unwrap();
index
.try_add("doc2", &[0.0, 1.0, 0.0], Some(&meta))
.unwrap();
index
.try_add("doc3", &[0.0, 0.0, 1.0], Some(&meta))
.unwrap();
let results = index.search(&[1.0, 0.1, 0.0], 2).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].0, "doc1"); }
#[test]
fn test_partial_index_manager() {
let manager = PartialIndexManager::new();
let config = PartialIndexConfig::new(Filter::eq("type", "article"))
.with_description("Artículos indexados");
manager.create_index("articles", config).unwrap();
assert_eq!(manager.len(), 1);
let mut meta = Metadata::new();
meta.insert("type", "article");
meta.insert("title", "Test Article");
let added_to = manager
.on_insert("art1", &[0.1, 0.2, 0.3], Some(&meta))
.unwrap();
assert_eq!(added_to, vec!["articles"]);
let results = manager.search("articles", &[0.1, 0.2, 0.3], 10).unwrap();
assert_eq!(results.len(), 1);
manager.drop_index("articles").unwrap();
assert!(manager.is_empty());
}
#[test]
fn test_partial_index_with_complex_filter() {
let config = PartialIndexConfig::new(Filter::all(vec![
Filter::eq("category", "tech"),
Filter::gt("score", 0.5f64),
]));
let index = PartialIndex::new("high_score_tech", config).unwrap();
let mut meta1 = Metadata::new();
meta1.insert("category", "tech");
meta1.insert("score", 0.8f64);
assert!(index.try_add("doc1", &[0.1; 3], Some(&meta1)).unwrap());
let mut meta2 = Metadata::new();
meta2.insert("category", "tech");
meta2.insert("score", 0.3f64);
assert!(!index.try_add("doc2", &[0.2; 3], Some(&meta2)).unwrap());
assert_eq!(index.len(), 1);
}
}