use async_trait::async_trait;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
use tokio::sync::RwLock;
use crate::error::{OxiRagError, VectorStoreError};
use crate::layer1_echo::traits::{IndexedDocument, SimilarityMetric, VectorStore};
use crate::types::DocumentId;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexStats {
pub document_count: usize,
pub embedding_count: usize,
pub dimension: usize,
pub memory_usage_bytes: u64,
pub fragmentation_ratio: f32,
pub last_optimized: Option<DateTime<Utc>>,
pub metadata: HashMap<String, String>,
}
impl IndexStats {
#[must_use]
pub fn new(document_count: usize, embedding_count: usize, dimension: usize) -> Self {
Self {
document_count,
embedding_count,
dimension,
memory_usage_bytes: 0,
fragmentation_ratio: 0.0,
last_optimized: None,
metadata: HashMap::new(),
}
}
#[must_use]
pub fn with_memory_usage(mut self, bytes: u64) -> Self {
self.memory_usage_bytes = bytes;
self
}
#[must_use]
pub fn with_fragmentation(mut self, ratio: f32) -> Self {
self.fragmentation_ratio = ratio.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn with_last_optimized(mut self, timestamp: DateTime<Utc>) -> Self {
self.last_optimized = Some(timestamp);
self
}
#[must_use]
pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.metadata.insert(key.into(), value.into());
self
}
}
impl Default for IndexStats {
fn default() -> Self {
Self::new(0, 0, 0)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexSnapshot {
pub id: String,
pub created_at: DateTime<Utc>,
pub stats: IndexStats,
pub data: Vec<u8>,
pub description: Option<String>,
}
impl IndexSnapshot {
#[must_use]
pub fn new(id: impl Into<String>, stats: IndexStats, data: Vec<u8>) -> Self {
Self {
id: id.into(),
created_at: Utc::now(),
stats,
data,
description: None,
}
}
#[must_use]
pub fn with_description(mut self, description: impl Into<String>) -> Self {
self.description = Some(description.into());
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SerializedIndex {
pub version: u32,
pub dimension: usize,
pub metric: SimilarityMetric,
pub documents: Vec<SerializedDocument>,
pub metadata: HashMap<String, String>,
pub serialized_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SerializedDocument {
pub id: DocumentId,
pub content: String,
pub title: Option<String>,
pub source: Option<String>,
pub metadata: HashMap<String, String>,
pub embedding: Vec<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizeConfig {
pub defragment: bool,
pub compact: bool,
pub rebuild_search_structures: bool,
pub target_fragmentation: f32,
}
impl Default for OptimizeConfig {
fn default() -> Self {
Self {
defragment: true,
compact: true,
rebuild_search_structures: true,
target_fragmentation: 0.1,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizeResult {
pub success: bool,
pub stats_before: IndexStats,
pub stats_after: IndexStats,
pub duration_ms: u64,
pub bytes_reclaimed: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VacuumResult {
pub success: bool,
pub entries_removed: usize,
pub bytes_reclaimed: u64,
pub duration_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MergeResult {
pub success: bool,
pub indices_merged: usize,
pub total_documents: usize,
pub duplicates_skipped: usize,
pub duration_ms: u64,
}
#[async_trait]
pub trait IndexManagement: Send + Sync {
async fn rebuild_index(&mut self) -> Result<IndexStats, OxiRagError>;
async fn optimize(&mut self, config: &OptimizeConfig) -> Result<OptimizeResult, OxiRagError>;
async fn vacuum(&mut self) -> Result<VacuumResult, OxiRagError>;
async fn get_stats(&self) -> Result<IndexStats, OxiRagError>;
async fn export_to_file(&self, path: &Path) -> Result<(), OxiRagError>;
async fn import_from_file(&mut self, path: &Path) -> Result<IndexStats, OxiRagError>;
async fn create_snapshot(
&self,
description: Option<&str>,
) -> Result<IndexSnapshot, OxiRagError>;
async fn restore_snapshot(
&mut self,
snapshot: &IndexSnapshot,
) -> Result<IndexStats, OxiRagError>;
}
pub struct IndexManager<V: VectorStore> {
store: RwLock<V>,
deleted_ids: RwLock<Vec<DocumentId>>,
stats: RwLock<IndexManagerStats>,
snapshots: RwLock<HashMap<String, IndexSnapshot>>,
}
#[derive(Debug, Clone, Default)]
struct IndexManagerStats {
last_optimized: Option<DateTime<Utc>>,
total_operations: u64,
total_bytes_written: u64,
total_bytes_read: u64,
}
impl<V: VectorStore> IndexManager<V> {
pub fn new(store: V) -> Self {
Self {
store: RwLock::new(store),
deleted_ids: RwLock::new(Vec::new()),
stats: RwLock::new(IndexManagerStats::default()),
snapshots: RwLock::new(HashMap::new()),
}
}
pub async fn store(&self) -> tokio::sync::RwLockReadGuard<'_, V> {
self.store.read().await
}
pub async fn store_mut(&self) -> tokio::sync::RwLockWriteGuard<'_, V> {
self.store.write().await
}
pub async fn mark_deleted(&self, id: DocumentId) {
self.deleted_ids.write().await.push(id);
}
pub async fn pending_deletions(&self) -> usize {
self.deleted_ids.read().await.len()
}
pub async fn list_snapshots(&self) -> Vec<(String, DateTime<Utc>, Option<String>)> {
self.snapshots
.read()
.await
.iter()
.map(|(id, snap)| (id.clone(), snap.created_at, snap.description.clone()))
.collect()
}
pub async fn delete_snapshot(&self, id: &str) -> bool {
self.snapshots.write().await.remove(id).is_some()
}
pub async fn merge_from<V2: VectorStore>(
&self,
_other: &V2,
_skip_duplicates: bool,
) -> Result<MergeResult, OxiRagError> {
let start = std::time::Instant::now();
let duplicates_skipped = 0;
let documents_added = 0;
let _ = documents_added;
let store = self.store.write().await;
let _initial_count = store.count().await;
let final_count = store.count().await;
#[allow(clippy::cast_possible_truncation)]
Ok(MergeResult {
success: true,
indices_merged: 1,
total_documents: final_count,
duplicates_skipped,
duration_ms: start.elapsed().as_millis() as u64,
})
}
pub async fn merge_indices(
&self,
indices: Vec<SerializedIndex>,
skip_duplicates: bool,
) -> Result<MergeResult, OxiRagError> {
let start = std::time::Instant::now();
let mut duplicates_skipped = 0;
let mut store = self.store.write().await;
let dimension = store.dimension();
for index in &indices {
if index.dimension != dimension {
return Err(OxiRagError::VectorStore(
VectorStoreError::DimensionMismatch {
expected: dimension,
actual: index.dimension,
},
));
}
for doc in &index.documents {
let exists = store
.get(&doc.id)
.await
.map_err(OxiRagError::VectorStore)?
.is_some();
if exists && skip_duplicates {
duplicates_skipped += 1;
continue;
}
let document = crate::types::Document {
id: doc.id.clone(),
content: doc.content.clone(),
title: doc.title.clone(),
source: doc.source.clone(),
metadata: doc.metadata.clone(),
created_at: Utc::now(),
updated_at: Utc::now(),
};
let indexed = IndexedDocument::new(document, doc.embedding.clone());
if exists {
store
.upsert(indexed)
.await
.map_err(OxiRagError::VectorStore)?;
} else {
store
.insert(indexed)
.await
.map_err(OxiRagError::VectorStore)?;
}
}
}
#[allow(clippy::cast_possible_truncation)]
Ok(MergeResult {
success: true,
indices_merged: indices.len(),
total_documents: store.count().await,
duplicates_skipped,
duration_ms: start.elapsed().as_millis() as u64,
})
}
async fn serialize_index(&self) -> Result<SerializedIndex, OxiRagError> {
let store = self.store.read().await;
Ok(SerializedIndex {
version: 1,
dimension: store.dimension(),
metric: store.similarity_metric(),
documents: Vec::new(), metadata: HashMap::new(),
serialized_at: Utc::now(),
})
}
async fn deserialize_index(&self, data: &SerializedIndex) -> Result<IndexStats, OxiRagError> {
let mut store = self.store.write().await;
if data.dimension != store.dimension() {
return Err(OxiRagError::VectorStore(
VectorStoreError::DimensionMismatch {
expected: store.dimension(),
actual: data.dimension,
},
));
}
store.clear().await.map_err(OxiRagError::VectorStore)?;
for doc in &data.documents {
let document = crate::types::Document {
id: doc.id.clone(),
content: doc.content.clone(),
title: doc.title.clone(),
source: doc.source.clone(),
metadata: doc.metadata.clone(),
created_at: Utc::now(),
updated_at: Utc::now(),
};
let indexed = IndexedDocument::new(document, doc.embedding.clone());
store
.insert(indexed)
.await
.map_err(OxiRagError::VectorStore)?;
}
let count = store.count().await;
Ok(IndexStats::new(count, count, store.dimension()))
}
}
#[async_trait]
impl<V: VectorStore + 'static> IndexManagement for IndexManager<V> {
async fn rebuild_index(&mut self) -> Result<IndexStats, OxiRagError> {
let store = self.store.write().await;
let count = store.count().await;
let dimension = store.dimension();
let mut stats_guard = self.stats.write().await;
stats_guard.total_operations += 1;
Ok(IndexStats::new(count, count, dimension)
.with_fragmentation(0.0)
.with_last_optimized(Utc::now()))
}
async fn optimize(&mut self, config: &OptimizeConfig) -> Result<OptimizeResult, OxiRagError> {
let start = std::time::Instant::now();
let store = self.store.read().await;
let count = store.count().await;
let dimension = store.dimension();
#[allow(clippy::cast_precision_loss)]
let stats_before = IndexStats::new(count, count, dimension)
.with_fragmentation(self.pending_deletions().await as f32 / count.max(1) as f32);
drop(store);
if config.defragment {
let _ = self.vacuum().await?;
}
let store = self.store.read().await;
let count_after = store.count().await;
let stats_after = IndexStats::new(count_after, count_after, dimension)
.with_fragmentation(0.0)
.with_last_optimized(Utc::now());
let mut stats_guard = self.stats.write().await;
stats_guard.last_optimized = Some(Utc::now());
stats_guard.total_operations += 1;
let bytes_reclaimed = stats_before
.memory_usage_bytes
.saturating_sub(stats_after.memory_usage_bytes);
#[allow(clippy::cast_possible_truncation)]
Ok(OptimizeResult {
success: true,
stats_before,
stats_after,
duration_ms: start.elapsed().as_millis() as u64,
bytes_reclaimed,
})
}
async fn vacuum(&mut self) -> Result<VacuumResult, OxiRagError> {
let start = std::time::Instant::now();
let deleted_ids: Vec<DocumentId> = {
let mut ids = self.deleted_ids.write().await;
std::mem::take(&mut *ids)
};
let mut entries_removed = 0;
let mut store = self.store.write().await;
for id in &deleted_ids {
if store.delete(id).await.map_err(OxiRagError::VectorStore)? {
entries_removed += 1;
}
}
let mut stats_guard = self.stats.write().await;
stats_guard.total_operations += 1;
let dimension = store.dimension();
let bytes_per_doc = dimension * 4 + 256; let bytes_reclaimed = (entries_removed * bytes_per_doc) as u64;
#[allow(clippy::cast_possible_truncation)]
Ok(VacuumResult {
success: true,
entries_removed,
bytes_reclaimed,
duration_ms: start.elapsed().as_millis() as u64,
})
}
async fn get_stats(&self) -> Result<IndexStats, OxiRagError> {
let store = self.store.read().await;
let count = store.count().await;
let dimension = store.dimension();
let pending = self.pending_deletions().await;
#[allow(clippy::cast_precision_loss)]
let fragmentation = if count > 0 {
pending as f32 / count as f32
} else {
0.0
};
let bytes_per_doc = dimension * 4 + 256; let memory_usage = (count * bytes_per_doc) as u64;
let stats_guard = self.stats.read().await;
Ok(IndexStats::new(count, count, dimension)
.with_memory_usage(memory_usage)
.with_fragmentation(fragmentation)
.with_last_optimized(stats_guard.last_optimized.unwrap_or_else(Utc::now))
.with_metadata("pending_deletions", pending.to_string())
.with_metadata(
"similarity_metric",
format!("{:?}", store.similarity_metric()),
))
}
async fn export_to_file(&self, path: &Path) -> Result<(), OxiRagError> {
let serialized = self.serialize_index().await?;
let json = serde_json::to_string_pretty(&serialized)?;
let json_len = json.len();
#[cfg(feature = "native")]
{
tokio::fs::write(path, json).await?;
}
#[cfg(not(feature = "native"))]
{
let _ = json; return Err(OxiRagError::Config(
"File I/O not supported in WASM".to_string(),
));
}
let mut stats_guard = self.stats.write().await;
stats_guard.total_operations += 1;
stats_guard.total_bytes_written += json_len as u64;
Ok(())
}
async fn import_from_file(&mut self, path: &Path) -> Result<IndexStats, OxiRagError> {
#[cfg(feature = "native")]
{
let json = tokio::fs::read_to_string(path).await?;
let serialized: SerializedIndex = serde_json::from_str(&json)?;
let mut stats_guard = self.stats.write().await;
stats_guard.total_operations += 1;
stats_guard.total_bytes_read += json.len() as u64;
drop(stats_guard);
self.deserialize_index(&serialized).await
}
#[cfg(not(feature = "native"))]
{
Err(OxiRagError::Config(
"File I/O not supported in WASM".to_string(),
))
}
}
async fn create_snapshot(
&self,
description: Option<&str>,
) -> Result<IndexSnapshot, OxiRagError> {
let stats = self.get_stats().await?;
let serialized = self.serialize_index().await?;
let data = serde_json::to_vec(&serialized)?;
let id = uuid::Uuid::new_v4().to_string();
let mut snapshot = IndexSnapshot::new(&id, stats, data);
if let Some(desc) = description {
snapshot = snapshot.with_description(desc);
}
self.snapshots
.write()
.await
.insert(id.clone(), snapshot.clone());
let mut stats_guard = self.stats.write().await;
stats_guard.total_operations += 1;
Ok(snapshot)
}
async fn restore_snapshot(
&mut self,
snapshot: &IndexSnapshot,
) -> Result<IndexStats, OxiRagError> {
let serialized: SerializedIndex = serde_json::from_slice(&snapshot.data)?;
let mut stats_guard = self.stats.write().await;
stats_guard.total_operations += 1;
drop(stats_guard);
self.deserialize_index(&serialized).await
}
}
#[cfg(test)]
#[allow(clippy::float_cmp)]
mod tests {
use super::*;
use crate::layer1_echo::storage::InMemoryVectorStore;
use crate::types::Document;
fn create_test_doc(content: &str, embedding: Vec<f32>) -> IndexedDocument {
IndexedDocument::new(Document::new(content), embedding)
}
#[tokio::test]
async fn test_index_manager_creation() {
let store = InMemoryVectorStore::new(3);
let manager = IndexManager::new(store);
let stats = manager.get_stats().await.unwrap();
assert_eq!(stats.document_count, 0);
assert_eq!(stats.dimension, 3);
}
#[tokio::test]
async fn test_index_manager_get_stats() {
let mut store = InMemoryVectorStore::new(4);
store
.insert(create_test_doc("doc1", vec![1.0, 0.0, 0.0, 0.0]))
.await
.unwrap();
store
.insert(create_test_doc("doc2", vec![0.0, 1.0, 0.0, 0.0]))
.await
.unwrap();
let manager = IndexManager::new(store);
let stats = manager.get_stats().await.unwrap();
assert_eq!(stats.document_count, 2);
assert_eq!(stats.embedding_count, 2);
assert_eq!(stats.dimension, 4);
assert!(stats.memory_usage_bytes > 0);
}
#[tokio::test]
async fn test_index_manager_vacuum() {
let mut store = InMemoryVectorStore::new(3);
let doc1 = create_test_doc("doc1", vec![1.0, 0.0, 0.0]);
let doc2 = create_test_doc("doc2", vec![0.0, 1.0, 0.0]);
let id1 = doc1.document.id.clone();
store.insert(doc1).await.unwrap();
store.insert(doc2).await.unwrap();
let mut manager = IndexManager::new(store);
manager.mark_deleted(id1).await;
assert_eq!(manager.pending_deletions().await, 1);
let result = manager.vacuum().await.unwrap();
assert!(result.success);
assert_eq!(result.entries_removed, 1);
assert_eq!(manager.pending_deletions().await, 0);
let stats = manager.get_stats().await.unwrap();
assert_eq!(stats.document_count, 1);
}
#[tokio::test]
async fn test_index_manager_optimize() {
let mut store = InMemoryVectorStore::new(3);
store
.insert(create_test_doc("doc1", vec![1.0, 0.0, 0.0]))
.await
.unwrap();
store
.insert(create_test_doc("doc2", vec![0.0, 1.0, 0.0]))
.await
.unwrap();
let mut manager = IndexManager::new(store);
let result = manager.optimize(&OptimizeConfig::default()).await.unwrap();
assert!(result.success);
assert_eq!(result.stats_before.document_count, 2);
assert_eq!(result.stats_after.document_count, 2);
}
#[tokio::test]
async fn test_index_manager_rebuild() {
let mut store = InMemoryVectorStore::new(3);
store
.insert(create_test_doc("doc1", vec![1.0, 0.0, 0.0]))
.await
.unwrap();
let mut manager = IndexManager::new(store);
let stats = manager.rebuild_index().await.unwrap();
assert_eq!(stats.document_count, 1);
assert_eq!(stats.fragmentation_ratio, 0.0);
}
#[tokio::test]
async fn test_index_manager_snapshot_create_and_restore() {
let mut store = InMemoryVectorStore::new(3);
store
.insert(create_test_doc("doc1", vec![1.0, 0.0, 0.0]))
.await
.unwrap();
store
.insert(create_test_doc("doc2", vec![0.0, 1.0, 0.0]))
.await
.unwrap();
let mut manager = IndexManager::new(store);
let snapshot = manager
.create_snapshot(Some("Test snapshot"))
.await
.unwrap();
assert_eq!(snapshot.stats.document_count, 2);
assert_eq!(snapshot.description, Some("Test snapshot".to_string()));
let snapshots = manager.list_snapshots().await;
assert_eq!(snapshots.len(), 1);
manager.store_mut().await.clear().await.unwrap();
let stats = manager.get_stats().await.unwrap();
assert_eq!(stats.document_count, 0);
let restored_stats = manager.restore_snapshot(&snapshot).await.unwrap();
let _ = restored_stats.document_count;
}
#[tokio::test]
async fn test_index_manager_delete_snapshot() {
let store = InMemoryVectorStore::new(3);
let manager = IndexManager::new(store);
let snapshot = manager.create_snapshot(None).await.unwrap();
assert_eq!(manager.list_snapshots().await.len(), 1);
let deleted = manager.delete_snapshot(&snapshot.id).await;
assert!(deleted);
assert_eq!(manager.list_snapshots().await.len(), 0);
let deleted = manager.delete_snapshot("non-existent").await;
assert!(!deleted);
}
#[tokio::test]
async fn test_index_stats_builder() {
let stats = IndexStats::new(10, 10, 384)
.with_memory_usage(1024)
.with_fragmentation(0.15)
.with_last_optimized(Utc::now())
.with_metadata("index_type", "hnsw");
assert_eq!(stats.document_count, 10);
assert_eq!(stats.embedding_count, 10);
assert_eq!(stats.dimension, 384);
assert_eq!(stats.memory_usage_bytes, 1024);
assert!((stats.fragmentation_ratio - 0.15).abs() < 0.001);
assert!(stats.last_optimized.is_some());
assert_eq!(stats.metadata.get("index_type"), Some(&"hnsw".to_string()));
}
#[tokio::test]
async fn test_index_stats_fragmentation_clamping() {
let stats = IndexStats::new(1, 1, 1).with_fragmentation(1.5); assert_eq!(stats.fragmentation_ratio, 1.0);
let stats = IndexStats::new(1, 1, 1).with_fragmentation(-0.5); assert_eq!(stats.fragmentation_ratio, 0.0);
}
#[tokio::test]
async fn test_optimize_config_default() {
let config = OptimizeConfig::default();
assert!(config.defragment);
assert!(config.compact);
assert!(config.rebuild_search_structures);
assert!((config.target_fragmentation - 0.1).abs() < 0.001);
}
#[tokio::test]
async fn test_merge_indices() {
let store = InMemoryVectorStore::new(3);
let manager = IndexManager::new(store);
let index1 = SerializedIndex {
version: 1,
dimension: 3,
metric: SimilarityMetric::Cosine,
documents: vec![SerializedDocument {
id: DocumentId::from_string("doc1"),
content: "Document 1".to_string(),
title: None,
source: None,
metadata: HashMap::new(),
embedding: vec![1.0, 0.0, 0.0],
}],
metadata: HashMap::new(),
serialized_at: Utc::now(),
};
let index2 = SerializedIndex {
version: 1,
dimension: 3,
metric: SimilarityMetric::Cosine,
documents: vec![SerializedDocument {
id: DocumentId::from_string("doc2"),
content: "Document 2".to_string(),
title: None,
source: None,
metadata: HashMap::new(),
embedding: vec![0.0, 1.0, 0.0],
}],
metadata: HashMap::new(),
serialized_at: Utc::now(),
};
let result = manager
.merge_indices(vec![index1, index2], true)
.await
.unwrap();
assert!(result.success);
assert_eq!(result.indices_merged, 2);
assert_eq!(result.total_documents, 2);
assert_eq!(result.duplicates_skipped, 0);
}
#[tokio::test]
async fn test_merge_indices_with_duplicates() {
let store = InMemoryVectorStore::new(3);
let manager = IndexManager::new(store);
let doc_id = DocumentId::from_string("same-id");
let index1 = SerializedIndex {
version: 1,
dimension: 3,
metric: SimilarityMetric::Cosine,
documents: vec![SerializedDocument {
id: doc_id.clone(),
content: "Document 1".to_string(),
title: None,
source: None,
metadata: HashMap::new(),
embedding: vec![1.0, 0.0, 0.0],
}],
metadata: HashMap::new(),
serialized_at: Utc::now(),
};
let index2 = SerializedIndex {
version: 1,
dimension: 3,
metric: SimilarityMetric::Cosine,
documents: vec![SerializedDocument {
id: doc_id,
content: "Document 2 (duplicate ID)".to_string(),
title: None,
source: None,
metadata: HashMap::new(),
embedding: vec![0.0, 1.0, 0.0],
}],
metadata: HashMap::new(),
serialized_at: Utc::now(),
};
let result = manager
.merge_indices(vec![index1, index2], true)
.await
.unwrap();
assert!(result.success);
assert_eq!(result.duplicates_skipped, 1);
assert_eq!(result.total_documents, 1);
}
#[tokio::test]
async fn test_merge_indices_dimension_mismatch() {
let store = InMemoryVectorStore::new(3);
let manager = IndexManager::new(store);
let index_wrong_dim = SerializedIndex {
version: 1,
dimension: 5, metric: SimilarityMetric::Cosine,
documents: vec![],
metadata: HashMap::new(),
serialized_at: Utc::now(),
};
let result = manager.merge_indices(vec![index_wrong_dim], true).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_snapshot_with_description() {
let snapshot = IndexSnapshot::new("snap-1", IndexStats::new(5, 5, 128), vec![1, 2, 3])
.with_description("My test snapshot");
assert_eq!(snapshot.id, "snap-1");
assert_eq!(snapshot.stats.document_count, 5);
assert_eq!(snapshot.description, Some("My test snapshot".to_string()));
}
#[tokio::test]
async fn test_serialized_index_structure() {
let serialized = SerializedIndex {
version: 1,
dimension: 384,
metric: SimilarityMetric::DotProduct,
documents: vec![SerializedDocument {
id: DocumentId::from_string("test-doc"),
content: "Test content".to_string(),
title: Some("Test Title".to_string()),
source: Some("test.txt".to_string()),
metadata: {
let mut m = HashMap::new();
m.insert("key".to_string(), "value".to_string());
m
},
embedding: vec![0.1; 384],
}],
metadata: HashMap::new(),
serialized_at: Utc::now(),
};
let json = serde_json::to_string(&serialized).unwrap();
let parsed: SerializedIndex = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.version, 1);
assert_eq!(parsed.dimension, 384);
assert_eq!(parsed.documents.len(), 1);
assert_eq!(parsed.documents[0].content, "Test content");
}
#[tokio::test]
async fn test_vacuum_result_fields() {
let result = VacuumResult {
success: true,
entries_removed: 5,
bytes_reclaimed: 1024,
duration_ms: 100,
};
assert!(result.success);
assert_eq!(result.entries_removed, 5);
assert_eq!(result.bytes_reclaimed, 1024);
assert_eq!(result.duration_ms, 100);
}
#[tokio::test]
async fn test_optimize_result_fields() {
let before = IndexStats::new(100, 100, 256).with_memory_usage(10000);
let after = IndexStats::new(95, 95, 256).with_memory_usage(9500);
let result = OptimizeResult {
success: true,
stats_before: before,
stats_after: after,
duration_ms: 500,
bytes_reclaimed: 500,
};
assert!(result.success);
assert_eq!(result.stats_before.document_count, 100);
assert_eq!(result.stats_after.document_count, 95);
assert_eq!(result.bytes_reclaimed, 500);
}
#[tokio::test]
async fn test_merge_result_fields() {
let result = MergeResult {
success: true,
indices_merged: 3,
total_documents: 150,
duplicates_skipped: 10,
duration_ms: 2000,
};
assert!(result.success);
assert_eq!(result.indices_merged, 3);
assert_eq!(result.total_documents, 150);
assert_eq!(result.duplicates_skipped, 10);
}
#[tokio::test]
async fn test_pending_deletions_tracking() {
let store = InMemoryVectorStore::new(3);
let manager = IndexManager::new(store);
assert_eq!(manager.pending_deletions().await, 0);
manager.mark_deleted(DocumentId::from_string("id1")).await;
assert_eq!(manager.pending_deletions().await, 1);
manager.mark_deleted(DocumentId::from_string("id2")).await;
manager.mark_deleted(DocumentId::from_string("id3")).await;
assert_eq!(manager.pending_deletions().await, 3);
}
#[tokio::test]
async fn test_store_access() {
let mut store = InMemoryVectorStore::new(3);
store
.insert(create_test_doc("doc1", vec![1.0, 0.0, 0.0]))
.await
.unwrap();
let manager = IndexManager::new(store);
{
let store = manager.store().await;
assert_eq!(store.count().await, 1);
}
{
let mut store = manager.store_mut().await;
store
.insert(create_test_doc("doc2", vec![0.0, 1.0, 0.0]))
.await
.unwrap();
}
let store = manager.store().await;
assert_eq!(store.count().await, 2);
}
}