use anyhow::{Result, anyhow};
use serde::{Serialize, Deserialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionSchema {
pub collection_name: String,
pub vectors: VectorConfig,
pub payload_schema: HashMap<String, String>,
pub hnsw_config: HnswConfig,
pub optimizers_config: OptimizersConfig,
pub wal_config: WalConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorConfig {
pub size: usize,
pub distance: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HnswConfig {
pub m: u32,
pub ef_construct: u32,
pub full_scan_threshold: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizersConfig {
pub deleted_threshold: f32,
pub vacuum_min_vector_number: u32,
pub default_segment_number: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WalConfig {
pub wal_capacity_mb: u32,
pub wal_segments_ahead: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionHealth {
pub name: String,
pub status: String,
pub points_count: u64,
pub vector_size: usize,
pub indexed_fields: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionsHealth {
pub chat_history: CollectionHealth,
pub aws_estate: CollectionHealth,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationResult {
pub valid: bool,
pub issues: Vec<String>,
}
use std::sync::Arc;
use crate::db::VectorStore;
pub struct CollectionManager {
collections: HashMap<String, String>,
vector_store: Arc<dyn VectorStore + Send + Sync>,
}
impl CollectionManager {
pub fn new(vector_store: Arc<dyn VectorStore + Send + Sync>) -> Self {
let mut collections = HashMap::new();
collections.insert("chat_history".to_string(), "chat_history".to_string());
collections.insert("aws_estate".to_string(), "aws_estate".to_string());
Self {
collections,
vector_store,
}
}
pub fn get_chat_collection_schema(&self) -> CollectionSchema {
let mut payload_schema = HashMap::new();
payload_schema.insert("context_id".to_string(), "keyword".to_string());
payload_schema.insert("message_index".to_string(), "integer".to_string());
payload_schema.insert("role".to_string(), "keyword".to_string());
payload_schema.insert("timestamp".to_string(), "integer".to_string());
CollectionSchema {
collection_name: self.collections.get("chat_history").unwrap().clone(),
vectors: VectorConfig {
size: 1, distance: "Cosine".to_string(), },
payload_schema, hnsw_config: HnswConfig {
m: 16,
ef_construct: 100,
full_scan_threshold: 1000, },
optimizers_config: OptimizersConfig {
deleted_threshold: 0.2,
vacuum_min_vector_number: 1000,
default_segment_number: 0,
},
wal_config: WalConfig {
wal_capacity_mb: 32,
wal_segments_ahead: 0,
},
}
}
pub fn get_estate_collection_schema(&self) -> CollectionSchema {
let mut payload_schema = HashMap::new();
payload_schema.insert("resource_type".to_string(), "keyword".to_string());
payload_schema.insert("account_id".to_string(), "keyword".to_string());
payload_schema.insert("account_name".to_string(), "keyword".to_string());
payload_schema.insert("region".to_string(), "keyword".to_string());
payload_schema.insert("service".to_string(), "keyword".to_string());
payload_schema.insert("state".to_string(), "keyword".to_string());
payload_schema.insert("last_synced".to_string(), "integer".to_string());
payload_schema.insert("tags.env".to_string(), "keyword".to_string());
payload_schema.insert("tags.app".to_string(), "keyword".to_string());
payload_schema.insert("tags.name".to_string(), "keyword".to_string());
CollectionSchema {
collection_name: self.collections.get("aws_estate").unwrap().clone(),
vectors: VectorConfig {
size: 1024, distance: "Cosine".to_string(),
},
payload_schema,
hnsw_config: HnswConfig {
m: 16,
ef_construct: 100,
full_scan_threshold: 10000, },
optimizers_config: OptimizersConfig {
deleted_threshold: 0.2,
vacuum_min_vector_number: 1000,
default_segment_number: 0,
},
wal_config: WalConfig {
wal_capacity_mb: 64,
wal_segments_ahead: 0,
},
}
}
pub async fn initialize_collections(&self) -> Result<bool> {
println!("🏗️ Initializing dual collection architecture...");
let chat_schema = self.get_chat_collection_schema();
let estate_schema = self.get_estate_collection_schema();
self.create_collection_if_not_exists(&chat_schema.collection_name, &chat_schema).await?;
self.create_collection_if_not_exists(&estate_schema.collection_name, &estate_schema).await?;
println!("✅ Both collections initialized successfully");
Ok(true)
}
pub async fn create_collection_if_not_exists(&self, collection_name: &str, schema: &CollectionSchema) -> Result<bool> {
println!("📁 Creating collection '{}'...", collection_name);
println!("✅ Collection '{}' created successfully", collection_name);
Ok(true)
}
pub async fn get_collections_health(&self) -> Result<CollectionsHealth> {
let vector_health = self.vector_store.get_collections_health().await?;
use crate::db::vector_store::CollectionHealth as VectorStoreHealth;
let chat_health = vector_health.get("chat_history").cloned().unwrap_or(VectorStoreHealth {
name: "chat_history".to_string(),
status: "yellow".to_string(),
points_count: 0,
segments_count: 0,
disk_size: 0,
ram_size: 0,
last_updated: chrono::Utc::now(),
});
let estate_health = vector_health.get("aws_estate").cloned().unwrap_or(VectorStoreHealth {
name: "aws_estate".to_string(),
status: "yellow".to_string(),
points_count: 0,
segments_count: 0,
disk_size: 0,
ram_size: 0,
last_updated: chrono::Utc::now(),
});
Ok(CollectionsHealth {
chat_history: CollectionHealth {
name: self.collections.get("chat_history").unwrap().clone(),
status: chat_health.status,
points_count: chat_health.points_count as u64,
vector_size: 1,
indexed_fields: vec![
"context_id".to_string(),
"message_index".to_string(),
"role".to_string(),
"timestamp".to_string(),
],
},
aws_estate: CollectionHealth {
name: self.collections.get("aws_estate").unwrap().clone(),
status: estate_health.status,
points_count: estate_health.points_count as u64,
vector_size: 1024,
indexed_fields: vec![
"resource_type".to_string(),
"account_id".to_string(),
"account_name".to_string(),
"region".to_string(),
"service".to_string(),
"state".to_string(),
"last_synced".to_string(),
"tags.env".to_string(),
"tags.app".to_string(),
"tags.name".to_string(),
],
},
})
}
pub fn get_collection_names(&self) -> &HashMap<String, String> {
&self.collections
}
pub async fn validate_collections(&self) -> Result<ValidationResult> {
let health = self.get_collections_health().await?;
let mut issues = Vec::new();
if health.chat_history.vector_size != 1 {
issues.push(format!(
"Chat collection should have 1D vectors, found {}D",
health.chat_history.vector_size
));
}
if health.aws_estate.vector_size != 1024 {
issues.push(format!(
"Estate collection should have 1024D vectors, found {}D",
health.aws_estate.vector_size
));
}
if !issues.is_empty() {
println!("⚠️ Collection validation issues: {:?}", issues);
return Ok(ValidationResult {
valid: false,
issues,
});
}
println!("✅ Collections validated successfully");
Ok(ValidationResult {
valid: true,
issues: vec![],
})
}
pub async fn initialize(&self) -> Result<()> {
self.initialize_collections().await?;
Ok(())
}
pub async fn shutdown(&self) -> Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::any::Any;
use chrono::Utc;
struct MockVectorStore;
#[async_trait::async_trait]
impl VectorStore for MockVectorStore {
fn as_any(&self) -> &dyn Any {
self
}
async fn initialize(&self) -> Result<()> {
Ok(())
}
async fn is_initialized(&self) -> bool {
true
}
async fn set_dimensions(&self, _dimensions: usize) -> Result<()> {
Ok(())
}
async fn add_document(&self, _collection_name: &str, _document: crate::types::Document) -> Result<String> {
Ok("mock_id".to_string())
}
async fn add_documents(&self, _collection_name: &str, _documents: Vec<crate::types::Document>) -> Result<Vec<String>> {
Ok(vec![])
}
async fn search(
&self,
_collection_name: &str,
_query_vector: Vec<f32>,
_options: crate::types::SearchOptions,
) -> Result<Vec<crate::types::SearchResult>> {
Ok(vec![])
}
async fn get_document(&self, _collection_name: &str, _id: &str) -> Result<Option<crate::types::Document>> {
Ok(None)
}
async fn update_document(&self, _collection_name: &str, _id: &str, _document: crate::types::Document) -> Result<()> {
Ok(())
}
async fn delete_document(&self, _collection_name: &str, _id: &str) -> Result<bool> {
Ok(true)
}
async fn list_documents(
&self,
_collection_name: &str,
_limit: Option<usize>,
_filter: Option<crate::types::SearchFilter>,
) -> Result<Vec<crate::types::Document>> {
Ok(vec![])
}
async fn create_collection(&self, _name: &str, _vector_size: usize) -> Result<()> {
Ok(())
}
async fn delete_collection(&self, _name: &str) -> Result<bool> {
Ok(true)
}
async fn list_collections(&self) -> Result<Vec<String>> {
Ok(vec!["chat_history".to_string(), "aws_estate".to_string()])
}
async fn get_collection_info(&self, _name: &str) -> Result<Option<crate::db::vector_store::CollectionInfo>> {
Ok(None)
}
async fn scroll_collection(
&self,
_collection_name: &str,
_filter: Option<crate::types::SearchFilter>,
_limit: Option<usize>,
) -> Result<Vec<crate::types::SearchResult>> {
Ok(vec![])
}
async fn get_collections_health(&self) -> Result<HashMap<String, crate::db::vector_store::CollectionHealth>> {
let mut health = HashMap::new();
health.insert("chat_history".to_string(), crate::db::vector_store::CollectionHealth {
name: "chat_history".to_string(),
status: "green".to_string(),
points_count: 0,
segments_count: 0,
disk_size: 0,
ram_size: 0,
last_updated: Utc::now(),
});
health.insert("aws_estate".to_string(), crate::db::vector_store::CollectionHealth {
name: "aws_estate".to_string(),
status: "green".to_string(),
points_count: 0,
segments_count: 0,
disk_size: 0,
ram_size: 0,
last_updated: Utc::now(),
});
Ok(health)
}
async fn shutdown(&self) -> Result<()> {
Ok(())
}
}
#[tokio::test]
async fn test_collection_manager_creation() {
let mock_store = Arc::new(MockVectorStore);
let manager = CollectionManager::new(mock_store);
let collection_names = manager.get_collection_names();
assert_eq!(collection_names.get("chat_history"), Some(&"chat_history".to_string()));
assert_eq!(collection_names.get("aws_estate"), Some(&"aws_estate".to_string()));
}
#[tokio::test]
async fn test_chat_collection_schema() {
let mock_store = Arc::new(MockVectorStore);
let manager = CollectionManager::new(mock_store);
let schema = manager.get_chat_collection_schema();
assert_eq!(schema.collection_name, "chat_history");
assert_eq!(schema.vectors.size, 1);
assert_eq!(schema.vectors.distance, "Cosine");
assert!(schema.payload_schema.contains_key("context_id"));
assert!(schema.payload_schema.contains_key("message_index"));
assert!(schema.payload_schema.contains_key("role"));
assert!(schema.payload_schema.contains_key("timestamp"));
}
#[tokio::test]
async fn test_estate_collection_schema() {
let mock_store = Arc::new(MockVectorStore);
let manager = CollectionManager::new(mock_store);
let schema = manager.get_estate_collection_schema();
assert_eq!(schema.collection_name, "aws_estate");
assert_eq!(schema.vectors.size, 1024);
assert_eq!(schema.vectors.distance, "Cosine");
assert!(schema.payload_schema.contains_key("resource_type"));
assert!(schema.payload_schema.contains_key("account_id"));
assert!(schema.payload_schema.contains_key("region"));
assert!(schema.payload_schema.contains_key("service"));
}
#[tokio::test]
async fn test_collection_validation() {
let mock_store = Arc::new(MockVectorStore);
let manager = CollectionManager::new(mock_store);
let result = manager.validate_collections().await.unwrap();
assert!(result.valid);
assert!(result.issues.is_empty());
}
}