use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use indexmap::IndexMap;
use uuid::Uuid;
use chrono::{DateTime, Utc};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Document {
pub id: String,
#[serde(rename = "vectorId")]
pub vector_id: String,
pub content: String,
pub embedding: Option<Vec<f32>>,
pub metadata: IndexMap<String, serde_json::Value>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl Document {
pub fn new(id: String, content: String) -> Self {
let now = Utc::now();
Self {
id: id.clone(),
vector_id: Uuid::new_v4().to_string(),
content,
embedding: None,
metadata: IndexMap::new(),
created_at: now,
updated_at: now,
}
}
pub fn new_with_vector_id(id: String, vector_id: String, content: String) -> Self {
let now = Utc::now();
Self {
id,
vector_id,
content,
embedding: None,
metadata: IndexMap::new(),
created_at: now,
updated_at: now,
}
}
pub fn with_metadata(mut self, metadata: IndexMap<String, serde_json::Value>) -> Self {
self.metadata = metadata;
self
}
pub fn with_embedding(mut self, embedding: Vec<f32>) -> Self {
self.embedding = Some(embedding);
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct SearchOptions {
pub limit: Option<usize>,
pub score_threshold: Option<f32>,
pub filter: Option<SearchFilter>,
pub collection_name: Option<String>,
pub privacy_level: Option<PrivacyLevel>,
pub with_payload: Option<bool>,
pub parameters: Option<HashMap<String, serde_json::Value>>, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchFilter {
pub must: Option<Vec<FilterCondition>>,
pub must_not: Option<Vec<FilterCondition>>,
pub should: Option<Vec<FilterCondition>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilterCondition {
pub key: String,
pub r#match: MatchCondition,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum MatchCondition {
Value { value: serde_json::Value },
Any { any: Vec<serde_json::Value> },
Range { gte: Option<f64>, lte: Option<f64> },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub id: String,
pub score: f32,
pub document: Option<Document>,
pub payload: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatContext {
pub context_id: String,
pub user_id: Option<String>,
pub session_id: Option<String>,
pub title: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub metadata: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum PrivacyLevel {
Full,
MinimalAws,
Anonymous,
}
impl Default for PrivacyLevel {
fn default() -> Self {
PrivacyLevel::MinimalAws
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum CollectionType {
ChatHistory,
AwsEstate,
KnowledgeBase,
Custom(String),
}
impl CollectionType {
pub fn as_str(&self) -> &str {
match self {
CollectionType::ChatHistory => "chat_history",
CollectionType::AwsEstate => "aws_estate",
CollectionType::KnowledgeBase => "knowledge_base",
CollectionType::Custom(name) => name,
}
}
}
impl From<&str> for CollectionType {
fn from(s: &str) -> Self {
match s {
"chat_history" => CollectionType::ChatHistory,
"aws_estate" => CollectionType::AwsEstate,
"knowledge_base" => CollectionType::KnowledgeBase,
name => CollectionType::Custom(name.to_string()),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum AwsResourceType {
Ec2Instance,
RdsInstance,
S3Bucket,
LambdaFunction,
IamUser,
IamRole,
IamPolicy,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum AzureResourceType {
VirtualMachine,
SqlDatabase,
StorageAccount,
FunctionApp,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum GcpResourceType {
ComputeEngine,
CloudSql,
CloudStorage,
CloudFunction,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CloudResource {
pub resource_id: String,
pub resource_type: String,
pub cloud_provider: String,
pub account_id: String,
pub region: String,
pub metadata: HashMap<String, serde_json::Value>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingConfig {
pub model: String,
pub dimensions: usize,
pub service_url: Option<String>,
pub api_key: Option<String>,
pub batch_size: Option<usize>,
}
impl Default for EmbeddingConfig {
fn default() -> Self {
Self {
model: "embaas/sentence-transformers-e5-large-v2".to_string(),
dimensions: 1024,
service_url: None, api_key: None,
batch_size: Some(32),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EncryptionConfig {
pub algorithm: String,
pub enable_content_encryption: bool,
pub enable_embedding_encryption: bool,
pub enable_metadata_encryption: bool,
pub key_rotation_days: Option<u32>,
}
impl Default for EncryptionConfig {
fn default() -> Self {
Self {
algorithm: "AES-256-GCM".to_string(),
enable_content_encryption: false,
enable_embedding_encryption: false,
enable_metadata_encryption: false,
key_rotation_days: Some(90),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QdrantConnectionConfig {
pub url: String,
pub api_key: Option<String>,
pub timeout_secs: u64,
}
impl Default for QdrantConnectionConfig {
fn default() -> Self {
Self {
url: "".to_string(), api_key: None,
timeout_secs: 30,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorStoreConfig {
pub backend: String,
pub url: Option<String>,
pub collection_prefix: Option<String>,
pub distance_metric: Option<String>,
pub connection: QdrantConnectionConfig,
pub storage_path: Option<String>,
}
impl Default for VectorStoreConfig {
fn default() -> Self {
Self {
backend: "qdrant-embedded".to_string(),
url: None,
collection_prefix: None,
distance_metric: Some("Cosine".to_string()),
connection: QdrantConnectionConfig::default(),
storage_path: Some("./qdrant-data".to_string()),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum RagError {
#[error("Configuration error: {0}")]
Configuration(String),
#[error("Vector store error: {0}")]
VectorStore(String),
#[error("Embedding service error: {0}")]
Embedding(String),
#[error("Encryption error: {0}")]
Encryption(String),
#[error("Document not found: {0}")]
DocumentNotFound(String),
#[error("Collection not found: {0}")]
CollectionNotFound(String),
#[error("Serialization error: {0}")]
Serialization(#[from] serde_json::Error),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Database error: {0}")]
Database(String),
}
pub type RagResult<T> = Result<T, RagError>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentMetadata {
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionDeleteResult {
pub collection_name: String,
pub user_id: String,
pub collection_deleted: bool,
pub files_removed: usize,
pub removed_files: Vec<String>,
pub errors: Vec<String>,
}
impl CollectionDeleteResult {
pub fn new(collection_name: String, user_id: String) -> Self {
Self {
collection_name,
user_id,
collection_deleted: false,
files_removed: 0,
removed_files: Vec::new(),
errors: Vec::new(),
}
}
pub fn is_successful(&self) -> bool {
self.collection_deleted && self.errors.is_empty()
}
pub fn has_partial_success(&self) -> bool {
self.collection_deleted || self.files_removed > 0
}
}