1use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use thiserror::Error;
10use uuid::Uuid;
11
12pub type MemoryResult<T> = Result<T, MemoryError>;
14
15#[derive(Error, Debug)]
17pub enum MemoryError {
18 #[error("Document not found: {0}")]
19 NotFound(Uuid),
20
21 #[error("Storage error: {0}")]
22 Storage(String),
23
24 #[error("Embedding error: {0}")]
25 Embedding(String),
26
27 #[error("Index error: {0}")]
28 Index(String),
29
30 #[error("Configuration error: {0}")]
31 Config(String),
32
33 #[error("Serialization error: {0}")]
34 Serialization(String),
35
36 #[error("IO error: {0}")]
37 Io(#[from] std::io::Error),
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct Document {
43 pub id: Option<Uuid>,
44 pub content: String,
45 pub metadata: HashMap<String, String>,
46 pub source: Option<String>,
47 pub created_at: Option<i64>,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct Chunk {
53 pub id: Option<Uuid>,
54 pub document_id: Uuid,
55 pub content: String,
56 pub index: usize,
57 pub embedding: Option<Vec<f32>>,
58 pub metadata: HashMap<String, String>,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct SearchResult {
64 pub chunk: Chunk,
65 pub score: f32,
66 pub source: RetrievalSource,
67}
68
69#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
71pub enum RetrievalSource {
72 Vector,
73 BM25,
74 Hybrid,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct HybridConfig {
80 pub vector_weight: f32,
81 pub bm25_weight: f32,
82 pub use_reranker: bool,
83 pub reranker_top_k: usize,
84}
85
86impl Default for HybridConfig {
87 fn default() -> Self {
88 Self {
89 vector_weight: 0.7,
90 bm25_weight: 0.3,
91 use_reranker: true,
92 reranker_top_k: 10,
93 }
94 }
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ContextWindow {
100 pub chunks: Vec<SearchResult>,
101 pub total_tokens: usize,
102 pub truncated: bool,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct IndexConfig {
108 pub name: String,
109 pub dimensions: usize,
110 pub metric: DistanceMetric,
111 pub ef_construction: usize,
112 pub m: usize,
113}
114
115impl Default for IndexConfig {
116 fn default() -> Self {
117 Self {
118 name: "default".to_string(),
119 dimensions: 384,
120 metric: DistanceMetric::Cosine,
121 ef_construction: 200,
122 m: 16,
123 }
124 }
125}
126
127#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
129pub enum DistanceMetric {
130 Cosine,
131 Euclidean,
132 DotProduct,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct IndexStats {
138 pub total_documents: usize,
139 pub total_chunks: usize,
140 pub total_vectors: usize,
141 pub index_size_bytes: u64,
142 pub last_updated: i64,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct MemoryConfig {
148 pub chunk_size: usize,
149 pub chunk_overlap: usize,
150 pub embedding_model: String,
151 pub embedding_dimensions: usize,
152 pub max_context_tokens: usize,
153 pub storage_path: Option<String>,
154}
155
156impl Default for MemoryConfig {
157 fn default() -> Self {
158 Self {
159 chunk_size: 512,
160 chunk_overlap: 50,
161 embedding_model: "all-MiniLM-L6-v2".to_string(),
162 embedding_dimensions: 384,
163 max_context_tokens: 4096,
164 storage_path: None,
165 }
166 }
167}
168
169#[async_trait]
195pub trait MemoryService: Send + Sync {
196 async fn store_document(&self, doc: &Document) -> MemoryResult<Uuid>;
202
203 async fn store_chunks(&self, chunks: &[Chunk]) -> MemoryResult<Vec<Uuid>>;
205
206 async fn delete_document(&self, id: Uuid) -> MemoryResult<()>;
208
209 async fn update_document(&self, id: Uuid, doc: &Document) -> MemoryResult<()>;
211
212 async fn search(&self, query: &str, top_k: usize) -> MemoryResult<Vec<SearchResult>>;
218
219 async fn hybrid_search(
221 &self,
222 query: &str,
223 top_k: usize,
224 config: HybridConfig,
225 ) -> MemoryResult<Vec<SearchResult>>;
226
227 async fn get_by_id(&self, id: Uuid) -> MemoryResult<Option<Document>>;
229
230 async fn get_context(&self, query: &str, max_tokens: usize) -> MemoryResult<ContextWindow>;
232
233 async fn embed(&self, text: &str) -> MemoryResult<Vec<f32>>;
239
240 async fn embed_batch(&self, texts: &[&str]) -> MemoryResult<Vec<Vec<f32>>>;
242
243 async fn create_index(&self, config: IndexConfig) -> MemoryResult<()>;
249
250 async fn rebuild_index(&self) -> MemoryResult<()>;
252
253 async fn get_stats(&self) -> MemoryResult<IndexStats>;
255
256 fn config(&self) -> &MemoryConfig;
262
263 fn set_config(&mut self, config: MemoryConfig);
265
266 async fn health_check(&self) -> MemoryResult<bool>;
272
273 async fn flush(&self) -> MemoryResult<()>;
275
276 async fn shutdown(&self) -> MemoryResult<()>;
278}