reasonkit/traits/
memory.rs

1//! Memory service trait for core <-> mem integration.
2//!
3//! This trait defines the contract between `reasonkit-core` and `reasonkit-mem`.
4//! Implementations live in `reasonkit-mem`, consumers live in `reasonkit-core`.
5
6use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use thiserror::Error;
10use uuid::Uuid;
11
12/// Result type for memory operations.
13pub type MemoryResult<T> = Result<T, MemoryError>;
14
15/// Errors that can occur during memory operations.
16#[derive(Error, Debug)]
17pub enum MemoryError {
18    #[error("Document not found: {0}")]
19    NotFound(Uuid),
20
21    #[error("Storage error: {0}")]
22    Storage(String),
23
24    #[error("Embedding error: {0}")]
25    Embedding(String),
26
27    #[error("Index error: {0}")]
28    Index(String),
29
30    #[error("Configuration error: {0}")]
31    Config(String),
32
33    #[error("Serialization error: {0}")]
34    Serialization(String),
35
36    #[error("IO error: {0}")]
37    Io(#[from] std::io::Error),
38}
39
40/// A document to be stored in memory.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct Document {
43    pub id: Option<Uuid>,
44    pub content: String,
45    pub metadata: HashMap<String, String>,
46    pub source: Option<String>,
47    pub created_at: Option<i64>,
48}
49
50/// A chunk of a document after splitting.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct Chunk {
53    pub id: Option<Uuid>,
54    pub document_id: Uuid,
55    pub content: String,
56    pub index: usize,
57    pub embedding: Option<Vec<f32>>,
58    pub metadata: HashMap<String, String>,
59}
60
61/// A search result from memory retrieval.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct SearchResult {
64    pub chunk: Chunk,
65    pub score: f32,
66    pub source: RetrievalSource,
67}
68
69/// Source of the retrieval result.
70#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
71pub enum RetrievalSource {
72    Vector,
73    BM25,
74    Hybrid,
75}
76
77/// Configuration for hybrid search.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct HybridConfig {
80    pub vector_weight: f32,
81    pub bm25_weight: f32,
82    pub use_reranker: bool,
83    pub reranker_top_k: usize,
84}
85
86impl Default for HybridConfig {
87    fn default() -> Self {
88        Self {
89            vector_weight: 0.7,
90            bm25_weight: 0.3,
91            use_reranker: true,
92            reranker_top_k: 10,
93        }
94    }
95}
96
97/// A context window assembled from retrieved chunks.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ContextWindow {
100    pub chunks: Vec<SearchResult>,
101    pub total_tokens: usize,
102    pub truncated: bool,
103}
104
105/// Configuration for index creation.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct IndexConfig {
108    pub name: String,
109    pub dimensions: usize,
110    pub metric: DistanceMetric,
111    pub ef_construction: usize,
112    pub m: usize,
113}
114
115impl Default for IndexConfig {
116    fn default() -> Self {
117        Self {
118            name: "default".to_string(),
119            dimensions: 384,
120            metric: DistanceMetric::Cosine,
121            ef_construction: 200,
122            m: 16,
123        }
124    }
125}
126
127/// Distance metric for vector similarity.
128#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
129pub enum DistanceMetric {
130    Cosine,
131    Euclidean,
132    DotProduct,
133}
134
135/// Statistics about the memory index.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct IndexStats {
138    pub total_documents: usize,
139    pub total_chunks: usize,
140    pub total_vectors: usize,
141    pub index_size_bytes: u64,
142    pub last_updated: i64,
143}
144
145/// Configuration for the memory service.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct MemoryConfig {
148    pub chunk_size: usize,
149    pub chunk_overlap: usize,
150    pub embedding_model: String,
151    pub embedding_dimensions: usize,
152    pub max_context_tokens: usize,
153    pub storage_path: Option<String>,
154}
155
156impl Default for MemoryConfig {
157    fn default() -> Self {
158        Self {
159            chunk_size: 512,
160            chunk_overlap: 50,
161            embedding_model: "all-MiniLM-L6-v2".to_string(),
162            embedding_dimensions: 384,
163            max_context_tokens: 4096,
164            storage_path: None,
165        }
166    }
167}
168
169/// Core abstraction for memory operations.
170///
171/// This trait is implemented by `reasonkit-mem` and consumed by `reasonkit-core`.
172/// It provides a unified interface for document storage, retrieval, and embedding.
173///
174/// # Example
175///
176/// ```ignore
177/// use reasonkit::traits::{MemoryService, Document};
178///
179/// async fn example(memory: &impl MemoryService) -> MemoryResult<()> {
180///     let doc = Document {
181///         id: None,
182///         content: "Hello, world!".to_string(),
183///         metadata: Default::default(),
184///         source: Some("example".to_string()),
185///         created_at: None,
186///     };
187///
188///     let id = memory.store_document(&doc).await?;
189///     let results = memory.search("hello", 5).await?;
190///
191///     Ok(())
192/// }
193/// ```
194#[async_trait]
195pub trait MemoryService: Send + Sync {
196    // ─────────────────────────────────────────────────────────────────────────
197    // Storage Operations
198    // ─────────────────────────────────────────────────────────────────────────
199
200    /// Store a document, returning its assigned ID.
201    async fn store_document(&self, doc: &Document) -> MemoryResult<Uuid>;
202
203    /// Store multiple chunks, returning their assigned IDs.
204    async fn store_chunks(&self, chunks: &[Chunk]) -> MemoryResult<Vec<Uuid>>;
205
206    /// Delete a document and all its chunks.
207    async fn delete_document(&self, id: Uuid) -> MemoryResult<()>;
208
209    /// Update an existing document.
210    async fn update_document(&self, id: Uuid, doc: &Document) -> MemoryResult<()>;
211
212    // ─────────────────────────────────────────────────────────────────────────
213    // Retrieval Operations
214    // ─────────────────────────────────────────────────────────────────────────
215
216    /// Search for relevant chunks using vector similarity.
217    async fn search(&self, query: &str, top_k: usize) -> MemoryResult<Vec<SearchResult>>;
218
219    /// Search using hybrid retrieval (vector + BM25 with RRF fusion).
220    async fn hybrid_search(
221        &self,
222        query: &str,
223        top_k: usize,
224        config: HybridConfig,
225    ) -> MemoryResult<Vec<SearchResult>>;
226
227    /// Get a document by its ID.
228    async fn get_by_id(&self, id: Uuid) -> MemoryResult<Option<Document>>;
229
230    /// Get a context window optimized for the query and token budget.
231    async fn get_context(&self, query: &str, max_tokens: usize) -> MemoryResult<ContextWindow>;
232
233    // ─────────────────────────────────────────────────────────────────────────
234    // Embedding Operations
235    // ─────────────────────────────────────────────────────────────────────────
236
237    /// Embed a single text string.
238    async fn embed(&self, text: &str) -> MemoryResult<Vec<f32>>;
239
240    /// Embed multiple texts in a batch.
241    async fn embed_batch(&self, texts: &[&str]) -> MemoryResult<Vec<Vec<f32>>>;
242
243    // ─────────────────────────────────────────────────────────────────────────
244    // Index Management
245    // ─────────────────────────────────────────────────────────────────────────
246
247    /// Create a new index with the given configuration.
248    async fn create_index(&self, config: IndexConfig) -> MemoryResult<()>;
249
250    /// Rebuild the index from stored documents.
251    async fn rebuild_index(&self) -> MemoryResult<()>;
252
253    /// Get statistics about the current index.
254    async fn get_stats(&self) -> MemoryResult<IndexStats>;
255
256    // ─────────────────────────────────────────────────────────────────────────
257    // Configuration
258    // ─────────────────────────────────────────────────────────────────────────
259
260    /// Get the current configuration.
261    fn config(&self) -> &MemoryConfig;
262
263    /// Update the configuration.
264    fn set_config(&mut self, config: MemoryConfig);
265
266    // ─────────────────────────────────────────────────────────────────────────
267    // Health & Lifecycle
268    // ─────────────────────────────────────────────────────────────────────────
269
270    /// Check if the service is healthy and ready.
271    async fn health_check(&self) -> MemoryResult<bool>;
272
273    /// Flush any pending writes to storage.
274    async fn flush(&self) -> MemoryResult<()>;
275
276    /// Gracefully shutdown the service.
277    async fn shutdown(&self) -> MemoryResult<()>;
278}