codemem_core/traits.rs
1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4use crate::{CodememError, Edge, GraphNode, MemoryNode, NodeKind, RelationshipType, Session};
5
6// ── Traits ──────────────────────────────────────────────────────────────────
7
8/// Vector backend trait for HNSW index operations.
9pub trait VectorBackend: Send + Sync {
10 /// Insert a vector with associated ID.
11 fn insert(&mut self, id: &str, embedding: &[f32]) -> Result<(), CodememError>;
12
13 /// Batch insert vectors.
14 fn insert_batch(&mut self, items: &[(String, Vec<f32>)]) -> Result<(), CodememError>;
15
16 /// Search for k nearest neighbors. Returns (id, distance) pairs.
17 fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, CodememError>;
18
19 /// Remove a vector by ID.
20 fn remove(&mut self, id: &str) -> Result<bool, CodememError>;
21
22 /// Save the index to disk.
23 fn save(&self, path: &std::path::Path) -> Result<(), CodememError>;
24
25 /// Load the index from disk.
26 fn load(&mut self, path: &std::path::Path) -> Result<(), CodememError>;
27
28 /// Get index statistics.
29 fn stats(&self) -> VectorStats;
30}
31
32/// Statistics about the vector index.
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct VectorStats {
35 pub count: usize,
36 pub dimensions: usize,
37 pub metric: String,
38 pub memory_bytes: usize,
39}
40
41/// Graph backend trait for graph operations.
42pub trait GraphBackend: Send + Sync {
43 /// Add a node to the graph.
44 fn add_node(&mut self, node: GraphNode) -> Result<(), CodememError>;
45
46 /// Get a node by ID.
47 fn get_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
48
49 /// Remove a node by ID.
50 fn remove_node(&mut self, id: &str) -> Result<bool, CodememError>;
51
52 /// Add an edge between two nodes.
53 fn add_edge(&mut self, edge: Edge) -> Result<(), CodememError>;
54
55 /// Get edges from a node.
56 fn get_edges(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
57
58 /// Remove an edge by ID.
59 fn remove_edge(&mut self, id: &str) -> Result<bool, CodememError>;
60
61 /// BFS traversal from a start node up to max_depth.
62 fn bfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
63
64 /// DFS traversal from a start node up to max_depth.
65 fn dfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
66
67 /// BFS traversal with filtering: exclude certain node kinds and optionally
68 /// restrict to specific relationship types.
69 fn bfs_filtered(
70 &self,
71 start_id: &str,
72 max_depth: usize,
73 exclude_kinds: &[NodeKind],
74 include_relationships: Option<&[RelationshipType]>,
75 ) -> Result<Vec<GraphNode>, CodememError> {
76 // Default implementation falls back to unfiltered BFS
77 let _ = (exclude_kinds, include_relationships);
78 self.bfs(start_id, max_depth)
79 }
80
81 /// DFS traversal with filtering: exclude certain node kinds and optionally
82 /// restrict to specific relationship types.
83 fn dfs_filtered(
84 &self,
85 start_id: &str,
86 max_depth: usize,
87 exclude_kinds: &[NodeKind],
88 include_relationships: Option<&[RelationshipType]>,
89 ) -> Result<Vec<GraphNode>, CodememError> {
90 // Default implementation falls back to unfiltered DFS
91 let _ = (exclude_kinds, include_relationships);
92 self.dfs(start_id, max_depth)
93 }
94
95 /// Shortest path between two nodes.
96 fn shortest_path(&self, from: &str, to: &str) -> Result<Vec<String>, CodememError>;
97
98 /// Get graph statistics.
99 fn stats(&self) -> GraphStats;
100}
101
102/// Statistics about the graph.
103#[derive(Debug, Clone, Default, Serialize, Deserialize)]
104pub struct GraphStats {
105 pub node_count: usize,
106 pub edge_count: usize,
107 pub node_kind_counts: HashMap<String, usize>,
108 pub relationship_type_counts: HashMap<String, usize>,
109}
110
111// ── Embedding Provider Trait ────────────────────────────────────────────────
112
113/// Trait for pluggable embedding providers.
114pub trait EmbeddingProvider: Send + Sync {
115 /// Embedding vector dimensions.
116 fn dimensions(&self) -> usize;
117
118 /// Embed a single text string.
119 fn embed(&self, text: &str) -> Result<Vec<f32>, crate::CodememError>;
120
121 /// Embed a batch of texts (default: sequential).
122 fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, crate::CodememError> {
123 texts.iter().map(|t| self.embed(t)).collect()
124 }
125
126 /// Provider name for display.
127 fn name(&self) -> &str;
128
129 /// Cache statistics: (current_size, capacity). Returns (0, 0) if no cache.
130 fn cache_stats(&self) -> (usize, usize) {
131 (0, 0)
132 }
133}
134
135// ── Storage Stats & Consolidation Types ─────────────────────────────────
136
137/// Database statistics.
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct StorageStats {
140 pub memory_count: usize,
141 pub embedding_count: usize,
142 pub node_count: usize,
143 pub edge_count: usize,
144}
145
146/// A single consolidation log entry.
147#[derive(Debug, Clone)]
148pub struct ConsolidationLogEntry {
149 pub cycle_type: String,
150 pub run_at: i64,
151 pub affected_count: usize,
152}
153
154// ── Storage Backend Trait ───────────────────────────────────────────────
155
156/// Pluggable storage backend trait for all persistence operations.
157///
158/// This trait unifies every persistence concern behind a single interface so
159/// that the engine layer (`CodememEngine`) remains backend-agnostic.
160///
161/// # Method groups
162///
163/// | Group | Methods | Purpose |
164/// |-------|---------|---------|
165/// | **Memory CRUD** | `insert_memory`, `get_memory`, `update_memory`, `delete_memory`, `list_memory_ids`, … | Create, read, update, delete memory nodes |
166/// | **Embedding persistence** | `store_embedding`, `get_embedding`, `delete_embedding`, `list_all_embeddings` | Persist and retrieve embedding vectors |
167/// | **Graph node/edge storage** | `insert_graph_node`, `get_graph_node`, `all_graph_nodes`, `insert_graph_edge`, … | Persist the knowledge graph structure |
168/// | **Sessions** | `start_session`, `end_session`, `list_sessions`, `session_count` | Track interaction sessions |
169/// | **Consolidation** | `insert_consolidation_log`, `last_consolidation_runs` | Record and query memory consolidation runs |
170/// | **Pattern detection** | `get_repeated_searches`, `get_file_hotspots`, `get_tool_usage_stats`, `get_decision_chains` | Cross-session pattern queries |
171/// | **Bulk/batch operations** | `insert_memories_batch`, `store_embeddings_batch`, `insert_graph_nodes_batch`, `insert_graph_edges_batch` | Efficient multi-row inserts |
172/// | **Decay & forgetting** | `decay_stale_memories`, `find_forgettable`, `get_stale_memories_for_decay`, `batch_update_importance` | Power-law decay and garbage collection |
173/// | **Query helpers** | `find_unembedded_memories`, `search_graph_nodes`, `list_memories_filtered`, `find_hash_duplicates` | Filtered searches and dedup |
174/// | **File hash tracking** | `load_file_hashes`, `save_file_hashes` | Incremental indexing support |
175/// | **Session activity** | `record_session_activity`, `get_session_activity_summary`, `get_session_hot_directories`, … | Fine-grained activity tracking |
176/// | **Stats** | `stats` | Database-level statistics |
177///
178/// Implementations include SQLite (default) and can be extended for
179/// SurrealDB, FalkorDB, or other backends.
180pub trait StorageBackend: Send + Sync {
181 // ── Memory CRUD ─────────────────────────────────────────────────
182
183 /// Insert a new memory. Returns Err(Duplicate) if content hash already exists.
184 fn insert_memory(&self, memory: &MemoryNode) -> Result<(), CodememError>;
185
186 /// Get a memory by ID. Updates access_count and last_accessed_at.
187 fn get_memory(&self, id: &str) -> Result<Option<MemoryNode>, CodememError>;
188
189 /// Get a memory by ID without updating access_count or last_accessed_at.
190 /// Use this for internal/system reads (consolidation checks, stats, batch processing).
191 fn get_memory_no_touch(&self, id: &str) -> Result<Option<MemoryNode>, CodememError> {
192 // Default: falls back to get_memory for backwards compatibility.
193 self.get_memory(id)
194 }
195
196 /// Get multiple memories by IDs in a single batch operation.
197 fn get_memories_batch(&self, ids: &[&str]) -> Result<Vec<MemoryNode>, CodememError>;
198
199 /// Update a memory's content and optionally its importance. Re-computes content hash.
200 fn update_memory(
201 &self,
202 id: &str,
203 content: &str,
204 importance: Option<f64>,
205 ) -> Result<(), CodememError>;
206
207 /// Delete a memory by ID. Returns true if a row was deleted.
208 fn delete_memory(&self, id: &str) -> Result<bool, CodememError>;
209
210 /// Delete a memory and all related data (graph nodes/edges, embeddings) atomically.
211 /// Returns true if the memory existed and was deleted.
212 /// Default falls back to individual deletes (non-transactional) for backwards compatibility.
213 fn delete_memory_cascade(&self, id: &str) -> Result<bool, CodememError> {
214 let deleted = self.delete_memory(id)?;
215 if deleted {
216 let _ = self.delete_graph_edges_for_node(id);
217 let _ = self.delete_graph_node(id);
218 let _ = self.delete_embedding(id);
219 }
220 Ok(deleted)
221 }
222
223 /// List all memory IDs, ordered by created_at descending.
224 fn list_memory_ids(&self) -> Result<Vec<String>, CodememError>;
225
226 /// List memory IDs scoped to a specific namespace.
227 fn list_memory_ids_for_namespace(&self, namespace: &str) -> Result<Vec<String>, CodememError>;
228
229 /// List all distinct namespaces.
230 fn list_namespaces(&self) -> Result<Vec<String>, CodememError>;
231
232 /// Get total memory count.
233 fn memory_count(&self) -> Result<usize, CodememError>;
234
235 // ── Embedding Persistence ───────────────────────────────────────
236
237 /// Store an embedding vector for a memory.
238 fn store_embedding(&self, memory_id: &str, embedding: &[f32]) -> Result<(), CodememError>;
239
240 /// Get an embedding by memory ID.
241 fn get_embedding(&self, memory_id: &str) -> Result<Option<Vec<f32>>, CodememError>;
242
243 /// Delete an embedding by memory ID. Returns true if a row was deleted.
244 fn delete_embedding(&self, memory_id: &str) -> Result<bool, CodememError>;
245
246 /// List all stored embeddings as (memory_id, embedding_vector) pairs.
247 fn list_all_embeddings(&self) -> Result<Vec<(String, Vec<f32>)>, CodememError>;
248
249 // ── Graph Node/Edge Persistence ─────────────────────────────────
250
251 /// Insert or replace a graph node.
252 fn insert_graph_node(&self, node: &GraphNode) -> Result<(), CodememError>;
253
254 /// Get a graph node by ID.
255 fn get_graph_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
256
257 /// Delete a graph node by ID. Returns true if a row was deleted.
258 fn delete_graph_node(&self, id: &str) -> Result<bool, CodememError>;
259
260 /// Get all graph nodes.
261 fn all_graph_nodes(&self) -> Result<Vec<GraphNode>, CodememError>;
262
263 /// Insert or replace a graph edge.
264 fn insert_graph_edge(&self, edge: &Edge) -> Result<(), CodememError>;
265
266 /// Get all edges from or to a node.
267 fn get_edges_for_node(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
268
269 /// Get all graph edges.
270 fn all_graph_edges(&self) -> Result<Vec<Edge>, CodememError>;
271
272 /// Delete all graph edges connected to a node. Returns count deleted.
273 fn delete_graph_edges_for_node(&self, node_id: &str) -> Result<usize, CodememError>;
274
275 /// Delete all graph nodes, edges, and embeddings whose node ID starts with the given prefix.
276 /// Returns count of nodes deleted.
277 fn delete_graph_nodes_by_prefix(&self, prefix: &str) -> Result<usize, CodememError>;
278
279 // ── Sessions ────────────────────────────────────────────────────
280
281 /// Start a new session.
282 fn start_session(&self, id: &str, namespace: Option<&str>) -> Result<(), CodememError>;
283
284 /// End a session with optional summary.
285 fn end_session(&self, id: &str, summary: Option<&str>) -> Result<(), CodememError>;
286
287 /// List sessions, optionally filtered by namespace, up to limit.
288 fn list_sessions(
289 &self,
290 namespace: Option<&str>,
291 limit: usize,
292 ) -> Result<Vec<Session>, CodememError>;
293
294 // ── Consolidation ───────────────────────────────────────────────
295
296 /// Record a consolidation run.
297 fn insert_consolidation_log(
298 &self,
299 cycle_type: &str,
300 affected_count: usize,
301 ) -> Result<(), CodememError>;
302
303 /// Get the last consolidation run for each cycle type.
304 fn last_consolidation_runs(&self) -> Result<Vec<ConsolidationLogEntry>, CodememError>;
305
306 // ── Pattern Detection Queries ───────────────────────────────────
307
308 /// Find repeated search patterns. Returns (pattern, count, memory_ids).
309 fn get_repeated_searches(
310 &self,
311 min_count: usize,
312 namespace: Option<&str>,
313 ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
314
315 /// Find file hotspots. Returns (file_path, count, memory_ids).
316 fn get_file_hotspots(
317 &self,
318 min_count: usize,
319 namespace: Option<&str>,
320 ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
321
322 /// Get tool usage statistics. Returns (tool_name, count) pairs.
323 fn get_tool_usage_stats(
324 &self,
325 namespace: Option<&str>,
326 ) -> Result<Vec<(String, usize)>, CodememError>;
327
328 /// Find decision chains. Returns (file_path, count, memory_ids).
329 fn get_decision_chains(
330 &self,
331 min_count: usize,
332 namespace: Option<&str>,
333 ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
334
335 // ── Bulk Operations ─────────────────────────────────────────────
336
337 /// Decay importance of stale memories older than threshold_ts by decay_factor.
338 /// Returns count of affected memories.
339 fn decay_stale_memories(
340 &self,
341 threshold_ts: i64,
342 decay_factor: f64,
343 ) -> Result<usize, CodememError>;
344
345 /// List memories for creative consolidation: (id, memory_type, tags).
346 fn list_memories_for_creative(
347 &self,
348 ) -> Result<Vec<(String, String, Vec<String>)>, CodememError>;
349
350 /// Find near-duplicate memories by content hash prefix matching.
351 /// Returns (id1, id2, similarity) pairs. Only catches exact content matches
352 /// (hash prefix), not semantic near-duplicates.
353 fn find_hash_duplicates(&self) -> Result<Vec<(String, String, f64)>, CodememError>;
354
355 /// Find memories eligible for forgetting (low importance).
356 /// Returns list of memory IDs.
357 fn find_forgettable(&self, importance_threshold: f64) -> Result<Vec<String>, CodememError>;
358
359 // ── Batch Operations ────────────────────────────────────────────
360
361 /// Insert multiple memories in a single batch. Default impl calls insert_memory in a loop.
362 fn insert_memories_batch(&self, memories: &[MemoryNode]) -> Result<(), CodememError> {
363 for memory in memories {
364 self.insert_memory(memory)?;
365 }
366 Ok(())
367 }
368
369 /// Store multiple embeddings in a single batch. Default impl calls store_embedding in a loop.
370 fn store_embeddings_batch(&self, items: &[(&str, &[f32])]) -> Result<(), CodememError> {
371 for (id, embedding) in items {
372 self.store_embedding(id, embedding)?;
373 }
374 Ok(())
375 }
376
377 /// Insert multiple graph nodes in a single batch. Default impl calls insert_graph_node in a loop.
378 fn insert_graph_nodes_batch(&self, nodes: &[GraphNode]) -> Result<(), CodememError> {
379 for node in nodes {
380 self.insert_graph_node(node)?;
381 }
382 Ok(())
383 }
384
385 /// Insert multiple graph edges in a single batch. Default impl calls insert_graph_edge in a loop.
386 fn insert_graph_edges_batch(&self, edges: &[Edge]) -> Result<(), CodememError> {
387 for edge in edges {
388 self.insert_graph_edge(edge)?;
389 }
390 Ok(())
391 }
392
393 // ── Query Helpers ───────────────────────────────────────────────
394
395 /// Find memories that have no embeddings yet. Returns (id, content) pairs.
396 fn find_unembedded_memories(&self) -> Result<Vec<(String, String)>, CodememError>;
397
398 /// Search graph nodes by label (case-insensitive LIKE). Returns matching nodes
399 /// sorted by centrality descending, limited to `limit` results.
400 fn search_graph_nodes(
401 &self,
402 query: &str,
403 namespace: Option<&str>,
404 limit: usize,
405 ) -> Result<Vec<GraphNode>, CodememError>;
406
407 /// List memories with optional namespace and memory_type filters.
408 fn list_memories_filtered(
409 &self,
410 namespace: Option<&str>,
411 memory_type: Option<&str>,
412 ) -> Result<Vec<MemoryNode>, CodememError>;
413
414 /// Get edges filtered by namespace (edges where both src and dst nodes have the given namespace).
415 fn graph_edges_for_namespace(&self, namespace: &str) -> Result<Vec<Edge>, CodememError>;
416
417 // ── Temporal Edge Queries ───────────────────────────────────────
418
419 /// Get edges active at a specific timestamp. Default: no temporal filtering.
420 fn get_edges_at_time(&self, node_id: &str, _timestamp: i64) -> Result<Vec<Edge>, CodememError> {
421 self.get_edges_for_node(node_id)
422 }
423
424 /// Fetch stale memories with access metadata for power-law decay.
425 /// Returns (id, importance, access_count, last_accessed_at).
426 fn get_stale_memories_for_decay(
427 &self,
428 threshold_ts: i64,
429 ) -> Result<Vec<(String, f64, u32, i64)>, CodememError>;
430
431 /// Batch-update importance values. Returns count of updated rows.
432 fn batch_update_importance(&self, updates: &[(String, f64)]) -> Result<usize, CodememError>;
433
434 /// Total session count, optionally filtered by namespace.
435 fn session_count(&self, namespace: Option<&str>) -> Result<usize, CodememError>;
436
437 // ── File Hash Tracking ──────────────────────────────────────────
438
439 /// Load all file hashes for incremental indexing. Returns path -> hash map.
440 fn load_file_hashes(&self) -> Result<HashMap<String, String>, CodememError>;
441
442 /// Save file hashes for incremental indexing.
443 fn save_file_hashes(&self, hashes: &HashMap<String, String>) -> Result<(), CodememError>;
444
445 // ── Session Activity Tracking ─────────────────────────────────
446
447 /// Record a session activity event (tool use with context).
448 fn record_session_activity(
449 &self,
450 session_id: &str,
451 tool_name: &str,
452 file_path: Option<&str>,
453 directory: Option<&str>,
454 pattern: Option<&str>,
455 ) -> Result<(), CodememError>;
456
457 /// Get a summary of session activity counts.
458 fn get_session_activity_summary(
459 &self,
460 session_id: &str,
461 ) -> Result<crate::SessionActivitySummary, CodememError>;
462
463 /// Get the most active directories in a session. Returns (directory, count) pairs.
464 fn get_session_hot_directories(
465 &self,
466 session_id: &str,
467 limit: usize,
468 ) -> Result<Vec<(String, usize)>, CodememError>;
469
470 /// Check whether a particular auto-insight dedup tag already exists for a session.
471 fn has_auto_insight(&self, session_id: &str, dedup_tag: &str) -> Result<bool, CodememError>;
472
473 /// Count how many Read events occurred in a directory during a session.
474 fn count_directory_reads(
475 &self,
476 session_id: &str,
477 directory: &str,
478 ) -> Result<usize, CodememError>;
479
480 /// Check if a file was read in the current session.
481 fn was_file_read_in_session(
482 &self,
483 session_id: &str,
484 file_path: &str,
485 ) -> Result<bool, CodememError>;
486
487 /// Count how many times a search pattern was used in a session.
488 fn count_search_pattern_in_session(
489 &self,
490 session_id: &str,
491 pattern: &str,
492 ) -> Result<usize, CodememError>;
493
494 // ── Stats ───────────────────────────────────────────────────────
495
496 /// Get database statistics.
497 fn stats(&self) -> Result<StorageStats, CodememError>;
498}