Skip to main content

codemem_core/
traits.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4use crate::{CodememError, Edge, GraphNode, MemoryNode, NodeKind, RelationshipType, Session};
5
6// ── Traits ──────────────────────────────────────────────────────────────────
7
8/// Vector backend trait for HNSW index operations.
9pub trait VectorBackend: Send + Sync {
10    /// Insert a vector with associated ID.
11    fn insert(&mut self, id: &str, embedding: &[f32]) -> Result<(), CodememError>;
12
13    /// Batch insert vectors.
14    fn insert_batch(&mut self, items: &[(String, Vec<f32>)]) -> Result<(), CodememError>;
15
16    /// Search for k nearest neighbors. Returns (id, distance) pairs.
17    fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, CodememError>;
18
19    /// Remove a vector by ID.
20    fn remove(&mut self, id: &str) -> Result<bool, CodememError>;
21
22    /// Save the index to disk.
23    fn save(&self, path: &std::path::Path) -> Result<(), CodememError>;
24
25    /// Load the index from disk.
26    fn load(&mut self, path: &std::path::Path) -> Result<(), CodememError>;
27
28    /// Get index statistics.
29    fn stats(&self) -> VectorStats;
30}
31
32/// Statistics about the vector index.
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct VectorStats {
35    pub count: usize,
36    pub dimensions: usize,
37    pub metric: String,
38    pub memory_bytes: usize,
39}
40
41/// Graph backend trait for graph operations.
42pub trait GraphBackend: Send + Sync {
43    /// Add a node to the graph.
44    fn add_node(&mut self, node: GraphNode) -> Result<(), CodememError>;
45
46    /// Get a node by ID.
47    fn get_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
48
49    /// Remove a node by ID.
50    fn remove_node(&mut self, id: &str) -> Result<bool, CodememError>;
51
52    /// Add an edge between two nodes.
53    fn add_edge(&mut self, edge: Edge) -> Result<(), CodememError>;
54
55    /// Get edges from a node.
56    fn get_edges(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
57
58    /// Remove an edge by ID.
59    fn remove_edge(&mut self, id: &str) -> Result<bool, CodememError>;
60
61    /// BFS traversal from a start node up to max_depth.
62    fn bfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
63
64    /// DFS traversal from a start node up to max_depth.
65    fn dfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
66
67    /// BFS traversal with filtering: exclude certain node kinds and optionally
68    /// restrict to specific relationship types.
69    fn bfs_filtered(
70        &self,
71        start_id: &str,
72        max_depth: usize,
73        exclude_kinds: &[NodeKind],
74        include_relationships: Option<&[RelationshipType]>,
75    ) -> Result<Vec<GraphNode>, CodememError>;
76
77    /// DFS traversal with filtering: exclude certain node kinds and optionally
78    /// restrict to specific relationship types.
79    fn dfs_filtered(
80        &self,
81        start_id: &str,
82        max_depth: usize,
83        exclude_kinds: &[NodeKind],
84        include_relationships: Option<&[RelationshipType]>,
85    ) -> Result<Vec<GraphNode>, CodememError>;
86
87    /// Shortest path between two nodes.
88    fn shortest_path(&self, from: &str, to: &str) -> Result<Vec<String>, CodememError>;
89
90    /// Get graph statistics.
91    fn stats(&self) -> GraphStats;
92}
93
94/// Statistics about the graph.
95#[derive(Debug, Clone, Default, Serialize, Deserialize)]
96pub struct GraphStats {
97    pub node_count: usize,
98    pub edge_count: usize,
99    pub node_kind_counts: HashMap<String, usize>,
100    pub relationship_type_counts: HashMap<String, usize>,
101}
102
103// ── Embedding Provider Trait ────────────────────────────────────────────────
104
105/// Trait for pluggable embedding providers.
106pub trait EmbeddingProvider: Send + Sync {
107    /// Embedding vector dimensions.
108    fn dimensions(&self) -> usize;
109
110    /// Embed a single text string.
111    fn embed(&self, text: &str) -> Result<Vec<f32>, crate::CodememError>;
112
113    /// Embed a batch of texts (default: sequential).
114    fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, crate::CodememError> {
115        texts.iter().map(|t| self.embed(t)).collect()
116    }
117
118    /// Provider name for display.
119    fn name(&self) -> &str;
120
121    /// Cache statistics: (current_size, capacity). Returns (0, 0) if no cache.
122    fn cache_stats(&self) -> (usize, usize) {
123        (0, 0)
124    }
125}
126
127// ── Storage Stats & Consolidation Types ─────────────────────────────────
128
129/// Database statistics.
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct StorageStats {
132    pub memory_count: usize,
133    pub embedding_count: usize,
134    pub node_count: usize,
135    pub edge_count: usize,
136}
137
138/// A single consolidation log entry.
139#[derive(Debug, Clone)]
140pub struct ConsolidationLogEntry {
141    pub cycle_type: String,
142    pub run_at: i64,
143    pub affected_count: usize,
144}
145
146// ── Storage Backend Trait ───────────────────────────────────────────────
147
148/// Pluggable storage backend trait for all persistence operations.
149///
150/// This trait unifies every persistence concern behind a single interface so
151/// that the engine layer (`CodememEngine`) remains backend-agnostic.
152///
153/// # Method groups
154///
155/// | Group | Methods | Purpose |
156/// |-------|---------|---------|
157/// | **Memory CRUD** | `insert_memory`, `get_memory`, `update_memory`, `delete_memory`, `list_memory_ids`, … | Create, read, update, delete memory nodes |
158/// | **Embedding persistence** | `store_embedding`, `get_embedding`, `delete_embedding`, `list_all_embeddings` | Persist and retrieve embedding vectors |
159/// | **Graph node/edge storage** | `insert_graph_node`, `get_graph_node`, `all_graph_nodes`, `insert_graph_edge`, … | Persist the knowledge graph structure |
160/// | **Sessions** | `start_session`, `end_session`, `list_sessions`, `session_count` | Track interaction sessions |
161/// | **Consolidation** | `insert_consolidation_log`, `last_consolidation_runs` | Record and query memory consolidation runs |
162/// | **Pattern detection** | `get_repeated_searches`, `get_file_hotspots`, `get_tool_usage_stats`, `get_decision_chains` | Cross-session pattern queries |
163/// | **Bulk/batch operations** | `insert_memories_batch`, `store_embeddings_batch`, `insert_graph_nodes_batch`, `insert_graph_edges_batch` | Efficient multi-row inserts |
164/// | **Decay & forgetting** | `decay_stale_memories`, `find_forgettable`, `get_stale_memories_for_decay`, `batch_update_importance` | Power-law decay and garbage collection |
165/// | **Query helpers** | `find_unembedded_memories`, `search_graph_nodes`, `list_memories_filtered`, `find_hash_duplicates` | Filtered searches and dedup |
166/// | **File hash tracking** | `load_file_hashes`, `save_file_hashes` | Incremental indexing support |
167/// | **Session activity** | `record_session_activity`, `get_session_activity_summary`, `get_session_hot_directories`, … | Fine-grained activity tracking |
168/// | **Stats** | `stats` | Database-level statistics |
169///
170/// Implementations include SQLite (default) and can be extended for
171/// SurrealDB, FalkorDB, or other backends.
172pub trait StorageBackend: Send + Sync {
173    // ── Memory CRUD ─────────────────────────────────────────────────
174
175    /// Insert a new memory. Returns Err(Duplicate) if content hash already exists.
176    fn insert_memory(&self, memory: &MemoryNode) -> Result<(), CodememError>;
177
178    /// Get a memory by ID. Updates access_count and last_accessed_at.
179    fn get_memory(&self, id: &str) -> Result<Option<MemoryNode>, CodememError>;
180
181    /// Get a memory by ID without updating access_count or last_accessed_at.
182    /// Use this for internal/system reads (consolidation checks, stats, batch processing).
183    fn get_memory_no_touch(&self, id: &str) -> Result<Option<MemoryNode>, CodememError> {
184        // Default: falls back to get_memory for backwards compatibility.
185        self.get_memory(id)
186    }
187
188    /// Get multiple memories by IDs in a single batch operation.
189    fn get_memories_batch(&self, ids: &[&str]) -> Result<Vec<MemoryNode>, CodememError>;
190
191    /// Update a memory's content and optionally its importance. Re-computes content hash.
192    fn update_memory(
193        &self,
194        id: &str,
195        content: &str,
196        importance: Option<f64>,
197    ) -> Result<(), CodememError>;
198
199    /// Delete a memory by ID. Returns true if a row was deleted.
200    fn delete_memory(&self, id: &str) -> Result<bool, CodememError>;
201
202    /// Delete a memory and all related data (graph nodes/edges, embeddings) atomically.
203    /// Returns true if the memory existed and was deleted.
204    /// Default falls back to individual deletes (non-transactional) for backwards compatibility.
205    fn delete_memory_cascade(&self, id: &str) -> Result<bool, CodememError> {
206        let deleted = self.delete_memory(id)?;
207        if deleted {
208            let _ = self.delete_graph_edges_for_node(id);
209            let _ = self.delete_graph_node(id);
210            let _ = self.delete_embedding(id);
211        }
212        Ok(deleted)
213    }
214
215    /// Delete multiple memories and all related data (graph nodes/edges, embeddings) atomically.
216    /// Returns the number of memories that were actually deleted.
217    /// Default falls back to calling `delete_memory_cascade` per ID for backwards compatibility.
218    fn delete_memories_batch_cascade(&self, ids: &[&str]) -> Result<usize, CodememError> {
219        let mut count = 0;
220        for id in ids {
221            if self.delete_memory_cascade(id)? {
222                count += 1;
223            }
224        }
225        Ok(count)
226    }
227
228    /// List all memory IDs, ordered by created_at descending.
229    fn list_memory_ids(&self) -> Result<Vec<String>, CodememError>;
230
231    /// List memory IDs scoped to a specific namespace.
232    fn list_memory_ids_for_namespace(&self, namespace: &str) -> Result<Vec<String>, CodememError>;
233
234    /// Find memory IDs whose tags contain the given tag value.
235    /// Optionally scoped to a namespace. Excludes `exclude_id`.
236    fn find_memory_ids_by_tag(
237        &self,
238        tag: &str,
239        namespace: Option<&str>,
240        exclude_id: &str,
241    ) -> Result<Vec<String>, CodememError>;
242
243    /// List all distinct namespaces.
244    fn list_namespaces(&self) -> Result<Vec<String>, CodememError>;
245
246    /// Get total memory count.
247    fn memory_count(&self) -> Result<usize, CodememError>;
248
249    // ── Embedding Persistence ───────────────────────────────────────
250
251    /// Store an embedding vector for a memory.
252    fn store_embedding(&self, memory_id: &str, embedding: &[f32]) -> Result<(), CodememError>;
253
254    /// Get an embedding by memory ID.
255    fn get_embedding(&self, memory_id: &str) -> Result<Option<Vec<f32>>, CodememError>;
256
257    /// Delete an embedding by memory ID. Returns true if a row was deleted.
258    fn delete_embedding(&self, memory_id: &str) -> Result<bool, CodememError>;
259
260    /// List all stored embeddings as (memory_id, embedding_vector) pairs.
261    fn list_all_embeddings(&self) -> Result<Vec<(String, Vec<f32>)>, CodememError>;
262
263    // ── Graph Node/Edge Persistence ─────────────────────────────────
264
265    /// Insert or replace a graph node.
266    fn insert_graph_node(&self, node: &GraphNode) -> Result<(), CodememError>;
267
268    /// Get a graph node by ID.
269    fn get_graph_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
270
271    /// Delete a graph node by ID. Returns true if a row was deleted.
272    fn delete_graph_node(&self, id: &str) -> Result<bool, CodememError>;
273
274    /// Get all graph nodes.
275    fn all_graph_nodes(&self) -> Result<Vec<GraphNode>, CodememError>;
276
277    /// Insert or replace a graph edge.
278    fn insert_graph_edge(&self, edge: &Edge) -> Result<(), CodememError>;
279
280    /// Get all edges from or to a node.
281    fn get_edges_for_node(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
282
283    /// Get all graph edges.
284    fn all_graph_edges(&self) -> Result<Vec<Edge>, CodememError>;
285
286    /// Delete all graph edges connected to a node. Returns count deleted.
287    fn delete_graph_edges_for_node(&self, node_id: &str) -> Result<usize, CodememError>;
288
289    /// Delete all graph nodes, edges, and embeddings whose node ID starts with the given prefix.
290    /// Returns count of nodes deleted.
291    fn delete_graph_nodes_by_prefix(&self, prefix: &str) -> Result<usize, CodememError>;
292
293    // ── Sessions ────────────────────────────────────────────────────
294
295    /// Start a new session.
296    fn start_session(&self, id: &str, namespace: Option<&str>) -> Result<(), CodememError>;
297
298    /// End a session with optional summary.
299    fn end_session(&self, id: &str, summary: Option<&str>) -> Result<(), CodememError>;
300
301    /// List sessions, optionally filtered by namespace, up to limit.
302    fn list_sessions(
303        &self,
304        namespace: Option<&str>,
305        limit: usize,
306    ) -> Result<Vec<Session>, CodememError>;
307
308    // ── Consolidation ───────────────────────────────────────────────
309
310    /// Record a consolidation run.
311    fn insert_consolidation_log(
312        &self,
313        cycle_type: &str,
314        affected_count: usize,
315    ) -> Result<(), CodememError>;
316
317    /// Get the last consolidation run for each cycle type.
318    fn last_consolidation_runs(&self) -> Result<Vec<ConsolidationLogEntry>, CodememError>;
319
320    // ── Pattern Detection Queries ───────────────────────────────────
321
322    /// Find repeated search patterns. Returns (pattern, count, memory_ids).
323    fn get_repeated_searches(
324        &self,
325        min_count: usize,
326        namespace: Option<&str>,
327    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
328
329    /// Find file hotspots. Returns (file_path, count, memory_ids).
330    fn get_file_hotspots(
331        &self,
332        min_count: usize,
333        namespace: Option<&str>,
334    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
335
336    /// Get tool usage statistics. Returns (tool_name, count) pairs.
337    fn get_tool_usage_stats(
338        &self,
339        namespace: Option<&str>,
340    ) -> Result<Vec<(String, usize)>, CodememError>;
341
342    /// Find decision chains. Returns (file_path, count, memory_ids).
343    fn get_decision_chains(
344        &self,
345        min_count: usize,
346        namespace: Option<&str>,
347    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
348
349    // ── Bulk Operations ─────────────────────────────────────────────
350
351    /// Decay importance of stale memories older than threshold_ts by decay_factor.
352    /// Returns count of affected memories.
353    fn decay_stale_memories(
354        &self,
355        threshold_ts: i64,
356        decay_factor: f64,
357    ) -> Result<usize, CodememError>;
358
359    /// List memories for creative consolidation: (id, memory_type, tags).
360    fn list_memories_for_creative(
361        &self,
362    ) -> Result<Vec<(String, String, Vec<String>)>, CodememError>;
363
364    /// Find near-duplicate memories by content hash prefix matching.
365    /// Returns (id1, id2, similarity) pairs. Only catches exact content matches
366    /// (hash prefix), not semantic near-duplicates.
367    fn find_hash_duplicates(&self) -> Result<Vec<(String, String, f64)>, CodememError>;
368
369    /// Find memories eligible for forgetting (low importance).
370    /// Returns list of memory IDs.
371    fn find_forgettable(&self, importance_threshold: f64) -> Result<Vec<String>, CodememError>;
372
373    // ── Batch Operations ────────────────────────────────────────────
374
375    /// Insert multiple memories in a single batch. Default impl calls insert_memory in a loop.
376    fn insert_memories_batch(&self, memories: &[MemoryNode]) -> Result<(), CodememError> {
377        for memory in memories {
378            self.insert_memory(memory)?;
379        }
380        Ok(())
381    }
382
383    /// Store multiple embeddings in a single batch. Default impl calls store_embedding in a loop.
384    fn store_embeddings_batch(&self, items: &[(&str, &[f32])]) -> Result<(), CodememError> {
385        for (id, embedding) in items {
386            self.store_embedding(id, embedding)?;
387        }
388        Ok(())
389    }
390
391    /// Insert multiple graph nodes in a single batch. Default impl calls insert_graph_node in a loop.
392    fn insert_graph_nodes_batch(&self, nodes: &[GraphNode]) -> Result<(), CodememError> {
393        for node in nodes {
394            self.insert_graph_node(node)?;
395        }
396        Ok(())
397    }
398
399    /// Insert multiple graph edges in a single batch. Default impl calls insert_graph_edge in a loop.
400    fn insert_graph_edges_batch(&self, edges: &[Edge]) -> Result<(), CodememError> {
401        for edge in edges {
402            self.insert_graph_edge(edge)?;
403        }
404        Ok(())
405    }
406
407    // ── Query Helpers ───────────────────────────────────────────────
408
409    /// Find memories that have no embeddings yet. Returns (id, content) pairs.
410    fn find_unembedded_memories(&self) -> Result<Vec<(String, String)>, CodememError>;
411
412    /// Search graph nodes by label (case-insensitive LIKE). Returns matching nodes
413    /// sorted by centrality descending, limited to `limit` results.
414    fn search_graph_nodes(
415        &self,
416        query: &str,
417        namespace: Option<&str>,
418        limit: usize,
419    ) -> Result<Vec<GraphNode>, CodememError>;
420
421    /// List memories with optional namespace and memory_type filters.
422    fn list_memories_filtered(
423        &self,
424        namespace: Option<&str>,
425        memory_type: Option<&str>,
426    ) -> Result<Vec<MemoryNode>, CodememError>;
427
428    /// Fetch stale memories with access metadata for power-law decay.
429    /// Returns (id, importance, access_count, last_accessed_at).
430    fn get_stale_memories_for_decay(
431        &self,
432        threshold_ts: i64,
433    ) -> Result<Vec<(String, f64, u32, i64)>, CodememError>;
434
435    /// Batch-update importance values. Returns count of updated rows.
436    fn batch_update_importance(&self, updates: &[(String, f64)]) -> Result<usize, CodememError>;
437
438    /// Total session count, optionally filtered by namespace.
439    fn session_count(&self, namespace: Option<&str>) -> Result<usize, CodememError>;
440
441    // ── File Hash Tracking ──────────────────────────────────────────
442
443    /// Load all file hashes for incremental indexing. Returns path -> hash map.
444    fn load_file_hashes(&self) -> Result<HashMap<String, String>, CodememError>;
445
446    /// Save file hashes for incremental indexing.
447    fn save_file_hashes(&self, hashes: &HashMap<String, String>) -> Result<(), CodememError>;
448
449    // ── Session Activity Tracking ─────────────────────────────────
450
451    /// Record a session activity event (tool use with context).
452    fn record_session_activity(
453        &self,
454        session_id: &str,
455        tool_name: &str,
456        file_path: Option<&str>,
457        directory: Option<&str>,
458        pattern: Option<&str>,
459    ) -> Result<(), CodememError>;
460
461    /// Get a summary of session activity counts.
462    fn get_session_activity_summary(
463        &self,
464        session_id: &str,
465    ) -> Result<crate::SessionActivitySummary, CodememError>;
466
467    /// Get the most active directories in a session. Returns (directory, count) pairs.
468    fn get_session_hot_directories(
469        &self,
470        session_id: &str,
471        limit: usize,
472    ) -> Result<Vec<(String, usize)>, CodememError>;
473
474    /// Check whether a particular auto-insight dedup tag already exists for a session.
475    fn has_auto_insight(&self, session_id: &str, dedup_tag: &str) -> Result<bool, CodememError>;
476
477    /// Count how many Read events occurred in a directory during a session.
478    fn count_directory_reads(
479        &self,
480        session_id: &str,
481        directory: &str,
482    ) -> Result<usize, CodememError>;
483
484    /// Check if a file was read in the current session.
485    fn was_file_read_in_session(
486        &self,
487        session_id: &str,
488        file_path: &str,
489    ) -> Result<bool, CodememError>;
490
491    /// Count how many times a search pattern was used in a session.
492    fn count_search_pattern_in_session(
493        &self,
494        session_id: &str,
495        pattern: &str,
496    ) -> Result<usize, CodememError>;
497
498    // ── Stats ───────────────────────────────────────────────────────
499
500    /// Get database statistics.
501    fn stats(&self) -> Result<StorageStats, CodememError>;
502
503    // ── Transaction Control ────────────────────────────────────────
504
505    /// Begin an explicit transaction.
506    ///
507    /// While a transaction is active, individual storage methods (e.g.
508    /// `insert_memory`, `insert_graph_node`) participate in it instead of
509    /// starting their own. Call `commit_transaction` to persist or
510    /// `rollback_transaction` to discard.
511    ///
512    /// Default implementation is a no-op for backends that don't support
513    /// explicit transaction control.
514    fn begin_transaction(&self) -> Result<(), CodememError> {
515        Ok(())
516    }
517
518    /// Commit the active transaction started by `begin_transaction`.
519    ///
520    /// Default implementation is a no-op.
521    fn commit_transaction(&self) -> Result<(), CodememError> {
522        Ok(())
523    }
524
525    /// Roll back the active transaction started by `begin_transaction`.
526    ///
527    /// Default implementation is a no-op.
528    fn rollback_transaction(&self) -> Result<(), CodememError> {
529        Ok(())
530    }
531}