Skip to main content

codemem_core/
traits.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4use crate::{CodememError, Edge, GraphNode, MemoryNode, NodeKind, RelationshipType, Session};
5
6// ── Traits ──────────────────────────────────────────────────────────────────
7
8/// Vector backend trait for HNSW index operations.
9pub trait VectorBackend: Send + Sync {
10    /// Insert a vector with associated ID.
11    fn insert(&mut self, id: &str, embedding: &[f32]) -> Result<(), CodememError>;
12
13    /// Batch insert vectors.
14    fn insert_batch(&mut self, items: &[(String, Vec<f32>)]) -> Result<(), CodememError>;
15
16    /// Search for k nearest neighbors. Returns (id, distance) pairs.
17    fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, CodememError>;
18
19    /// Remove a vector by ID.
20    fn remove(&mut self, id: &str) -> Result<bool, CodememError>;
21
22    /// Save the index to disk.
23    fn save(&self, path: &std::path::Path) -> Result<(), CodememError>;
24
25    /// Load the index from disk.
26    fn load(&mut self, path: &std::path::Path) -> Result<(), CodememError>;
27
28    /// Get index statistics.
29    fn stats(&self) -> VectorStats;
30}
31
32/// Statistics about the vector index.
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct VectorStats {
35    pub count: usize,
36    pub dimensions: usize,
37    pub metric: String,
38    pub memory_bytes: usize,
39}
40
41/// Graph backend trait for graph operations.
42pub trait GraphBackend: Send + Sync {
43    /// Add a node to the graph.
44    fn add_node(&mut self, node: GraphNode) -> Result<(), CodememError>;
45
46    /// Get a node by ID.
47    fn get_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
48
49    /// Remove a node by ID.
50    fn remove_node(&mut self, id: &str) -> Result<bool, CodememError>;
51
52    /// Add an edge between two nodes.
53    fn add_edge(&mut self, edge: Edge) -> Result<(), CodememError>;
54
55    /// Get edges from a node.
56    fn get_edges(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
57
58    /// Remove an edge by ID.
59    fn remove_edge(&mut self, id: &str) -> Result<bool, CodememError>;
60
61    /// BFS traversal from a start node up to max_depth.
62    fn bfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
63
64    /// DFS traversal from a start node up to max_depth.
65    fn dfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
66
67    /// BFS traversal with filtering: exclude certain node kinds and optionally
68    /// restrict to specific relationship types.
69    fn bfs_filtered(
70        &self,
71        start_id: &str,
72        max_depth: usize,
73        exclude_kinds: &[NodeKind],
74        include_relationships: Option<&[RelationshipType]>,
75    ) -> Result<Vec<GraphNode>, CodememError> {
76        // Default implementation falls back to unfiltered BFS
77        let _ = (exclude_kinds, include_relationships);
78        self.bfs(start_id, max_depth)
79    }
80
81    /// DFS traversal with filtering: exclude certain node kinds and optionally
82    /// restrict to specific relationship types.
83    fn dfs_filtered(
84        &self,
85        start_id: &str,
86        max_depth: usize,
87        exclude_kinds: &[NodeKind],
88        include_relationships: Option<&[RelationshipType]>,
89    ) -> Result<Vec<GraphNode>, CodememError> {
90        // Default implementation falls back to unfiltered DFS
91        let _ = (exclude_kinds, include_relationships);
92        self.dfs(start_id, max_depth)
93    }
94
95    /// Shortest path between two nodes.
96    fn shortest_path(&self, from: &str, to: &str) -> Result<Vec<String>, CodememError>;
97
98    /// Get graph statistics.
99    fn stats(&self) -> GraphStats;
100}
101
102/// Statistics about the graph.
103#[derive(Debug, Clone, Default, Serialize, Deserialize)]
104pub struct GraphStats {
105    pub node_count: usize,
106    pub edge_count: usize,
107    pub node_kind_counts: HashMap<String, usize>,
108    pub relationship_type_counts: HashMap<String, usize>,
109}
110
111// ── Storage Stats & Consolidation Types ─────────────────────────────────
112
113/// Database statistics.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct StorageStats {
116    pub memory_count: usize,
117    pub embedding_count: usize,
118    pub node_count: usize,
119    pub edge_count: usize,
120}
121
122/// A single consolidation log entry.
123#[derive(Debug, Clone)]
124pub struct ConsolidationLogEntry {
125    pub cycle_type: String,
126    pub run_at: i64,
127    pub affected_count: usize,
128}
129
130// ── Storage Backend Trait ───────────────────────────────────────────────
131
132/// Pluggable storage backend trait for all persistence operations.
133///
134/// This trait unifies memory CRUD, embedding persistence, graph node/edge
135/// storage, sessions, consolidation, and pattern detection behind a single
136/// interface. Implementations include SQLite (default) and can be extended
137/// for SurrealDB, FalkorDB, or other backends.
138pub trait StorageBackend: Send + Sync {
139    // ── Memory CRUD ─────────────────────────────────────────────────
140
141    /// Insert a new memory. Returns Err(Duplicate) if content hash already exists.
142    fn insert_memory(&self, memory: &MemoryNode) -> Result<(), CodememError>;
143
144    /// Get a memory by ID. Updates access_count and last_accessed_at.
145    fn get_memory(&self, id: &str) -> Result<Option<MemoryNode>, CodememError>;
146
147    /// Get multiple memories by IDs in a single batch operation.
148    fn get_memories_batch(&self, ids: &[&str]) -> Result<Vec<MemoryNode>, CodememError>;
149
150    /// Update a memory's content and optionally its importance. Re-computes content hash.
151    fn update_memory(
152        &self,
153        id: &str,
154        content: &str,
155        importance: Option<f64>,
156    ) -> Result<(), CodememError>;
157
158    /// Delete a memory by ID. Returns true if a row was deleted.
159    fn delete_memory(&self, id: &str) -> Result<bool, CodememError>;
160
161    /// List all memory IDs, ordered by created_at descending.
162    fn list_memory_ids(&self) -> Result<Vec<String>, CodememError>;
163
164    /// List memory IDs scoped to a specific namespace.
165    fn list_memory_ids_for_namespace(&self, namespace: &str) -> Result<Vec<String>, CodememError>;
166
167    /// List all distinct namespaces.
168    fn list_namespaces(&self) -> Result<Vec<String>, CodememError>;
169
170    /// Get total memory count.
171    fn memory_count(&self) -> Result<usize, CodememError>;
172
173    // ── Embedding Persistence ───────────────────────────────────────
174
175    /// Store an embedding vector for a memory.
176    fn store_embedding(&self, memory_id: &str, embedding: &[f32]) -> Result<(), CodememError>;
177
178    /// Get an embedding by memory ID.
179    fn get_embedding(&self, memory_id: &str) -> Result<Option<Vec<f32>>, CodememError>;
180
181    /// Delete an embedding by memory ID. Returns true if a row was deleted.
182    fn delete_embedding(&self, memory_id: &str) -> Result<bool, CodememError>;
183
184    /// List all stored embeddings as (memory_id, embedding_vector) pairs.
185    fn list_all_embeddings(&self) -> Result<Vec<(String, Vec<f32>)>, CodememError>;
186
187    // ── Graph Node/Edge Persistence ─────────────────────────────────
188
189    /// Insert or replace a graph node.
190    fn insert_graph_node(&self, node: &GraphNode) -> Result<(), CodememError>;
191
192    /// Get a graph node by ID.
193    fn get_graph_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
194
195    /// Delete a graph node by ID. Returns true if a row was deleted.
196    fn delete_graph_node(&self, id: &str) -> Result<bool, CodememError>;
197
198    /// Get all graph nodes.
199    fn all_graph_nodes(&self) -> Result<Vec<GraphNode>, CodememError>;
200
201    /// Insert or replace a graph edge.
202    fn insert_graph_edge(&self, edge: &Edge) -> Result<(), CodememError>;
203
204    /// Get all edges from or to a node.
205    fn get_edges_for_node(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
206
207    /// Get all graph edges.
208    fn all_graph_edges(&self) -> Result<Vec<Edge>, CodememError>;
209
210    /// Delete all graph edges connected to a node. Returns count deleted.
211    fn delete_graph_edges_for_node(&self, node_id: &str) -> Result<usize, CodememError>;
212
213    /// Delete all graph nodes, edges, and embeddings whose node ID starts with the given prefix.
214    /// Returns count of nodes deleted.
215    fn delete_graph_nodes_by_prefix(&self, prefix: &str) -> Result<usize, CodememError>;
216
217    // ── Sessions ────────────────────────────────────────────────────
218
219    /// Start a new session.
220    fn start_session(&self, id: &str, namespace: Option<&str>) -> Result<(), CodememError>;
221
222    /// End a session with optional summary.
223    fn end_session(&self, id: &str, summary: Option<&str>) -> Result<(), CodememError>;
224
225    /// List sessions, optionally filtered by namespace, up to limit.
226    fn list_sessions(
227        &self,
228        namespace: Option<&str>,
229        limit: usize,
230    ) -> Result<Vec<Session>, CodememError>;
231
232    // ── Consolidation ───────────────────────────────────────────────
233
234    /// Record a consolidation run.
235    fn insert_consolidation_log(
236        &self,
237        cycle_type: &str,
238        affected_count: usize,
239    ) -> Result<(), CodememError>;
240
241    /// Get the last consolidation run for each cycle type.
242    fn last_consolidation_runs(&self) -> Result<Vec<ConsolidationLogEntry>, CodememError>;
243
244    // ── Pattern Detection Queries ───────────────────────────────────
245
246    /// Find repeated search patterns. Returns (pattern, count, memory_ids).
247    fn get_repeated_searches(
248        &self,
249        min_count: usize,
250        namespace: Option<&str>,
251    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
252
253    /// Find file hotspots. Returns (file_path, count, memory_ids).
254    fn get_file_hotspots(
255        &self,
256        min_count: usize,
257        namespace: Option<&str>,
258    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
259
260    /// Get tool usage statistics. Returns (tool_name, count) pairs.
261    fn get_tool_usage_stats(
262        &self,
263        namespace: Option<&str>,
264    ) -> Result<Vec<(String, usize)>, CodememError>;
265
266    /// Find decision chains. Returns (file_path, count, memory_ids).
267    fn get_decision_chains(
268        &self,
269        min_count: usize,
270        namespace: Option<&str>,
271    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
272
273    // ── Bulk Operations ─────────────────────────────────────────────
274
275    /// Decay importance of stale memories older than threshold_ts by decay_factor.
276    /// Returns count of affected memories.
277    fn decay_stale_memories(
278        &self,
279        threshold_ts: i64,
280        decay_factor: f64,
281    ) -> Result<usize, CodememError>;
282
283    /// List memories for creative consolidation: (id, memory_type, tags).
284    fn list_memories_for_creative(
285        &self,
286    ) -> Result<Vec<(String, String, Vec<String>)>, CodememError>;
287
288    /// Find near-duplicate memories by content hash prefix similarity.
289    /// Returns (id1, id2, similarity) pairs.
290    fn find_cluster_duplicates(&self) -> Result<Vec<(String, String, f64)>, CodememError>;
291
292    /// Find memories eligible for forgetting (low importance).
293    /// Returns list of memory IDs.
294    fn find_forgettable(&self, importance_threshold: f64) -> Result<Vec<String>, CodememError>;
295
296    // ── Batch Operations ────────────────────────────────────────────
297
298    /// Insert multiple memories in a single batch. Default impl calls insert_memory in a loop.
299    fn insert_memories_batch(&self, memories: &[MemoryNode]) -> Result<(), CodememError> {
300        for memory in memories {
301            self.insert_memory(memory)?;
302        }
303        Ok(())
304    }
305
306    /// Store multiple embeddings in a single batch. Default impl calls store_embedding in a loop.
307    fn store_embeddings_batch(&self, items: &[(&str, &[f32])]) -> Result<(), CodememError> {
308        for (id, embedding) in items {
309            self.store_embedding(id, embedding)?;
310        }
311        Ok(())
312    }
313
314    /// Insert multiple graph nodes in a single batch. Default impl calls insert_graph_node in a loop.
315    fn insert_graph_nodes_batch(&self, nodes: &[GraphNode]) -> Result<(), CodememError> {
316        for node in nodes {
317            self.insert_graph_node(node)?;
318        }
319        Ok(())
320    }
321
322    /// Insert multiple graph edges in a single batch. Default impl calls insert_graph_edge in a loop.
323    fn insert_graph_edges_batch(&self, edges: &[Edge]) -> Result<(), CodememError> {
324        for edge in edges {
325            self.insert_graph_edge(edge)?;
326        }
327        Ok(())
328    }
329
330    // ── Query Helpers ───────────────────────────────────────────────
331
332    /// Find memories that have no embeddings yet. Returns (id, content) pairs.
333    fn find_unembedded_memories(&self) -> Result<Vec<(String, String)>, CodememError>;
334
335    /// Search graph nodes by label (case-insensitive LIKE). Returns matching nodes
336    /// sorted by centrality descending, limited to `limit` results.
337    fn search_graph_nodes(
338        &self,
339        query: &str,
340        namespace: Option<&str>,
341        limit: usize,
342    ) -> Result<Vec<GraphNode>, CodememError>;
343
344    /// List memories with optional namespace and memory_type filters.
345    fn list_memories_filtered(
346        &self,
347        namespace: Option<&str>,
348        memory_type: Option<&str>,
349    ) -> Result<Vec<MemoryNode>, CodememError>;
350
351    /// Get edges filtered by namespace (edges where both src and dst nodes have the given namespace).
352    fn graph_edges_for_namespace(&self, namespace: &str) -> Result<Vec<Edge>, CodememError>;
353
354    // ── Temporal Edge Queries ───────────────────────────────────────
355
356    /// Get edges active at a specific timestamp. Default: no temporal filtering.
357    fn get_edges_at_time(&self, node_id: &str, _timestamp: i64) -> Result<Vec<Edge>, CodememError> {
358        self.get_edges_for_node(node_id)
359    }
360
361    /// Fetch stale memories with access metadata for power-law decay.
362    /// Returns (id, importance, access_count, last_accessed_at).
363    fn get_stale_memories_for_decay(
364        &self,
365        threshold_ts: i64,
366    ) -> Result<Vec<(String, f64, u32, i64)>, CodememError>;
367
368    /// Batch-update importance values. Returns count of updated rows.
369    fn batch_update_importance(&self, updates: &[(String, f64)]) -> Result<usize, CodememError>;
370
371    /// Total session count, optionally filtered by namespace.
372    fn session_count(&self, namespace: Option<&str>) -> Result<usize, CodememError>;
373
374    // ── File Hash Tracking ──────────────────────────────────────────
375
376    /// Load all file hashes for incremental indexing. Returns path -> hash map.
377    fn load_file_hashes(&self) -> Result<HashMap<String, String>, CodememError>;
378
379    /// Save file hashes for incremental indexing.
380    fn save_file_hashes(&self, hashes: &HashMap<String, String>) -> Result<(), CodememError>;
381
382    // ── Session Activity Tracking ─────────────────────────────────
383
384    /// Record a session activity event (tool use with context).
385    fn record_session_activity(
386        &self,
387        session_id: &str,
388        tool_name: &str,
389        file_path: Option<&str>,
390        directory: Option<&str>,
391        pattern: Option<&str>,
392    ) -> Result<(), CodememError>;
393
394    /// Get a summary of session activity counts.
395    fn get_session_activity_summary(
396        &self,
397        session_id: &str,
398    ) -> Result<crate::SessionActivitySummary, CodememError>;
399
400    /// Get the most active directories in a session. Returns (directory, count) pairs.
401    fn get_session_hot_directories(
402        &self,
403        session_id: &str,
404        limit: usize,
405    ) -> Result<Vec<(String, usize)>, CodememError>;
406
407    /// Check whether a particular auto-insight dedup tag already exists for a session.
408    fn has_auto_insight(&self, session_id: &str, dedup_tag: &str) -> Result<bool, CodememError>;
409
410    /// Count how many Read events occurred in a directory during a session.
411    fn count_directory_reads(
412        &self,
413        session_id: &str,
414        directory: &str,
415    ) -> Result<usize, CodememError>;
416
417    /// Check if a file was read in the current session.
418    fn was_file_read_in_session(
419        &self,
420        session_id: &str,
421        file_path: &str,
422    ) -> Result<bool, CodememError>;
423
424    /// Count how many times a search pattern was used in a session.
425    fn count_search_pattern_in_session(
426        &self,
427        session_id: &str,
428        pattern: &str,
429    ) -> Result<usize, CodememError>;
430
431    // ── Stats ───────────────────────────────────────────────────────
432
433    /// Get database statistics.
434    fn stats(&self) -> Result<StorageStats, CodememError>;
435}