Skip to main content

codemem_core/
traits.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4use crate::{
5    CodememError, Edge, GraphNode, MemoryNode, NodeKind, RelationshipType, Repository, Session,
6};
7
8// ── Traits ──────────────────────────────────────────────────────────────────
9
10/// Vector backend trait for HNSW index operations.
11pub trait VectorBackend: Send + Sync {
12    /// Insert a vector with associated ID.
13    fn insert(&mut self, id: &str, embedding: &[f32]) -> Result<(), CodememError>;
14
15    /// Batch insert vectors.
16    fn insert_batch(&mut self, items: &[(String, Vec<f32>)]) -> Result<(), CodememError>;
17
18    /// Search for k nearest neighbors. Returns (id, distance) pairs.
19    fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>, CodememError>;
20
21    /// Remove a vector by ID.
22    fn remove(&mut self, id: &str) -> Result<bool, CodememError>;
23
24    /// Save the index to disk.
25    fn save(&self, path: &std::path::Path) -> Result<(), CodememError>;
26
27    /// Load the index from disk.
28    fn load(&mut self, path: &std::path::Path) -> Result<(), CodememError>;
29
30    /// Get index statistics.
31    fn stats(&self) -> VectorStats;
32}
33
34/// Statistics about the vector index.
35#[derive(Debug, Clone, Default, Serialize, Deserialize)]
36pub struct VectorStats {
37    pub count: usize,
38    pub dimensions: usize,
39    pub metric: String,
40    pub memory_bytes: usize,
41}
42
43/// Graph backend trait for graph operations.
44pub trait GraphBackend: Send + Sync {
45    /// Add a node to the graph.
46    fn add_node(&mut self, node: GraphNode) -> Result<(), CodememError>;
47
48    /// Get a node by ID.
49    fn get_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
50
51    /// Remove a node by ID.
52    fn remove_node(&mut self, id: &str) -> Result<bool, CodememError>;
53
54    /// Add an edge between two nodes.
55    fn add_edge(&mut self, edge: Edge) -> Result<(), CodememError>;
56
57    /// Get edges from a node.
58    fn get_edges(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
59
60    /// Remove an edge by ID.
61    fn remove_edge(&mut self, id: &str) -> Result<bool, CodememError>;
62
63    /// BFS traversal from a start node up to max_depth.
64    fn bfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
65
66    /// DFS traversal from a start node up to max_depth.
67    fn dfs(&self, start_id: &str, max_depth: usize) -> Result<Vec<GraphNode>, CodememError>;
68
69    /// BFS traversal with filtering: exclude certain node kinds and optionally
70    /// restrict to specific relationship types.
71    fn bfs_filtered(
72        &self,
73        start_id: &str,
74        max_depth: usize,
75        exclude_kinds: &[NodeKind],
76        include_relationships: Option<&[RelationshipType]>,
77    ) -> Result<Vec<GraphNode>, CodememError>;
78
79    /// DFS traversal with filtering: exclude certain node kinds and optionally
80    /// restrict to specific relationship types.
81    fn dfs_filtered(
82        &self,
83        start_id: &str,
84        max_depth: usize,
85        exclude_kinds: &[NodeKind],
86        include_relationships: Option<&[RelationshipType]>,
87    ) -> Result<Vec<GraphNode>, CodememError>;
88
89    /// Shortest path between two nodes.
90    fn shortest_path(&self, from: &str, to: &str) -> Result<Vec<String>, CodememError>;
91
92    /// Get graph statistics.
93    fn stats(&self) -> GraphStats;
94}
95
96/// Statistics about the graph.
97#[derive(Debug, Clone, Default, Serialize, Deserialize)]
98pub struct GraphStats {
99    pub node_count: usize,
100    pub edge_count: usize,
101    pub node_kind_counts: HashMap<String, usize>,
102    pub relationship_type_counts: HashMap<String, usize>,
103}
104
105// ── Embedding Provider Trait ────────────────────────────────────────────────
106
107/// Trait for pluggable embedding providers.
108pub trait EmbeddingProvider: Send + Sync {
109    /// Embedding vector dimensions.
110    fn dimensions(&self) -> usize;
111
112    /// Embed a single text string.
113    fn embed(&self, text: &str) -> Result<Vec<f32>, crate::CodememError>;
114
115    /// Embed a batch of texts (default: sequential).
116    fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, crate::CodememError> {
117        texts.iter().map(|t| self.embed(t)).collect()
118    }
119
120    /// Provider name for display.
121    fn name(&self) -> &str;
122
123    /// Cache statistics: (current_size, capacity). Returns (0, 0) if no cache.
124    fn cache_stats(&self) -> (usize, usize) {
125        (0, 0)
126    }
127}
128
129// ── Storage Stats & Consolidation Types ─────────────────────────────────
130
131/// Database statistics.
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct StorageStats {
134    pub memory_count: usize,
135    pub embedding_count: usize,
136    pub node_count: usize,
137    pub edge_count: usize,
138}
139
140/// A single consolidation log entry.
141#[derive(Debug, Clone)]
142pub struct ConsolidationLogEntry {
143    pub cycle_type: String,
144    pub run_at: i64,
145    pub affected_count: usize,
146}
147
148// ── Storage Backend Trait ───────────────────────────────────────────────
149
150/// Pluggable storage backend trait for all persistence operations.
151///
152/// This trait unifies every persistence concern behind a single interface so
153/// that the engine layer (`CodememEngine`) remains backend-agnostic.
154///
155/// # Method groups
156///
157/// | Group | Methods | Purpose |
158/// |-------|---------|---------|
159/// | **Memory CRUD** | `insert_memory`, `get_memory`, `update_memory`, `delete_memory`, `list_memory_ids`, … | Create, read, update, delete memory nodes |
160/// | **Embedding persistence** | `store_embedding`, `get_embedding`, `delete_embedding`, `list_all_embeddings` | Persist and retrieve embedding vectors |
161/// | **Graph node/edge storage** | `insert_graph_node`, `get_graph_node`, `all_graph_nodes`, `insert_graph_edge`, … | Persist the knowledge graph structure |
162/// | **Sessions** | `start_session`, `end_session`, `list_sessions`, `session_count` | Track interaction sessions |
163/// | **Consolidation** | `insert_consolidation_log`, `last_consolidation_runs` | Record and query memory consolidation runs |
164/// | **Pattern detection** | `get_repeated_searches`, `get_file_hotspots`, `get_tool_usage_stats`, `get_decision_chains` | Cross-session pattern queries |
165/// | **Bulk/batch operations** | `insert_memories_batch`, `store_embeddings_batch`, `insert_graph_nodes_batch`, `insert_graph_edges_batch` | Efficient multi-row inserts |
166/// | **Decay & forgetting** | `decay_stale_memories`, `find_forgettable`, `get_stale_memories_for_decay`, `batch_update_importance` | Power-law decay and garbage collection |
167/// | **Query helpers** | `find_unembedded_memories`, `search_graph_nodes`, `list_memories_filtered`, `find_hash_duplicates` | Filtered searches and dedup |
168/// | **File hash tracking** | `load_file_hashes`, `save_file_hashes` | Incremental indexing support |
169/// | **Session activity** | `record_session_activity`, `get_session_activity_summary`, `get_session_hot_directories`, … | Fine-grained activity tracking |
170/// | **Stats** | `stats` | Database-level statistics |
171///
172/// Implementations include SQLite (default) and can be extended for
173/// SurrealDB, FalkorDB, or other backends.
174pub trait StorageBackend: Send + Sync {
175    // ── Memory CRUD ─────────────────────────────────────────────────
176
177    /// Insert a new memory. Returns Err(Duplicate) if content hash already exists.
178    fn insert_memory(&self, memory: &MemoryNode) -> Result<(), CodememError>;
179
180    /// Get a memory by ID. Updates access_count and last_accessed_at.
181    fn get_memory(&self, id: &str) -> Result<Option<MemoryNode>, CodememError>;
182
183    /// Get a memory by ID without updating access_count or last_accessed_at.
184    /// Use this for internal/system reads (consolidation checks, stats, batch processing).
185    fn get_memory_no_touch(&self, id: &str) -> Result<Option<MemoryNode>, CodememError> {
186        // Default: falls back to get_memory for backwards compatibility.
187        self.get_memory(id)
188    }
189
190    /// Get multiple memories by IDs in a single batch operation.
191    fn get_memories_batch(&self, ids: &[&str]) -> Result<Vec<MemoryNode>, CodememError>;
192
193    /// Update a memory's content and optionally its importance. Re-computes content hash.
194    fn update_memory(
195        &self,
196        id: &str,
197        content: &str,
198        importance: Option<f64>,
199    ) -> Result<(), CodememError>;
200
201    /// Delete a memory by ID. Returns true if a row was deleted.
202    fn delete_memory(&self, id: &str) -> Result<bool, CodememError>;
203
204    /// Delete a memory and all related data (graph nodes/edges, embeddings) atomically.
205    /// Returns true if the memory existed and was deleted.
206    /// Default falls back to individual deletes (non-transactional) for backwards compatibility.
207    fn delete_memory_cascade(&self, id: &str) -> Result<bool, CodememError> {
208        let deleted = self.delete_memory(id)?;
209        if deleted {
210            let _ = self.delete_graph_edges_for_node(id);
211            let _ = self.delete_graph_node(id);
212            let _ = self.delete_embedding(id);
213        }
214        Ok(deleted)
215    }
216
217    /// Delete multiple memories and all related data (graph nodes/edges, embeddings) atomically.
218    /// Returns the number of memories that were actually deleted.
219    /// Default falls back to calling `delete_memory_cascade` per ID for backwards compatibility.
220    fn delete_memories_batch_cascade(&self, ids: &[&str]) -> Result<usize, CodememError> {
221        let mut count = 0;
222        for id in ids {
223            if self.delete_memory_cascade(id)? {
224                count += 1;
225            }
226        }
227        Ok(count)
228    }
229
230    /// List all memory IDs, ordered by created_at descending.
231    fn list_memory_ids(&self) -> Result<Vec<String>, CodememError>;
232
233    /// List memory IDs scoped to a specific namespace.
234    fn list_memory_ids_for_namespace(&self, namespace: &str) -> Result<Vec<String>, CodememError>;
235
236    /// Find memory IDs whose tags contain the given tag value.
237    /// Optionally scoped to a namespace. Excludes `exclude_id`.
238    fn find_memory_ids_by_tag(
239        &self,
240        tag: &str,
241        namespace: Option<&str>,
242        exclude_id: &str,
243    ) -> Result<Vec<String>, CodememError>;
244
245    /// List all distinct namespaces.
246    fn list_namespaces(&self) -> Result<Vec<String>, CodememError>;
247
248    /// Get total memory count.
249    fn memory_count(&self) -> Result<usize, CodememError>;
250
251    // ── Embedding Persistence ───────────────────────────────────────
252
253    /// Store an embedding vector for a memory.
254    fn store_embedding(&self, memory_id: &str, embedding: &[f32]) -> Result<(), CodememError>;
255
256    /// Get an embedding by memory ID.
257    fn get_embedding(&self, memory_id: &str) -> Result<Option<Vec<f32>>, CodememError>;
258
259    /// Delete an embedding by memory ID. Returns true if a row was deleted.
260    fn delete_embedding(&self, memory_id: &str) -> Result<bool, CodememError>;
261
262    /// List all stored embeddings as (memory_id, embedding_vector) pairs.
263    fn list_all_embeddings(&self) -> Result<Vec<(String, Vec<f32>)>, CodememError>;
264
265    // ── Graph Node/Edge Persistence ─────────────────────────────────
266
267    /// Insert or replace a graph node.
268    fn insert_graph_node(&self, node: &GraphNode) -> Result<(), CodememError>;
269
270    /// Get a graph node by ID.
271    fn get_graph_node(&self, id: &str) -> Result<Option<GraphNode>, CodememError>;
272
273    /// Delete a graph node by ID. Returns true if a row was deleted.
274    fn delete_graph_node(&self, id: &str) -> Result<bool, CodememError>;
275
276    /// Get all graph nodes.
277    fn all_graph_nodes(&self) -> Result<Vec<GraphNode>, CodememError>;
278
279    /// Insert or replace a graph edge.
280    fn insert_graph_edge(&self, edge: &Edge) -> Result<(), CodememError>;
281
282    /// Get all edges from or to a node.
283    fn get_edges_for_node(&self, node_id: &str) -> Result<Vec<Edge>, CodememError>;
284
285    /// Get all graph edges.
286    fn all_graph_edges(&self) -> Result<Vec<Edge>, CodememError>;
287
288    /// Delete a single graph edge by ID. Returns true if a row was deleted.
289    fn delete_graph_edge(&self, edge_id: &str) -> Result<bool, CodememError> {
290        // Default: fall back to querying all edges and deleting via for_node.
291        // Backends should override with a direct DELETE WHERE id = ?1.
292        let _ = edge_id;
293        Ok(false)
294    }
295
296    /// Delete all graph edges connected to a node. Returns count deleted.
297    fn delete_graph_edges_for_node(&self, node_id: &str) -> Result<usize, CodememError>;
298
299    /// Delete all graph nodes, edges, and embeddings whose node ID starts with the given prefix.
300    /// Returns count of nodes deleted.
301    fn delete_graph_nodes_by_prefix(&self, prefix: &str) -> Result<usize, CodememError>;
302
303    // ── Sessions ────────────────────────────────────────────────────
304
305    /// Start a new session.
306    fn start_session(&self, id: &str, namespace: Option<&str>) -> Result<(), CodememError>;
307
308    /// End a session with optional summary.
309    fn end_session(&self, id: &str, summary: Option<&str>) -> Result<(), CodememError>;
310
311    /// List sessions, optionally filtered by namespace, up to limit.
312    fn list_sessions(
313        &self,
314        namespace: Option<&str>,
315        limit: usize,
316    ) -> Result<Vec<Session>, CodememError>;
317
318    // ── Consolidation ───────────────────────────────────────────────
319
320    /// Record a consolidation run.
321    fn insert_consolidation_log(
322        &self,
323        cycle_type: &str,
324        affected_count: usize,
325    ) -> Result<(), CodememError>;
326
327    /// Get the last consolidation run for each cycle type.
328    fn last_consolidation_runs(&self) -> Result<Vec<ConsolidationLogEntry>, CodememError>;
329
330    // ── Pattern Detection Queries ───────────────────────────────────
331
332    /// Find repeated search patterns. Returns (pattern, count, memory_ids).
333    fn get_repeated_searches(
334        &self,
335        min_count: usize,
336        namespace: Option<&str>,
337    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
338
339    /// Find file hotspots. Returns (file_path, count, memory_ids).
340    fn get_file_hotspots(
341        &self,
342        min_count: usize,
343        namespace: Option<&str>,
344    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
345
346    /// Get tool usage statistics. Returns (tool_name, count) pairs.
347    fn get_tool_usage_stats(
348        &self,
349        namespace: Option<&str>,
350    ) -> Result<Vec<(String, usize)>, CodememError>;
351
352    /// Find decision chains. Returns (file_path, count, memory_ids).
353    fn get_decision_chains(
354        &self,
355        min_count: usize,
356        namespace: Option<&str>,
357    ) -> Result<Vec<(String, usize, Vec<String>)>, CodememError>;
358
359    // ── Bulk Operations ─────────────────────────────────────────────
360
361    /// Decay importance of stale memories older than threshold_ts by decay_factor.
362    /// Returns count of affected memories.
363    fn decay_stale_memories(
364        &self,
365        threshold_ts: i64,
366        decay_factor: f64,
367    ) -> Result<usize, CodememError>;
368
369    /// List memories for creative consolidation: (id, memory_type, tags).
370    fn list_memories_for_creative(
371        &self,
372    ) -> Result<Vec<(String, String, Vec<String>)>, CodememError>;
373
374    /// Find near-duplicate memories by content hash prefix matching.
375    /// Returns (id1, id2, similarity) pairs. Only catches exact content matches
376    /// (hash prefix), not semantic near-duplicates.
377    fn find_hash_duplicates(&self) -> Result<Vec<(String, String, f64)>, CodememError>;
378
379    /// Find memories eligible for forgetting (low importance).
380    /// Returns list of memory IDs.
381    fn find_forgettable(&self, importance_threshold: f64) -> Result<Vec<String>, CodememError>;
382
383    // ── Batch Operations ────────────────────────────────────────────
384
385    /// Insert multiple memories in a single batch. Default impl calls insert_memory in a loop.
386    fn insert_memories_batch(&self, memories: &[MemoryNode]) -> Result<(), CodememError> {
387        for memory in memories {
388            self.insert_memory(memory)?;
389        }
390        Ok(())
391    }
392
393    /// Store multiple embeddings in a single batch. Default impl calls store_embedding in a loop.
394    fn store_embeddings_batch(&self, items: &[(&str, &[f32])]) -> Result<(), CodememError> {
395        for (id, embedding) in items {
396            self.store_embedding(id, embedding)?;
397        }
398        Ok(())
399    }
400
401    /// Insert multiple graph nodes in a single batch. Default impl calls insert_graph_node in a loop.
402    fn insert_graph_nodes_batch(&self, nodes: &[GraphNode]) -> Result<(), CodememError> {
403        for node in nodes {
404            self.insert_graph_node(node)?;
405        }
406        Ok(())
407    }
408
409    /// Insert multiple graph edges in a single batch. Default impl calls insert_graph_edge in a loop.
410    fn insert_graph_edges_batch(&self, edges: &[Edge]) -> Result<(), CodememError> {
411        for edge in edges {
412            self.insert_graph_edge(edge)?;
413        }
414        Ok(())
415    }
416
417    // ── Query Helpers ───────────────────────────────────────────────
418
419    /// Find memories that have no embeddings yet. Returns (id, content) pairs.
420    fn find_unembedded_memories(&self) -> Result<Vec<(String, String)>, CodememError>;
421
422    /// Search graph nodes by label (case-insensitive LIKE). Returns matching nodes
423    /// sorted by centrality descending, limited to `limit` results.
424    fn search_graph_nodes(
425        &self,
426        query: &str,
427        namespace: Option<&str>,
428        limit: usize,
429    ) -> Result<Vec<GraphNode>, CodememError>;
430
431    /// List memories matching a specific tag, with optional namespace filter.
432    fn list_memories_by_tag(
433        &self,
434        tag: &str,
435        namespace: Option<&str>,
436        limit: usize,
437    ) -> Result<Vec<MemoryNode>, CodememError>;
438
439    /// List memories with optional namespace and memory_type filters.
440    fn list_memories_filtered(
441        &self,
442        namespace: Option<&str>,
443        memory_type: Option<&str>,
444    ) -> Result<Vec<MemoryNode>, CodememError>;
445
446    /// Fetch stale memories with access metadata for power-law decay.
447    /// Returns (id, importance, access_count, last_accessed_at).
448    fn get_stale_memories_for_decay(
449        &self,
450        threshold_ts: i64,
451    ) -> Result<Vec<(String, f64, u32, i64)>, CodememError>;
452
453    /// Batch-update importance values. Returns count of updated rows.
454    fn batch_update_importance(&self, updates: &[(String, f64)]) -> Result<usize, CodememError>;
455
456    /// Total session count, optionally filtered by namespace.
457    fn session_count(&self, namespace: Option<&str>) -> Result<usize, CodememError>;
458
459    // ── File Hash Tracking ──────────────────────────────────────────
460
461    /// Load all file hashes for incremental indexing. Returns path -> hash map.
462    fn load_file_hashes(&self) -> Result<HashMap<String, String>, CodememError>;
463
464    /// Save file hashes for incremental indexing.
465    fn save_file_hashes(&self, hashes: &HashMap<String, String>) -> Result<(), CodememError>;
466
467    // ── Session Activity Tracking ─────────────────────────────────
468
469    /// Record a session activity event (tool use with context).
470    fn record_session_activity(
471        &self,
472        session_id: &str,
473        tool_name: &str,
474        file_path: Option<&str>,
475        directory: Option<&str>,
476        pattern: Option<&str>,
477    ) -> Result<(), CodememError>;
478
479    /// Get a summary of session activity counts.
480    fn get_session_activity_summary(
481        &self,
482        session_id: &str,
483    ) -> Result<crate::SessionActivitySummary, CodememError>;
484
485    /// Get the most active directories in a session. Returns (directory, count) pairs.
486    fn get_session_hot_directories(
487        &self,
488        session_id: &str,
489        limit: usize,
490    ) -> Result<Vec<(String, usize)>, CodememError>;
491
492    /// Check whether a particular auto-insight dedup tag already exists for a session.
493    fn has_auto_insight(&self, session_id: &str, dedup_tag: &str) -> Result<bool, CodememError>;
494
495    /// Count how many Read events occurred in a directory during a session.
496    fn count_directory_reads(
497        &self,
498        session_id: &str,
499        directory: &str,
500    ) -> Result<usize, CodememError>;
501
502    /// Check if a file was read in the current session.
503    fn was_file_read_in_session(
504        &self,
505        session_id: &str,
506        file_path: &str,
507    ) -> Result<bool, CodememError>;
508
509    /// Count how many times a search pattern was used in a session.
510    fn count_search_pattern_in_session(
511        &self,
512        session_id: &str,
513        pattern: &str,
514    ) -> Result<usize, CodememError>;
515
516    // ── Repository Management ────────────────────────────────────────
517
518    /// List all registered repositories.
519    fn list_repos(&self) -> Result<Vec<Repository>, CodememError>;
520
521    /// Add a new repository.
522    fn add_repo(&self, repo: &Repository) -> Result<(), CodememError>;
523
524    /// Get a repository by ID.
525    fn get_repo(&self, id: &str) -> Result<Option<Repository>, CodememError>;
526
527    /// Remove a repository by ID. Returns true if it existed.
528    fn remove_repo(&self, id: &str) -> Result<bool, CodememError>;
529
530    /// Update a repository's status and optionally its last-indexed timestamp.
531    fn update_repo_status(
532        &self,
533        id: &str,
534        status: &str,
535        indexed_at: Option<&str>,
536    ) -> Result<(), CodememError>;
537
538    // ── Stats ───────────────────────────────────────────────────────
539
540    /// Get database statistics.
541    fn stats(&self) -> Result<StorageStats, CodememError>;
542
543    // ── Transaction Control ────────────────────────────────────────
544
545    /// Begin an explicit transaction.
546    ///
547    /// While a transaction is active, individual storage methods (e.g.
548    /// `insert_memory`, `insert_graph_node`) participate in it instead of
549    /// starting their own. Call `commit_transaction` to persist or
550    /// `rollback_transaction` to discard.
551    ///
552    /// Default implementation is a no-op for backends that don't support
553    /// explicit transaction control.
554    fn begin_transaction(&self) -> Result<(), CodememError> {
555        Ok(())
556    }
557
558    /// Commit the active transaction started by `begin_transaction`.
559    ///
560    /// Default implementation is a no-op.
561    fn commit_transaction(&self) -> Result<(), CodememError> {
562        Ok(())
563    }
564
565    /// Roll back the active transaction started by `begin_transaction`.
566    ///
567    /// Default implementation is a no-op.
568    fn rollback_transaction(&self) -> Result<(), CodememError> {
569        Ok(())
570    }
571}