Skip to main content

codemem_engine/
memory_ops.rs

1use crate::scoring;
2use crate::CodememEngine;
3use crate::SplitPart;
4use codemem_core::{CodememError, Edge, MemoryNode, MemoryType, RelationshipType};
5use std::collections::HashMap;
6use std::sync::atomic::Ordering;
7
8impl CodememEngine {
9    // ── Persistence ─────────────────────────────────────────────────────
10
11    /// Persist a memory through the full pipeline: storage → BM25 → graph → embedding → vector.
12    pub fn persist_memory(&self, memory: &MemoryNode) -> Result<(), CodememError> {
13        self.persist_memory_inner(memory, true)
14    }
15
16    /// Persist a memory without saving the vector index to disk.
17    /// Use this in batch operations, then call `save_index()` once at the end.
18    pub(crate) fn persist_memory_no_save(&self, memory: &MemoryNode) -> Result<(), CodememError> {
19        self.persist_memory_inner(memory, false)
20    }
21
22    /// Inner persist implementation with optional index save.
23    ///
24    /// H3: Lock ordering is enforced to prevent deadlocks:
25    /// 1. Embeddings lock (acquire, embed, drop)
26    /// 2. BM25 lock
27    /// 3. Graph lock
28    /// 4. Vector lock
29    fn persist_memory_inner(&self, memory: &MemoryNode, save: bool) -> Result<(), CodememError> {
30        // Auto-populate session_id from the engine's active session if not already set
31        let memory = if memory.session_id.is_none() {
32            if let Some(active_sid) = self.active_session_id() {
33                let mut m = memory.clone();
34                m.session_id = Some(active_sid);
35                std::borrow::Cow::Owned(m)
36            } else {
37                std::borrow::Cow::Borrowed(memory)
38            }
39        } else {
40            std::borrow::Cow::Borrowed(memory)
41        };
42
43        // Auto-set expires_at for session memories if not explicitly set
44        let memory = if memory.expires_at.is_none() && memory.session_id.is_some() {
45            let ttl_hours = self.config.memory.default_session_ttl_hours;
46            if ttl_hours > 0 {
47                let mut m = memory.into_owned();
48                m.expires_at = Some(chrono::Utc::now() + chrono::Duration::hours(ttl_hours as i64));
49                std::borrow::Cow::Owned(m)
50            } else {
51                memory
52            }
53        } else {
54            memory
55        };
56
57        // Auto-populate repo/git_ref from engine scope if not already set
58        let memory = if memory.repo.is_none() {
59            if let Some(scope) = self.scope() {
60                let mut m = memory.into_owned();
61                m.repo = Some(scope.repo.clone());
62                m.git_ref = Some(scope.git_ref.clone());
63                std::borrow::Cow::Owned(m)
64            } else {
65                memory
66            }
67        } else {
68            memory
69        };
70        let memory = memory.as_ref();
71
72        // H3: Step 1 — Embed if the provider is already loaded (don't trigger lazy init).
73        // Lifecycle hooks skip embedding for speed; the provider gets initialized on
74        // first recall/search, and backfill_embeddings() picks up any gaps.
75        let embedding_result = if self.embeddings_ready() {
76            match self.lock_embeddings() {
77                Ok(Some(emb)) => {
78                    let enriched = self.enrich_memory_text(
79                        &memory.content,
80                        memory.memory_type,
81                        &memory.tags,
82                        memory.namespace.as_deref(),
83                        Some(&memory.id),
84                    );
85                    let result = emb.embed(&enriched).ok();
86                    drop(emb);
87                    result
88                }
89                Ok(None) => None,
90                Err(e) => {
91                    tracing::warn!("Embeddings lock failed during persist: {e}");
92                    None
93                }
94            }
95        } else {
96            None
97        };
98
99        // 2F: Wrap all SQLite mutations in a single transaction so that the
100        // database cannot be left in an inconsistent state if one step fails.
101        // The HNSW vector index is NOT in SQLite, so vector insertion happens
102        // after commit — if it fails, the memory is still persisted without
103        // its embedding, which is recoverable.
104        self.storage.begin_transaction()?;
105
106        let result = self.persist_memory_sqlite(memory, &embedding_result);
107
108        match result {
109            Ok(()) => {
110                self.storage.commit_transaction()?;
111            }
112            Err(e) => {
113                if let Err(rb_err) = self.storage.rollback_transaction() {
114                    tracing::error!("Failed to rollback transaction after persist error: {rb_err}");
115                }
116                return Err(e);
117            }
118        }
119
120        // 2. Update BM25 index if already loaded (don't trigger lazy init).
121        // The BM25 index rebuilds from all memories on first access anyway.
122        if self.bm25_ready() {
123            match self.lock_bm25() {
124                Ok(mut bm25) => {
125                    bm25.add_document(&memory.id, &memory.content);
126                }
127                Err(e) => tracing::warn!("BM25 lock failed during persist: {e}"),
128            }
129        }
130
131        // 3. Add memory node to in-memory graph (already persisted to SQLite above)
132        match self.lock_graph() {
133            Ok(mut graph) => {
134                let node = codemem_core::GraphNode {
135                    id: memory.id.clone(),
136                    kind: codemem_core::NodeKind::Memory,
137                    label: scoring::truncate_content(&memory.content, 80),
138                    payload: std::collections::HashMap::new(),
139                    centrality: 0.0,
140                    memory_id: Some(memory.id.clone()),
141                    namespace: memory.namespace.clone(),
142                };
143                if let Err(e) = graph.add_node(node) {
144                    tracing::warn!(
145                        "Failed to add graph node in-memory for memory {}: {e}",
146                        memory.id
147                    );
148                }
149            }
150            Err(e) => tracing::warn!("Graph lock failed during persist: {e}"),
151        }
152
153        // 3b. Auto-link to memories with shared tags (session co-membership, topic overlap)
154        self.auto_link_by_tags(memory);
155
156        // H3: Step 4 — Insert embedding into HNSW vector index if already loaded.
157        if let Some(vec) = &embedding_result {
158            if self.vector_ready() {
159                if let Ok(mut vi) = self.lock_vector() {
160                    if let Err(e) = vi.insert(&memory.id, vec) {
161                        tracing::warn!("Failed to insert into vector index for {}: {e}", memory.id);
162                    }
163                }
164            }
165        }
166
167        // C5: Set dirty flag instead of calling save_index() after each persist.
168        // Callers should use flush_if_dirty() to batch save the index.
169        if save {
170            self.save_index(); // save_index() clears dirty flag
171        } else {
172            self.dirty.store(true, Ordering::Release);
173        }
174
175        Ok(())
176    }
177
178    /// Execute all SQLite mutations for a memory persist.
179    ///
180    /// Called within the transaction opened by `persist_memory_inner`.
181    /// Inserts the memory row, graph node, and embedding (if available).
182    fn persist_memory_sqlite(
183        &self,
184        memory: &MemoryNode,
185        embedding: &Option<Vec<f32>>,
186    ) -> Result<(), CodememError> {
187        // 1. Store memory in SQLite
188        self.storage.insert_memory(memory)?;
189
190        // 2. Insert graph node in SQLite
191        let node = codemem_core::GraphNode {
192            id: memory.id.clone(),
193            kind: codemem_core::NodeKind::Memory,
194            label: scoring::truncate_content(&memory.content, 80),
195            payload: std::collections::HashMap::new(),
196            centrality: 0.0,
197            memory_id: Some(memory.id.clone()),
198            namespace: memory.namespace.clone(),
199        };
200        if let Err(e) = self.storage.insert_graph_node(&node) {
201            tracing::warn!("Failed to insert graph node for memory {}: {e}", memory.id);
202        }
203
204        // 3. Store embedding in SQLite (vector blob, not HNSW index)
205        if let Some(vec) = embedding {
206            if let Err(e) = self.storage.store_embedding(&memory.id, vec) {
207                tracing::warn!("Failed to store embedding for {}: {e}", memory.id);
208            }
209        }
210
211        Ok(())
212    }
213
214    // ── Store with Links ──────────────────────────────────────────────────
215
216    /// Store a memory with optional explicit link IDs.
217    ///
218    /// Runs the full pipeline: persist → explicit RELATES_TO edges → auto-link
219    /// to code nodes → save index. This consolidates domain logic that was
220    /// previously spread across the MCP transport layer.
221    pub fn store_memory_with_links(
222        &self,
223        memory: &MemoryNode,
224        links: &[String],
225    ) -> Result<(), CodememError> {
226        self.persist_memory(memory)?;
227
228        // Create RELATES_TO edges for explicit links
229        if !links.is_empty() {
230            let now = chrono::Utc::now();
231            let mut graph = self.lock_graph()?;
232            for link_id in links {
233                let edge = Edge {
234                    id: format!("{}-RELATES_TO-{link_id}", memory.id),
235                    src: memory.id.clone(),
236                    dst: link_id.clone(),
237                    relationship: RelationshipType::RelatesTo,
238                    weight: 1.0,
239                    properties: HashMap::new(),
240                    created_at: now,
241                    valid_from: None,
242                    valid_to: None,
243                };
244                if let Err(e) = self.storage.insert_graph_edge(&edge) {
245                    tracing::warn!("Failed to persist link edge to {link_id}: {e}");
246                }
247                if let Err(e) = graph.add_edge(edge) {
248                    tracing::warn!("Failed to add link edge to {link_id}: {e}");
249                }
250            }
251        }
252
253        // Auto-link to code nodes mentioned in content
254        self.auto_link_to_code_nodes(&memory.id, &memory.content, links);
255
256        Ok(())
257    }
258
259    // ── Edge Helpers ─────────────────────────────────────────────────────
260
261    /// Add an edge to both storage and in-memory graph.
262    pub fn add_edge(&self, edge: Edge) -> Result<(), CodememError> {
263        self.storage.insert_graph_edge(&edge)?;
264        let mut graph = self.lock_graph()?;
265        graph.add_edge(edge)?;
266        Ok(())
267    }
268
269    // ── Self-Editing ────────────────────────────────────────────────────
270
271    /// Refine a memory: create a new version with an EVOLVED_INTO edge from old to new.
272    pub fn refine_memory(
273        &self,
274        old_id: &str,
275        content: Option<&str>,
276        tags: Option<Vec<String>>,
277        importance: Option<f64>,
278    ) -> Result<(MemoryNode, String), CodememError> {
279        let old_memory = self
280            .storage
281            .get_memory(old_id)?
282            .ok_or_else(|| CodememError::NotFound(format!("Memory not found: {old_id}")))?;
283
284        let new_content = content.unwrap_or(&old_memory.content);
285        let new_tags = tags.unwrap_or_else(|| old_memory.tags.clone());
286        let new_importance = importance.unwrap_or(old_memory.importance);
287
288        let mut memory = MemoryNode::new(new_content, old_memory.memory_type);
289        let new_id = memory.id.clone();
290        memory.importance = new_importance;
291        memory.confidence = old_memory.confidence;
292        memory.tags = new_tags;
293        memory.metadata = old_memory.metadata.clone();
294        memory.namespace = old_memory.namespace.clone();
295
296        self.persist_memory(&memory)?;
297
298        // Create EVOLVED_INTO edge from old -> new
299        let now = chrono::Utc::now();
300        let edge = Edge {
301            id: format!("{old_id}-EVOLVED_INTO-{new_id}"),
302            src: old_id.to_string(),
303            dst: new_id.clone(),
304            relationship: RelationshipType::EvolvedInto,
305            weight: 1.0,
306            properties: std::collections::HashMap::new(),
307            created_at: now,
308            valid_from: Some(now),
309            valid_to: None,
310        };
311        if let Err(e) = self.add_edge(edge) {
312            tracing::warn!("Failed to add EVOLVED_INTO edge: {e}");
313        }
314
315        Ok((memory, new_id))
316    }
317
318    /// Split a memory into multiple parts, each linked via PART_OF edges.
319    pub fn split_memory(
320        &self,
321        source_id: &str,
322        parts: &[SplitPart],
323    ) -> Result<Vec<String>, CodememError> {
324        let source_memory = self
325            .storage
326            .get_memory(source_id)?
327            .ok_or_else(|| CodememError::NotFound(format!("Memory not found: {source_id}")))?;
328
329        if parts.is_empty() {
330            return Err(CodememError::InvalidInput(
331                "'parts' array must not be empty".to_string(),
332            ));
333        }
334
335        // Validate all parts upfront before persisting anything
336        for part in parts {
337            if part.content.is_empty() {
338                return Err(CodememError::InvalidInput(
339                    "Each part must have a non-empty 'content' field".to_string(),
340                ));
341            }
342        }
343
344        let now = chrono::Utc::now();
345        let mut child_ids: Vec<String> = Vec::new();
346
347        for part in parts {
348            let tags = part
349                .tags
350                .clone()
351                .unwrap_or_else(|| source_memory.tags.clone());
352            let importance = part.importance.unwrap_or(source_memory.importance);
353
354            let mut memory = MemoryNode::new(part.content.clone(), source_memory.memory_type);
355            let child_id = memory.id.clone();
356            memory.importance = importance;
357            memory.confidence = source_memory.confidence;
358            memory.tags = tags;
359            memory.namespace = source_memory.namespace.clone();
360
361            if let Err(e) = self.persist_memory_no_save(&memory) {
362                // Clean up already-created child memories
363                for created_id in &child_ids {
364                    if let Err(del_err) = self.delete_memory(created_id) {
365                        tracing::warn!(
366                            "Failed to clean up child memory {created_id} after split failure: {del_err}"
367                        );
368                    }
369                }
370                return Err(e);
371            }
372
373            // Create PART_OF edge: child -> source
374            let edge = Edge {
375                id: format!("{child_id}-PART_OF-{source_id}"),
376                src: child_id.clone(),
377                dst: source_id.to_string(),
378                relationship: RelationshipType::PartOf,
379                weight: 1.0,
380                properties: std::collections::HashMap::new(),
381                created_at: now,
382                valid_from: Some(now),
383                valid_to: None,
384            };
385            if let Err(e) = self.add_edge(edge) {
386                tracing::warn!("Failed to add PART_OF edge: {e}");
387            }
388
389            child_ids.push(child_id);
390        }
391
392        self.save_index();
393        Ok(child_ids)
394    }
395
396    /// Merge multiple memories into one, linked via SUMMARIZES edges.
397    pub fn merge_memories(
398        &self,
399        source_ids: &[String],
400        content: &str,
401        memory_type: MemoryType,
402        importance: f64,
403        tags: Vec<String>,
404    ) -> Result<String, CodememError> {
405        if source_ids.len() < 2 {
406            return Err(CodememError::InvalidInput(
407                "'source_ids' must contain at least 2 IDs".to_string(),
408            ));
409        }
410
411        // Verify all sources exist
412        let id_refs: Vec<&str> = source_ids.iter().map(|s| s.as_str()).collect();
413        let found = self.storage.get_memories_batch(&id_refs)?;
414        if found.len() != source_ids.len() {
415            let found_ids: std::collections::HashSet<&str> =
416                found.iter().map(|m| m.id.as_str()).collect();
417            let missing: Vec<&str> = id_refs
418                .iter()
419                .filter(|id| !found_ids.contains(**id))
420                .copied()
421                .collect();
422            return Err(CodememError::NotFound(format!(
423                "Source memories not found: {}",
424                missing.join(", ")
425            )));
426        }
427
428        let mut memory = MemoryNode::new(content, memory_type);
429        let merged_id = memory.id.clone();
430        memory.importance = importance;
431        memory.confidence = found.iter().map(|m| m.confidence).sum::<f64>() / found.len() as f64;
432        memory.tags = tags;
433        memory.namespace = found.iter().find_map(|m| m.namespace.clone());
434
435        self.persist_memory_no_save(&memory)?;
436
437        // Create SUMMARIZES edges: merged -> each source
438        let now = chrono::Utc::now();
439        for source_id in source_ids {
440            let edge = Edge {
441                id: format!("{merged_id}-SUMMARIZES-{source_id}"),
442                src: merged_id.clone(),
443                dst: source_id.clone(),
444                relationship: RelationshipType::Summarizes,
445                weight: 1.0,
446                properties: std::collections::HashMap::new(),
447                created_at: now,
448                valid_from: Some(now),
449                valid_to: None,
450            };
451            if let Err(e) = self.add_edge(edge) {
452                tracing::warn!("Failed to add SUMMARIZES edge to {source_id}: {e}");
453            }
454        }
455
456        self.save_index();
457        Ok(merged_id)
458    }
459
460    /// Update a memory's content and/or importance, re-embedding if needed.
461    pub fn update_memory(
462        &self,
463        id: &str,
464        content: &str,
465        importance: Option<f64>,
466    ) -> Result<(), CodememError> {
467        self.storage.update_memory(id, content, importance)?;
468
469        // Update BM25 index
470        self.lock_bm25()?.add_document(id, content);
471
472        // Update graph node label
473        if let Ok(mut graph) = self.lock_graph() {
474            if let Ok(Some(mut node)) = graph.get_node(id) {
475                node.label = scoring::truncate_content(content, 80);
476                if let Err(e) = graph.add_node(node) {
477                    tracing::warn!("Failed to update graph node for {id}: {e}");
478                }
479            }
480        }
481
482        // Re-embed with contextual enrichment
483        // H3: Acquire embeddings lock, embed, drop lock before acquiring vector lock.
484        if let Some(emb_guard) = self.lock_embeddings()? {
485            let (mem_type, tags, namespace) =
486                if let Ok(Some(mem)) = self.storage.get_memory_no_touch(id) {
487                    (mem.memory_type, mem.tags, mem.namespace)
488                } else {
489                    (MemoryType::Context, vec![], None)
490                };
491            let enriched =
492                self.enrich_memory_text(content, mem_type, &tags, namespace.as_deref(), Some(id));
493            let emb_result = emb_guard.embed(&enriched);
494            drop(emb_guard);
495            if let Ok(embedding) = emb_result {
496                if let Err(e) = self.storage.store_embedding(id, &embedding) {
497                    tracing::warn!("Failed to store embedding for {id}: {e}");
498                }
499                let mut vec = self.lock_vector()?;
500                if let Err(e) = vec.remove(id) {
501                    tracing::warn!("Failed to remove old vector for {id}: {e}");
502                }
503                if let Err(e) = vec.insert(id, &embedding) {
504                    tracing::warn!("Failed to insert new vector for {id}: {e}");
505                }
506            }
507        }
508
509        self.save_index();
510        Ok(())
511    }
512
513    /// Update only the importance of a memory.
514    /// Routes through the engine to maintain the transport → engine → storage boundary.
515    pub fn update_importance(&self, id: &str, importance: f64) -> Result<(), CodememError> {
516        self.storage
517            .batch_update_importance(&[(id.to_string(), importance)])?;
518        Ok(())
519    }
520
521    /// Delete a memory from all subsystems.
522    ///
523    /// M1: Uses `delete_memory_cascade` on the storage backend to wrap all
524    /// SQLite deletes (memory + graph nodes/edges + embedding) in a single
525    /// transaction when the backend supports it. In-memory structures
526    /// (vector, graph, BM25) are cleaned up separately with proper lock ordering.
527    pub fn delete_memory(&self, id: &str) -> Result<bool, CodememError> {
528        // Use cascade delete for all storage-side operations in a single transaction.
529        let deleted = self.storage.delete_memory_cascade(id)?;
530        if !deleted {
531            return Ok(false);
532        }
533
534        // Clean up in-memory structures with proper lock ordering:
535        // vector first, then graph, then BM25.
536        let mut vec = self.lock_vector()?;
537        if let Err(e) = vec.remove(id) {
538            tracing::warn!("Failed to remove {id} from vector index: {e}");
539        }
540        drop(vec);
541
542        let mut graph = self.lock_graph()?;
543        if let Err(e) = graph.remove_node(id) {
544            tracing::warn!("Failed to remove {id} from in-memory graph: {e}");
545        }
546        drop(graph);
547
548        self.lock_bm25()?.remove_document(id);
549
550        // Persist vector index to disk
551        self.save_index();
552        Ok(true)
553    }
554
555    /// Opportunistic sweep of expired memories. Rate-limited to once per 60 seconds
556    /// to avoid adding overhead to every recall call.
557    pub(crate) fn sweep_expired_memories(&self) {
558        let now = chrono::Utc::now().timestamp();
559        let last = self.last_expiry_sweep.load(Ordering::Relaxed);
560        if now - last < 60 {
561            return;
562        }
563        // CAS to avoid concurrent sweeps
564        if self
565            .last_expiry_sweep
566            .compare_exchange(last, now, Ordering::Relaxed, Ordering::Relaxed)
567            .is_err()
568        {
569            return;
570        }
571        match self.storage.delete_expired_memories() {
572            Ok(0) => {}
573            Ok(n) => tracing::debug!("Swept {n} expired memories"),
574            Err(e) => tracing::warn!("Expired memory sweep failed: {e}"),
575        }
576    }
577}