1use codemem_core::{
14 CodememConfig, CodememError, DetectedPattern, Edge, GraphBackend, MemoryNode, MemoryType,
15 NodeKind, NodeMemoryResult, RelationshipType, ScoringWeights, StorageBackend, VectorBackend,
16};
17use codemem_storage::graph::GraphEngine;
18use codemem_storage::HnswIndex;
19use codemem_storage::Storage;
20use std::collections::HashSet;
21use std::path::{Path, PathBuf};
22use std::sync::atomic::{AtomicBool, Ordering};
23use std::sync::{Arc, Mutex, RwLock};
24
25pub mod analysis;
26pub mod bm25;
27pub mod compress;
28pub mod consolidation;
29pub mod enrichment;
30pub mod hooks;
31pub mod index;
32pub mod metrics;
33pub mod patterns;
34pub mod persistence;
35pub mod recall;
36pub mod scoring;
37pub mod search;
38pub mod watch;
39
40#[cfg(test)]
41#[path = "tests/engine_integration_tests.rs"]
42mod integration_tests;
43
44pub use index::{
46 ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
47 IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
48 ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
49};
50
51pub use bm25::Bm25Index;
53pub use metrics::InMemoryMetrics;
54
55pub use enrichment::EnrichResult;
57
58pub use persistence::{edge_weight_for, IndexPersistResult};
60
61pub use recall::{ExpandedResult, NamespaceStats};
63
64pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
66
67pub use analysis::{
69 DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
70};
71
72#[derive(Debug, Clone)]
74pub struct SplitPart {
75 pub content: String,
76 pub tags: Option<Vec<String>>,
77 pub importance: Option<f64>,
78}
79
80pub struct IndexCache {
84 pub symbols: Vec<Symbol>,
85 pub chunks: Vec<CodeChunk>,
86 pub root_path: String,
87}
88
89pub struct CodememEngine {
105 pub storage: Box<dyn StorageBackend>,
110 pub vector: Mutex<HnswIndex>,
111 pub graph: Mutex<GraphEngine>,
112 pub embeddings: Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>,
114 pub db_path: Option<PathBuf>,
116 pub index_cache: Mutex<Option<IndexCache>>,
118 pub scoring_weights: RwLock<ScoringWeights>,
120 pub bm25_index: Mutex<Bm25Index>,
122 pub config: CodememConfig,
124 pub metrics: Arc<InMemoryMetrics>,
126 dirty: AtomicBool,
129}
130
131impl CodememEngine {
132 pub fn new(
134 storage: Box<dyn StorageBackend>,
135 vector: HnswIndex,
136 graph: GraphEngine,
137 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
138 ) -> Self {
139 let config = CodememConfig::load_or_default();
140 Self {
141 storage,
142 vector: Mutex::new(vector),
143 graph: Mutex::new(graph),
144 embeddings: embeddings.map(Mutex::new),
145 db_path: None,
146 index_cache: Mutex::new(None),
147 scoring_weights: RwLock::new(config.scoring.clone()),
148 bm25_index: Mutex::new(Bm25Index::new()),
149 config,
150 metrics: Arc::new(InMemoryMetrics::new()),
151 dirty: AtomicBool::new(false),
152 }
153 }
154
155 pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
157 let storage = Storage::open(db_path)?;
158 let mut vector = HnswIndex::with_defaults()?;
159
160 let index_path = db_path.with_extension("idx");
162 if index_path.exists() {
163 vector.load(&index_path)?;
164 }
165
166 let vector_count = vector.stats().count;
169 let db_stats = storage.stats()?;
170 let db_embed_count = db_stats.embedding_count;
171 if vector_count != db_embed_count {
172 tracing::warn!(
173 "Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
174 );
175 let mut fresh_vector = HnswIndex::with_defaults()?;
177 if let Ok(embeddings) = storage.list_all_embeddings() {
178 for (id, embedding) in &embeddings {
179 if let Err(e) = fresh_vector.insert(id, embedding) {
180 tracing::warn!("Failed to re-insert embedding {id}: {e}");
181 }
182 }
183 }
184 vector = fresh_vector;
185 if let Err(e) = vector.save(&index_path) {
187 tracing::warn!("Failed to save rebuilt vector index: {e}");
188 }
189 }
190
191 let graph = GraphEngine::from_storage(&storage)?;
193
194 let embeddings = codemem_embeddings::from_env().ok();
196
197 let mut engine = Self::new(Box::new(storage), vector, graph, embeddings);
198 engine.db_path = Some(db_path.to_path_buf());
199
200 engine
203 .lock_graph()?
204 .recompute_centrality_with_options(false);
205
206 let bm25_path = db_path.with_extension("bm25");
208 let mut bm25_loaded = false;
209 if bm25_path.exists() {
210 match std::fs::read(&bm25_path) {
211 Ok(data) => match Bm25Index::deserialize(&data) {
212 Ok(index) => {
213 let mut bm25 = engine.lock_bm25()?;
214 *bm25 = index;
215 bm25_loaded = true;
216 tracing::info!(
217 "Loaded BM25 index from disk ({} documents)",
218 bm25.doc_count
219 );
220 }
221 Err(e) => {
222 tracing::warn!("Failed to deserialize BM25 index, rebuilding: {e}");
223 }
224 },
225 Err(e) => {
226 tracing::warn!("Failed to read BM25 index file, rebuilding: {e}");
227 }
228 }
229 }
230
231 if !bm25_loaded {
232 if let Ok(ids) = engine.storage.list_memory_ids() {
234 let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
235 if let Ok(memories) = engine.storage.get_memories_batch(&id_refs) {
236 let mut bm25 = engine.lock_bm25()?;
237 for memory in &memories {
238 bm25.add_document(&memory.id, &memory.content);
239 }
240 tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
241 }
242 }
243 }
244
245 Ok(engine)
246 }
247
248 pub fn for_testing() -> Self {
250 let storage = Storage::open_in_memory().unwrap();
251 let vector = HnswIndex::with_defaults().unwrap();
252 let graph = GraphEngine::new();
253 let config = CodememConfig::default();
254 Self {
255 storage: Box::new(storage),
256 vector: Mutex::new(vector),
257 graph: Mutex::new(graph),
258 embeddings: None,
259 db_path: None,
260 index_cache: Mutex::new(None),
261 scoring_weights: RwLock::new(config.scoring.clone()),
262 bm25_index: Mutex::new(Bm25Index::new()),
263 config,
264 metrics: Arc::new(InMemoryMetrics::new()),
265 dirty: AtomicBool::new(false),
266 }
267 }
268
269 pub fn lock_vector(&self) -> Result<std::sync::MutexGuard<'_, HnswIndex>, CodememError> {
272 self.vector
273 .lock()
274 .map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
275 }
276
277 pub fn lock_graph(&self) -> Result<std::sync::MutexGuard<'_, GraphEngine>, CodememError> {
278 self.graph
279 .lock()
280 .map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
281 }
282
283 pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
284 self.bm25_index
285 .lock()
286 .map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
287 }
288
289 pub fn lock_embeddings(
290 &self,
291 ) -> Result<
292 Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
293 CodememError,
294 > {
295 match &self.embeddings {
296 Some(m) => Ok(Some(m.lock().map_err(|e| {
297 CodememError::LockPoisoned(format!("embeddings: {e}"))
298 })?)),
299 None => Ok(None),
300 }
301 }
302
303 pub fn lock_index_cache(
304 &self,
305 ) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
306 self.index_cache
307 .lock()
308 .map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
309 }
310
311 pub fn scoring_weights(
312 &self,
313 ) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
314 self.scoring_weights
315 .read()
316 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
317 }
318
319 pub fn scoring_weights_mut(
320 &self,
321 ) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
322 self.scoring_weights
323 .write()
324 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
325 }
326
327 pub fn storage(&self) -> &dyn StorageBackend {
334 &*self.storage
335 }
336
337 pub fn graph(&self) -> &Mutex<GraphEngine> {
339 &self.graph
340 }
341
342 pub fn vector(&self) -> &Mutex<HnswIndex> {
344 &self.vector
345 }
346
347 pub fn embeddings(&self) -> Option<&Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>> {
349 self.embeddings.as_ref()
350 }
351
352 pub fn bm25_index(&self) -> &Mutex<Bm25Index> {
354 &self.bm25_index
355 }
356
357 pub fn db_path(&self) -> Option<&Path> {
359 self.db_path.as_deref()
360 }
361
362 pub fn config(&self) -> &CodememConfig {
364 &self.config
365 }
366
367 pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
369 &self.metrics
370 }
371
372 #[cfg(test)]
374 pub(crate) fn is_dirty(&self) -> bool {
375 self.dirty.load(Ordering::Acquire)
376 }
377
378 pub fn enrich_memory_text(
385 &self,
386 content: &str,
387 memory_type: MemoryType,
388 tags: &[String],
389 namespace: Option<&str>,
390 node_id: Option<&str>,
391 ) -> String {
392 let mut ctx = String::new();
393 ctx.push_str(&format!("[{}]", memory_type));
394
395 if let Some(ns) = namespace {
396 ctx.push_str(&format!(" [namespace:{}]", ns));
397 }
398
399 if !tags.is_empty() {
400 ctx.push_str(&format!(" [tags:{}]", tags.join(",")));
401 }
402
403 if let Some(nid) = node_id {
404 let graph = match self.lock_graph() {
405 Ok(g) => g,
406 Err(_) => return format!("{ctx}\n{content}"),
407 };
408 if let Ok(edges) = graph.get_edges(nid) {
409 let mut rels: Vec<String> = Vec::new();
410 for edge in edges.iter().take(8) {
411 let other = if edge.src == nid {
412 &edge.dst
413 } else {
414 &edge.src
415 };
416 let label = graph
417 .get_node(other)
418 .ok()
419 .flatten()
420 .map(|n| n.label.clone())
421 .unwrap_or_else(|| other.to_string());
422 let dir = if edge.src == nid { "->" } else { "<-" };
423 rels.push(format!("{dir} {} ({})", label, edge.relationship));
424 }
425 if !rels.is_empty() {
426 ctx.push_str(&format!("\nRelated: {}", rels.join("; ")));
427 }
428 }
429 }
430
431 format!("{ctx}\n{content}")
432 }
433
434 pub fn enrich_symbol_text(&self, sym: &Symbol, edges: &[ResolvedEdge]) -> String {
436 let mut ctx = String::new();
437 ctx.push_str(&format!("[{} {}]", sym.visibility, sym.kind));
438 ctx.push_str(&format!(" File: {}", sym.file_path));
439
440 if let Some(ref parent) = sym.parent {
441 ctx.push_str(&format!(" Parent: {}", parent));
442 }
443
444 let related: Vec<String> = edges
445 .iter()
446 .filter(|e| {
447 e.source_qualified_name == sym.qualified_name
448 || e.target_qualified_name == sym.qualified_name
449 })
450 .take(8)
451 .map(|e| {
452 if e.source_qualified_name == sym.qualified_name {
453 format!("-> {} ({})", e.target_qualified_name, e.relationship)
454 } else {
455 format!("<- {} ({})", e.source_qualified_name, e.relationship)
456 }
457 })
458 .collect();
459 if !related.is_empty() {
460 ctx.push_str(&format!("\nRelated: {}", related.join("; ")));
461 }
462
463 let mut body = format!("{}: {}", sym.qualified_name, sym.signature);
464 if let Some(ref doc) = sym.doc_comment {
465 body.push('\n');
466 body.push_str(doc);
467 }
468
469 format!("{ctx}\n{body}")
470 }
471
472 pub fn enrich_chunk_text(&self, chunk: &CodeChunk) -> String {
474 let mut ctx = String::new();
475 ctx.push_str(&format!("[chunk:{}]", chunk.node_kind));
476 ctx.push_str(&format!(" File: {}", chunk.file_path));
477 ctx.push_str(&format!(" Lines: {}-{}", chunk.line_start, chunk.line_end));
478 if let Some(ref parent) = chunk.parent_symbol {
479 ctx.push_str(&format!(" Parent: {}", parent));
480 }
481
482 let body = scoring::truncate_content(&chunk.text, 4000);
483
484 format!("{ctx}\n{body}")
485 }
486
487 pub fn auto_link_to_code_nodes(
492 &self,
493 memory_id: &str,
494 content: &str,
495 existing_links: &[String],
496 ) -> usize {
497 let mut graph = match self.lock_graph() {
498 Ok(g) => g,
499 Err(_) => return 0,
500 };
501
502 let existing_set: std::collections::HashSet<&str> =
503 existing_links.iter().map(|s| s.as_str()).collect();
504
505 let mut candidates: Vec<String> = Vec::new();
506
507 for word in content.split_whitespace() {
508 let cleaned = word.trim_matches(|c: char| {
509 !c.is_alphanumeric() && c != '/' && c != '.' && c != '_' && c != '-' && c != ':'
510 });
511 if cleaned.is_empty() {
512 continue;
513 }
514 if cleaned.contains('/') || cleaned.contains('.') {
515 let file_id = format!("file:{cleaned}");
516 if !existing_set.contains(file_id.as_str()) {
517 candidates.push(file_id);
518 }
519 }
520 if cleaned.contains("::") {
521 let sym_id = format!("sym:{cleaned}");
522 if !existing_set.contains(sym_id.as_str()) {
523 candidates.push(sym_id);
524 }
525 }
526 }
527
528 let now = chrono::Utc::now();
529 let mut created = 0;
530 let mut seen = std::collections::HashSet::new();
531
532 for candidate_id in &candidates {
533 if !seen.insert(candidate_id.clone()) {
534 continue;
535 }
536 if graph.get_node(candidate_id).ok().flatten().is_none() {
537 continue;
538 }
539 let edge = Edge {
540 id: format!("{memory_id}-RELATES_TO-{candidate_id}"),
541 src: memory_id.to_string(),
542 dst: candidate_id.clone(),
543 relationship: RelationshipType::RelatesTo,
544 weight: 0.5,
545 properties: std::collections::HashMap::from([(
546 "auto_linked".to_string(),
547 serde_json::json!(true),
548 )]),
549 created_at: now,
550 valid_from: None,
551 valid_to: None,
552 };
553 if self.storage.insert_graph_edge(&edge).is_ok() && graph.add_edge(edge).is_ok() {
554 created += 1;
555 }
556 }
557
558 created
559 }
560
561 pub fn get_node_memories(
569 &self,
570 node_id: &str,
571 max_depth: usize,
572 include_relationships: Option<&[RelationshipType]>,
573 ) -> Result<Vec<NodeMemoryResult>, CodememError> {
574 let graph = self.lock_graph()?;
575
576 let mut results: Vec<NodeMemoryResult> = Vec::new();
578 let mut seen_memory_ids = HashSet::new();
579 let mut visited = HashSet::new();
580 let mut queue: std::collections::VecDeque<(String, usize, String)> =
581 std::collections::VecDeque::new();
582
583 visited.insert(node_id.to_string());
584 queue.push_back((node_id.to_string(), 0, String::new()));
585
586 while let Some((current_id, depth, parent_rel)) = queue.pop_front() {
587 if current_id != node_id {
589 if let Some(node) = graph.get_node_ref(¤t_id) {
590 if node.kind == NodeKind::Memory {
591 let memory_id = node.memory_id.as_deref().unwrap_or(&node.id);
592 if seen_memory_ids.insert(memory_id.to_string()) {
593 if let Ok(Some(memory)) = self.storage.get_memory_no_touch(memory_id) {
594 results.push(NodeMemoryResult {
595 memory,
596 relationship: parent_rel.clone(),
597 depth,
598 });
599 }
600 }
601 }
602 }
603 }
604
605 if depth >= max_depth {
606 continue;
607 }
608
609 for edge in graph.get_edges_ref(¤t_id) {
611 let neighbor_id = if edge.src == current_id {
612 &edge.dst
613 } else {
614 &edge.src
615 };
616
617 if visited.contains(neighbor_id.as_str()) {
618 continue;
619 }
620
621 if let Some(allowed) = include_relationships {
623 if !allowed.contains(&edge.relationship) {
624 continue;
625 }
626 }
627
628 if let Some(neighbor) = graph.get_node_ref(neighbor_id) {
630 if neighbor.kind == NodeKind::Chunk {
631 continue;
632 }
633 }
634
635 visited.insert(neighbor_id.clone());
636 queue.push_back((
637 neighbor_id.clone(),
638 depth + 1,
639 edge.relationship.to_string(),
640 ));
641 }
642 }
643
644 Ok(results)
645 }
646
647 pub fn persist_memory(&self, memory: &MemoryNode) -> Result<(), CodememError> {
651 self.persist_memory_inner(memory, true)
652 }
653
654 pub(crate) fn persist_memory_no_save(&self, memory: &MemoryNode) -> Result<(), CodememError> {
657 self.persist_memory_inner(memory, false)
658 }
659
660 fn persist_memory_inner(&self, memory: &MemoryNode, save: bool) -> Result<(), CodememError> {
668 self.storage.insert_memory(memory)?;
670
671 let embedding_result = match self.lock_embeddings() {
674 Ok(Some(emb)) => {
675 let enriched = self.enrich_memory_text(
676 &memory.content,
677 memory.memory_type,
678 &memory.tags,
679 memory.namespace.as_deref(),
680 Some(&memory.id),
681 );
682 let result = emb.embed(&enriched).ok();
683 drop(emb);
684 result
685 }
686 Ok(None) => None,
687 Err(e) => {
688 tracing::warn!("Embeddings lock failed during persist: {e}");
689 None
690 }
691 };
692
693 match self.lock_bm25() {
695 Ok(mut bm25) => {
696 bm25.add_document(&memory.id, &memory.content);
697 }
698 Err(e) => tracing::warn!("BM25 lock failed during persist: {e}"),
699 }
700
701 match self.lock_graph() {
703 Ok(mut graph) => {
704 let node = codemem_core::GraphNode {
705 id: memory.id.clone(),
706 kind: codemem_core::NodeKind::Memory,
707 label: scoring::truncate_content(&memory.content, 80),
708 payload: std::collections::HashMap::new(),
709 centrality: 0.0,
710 memory_id: Some(memory.id.clone()),
711 namespace: memory.namespace.clone(),
712 };
713 if let Err(e) = self.storage.insert_graph_node(&node) {
714 tracing::warn!("Failed to insert graph node for memory {}: {e}", memory.id);
715 }
716 if let Err(e) = graph.add_node(node) {
717 tracing::warn!(
718 "Failed to add graph node in-memory for memory {}: {e}",
719 memory.id
720 );
721 }
722 }
723 Err(e) => tracing::warn!("Graph lock failed during persist: {e}"),
724 }
725
726 if let Some(vec) = &embedding_result {
728 if let Ok(mut vi) = self.lock_vector() {
729 if let Err(e) = vi.insert(&memory.id, vec) {
730 tracing::warn!("Failed to insert into vector index for {}: {e}", memory.id);
731 }
732 }
733 if let Err(e) = self.storage.store_embedding(&memory.id, vec) {
734 tracing::warn!("Failed to store embedding for {}: {e}", memory.id);
735 }
736 }
737
738 if save {
741 self.save_index(); } else {
743 self.dirty.store(true, Ordering::Release);
744 }
745
746 Ok(())
747 }
748
749 pub fn add_edge(&self, edge: Edge) -> Result<(), CodememError> {
753 self.storage.insert_graph_edge(&edge)?;
754 let mut graph = self.lock_graph()?;
755 graph.add_edge(edge)?;
756 Ok(())
757 }
758
759 pub fn refine_memory(
763 &self,
764 old_id: &str,
765 content: Option<&str>,
766 tags: Option<Vec<String>>,
767 importance: Option<f64>,
768 ) -> Result<(MemoryNode, String), CodememError> {
769 let old_memory = self
770 .storage
771 .get_memory(old_id)?
772 .ok_or_else(|| CodememError::NotFound(format!("Memory not found: {old_id}")))?;
773
774 let new_content = content.unwrap_or(&old_memory.content);
775 let new_tags = tags.unwrap_or_else(|| old_memory.tags.clone());
776 let new_importance = importance.unwrap_or(old_memory.importance);
777
778 let now = chrono::Utc::now();
779 let new_id = uuid::Uuid::new_v4().to_string();
780 let hash = codemem_storage::Storage::content_hash(new_content);
781
782 let memory = MemoryNode {
783 id: new_id.clone(),
784 content: new_content.to_string(),
785 memory_type: old_memory.memory_type,
786 importance: new_importance,
787 confidence: old_memory.confidence,
788 access_count: 0,
789 content_hash: hash,
790 tags: new_tags,
791 metadata: old_memory.metadata.clone(),
792 namespace: old_memory.namespace.clone(),
793 created_at: now,
794 updated_at: now,
795 last_accessed_at: now,
796 };
797
798 self.persist_memory(&memory)?;
799
800 let edge = Edge {
802 id: format!("{old_id}-EVOLVED_INTO-{new_id}"),
803 src: old_id.to_string(),
804 dst: new_id.clone(),
805 relationship: RelationshipType::EvolvedInto,
806 weight: 1.0,
807 properties: std::collections::HashMap::new(),
808 created_at: now,
809 valid_from: Some(now),
810 valid_to: None,
811 };
812 if let Err(e) = self.add_edge(edge) {
813 tracing::warn!("Failed to add EVOLVED_INTO edge: {e}");
814 }
815
816 Ok((memory, new_id))
817 }
818
819 pub fn split_memory(
821 &self,
822 source_id: &str,
823 parts: &[SplitPart],
824 ) -> Result<Vec<String>, CodememError> {
825 let source_memory = self
826 .storage
827 .get_memory(source_id)?
828 .ok_or_else(|| CodememError::NotFound(format!("Memory not found: {source_id}")))?;
829
830 if parts.is_empty() {
831 return Err(CodememError::InvalidInput(
832 "'parts' array must not be empty".to_string(),
833 ));
834 }
835
836 for part in parts {
838 if part.content.is_empty() {
839 return Err(CodememError::InvalidInput(
840 "Each part must have a non-empty 'content' field".to_string(),
841 ));
842 }
843 }
844
845 let now = chrono::Utc::now();
846 let mut child_ids: Vec<String> = Vec::new();
847
848 for part in parts {
849 let tags = part
850 .tags
851 .clone()
852 .unwrap_or_else(|| source_memory.tags.clone());
853 let importance = part.importance.unwrap_or(source_memory.importance);
854
855 let child_id = uuid::Uuid::new_v4().to_string();
856 let hash = codemem_storage::Storage::content_hash(&part.content);
857
858 let memory = MemoryNode {
859 id: child_id.clone(),
860 content: part.content.clone(),
861 memory_type: source_memory.memory_type,
862 importance,
863 confidence: source_memory.confidence,
864 access_count: 0,
865 content_hash: hash,
866 tags,
867 metadata: std::collections::HashMap::new(),
868 namespace: source_memory.namespace.clone(),
869 created_at: now,
870 updated_at: now,
871 last_accessed_at: now,
872 };
873
874 if let Err(e) = self.persist_memory_no_save(&memory) {
875 for created_id in &child_ids {
877 if let Err(del_err) = self.delete_memory(created_id) {
878 tracing::warn!(
879 "Failed to clean up child memory {created_id} after split failure: {del_err}"
880 );
881 }
882 }
883 return Err(e);
884 }
885
886 let edge = Edge {
888 id: format!("{child_id}-PART_OF-{source_id}"),
889 src: child_id.clone(),
890 dst: source_id.to_string(),
891 relationship: RelationshipType::PartOf,
892 weight: 1.0,
893 properties: std::collections::HashMap::new(),
894 created_at: now,
895 valid_from: Some(now),
896 valid_to: None,
897 };
898 if let Err(e) = self.add_edge(edge) {
899 tracing::warn!("Failed to add PART_OF edge: {e}");
900 }
901
902 child_ids.push(child_id);
903 }
904
905 self.save_index();
906 Ok(child_ids)
907 }
908
909 pub fn merge_memories(
911 &self,
912 source_ids: &[String],
913 content: &str,
914 memory_type: MemoryType,
915 importance: f64,
916 tags: Vec<String>,
917 ) -> Result<String, CodememError> {
918 if source_ids.len() < 2 {
919 return Err(CodememError::InvalidInput(
920 "'source_ids' must contain at least 2 IDs".to_string(),
921 ));
922 }
923
924 let id_refs: Vec<&str> = source_ids.iter().map(|s| s.as_str()).collect();
926 let found = self.storage.get_memories_batch(&id_refs)?;
927 if found.len() != source_ids.len() {
928 let found_ids: std::collections::HashSet<&str> =
929 found.iter().map(|m| m.id.as_str()).collect();
930 let missing: Vec<&str> = id_refs
931 .iter()
932 .filter(|id| !found_ids.contains(**id))
933 .copied()
934 .collect();
935 return Err(CodememError::NotFound(format!(
936 "Source memories not found: {}",
937 missing.join(", ")
938 )));
939 }
940
941 let now = chrono::Utc::now();
942 let merged_id = uuid::Uuid::new_v4().to_string();
943 let hash = codemem_storage::Storage::content_hash(content);
944
945 let memory = MemoryNode {
946 id: merged_id.clone(),
947 content: content.to_string(),
948 memory_type,
949 importance,
950 confidence: found.iter().map(|m| m.confidence).sum::<f64>() / found.len() as f64,
951 access_count: 0,
952 content_hash: hash,
953 tags,
954 metadata: std::collections::HashMap::new(),
955 namespace: found.iter().find_map(|m| m.namespace.clone()),
956 created_at: now,
957 updated_at: now,
958 last_accessed_at: now,
959 };
960
961 self.persist_memory_no_save(&memory)?;
962
963 for source_id in source_ids {
965 let edge = Edge {
966 id: format!("{merged_id}-SUMMARIZES-{source_id}"),
967 src: merged_id.clone(),
968 dst: source_id.clone(),
969 relationship: RelationshipType::Summarizes,
970 weight: 1.0,
971 properties: std::collections::HashMap::new(),
972 created_at: now,
973 valid_from: Some(now),
974 valid_to: None,
975 };
976 if let Err(e) = self.add_edge(edge) {
977 tracing::warn!("Failed to add SUMMARIZES edge to {source_id}: {e}");
978 }
979 }
980
981 self.save_index();
982 Ok(merged_id)
983 }
984
985 pub fn update_memory(
987 &self,
988 id: &str,
989 content: &str,
990 importance: Option<f64>,
991 ) -> Result<(), CodememError> {
992 self.storage.update_memory(id, content, importance)?;
993
994 self.lock_bm25()?.add_document(id, content);
996
997 if let Ok(mut graph) = self.lock_graph() {
999 if let Ok(Some(mut node)) = graph.get_node(id) {
1000 node.label = scoring::truncate_content(content, 80);
1001 if let Err(e) = graph.add_node(node) {
1002 tracing::warn!("Failed to update graph node for {id}: {e}");
1003 }
1004 }
1005 }
1006
1007 if let Some(emb_guard) = self.lock_embeddings()? {
1010 let (mem_type, tags, namespace) =
1011 if let Ok(Some(mem)) = self.storage.get_memory_no_touch(id) {
1012 (mem.memory_type, mem.tags, mem.namespace)
1013 } else {
1014 (MemoryType::Context, vec![], None)
1015 };
1016 let enriched =
1017 self.enrich_memory_text(content, mem_type, &tags, namespace.as_deref(), Some(id));
1018 let emb_result = emb_guard.embed(&enriched);
1019 drop(emb_guard);
1020 if let Ok(embedding) = emb_result {
1021 if let Err(e) = self.storage.store_embedding(id, &embedding) {
1022 tracing::warn!("Failed to store embedding for {id}: {e}");
1023 }
1024 let mut vec = self.lock_vector()?;
1025 if let Err(e) = vec.remove(id) {
1026 tracing::warn!("Failed to remove old vector for {id}: {e}");
1027 }
1028 if let Err(e) = vec.insert(id, &embedding) {
1029 tracing::warn!("Failed to insert new vector for {id}: {e}");
1030 }
1031 }
1032 }
1033
1034 self.save_index();
1035 Ok(())
1036 }
1037
1038 pub fn delete_memory(&self, id: &str) -> Result<bool, CodememError> {
1045 let deleted = self.storage.delete_memory_cascade(id)?;
1047 if !deleted {
1048 return Ok(false);
1049 }
1050
1051 let mut vec = self.lock_vector()?;
1054 if let Err(e) = vec.remove(id) {
1055 tracing::warn!("Failed to remove {id} from vector index: {e}");
1056 }
1057 drop(vec);
1058
1059 let mut graph = self.lock_graph()?;
1060 if let Err(e) = graph.remove_node(id) {
1061 tracing::warn!("Failed to remove {id} from in-memory graph: {e}");
1062 }
1063 drop(graph);
1064
1065 self.lock_bm25()?.remove_document(id);
1066
1067 self.save_index();
1069 Ok(true)
1070 }
1071
1072 pub fn save_index(&self) {
1078 if let Some(ref db_path) = self.db_path {
1079 let idx_path = db_path.with_extension("idx");
1080 if let Ok(mut vi) = self.lock_vector() {
1081 if vi.needs_compaction() {
1083 let ghost = vi.ghost_count();
1084 let live = vi.stats().count;
1085 tracing::info!(
1086 "HNSW ghost compaction: {ghost} ghosts / {live} live entries, rebuilding..."
1087 );
1088 if let Ok(embeddings) = self.storage.list_all_embeddings() {
1089 if let Err(e) = vi.rebuild_from_entries(&embeddings) {
1090 tracing::warn!("HNSW compaction failed: {e}");
1091 }
1092 }
1093 }
1094 if let Err(e) = vi.save(&idx_path) {
1095 tracing::warn!("Failed to save vector index: {e}");
1096 }
1097 }
1098
1099 let bm25_path = db_path.with_extension("bm25");
1101 if let Ok(bm25) = self.lock_bm25() {
1102 if bm25.needs_save() {
1103 let data = bm25.serialize();
1104 let tmp_path = db_path.with_extension("bm25.tmp");
1105 if let Err(e) = std::fs::write(&tmp_path, &data)
1106 .and_then(|_| std::fs::rename(&tmp_path, &bm25_path))
1107 {
1108 tracing::warn!("Failed to save BM25 index: {e}");
1109 }
1110 }
1111 }
1112 }
1113 self.dirty.store(false, Ordering::Release);
1114 }
1115
1116 pub fn reload_graph(&self) -> Result<(), CodememError> {
1118 let new_graph = GraphEngine::from_storage(&*self.storage)?;
1119 let mut graph = self.lock_graph()?;
1120 *graph = new_graph;
1121 graph.recompute_centrality();
1122 Ok(())
1123 }
1124
1125 pub fn process_watch_event(
1137 &self,
1138 event: &watch::WatchEvent,
1139 namespace: Option<&str>,
1140 ) -> Result<(), CodememError> {
1141 match event {
1142 watch::WatchEvent::FileChanged(path) | watch::WatchEvent::FileCreated(path) => {
1143 self.index_single_file(path, namespace)?;
1144 }
1145 watch::WatchEvent::FileDeleted(path) => {
1146 let path_str = path.to_string_lossy().to_string();
1147 self.cleanup_file_nodes(&path_str)?;
1148 }
1149 }
1150 Ok(())
1151 }
1152
1153 fn index_single_file(&self, path: &Path, namespace: Option<&str>) -> Result<(), CodememError> {
1156 let content = std::fs::read(path)?;
1157
1158 let path_str = path.to_string_lossy().to_string();
1159 let parser = index::CodeParser::new();
1160
1161 let parse_result = match parser.parse_file(&path_str, &content) {
1162 Some(pr) => pr,
1163 None => return Ok(()), };
1165
1166 let mut file_paths = HashSet::new();
1168 file_paths.insert(parse_result.file_path.clone());
1169
1170 let mut resolver = index::ReferenceResolver::new();
1171 resolver.add_symbols(&parse_result.symbols);
1172 let edges = resolver.resolve_all(&parse_result.references);
1173
1174 let results = IndexAndResolveResult {
1175 index: index::IndexResult {
1176 files_scanned: 1,
1177 files_parsed: 1,
1178 files_skipped: 0,
1179 total_symbols: parse_result.symbols.len(),
1180 total_references: parse_result.references.len(),
1181 total_chunks: parse_result.chunks.len(),
1182 parse_results: Vec::new(),
1183 },
1184 symbols: parse_result.symbols,
1185 references: parse_result.references,
1186 chunks: parse_result.chunks,
1187 file_paths,
1188 edges,
1189 };
1190
1191 self.persist_index_results(&results, namespace)?;
1192 Ok(())
1193 }
1194
1195 fn cleanup_file_nodes(&self, file_path: &str) -> Result<(), CodememError> {
1199 let file_node_id = format!("file:{file_path}");
1200
1201 let chunk_prefix = format!("chunk:{file_path}:");
1203 if let Err(e) = self.storage.delete_graph_nodes_by_prefix(&chunk_prefix) {
1204 tracing::warn!("Failed to delete chunk nodes for {file_path}: {e}");
1205 }
1206
1207 let graph = self.lock_graph()?;
1209 let sym_ids: Vec<String> = graph
1210 .get_all_nodes()
1211 .into_iter()
1212 .filter(|n| {
1213 n.id.starts_with("sym:")
1214 && n.payload.get("file_path").and_then(|v| v.as_str()) == Some(file_path)
1215 })
1216 .map(|n| n.id.clone())
1217 .collect();
1218 drop(graph);
1219
1220 for sym_id in &sym_ids {
1221 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
1222 tracing::warn!("Failed to delete graph edges for {sym_id}: {e}");
1223 }
1224 if let Err(e) = self.storage.delete_graph_node(sym_id) {
1225 tracing::warn!("Failed to delete graph node {sym_id}: {e}");
1226 }
1227 if let Err(e) = self.storage.delete_embedding(sym_id) {
1228 tracing::warn!("Failed to delete embedding {sym_id}: {e}");
1229 }
1230 }
1231
1232 if let Err(e) = self.storage.delete_graph_edges_for_node(&file_node_id) {
1234 tracing::warn!("Failed to delete graph edges for {file_node_id}: {e}");
1235 }
1236 if let Err(e) = self.storage.delete_graph_node(&file_node_id) {
1237 tracing::warn!("Failed to delete graph node {file_node_id}: {e}");
1238 }
1239
1240 let mut graph = self.lock_graph()?;
1242 for sym_id in &sym_ids {
1243 if let Err(e) = graph.remove_node(sym_id) {
1244 tracing::warn!("Failed to remove {sym_id} from in-memory graph: {e}");
1245 }
1246 }
1247 let chunk_ids: Vec<String> = graph
1249 .get_all_nodes()
1250 .into_iter()
1251 .filter(|n| n.id.starts_with(&format!("chunk:{file_path}:")))
1252 .map(|n| n.id.clone())
1253 .collect();
1254 for chunk_id in &chunk_ids {
1255 if let Err(e) = graph.remove_node(chunk_id) {
1256 tracing::warn!("Failed to remove {chunk_id} from in-memory graph: {e}");
1257 }
1258 }
1259 if let Err(e) = graph.remove_node(&file_node_id) {
1260 tracing::warn!("Failed to remove {file_node_id} from in-memory graph: {e}");
1261 }
1262 drop(graph);
1263
1264 let mut vec = self.lock_vector()?;
1266 for sym_id in &sym_ids {
1267 if let Err(e) = vec.remove(sym_id) {
1268 tracing::warn!("Failed to remove {sym_id} from vector index: {e}");
1269 }
1270 }
1271 for chunk_id in &chunk_ids {
1272 if let Err(e) = vec.remove(chunk_id) {
1273 tracing::warn!("Failed to remove {chunk_id} from vector index: {e}");
1274 }
1275 }
1276 drop(vec);
1277
1278 self.save_index();
1279 Ok(())
1280 }
1281
1282 pub fn cleanup_deleted_files(&self, dir_path: &str) -> Result<usize, CodememError> {
1285 let dir = Path::new(dir_path);
1286 if !dir.is_dir() {
1287 return Ok(0);
1288 }
1289
1290 let graph = self.lock_graph()?;
1292 let file_nodes: Vec<String> = graph
1293 .get_all_nodes()
1294 .into_iter()
1295 .filter(|n| n.kind == NodeKind::File && n.label.starts_with(dir_path))
1296 .map(|n| n.label.clone())
1297 .collect();
1298 drop(graph);
1299
1300 let mut cleaned = 0usize;
1301 for file_path in &file_nodes {
1302 if !Path::new(file_path).exists() {
1303 self.cleanup_file_nodes(file_path)?;
1304 cleaned += 1;
1305 }
1306 }
1307
1308 if cleaned > 0 {
1309 self.lock_graph()?.recompute_centrality();
1310 }
1311
1312 Ok(cleaned)
1313 }
1314
1315 pub fn index_and_enrich(
1320 &self,
1321 path: &str,
1322 namespace: Option<&str>,
1323 git_days: u64,
1324 ) -> Result<IndexEnrichResult, CodememError> {
1325 let mut indexer = Indexer::new();
1327 let index_results = indexer.index_and_resolve(Path::new(path))?;
1328 let persist = self.persist_index_results(&index_results, namespace)?;
1329
1330 let git_result = match self.enrich_git_history(path, git_days, namespace) {
1332 Ok(r) => Some(r),
1333 Err(e) => {
1334 tracing::warn!("Git history enrichment failed: {e}");
1335 None
1336 }
1337 };
1338
1339 let security_result = match self.enrich_security(namespace) {
1341 Ok(r) => Some(r),
1342 Err(e) => {
1343 tracing::warn!("Security enrichment failed: {e}");
1344 None
1345 }
1346 };
1347
1348 let perf_result = match self.enrich_performance(10, namespace) {
1350 Ok(r) => Some(r),
1351 Err(e) => {
1352 tracing::warn!("Performance enrichment failed: {e}");
1353 None
1354 }
1355 };
1356
1357 self.lock_graph()?.recompute_centrality();
1359
1360 let total_insights = git_result.as_ref().map_or(0, |r| r.insights_stored)
1361 + security_result.as_ref().map_or(0, |r| r.insights_stored)
1362 + perf_result.as_ref().map_or(0, |r| r.insights_stored);
1363
1364 Ok(IndexEnrichResult {
1365 files_indexed: persist.files_created,
1366 symbols_stored: persist.symbols_stored,
1367 chunks_stored: persist.chunks_stored,
1368 edges_resolved: persist.edges_resolved,
1369 symbols_embedded: persist.symbols_embedded,
1370 chunks_embedded: persist.chunks_embedded,
1371 total_insights,
1372 })
1373 }
1374
1375 pub fn session_context(&self, namespace: Option<&str>) -> Result<SessionContext, CodememError> {
1380 let now = chrono::Utc::now();
1381 let cutoff_24h = now - chrono::Duration::hours(24);
1382
1383 let ids = match namespace {
1385 Some(ns) => self.storage.list_memory_ids_for_namespace(ns)?,
1386 None => self.storage.list_memory_ids()?,
1387 };
1388
1389 let mut recent_memories = Vec::new();
1390 let mut pending_analyses = Vec::new();
1391
1392 for id in ids.iter().rev().take(200) {
1393 if let Ok(Some(m)) = self.storage.get_memory_no_touch(id) {
1394 if m.tags.contains(&"pending-analysis".to_string()) {
1396 pending_analyses.push(m.clone());
1397 }
1398 if m.created_at >= cutoff_24h {
1400 recent_memories.push(m);
1401 }
1402 if recent_memories.len() >= 50 && pending_analyses.len() >= 10 {
1403 break;
1404 }
1405 }
1406 }
1407
1408 let session_count = self.storage.session_count(namespace).unwrap_or(1).max(1);
1410 let active_patterns = patterns::detect_patterns(
1411 &*self.storage,
1412 namespace,
1413 2, session_count,
1415 )
1416 .unwrap_or_default();
1417
1418 let last_session_summary = self
1420 .storage
1421 .list_sessions(namespace, 1)?
1422 .into_iter()
1423 .next()
1424 .and_then(|s| s.summary);
1425
1426 Ok(SessionContext {
1427 recent_memories,
1428 pending_analyses,
1429 active_patterns,
1430 last_session_summary,
1431 })
1432 }
1433}
1434
1435#[derive(Debug)]
1439pub struct IndexEnrichResult {
1440 pub files_indexed: usize,
1441 pub symbols_stored: usize,
1442 pub chunks_stored: usize,
1443 pub edges_resolved: usize,
1444 pub symbols_embedded: usize,
1445 pub chunks_embedded: usize,
1446 pub total_insights: usize,
1447}
1448
1449#[derive(Debug)]
1451pub struct SessionContext {
1452 pub recent_memories: Vec<MemoryNode>,
1454 pub pending_analyses: Vec<MemoryNode>,
1456 pub active_patterns: Vec<DetectedPattern>,
1458 pub last_session_summary: Option<String>,
1460}