1use crate::index::{self, IndexAndResolveResult, Indexer};
2use crate::patterns;
3use crate::CodememEngine;
4use codemem_core::{CodememError, DetectedPattern, GraphBackend, MemoryNode, VectorBackend};
5use std::collections::HashSet;
6use std::path::Path;
7use std::sync::atomic::Ordering;
8
9impl CodememEngine {
10 pub fn save_index(&self) {
16 if let Some(ref db_path) = self.db_path {
17 if self.vector_ready() {
19 let idx_path = db_path.with_extension("idx");
20 if let Ok(mut vi) = self.lock_vector() {
21 if vi.needs_compaction() {
23 let ghost = vi.ghost_count();
24 let live = vi.stats().count;
25 tracing::info!(
26 "HNSW ghost compaction: {ghost} ghosts / {live} live entries, rebuilding..."
27 );
28 if let Ok(embeddings) = self.storage.list_all_embeddings() {
29 if let Err(e) = vi.rebuild_from_entries(&embeddings) {
30 tracing::warn!("HNSW compaction failed: {e}");
31 }
32 }
33 }
34 if let Err(e) = vi.save(&idx_path) {
35 tracing::warn!("Failed to save vector index: {e}");
36 }
37 }
38 }
39
40 if self.bm25_ready() {
42 let bm25_path = db_path.with_extension("bm25");
43 if let Ok(bm25) = self.lock_bm25() {
44 if bm25.needs_save() {
45 let data = bm25.serialize();
46 let tmp_path = db_path.with_extension("bm25.tmp");
47 if let Err(e) = std::fs::write(&tmp_path, &data)
48 .and_then(|_| std::fs::rename(&tmp_path, &bm25_path))
49 {
50 tracing::warn!("Failed to save BM25 index: {e}");
51 }
52 }
53 }
54 }
55 }
56 self.dirty.store(false, Ordering::Release);
57 }
58
59 pub fn reload_graph(&self) -> Result<(), CodememError> {
61 let new_graph = codemem_storage::graph::GraphEngine::from_storage(&*self.storage)?;
62 let mut graph = self.lock_graph()?;
63 *graph = new_graph;
64 graph.recompute_centrality();
65 Ok(())
66 }
67
68 pub fn process_watch_event(
80 &self,
81 event: &crate::watch::WatchEvent,
82 namespace: Option<&str>,
83 project_root: Option<&Path>,
84 ) -> Result<(), CodememError> {
85 match event {
86 crate::watch::WatchEvent::FileChanged(path)
87 | crate::watch::WatchEvent::FileCreated(path) => {
88 self.index_single_file(path, namespace, project_root)?;
89 }
90 crate::watch::WatchEvent::FileDeleted(path) => {
91 let rel = if let Some(root) = project_root {
93 path.strip_prefix(root)
94 .unwrap_or(path)
95 .to_string_lossy()
96 .to_string()
97 } else {
98 path.to_string_lossy().to_string()
99 };
100 self.cleanup_file_nodes(&rel)?;
101 }
102 }
103 Ok(())
104 }
105
106 fn index_single_file(
116 &self,
117 path: &Path,
118 namespace: Option<&str>,
119 project_root: Option<&Path>,
120 ) -> Result<(), CodememError> {
121 let content = std::fs::read(path)?;
122
123 let path_str = if let Some(root) = project_root {
124 path.strip_prefix(root)
125 .unwrap_or(path)
126 .to_string_lossy()
127 .to_string()
128 } else {
129 path.to_string_lossy().to_string()
130 };
131
132 let hash = {
135 let mut cd_guard = self
136 .change_detector
137 .lock()
138 .map_err(|_| CodememError::LockPoisoned("change_detector".into()))?;
139 let ns = namespace.unwrap_or("");
140 let cd = cd_guard.get_or_insert_with(|| {
141 let mut cd = index::incremental::ChangeDetector::new();
142 cd.load_from_storage(&*self.storage, ns);
143 cd
144 });
145 let (changed, hash) = cd.check_changed(&path_str, &content);
146 if !changed {
147 tracing::debug!("Skipping unchanged file: {path_str}");
148 return Ok(());
149 }
150 hash
151 };
152
153 let parser = index::CodeParser::new();
154
155 let parse_result = match parser.parse_file(&path_str, &content) {
156 Some(pr) => pr,
157 None => return Ok(()), };
159
160 let mut file_paths = HashSet::new();
162 file_paths.insert(parse_result.file_path.clone());
163
164 let mut resolver = index::ReferenceResolver::new();
165 resolver.add_symbols(&parse_result.symbols);
166 let resolve_result = resolver.resolve_all_with_unresolved(&parse_result.references);
167
168 let results = IndexAndResolveResult {
169 index: index::IndexResult {
170 files_scanned: 1,
171 files_parsed: 1,
172 files_skipped: 0,
173 total_symbols: parse_result.symbols.len(),
174 total_references: parse_result.references.len(),
175 total_chunks: parse_result.chunks.len(),
176 parse_results: Vec::new(),
177 },
178 symbols: parse_result.symbols,
179 references: parse_result.references,
180 chunks: parse_result.chunks,
181 file_paths,
182 edges: resolve_result.edges,
183 unresolved: resolve_result.unresolved,
184 root_path: project_root
185 .map(|p| p.to_path_buf())
186 .unwrap_or_else(|| path.to_path_buf()),
187 scip_build: None,
188 };
189
190 self.persist_index_results(&results, namespace)?;
191
192 if let Ok(mut cd_guard) = self.change_detector.lock() {
194 if let Some(cd) = cd_guard.as_mut() {
195 cd.record_hash(&path_str, hash);
196 if let Err(e) = cd.save_to_storage(&*self.storage, namespace.unwrap_or("")) {
197 tracing::warn!("Failed to save file hash for {path_str}: {e}");
198 }
199 }
200 }
201
202 Ok(())
203 }
204
205 pub fn cleanup_stale_symbols(
220 &self,
221 file_path: &str,
222 old_symbol_ids: &HashSet<String>,
223 new_symbol_ids: &HashSet<String>,
224 ) -> Result<usize, CodememError> {
225 let stale_ids: Vec<&String> = old_symbol_ids
227 .iter()
228 .filter(|id| !new_symbol_ids.contains(*id))
229 .collect();
230
231 if stale_ids.is_empty() {
232 return Ok(0);
233 }
234
235 let count = stale_ids.len();
236 tracing::info!(
237 "Cleaning up {count} stale symbols for {file_path}: {:?}",
238 stale_ids
239 );
240
241 let file_node_id = format!("file:{file_path}");
242 let mut redirected_pairs: std::collections::HashSet<(String, String)> =
243 std::collections::HashSet::new();
244 let mut redirected_edges: Vec<codemem_core::Edge> = Vec::new();
245 for sym_id in &stale_ids {
246 let edges = self.storage.get_edges_for_node(sym_id.as_str())?;
250 for edge in &edges {
251 let other = if edge.src.as_str() == sym_id.as_str() {
252 &edge.dst
253 } else {
254 &edge.src
255 };
256 let is_code_node = other.starts_with("sym:")
257 || other.starts_with("file:")
258 || other.starts_with("chunk:")
259 || other.starts_with("pkg:");
260 if !is_code_node {
261 let pair = (other.to_string(), file_node_id.clone());
263 if !redirected_pairs.insert(pair) {
264 continue;
265 }
266 let mut redirected = edge.clone();
267 if redirected.src.as_str() == sym_id.as_str() {
268 redirected.src = file_node_id.clone();
269 } else {
270 redirected.dst = file_node_id.clone();
271 }
272 redirected.id = format!("{}-redirected", edge.id);
275 if let Err(e) = self.storage.insert_graph_edge(&redirected) {
276 tracing::warn!("Failed to redirect memory edge {}: {e}", edge.id);
277 }
278 redirected_edges.push(redirected);
279 }
280 }
281
282 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
284 tracing::warn!("Failed to delete edges for stale symbol {sym_id}: {e}");
285 }
286 if let Err(e) = self.storage.delete_graph_node(sym_id) {
287 tracing::warn!("Failed to delete stale symbol node {sym_id}: {e}");
288 }
289 if let Err(e) = self.storage.delete_embedding(sym_id) {
290 tracing::warn!("Failed to delete embedding for stale symbol {sym_id}: {e}");
291 }
292 }
293
294 {
296 let mut graph = self.lock_graph()?;
297 for sym_id in &stale_ids {
298 if let Err(e) = graph.remove_node(sym_id.as_str()) {
299 tracing::warn!("Failed to remove stale {sym_id} from in-memory graph: {e}");
300 }
301 }
302 for edge in redirected_edges {
305 let _ = graph.add_edge(edge);
306 }
307 }
308 {
309 let mut vec = self.lock_vector()?;
310 for sym_id in &stale_ids {
311 if let Err(e) = vec.remove(sym_id.as_str()) {
312 tracing::warn!("Failed to remove stale {sym_id} from vector index: {e}");
313 }
314 }
315 }
316
317 Ok(count)
318 }
319
320 fn cleanup_file_nodes(&self, file_path: &str) -> Result<(), CodememError> {
324 let file_node_id = format!("file:{file_path}");
325
326 let chunk_prefix = format!("chunk:{file_path}:");
328 if let Err(e) = self.storage.delete_graph_nodes_by_prefix(&chunk_prefix) {
329 tracing::warn!("Failed to delete chunk nodes for {file_path}: {e}");
330 }
331
332 let graph = self.lock_graph()?;
334 let sym_ids: Vec<String> = graph
335 .get_all_nodes()
336 .into_iter()
337 .filter(|n| {
338 n.id.starts_with("sym:")
339 && n.payload.get("file_path").and_then(|v| v.as_str()) == Some(file_path)
340 })
341 .map(|n| n.id.clone())
342 .collect();
343 drop(graph);
344
345 for sym_id in &sym_ids {
346 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
347 tracing::warn!("Failed to delete graph edges for {sym_id}: {e}");
348 }
349 if let Err(e) = self.storage.delete_graph_node(sym_id) {
350 tracing::warn!("Failed to delete graph node {sym_id}: {e}");
351 }
352 if let Err(e) = self.storage.delete_embedding(sym_id) {
353 tracing::warn!("Failed to delete embedding {sym_id}: {e}");
354 }
355 }
356
357 if let Err(e) = self.storage.delete_graph_edges_for_node(&file_node_id) {
359 tracing::warn!("Failed to delete graph edges for {file_node_id}: {e}");
360 }
361 if let Err(e) = self.storage.delete_graph_node(&file_node_id) {
362 tracing::warn!("Failed to delete graph node {file_node_id}: {e}");
363 }
364
365 let mut graph = self.lock_graph()?;
367 for sym_id in &sym_ids {
368 if let Err(e) = graph.remove_node(sym_id) {
369 tracing::warn!("Failed to remove {sym_id} from in-memory graph: {e}");
370 }
371 }
372 let chunk_ids: Vec<String> = graph
374 .get_all_nodes()
375 .into_iter()
376 .filter(|n| n.id.starts_with(&format!("chunk:{file_path}:")))
377 .map(|n| n.id.clone())
378 .collect();
379 for chunk_id in &chunk_ids {
380 if let Err(e) = graph.remove_node(chunk_id) {
381 tracing::warn!("Failed to remove {chunk_id} from in-memory graph: {e}");
382 }
383 }
384 if let Err(e) = graph.remove_node(&file_node_id) {
385 tracing::warn!("Failed to remove {file_node_id} from in-memory graph: {e}");
386 }
387 drop(graph);
388
389 let mut vec = self.lock_vector()?;
391 for sym_id in &sym_ids {
392 if let Err(e) = vec.remove(sym_id) {
393 tracing::warn!("Failed to remove {sym_id} from vector index: {e}");
394 }
395 }
396 for chunk_id in &chunk_ids {
397 if let Err(e) = vec.remove(chunk_id) {
398 tracing::warn!("Failed to remove {chunk_id} from vector index: {e}");
399 }
400 }
401 drop(vec);
402
403 self.save_index();
404 Ok(())
405 }
406
407 pub fn detect_orphans(
417 &self,
418 project_root: Option<&Path>,
419 ) -> Result<(usize, usize), CodememError> {
420 let all_nodes = self.storage.all_graph_nodes()?;
422 let node_ids: HashSet<String> = all_nodes.iter().map(|n| n.id.clone()).collect();
423
424 let mut orphan_sym_ids: Vec<String> = Vec::new();
425
426 if let Some(root) = project_root {
429 for node in &all_nodes {
430 if !node.id.starts_with("sym:") && !node.id.starts_with("chunk:") {
431 continue;
432 }
433 let file_path = match node.payload.get("file_path").and_then(|v| v.as_str()) {
434 Some(fp) => fp,
435 None => continue,
436 };
437 let abs_path = root.join(file_path);
438 if !abs_path.exists() {
439 orphan_sym_ids.push(node.id.clone());
440 }
441 }
442 }
443
444 let all_edges = self.storage.all_graph_edges()?;
446 let mut dangling_edge_ids: Vec<String> = Vec::new();
447 for edge in &all_edges {
448 if !node_ids.contains(&edge.src) || !node_ids.contains(&edge.dst) {
449 dangling_edge_ids.push(edge.id.clone());
450 }
451 }
452
453 let symbols_cleaned = orphan_sym_ids.len();
454
455 for sym_id in &orphan_sym_ids {
457 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
458 tracing::warn!("Orphan cleanup: failed to delete edges for {sym_id}: {e}");
459 }
460 if let Err(e) = self.storage.delete_graph_node(sym_id) {
461 tracing::warn!("Orphan cleanup: failed to delete node {sym_id}: {e}");
462 }
463 if let Err(e) = self.storage.delete_embedding(sym_id) {
464 tracing::warn!("Orphan cleanup: failed to delete embedding {sym_id}: {e}");
465 }
466 }
467
468 if !orphan_sym_ids.is_empty() {
470 if let Ok(mut graph) = self.lock_graph() {
471 for sym_id in &orphan_sym_ids {
472 let _ = graph.remove_node(sym_id);
473 }
474 }
475 if let Ok(mut vec) = self.lock_vector() {
476 for sym_id in &orphan_sym_ids {
477 let _ = vec.remove(sym_id);
478 }
479 }
480 }
481
482 let mut edges_cleaned = 0usize;
484 for edge_id in &dangling_edge_ids {
485 match self.storage.delete_graph_edge(edge_id) {
486 Ok(true) => edges_cleaned += 1,
487 Ok(false) => {} Err(e) => {
489 tracing::warn!("Orphan cleanup: failed to delete dangling edge {edge_id}: {e}");
490 }
491 }
492 }
493
494 if symbols_cleaned > 0 || edges_cleaned > 0 {
495 tracing::info!(
496 "Orphan scan: cleaned {symbols_cleaned} symbol/chunk nodes, {edges_cleaned} dangling edges"
497 );
498 }
499
500 Ok((symbols_cleaned, edges_cleaned))
501 }
502
503 pub fn analyze(&self, options: AnalyzeOptions<'_>) -> Result<AnalyzeResult, CodememError> {
511 let root = options.path;
512
513 if !options.skip_embed {
517 drop(self.lock_embeddings());
518 drop(self.lock_vector());
519 drop(self.lock_bm25());
520 }
521
522 let (scip_covered, scip_build) = if !options.skip_scip && self.config.scip.enabled {
525 match self.run_scip_phase(root, options.namespace) {
526 Ok((covered, build)) => (Some(covered), Some(build)),
527 Err(e) => {
528 tracing::warn!("SCIP phase failed, falling back to ast-grep only: {e}");
529 (None, None)
530 }
531 }
532 } else {
533 (None, None)
534 };
535
536 let scip_nodes_created = scip_build.as_ref().map_or(0, |b| b.nodes.len());
537 let scip_edges_created = scip_build.as_ref().map_or(0, |b| b.edges.len());
538 let scip_files_covered = scip_covered.as_ref().map_or(0, |s| s.len());
539
540 let mut indexer = match options.change_detector {
543 Some(cd) if !options.force => Indexer::with_change_detector(cd),
544 _ => Indexer::new(),
545 };
546 let resolved =
547 indexer.index_and_resolve_with_scip(root, scip_covered.as_ref(), scip_build)?;
548
549 let persist = if options.skip_embed {
551 self.persist_graph_only(&resolved, Some(options.namespace))?
552 } else if let Some(ref on_progress) = options.progress {
553 self.persist_index_results_with_progress(
554 &resolved,
555 Some(options.namespace),
556 |done, total| {
557 on_progress(AnalyzeProgress::Embedding { done, total });
558 },
559 )?
560 } else {
561 self.persist_index_results(&resolved, Some(options.namespace))?
562 };
563
564 {
566 if let Ok(mut cache) = self.lock_index_cache() {
567 *cache = Some(crate::IndexCache {
568 symbols: resolved.symbols,
569 chunks: resolved.chunks,
570 root_path: root.to_string_lossy().to_string(),
571 });
572 }
573 }
574
575 let enrichment = if options.skip_enrich {
577 crate::enrichment::EnrichmentPipelineResult {
578 results: serde_json::json!({}),
579 total_insights: 0,
580 }
581 } else {
582 let path_str = root.to_str().unwrap_or("");
583 self.run_enrichments(
584 path_str,
585 &[],
586 options.git_days,
587 Some(options.namespace),
588 None,
589 )
590 };
591
592 self.lock_graph()?.recompute_centrality();
594
595 let top_nodes = self.find_important_nodes(10, 0.85).unwrap_or_default();
597 let community_count = self.louvain_communities(1.0).map(|c| c.len()).unwrap_or(0);
598
599 self.save_index();
601
602 indexer
604 .change_detector()
605 .save_to_storage(self.storage(), options.namespace)?;
606
607 Ok(AnalyzeResult {
608 files_parsed: resolved.index.files_parsed,
609 files_skipped: resolved.index.files_skipped,
610 symbols_found: resolved.index.total_symbols,
611 edges_resolved: persist.edges_resolved,
612 chunks_stored: persist.chunks_stored,
613 symbols_embedded: persist.symbols_embedded,
614 chunks_embedded: persist.chunks_embedded,
615 chunks_pruned: persist.chunks_pruned,
616 symbols_pruned: persist.symbols_pruned,
617 enrichment_results: enrichment.results,
618 total_insights: enrichment.total_insights,
619 top_nodes,
620 community_count,
621 scip_nodes_created,
622 scip_edges_created,
623 scip_files_covered,
624 })
625 }
626
627 fn run_scip_phase(
629 &self,
630 root: &Path,
631 namespace: &str,
632 ) -> Result<(HashSet<String>, index::scip::graph_builder::ScipBuildResult), CodememError> {
633 let orchestrator =
634 index::scip::orchestrate::ScipOrchestrator::new(self.config.scip.clone());
635 let orch_result = orchestrator.run(root, namespace)?;
636
637 if orch_result.scip_result.covered_files.is_empty() {
638 return Ok((
639 HashSet::new(),
640 index::scip::graph_builder::ScipBuildResult::default(),
641 ));
642 }
643
644 for (lang, err) in &orch_result.failed_languages {
645 tracing::warn!("SCIP indexer for {:?} failed: {}", lang, err);
646 }
647 for lang in &orch_result.indexed_languages {
648 tracing::info!("SCIP indexed {:?} successfully", lang);
649 }
650
651 let build = index::scip::graph_builder::build_graph(
652 &orch_result.scip_result,
653 Some(namespace),
654 &self.config.scip,
655 );
656 let covered: HashSet<String> = build.files_covered.clone();
657
658 tracing::info!(
659 "SCIP phase: {} nodes, {} edges, {} ext nodes, {} files covered, {} doc memories",
660 build.nodes.len(),
661 build.edges.len(),
662 build.ext_nodes_created,
663 covered.len(),
664 build.doc_memories_created,
665 );
666
667 Ok((covered, build))
668 }
669
670 pub fn session_context(&self, namespace: Option<&str>) -> Result<SessionContext, CodememError> {
675 let now = chrono::Utc::now();
676 let cutoff_24h = now - chrono::Duration::hours(24);
677
678 let ids = match namespace {
680 Some(ns) => self.storage.list_memory_ids_for_namespace(ns)?,
681 None => self.storage.list_memory_ids()?,
682 };
683
684 let mut recent_memories = Vec::new();
685 let mut pending_analyses = Vec::new();
686
687 for id in ids.iter().rev().take(200) {
688 if let Ok(Some(m)) = self.storage.get_memory_no_touch(id) {
689 if m.tags.contains(&"pending-analysis".to_string()) {
691 pending_analyses.push(m.clone());
692 }
693 if m.created_at >= cutoff_24h {
695 recent_memories.push(m);
696 }
697 if recent_memories.len() >= 50 && pending_analyses.len() >= 10 {
698 break;
699 }
700 }
701 }
702
703 let session_count = self.storage.session_count(namespace).unwrap_or(1).max(1);
705 let active_patterns = patterns::detect_patterns(
706 &*self.storage,
707 namespace,
708 2, session_count,
710 )
711 .unwrap_or_default();
712
713 let last_session_summary = self
715 .storage
716 .list_sessions(namespace, 1)?
717 .into_iter()
718 .next()
719 .and_then(|s| s.summary);
720
721 Ok(SessionContext {
722 recent_memories,
723 pending_analyses,
724 active_patterns,
725 last_session_summary,
726 })
727 }
728}
729
730pub struct AnalyzeOptions<'a> {
734 pub path: &'a Path,
735 pub namespace: &'a str,
736 pub git_days: u64,
737 pub change_detector: Option<index::incremental::ChangeDetector>,
738 pub progress: Option<Box<dyn Fn(AnalyzeProgress) + Send + 'a>>,
739 pub skip_scip: bool,
741 pub skip_embed: bool,
743 pub skip_enrich: bool,
745 pub force: bool,
747}
748
749#[derive(Debug, Clone)]
751pub enum AnalyzeProgress {
752 Embedding { done: usize, total: usize },
753}
754
755#[derive(Debug)]
757pub struct AnalyzeResult {
758 pub files_parsed: usize,
759 pub files_skipped: usize,
760 pub symbols_found: usize,
761 pub edges_resolved: usize,
762 pub chunks_stored: usize,
763 pub symbols_embedded: usize,
764 pub chunks_embedded: usize,
765 pub chunks_pruned: usize,
766 pub symbols_pruned: usize,
767 pub enrichment_results: serde_json::Value,
768 pub total_insights: usize,
769 pub top_nodes: Vec<crate::graph_ops::RankedNode>,
770 pub community_count: usize,
771 pub scip_nodes_created: usize,
773 pub scip_edges_created: usize,
775 pub scip_files_covered: usize,
777}
778
779#[derive(Debug)]
781pub struct SessionContext {
782 pub recent_memories: Vec<MemoryNode>,
784 pub pending_analyses: Vec<MemoryNode>,
786 pub active_patterns: Vec<DetectedPattern>,
788 pub last_session_summary: Option<String>,
790}
791
792#[cfg(test)]
793mod tests {
794 use super::*;
795 use codemem_core::{Edge, GraphBackend, GraphNode, NodeKind, RelationshipType};
796 use std::collections::{HashMap, HashSet};
797
798 fn test_engine() -> CodememEngine {
800 let dir = tempfile::tempdir().unwrap();
801 let db_path = dir.path().join("test.db");
802 let _ = Box::leak(Box::new(dir));
804 CodememEngine::from_db_path(&db_path).unwrap()
805 }
806
807 fn graph_node(id: &str, kind: NodeKind, file_path: Option<&str>) -> GraphNode {
808 let mut payload = HashMap::new();
809 if let Some(fp) = file_path {
810 payload.insert(
811 "file_path".to_string(),
812 serde_json::Value::String(fp.to_string()),
813 );
814 }
815 GraphNode {
816 id: id.to_string(),
817 kind,
818 label: id.to_string(),
819 payload,
820 centrality: 0.0,
821 memory_id: None,
822 namespace: None,
823 }
824 }
825
826 fn edge(src: &str, dst: &str, rel: RelationshipType) -> Edge {
827 Edge {
828 id: format!("{rel}:{src}->{dst}"),
829 src: src.to_string(),
830 dst: dst.to_string(),
831 relationship: rel,
832 weight: 1.0,
833 properties: HashMap::new(),
834 created_at: chrono::Utc::now(),
835 valid_from: None,
836 valid_to: None,
837 }
838 }
839
840 #[test]
843 fn cleanup_stale_symbols_deletes_stale_nodes() {
844 let engine = test_engine();
845
846 let file = graph_node("file:src/a.rs", NodeKind::File, None);
848 let sym_keep = graph_node("sym:a::keep", NodeKind::Function, Some("src/a.rs"));
849 let sym_stale = graph_node("sym:a::stale", NodeKind::Function, Some("src/a.rs"));
850
851 {
852 let mut g = engine.lock_graph().unwrap();
853 g.add_node(file).unwrap();
854 g.add_node(sym_keep.clone()).unwrap();
855 g.add_node(sym_stale.clone()).unwrap();
856 g.add_edge(edge(
857 "file:src/a.rs",
858 "sym:a::keep",
859 RelationshipType::Contains,
860 ))
861 .unwrap();
862 g.add_edge(edge(
863 "file:src/a.rs",
864 "sym:a::stale",
865 RelationshipType::Contains,
866 ))
867 .unwrap();
868 }
869 let _ =
871 engine
872 .storage
873 .insert_graph_node(&graph_node("file:src/a.rs", NodeKind::File, None));
874 let _ = engine.storage.insert_graph_node(&sym_keep);
875 let _ = engine.storage.insert_graph_node(&sym_stale);
876 let _ = engine.storage.insert_graph_edge(&edge(
877 "file:src/a.rs",
878 "sym:a::keep",
879 RelationshipType::Contains,
880 ));
881 let _ = engine.storage.insert_graph_edge(&edge(
882 "file:src/a.rs",
883 "sym:a::stale",
884 RelationshipType::Contains,
885 ));
886
887 let old_ids: HashSet<String> = ["sym:a::keep", "sym:a::stale"]
888 .iter()
889 .map(|s| s.to_string())
890 .collect();
891 let new_ids: HashSet<String> = ["sym:a::keep"].iter().map(|s| s.to_string()).collect();
892
893 let cleaned = engine
894 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
895 .unwrap();
896 assert_eq!(cleaned, 1);
897
898 let g = engine.lock_graph().unwrap();
900 assert!(g.get_node("sym:a::stale").unwrap().is_none());
901 assert!(g.get_node("sym:a::keep").unwrap().is_some());
902 }
903
904 #[test]
905 fn cleanup_stale_symbols_redirects_memory_edges_to_graph() {
906 let engine = test_engine();
907
908 let file = graph_node("file:src/a.rs", NodeKind::File, None);
909 let sym_stale = graph_node("sym:a::old_fn", NodeKind::Function, Some("src/a.rs"));
910 let mem = graph_node("mem-uuid-123", NodeKind::Memory, None);
911
912 {
913 let mut g = engine.lock_graph().unwrap();
914 g.add_node(file.clone()).unwrap();
915 g.add_node(sym_stale.clone()).unwrap();
916 g.add_node(mem.clone()).unwrap();
917 g.add_edge(edge(
918 "file:src/a.rs",
919 "sym:a::old_fn",
920 RelationshipType::Contains,
921 ))
922 .unwrap();
923 g.add_edge(edge(
924 "mem-uuid-123",
925 "sym:a::old_fn",
926 RelationshipType::RelatesTo,
927 ))
928 .unwrap();
929 }
930 let _ = engine.storage.insert_graph_node(&file);
931 let _ = engine.storage.insert_graph_node(&sym_stale);
932 let _ = engine.storage.insert_graph_node(&mem);
933 let _ = engine.storage.insert_graph_edge(&edge(
934 "file:src/a.rs",
935 "sym:a::old_fn",
936 RelationshipType::Contains,
937 ));
938 let _ = engine.storage.insert_graph_edge(&edge(
939 "mem-uuid-123",
940 "sym:a::old_fn",
941 RelationshipType::RelatesTo,
942 ));
943
944 let old_ids: HashSet<String> = ["sym:a::old_fn"].iter().map(|s| s.to_string()).collect();
945 let new_ids: HashSet<String> = HashSet::new();
946
947 engine
948 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
949 .unwrap();
950
951 let g = engine.lock_graph().unwrap();
953 let file_edges = g.get_edges("file:src/a.rs").unwrap();
954 let has_redirect = file_edges.iter().any(|e| {
955 (e.src == "mem-uuid-123" || e.dst == "mem-uuid-123") && e.id.contains("-redirected")
956 });
957 assert!(
958 has_redirect,
959 "redirected memory→file edge should be in the in-memory graph"
960 );
961 }
962
963 #[test]
964 fn cleanup_stale_symbols_deduplicates_redirects() {
965 let engine = test_engine();
966
967 let file = graph_node("file:src/a.rs", NodeKind::File, None);
968 let sym1 = graph_node("sym:a::fn1", NodeKind::Function, Some("src/a.rs"));
969 let sym2 = graph_node("sym:a::fn2", NodeKind::Function, Some("src/a.rs"));
970 let mem = graph_node("mem-uuid-456", NodeKind::Memory, None);
971
972 let _ = engine.storage.insert_graph_node(&file);
974 let _ = engine.storage.insert_graph_node(&sym1);
975 let _ = engine.storage.insert_graph_node(&sym2);
976 let _ = engine.storage.insert_graph_node(&mem);
977 let _ = engine.storage.insert_graph_edge(&edge(
978 "mem-uuid-456",
979 "sym:a::fn1",
980 RelationshipType::RelatesTo,
981 ));
982 let _ = engine.storage.insert_graph_edge(&edge(
983 "mem-uuid-456",
984 "sym:a::fn2",
985 RelationshipType::RelatesTo,
986 ));
987
988 {
989 let mut g = engine.lock_graph().unwrap();
990 g.add_node(file).unwrap();
991 g.add_node(sym1).unwrap();
992 g.add_node(sym2).unwrap();
993 g.add_node(mem).unwrap();
994 }
995
996 let old_ids: HashSet<String> = ["sym:a::fn1", "sym:a::fn2"]
997 .iter()
998 .map(|s| s.to_string())
999 .collect();
1000 let new_ids: HashSet<String> = HashSet::new();
1001
1002 engine
1003 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
1004 .unwrap();
1005
1006 let g = engine.lock_graph().unwrap();
1008 let file_edges = g.get_edges("file:src/a.rs").unwrap();
1009 let redirect_count = file_edges
1010 .iter()
1011 .filter(|e| e.id.contains("-redirected"))
1012 .count();
1013 assert_eq!(
1014 redirect_count, 1,
1015 "should have exactly 1 redirected edge, got {redirect_count}"
1016 );
1017 }
1018
1019 #[test]
1022 fn detect_orphans_skips_file_check_when_no_root() {
1023 let engine = test_engine();
1024
1025 let sym = graph_node(
1027 "sym:nonexistent::fn",
1028 NodeKind::Function,
1029 Some("does/not/exist.rs"),
1030 );
1031 let _ = engine.storage.insert_graph_node(&sym);
1032 {
1033 let mut g = engine.lock_graph().unwrap();
1034 g.add_node(sym).unwrap();
1035 }
1036
1037 let (symbols_cleaned, _) = engine.detect_orphans(None).unwrap();
1039 assert_eq!(
1040 symbols_cleaned, 0,
1041 "detect_orphans(None) should not delete nodes based on file existence"
1042 );
1043 }
1044
1045 #[test]
1046 fn detect_orphans_removes_missing_files_with_root() {
1047 let dir = tempfile::tempdir().unwrap();
1048 let db_path = dir.path().join("test.db");
1049 let engine = CodememEngine::from_db_path(&db_path).unwrap();
1050
1051 let sym = graph_node(
1053 "sym:missing::fn",
1054 NodeKind::Function,
1055 Some("src/missing.rs"),
1056 );
1057 let _ = engine.storage.insert_graph_node(&sym);
1058 {
1059 let mut g = engine.lock_graph().unwrap();
1060 g.add_node(sym).unwrap();
1061 }
1062
1063 let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
1064 assert_eq!(symbols_cleaned, 1);
1065 }
1066
1067 #[test]
1068 fn detect_orphans_keeps_existing_files() {
1069 let dir = tempfile::tempdir().unwrap();
1070 let db_path = dir.path().join("test.db");
1071 let engine = CodememEngine::from_db_path(&db_path).unwrap();
1072
1073 let src_dir = dir.path().join("src");
1075 std::fs::create_dir_all(&src_dir).unwrap();
1076 std::fs::write(src_dir.join("exists.rs"), "fn main() {}").unwrap();
1077
1078 let sym = graph_node(
1079 "sym:exists::main",
1080 NodeKind::Function,
1081 Some("src/exists.rs"),
1082 );
1083 let _ = engine.storage.insert_graph_node(&sym);
1084 {
1085 let mut g = engine.lock_graph().unwrap();
1086 g.add_node(sym).unwrap();
1087 }
1088
1089 let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
1090 assert_eq!(symbols_cleaned, 0);
1091 }
1092
1093 }