1use crate::index::{self, IndexAndResolveResult, Indexer};
2use crate::patterns;
3use crate::CodememEngine;
4use codemem_core::{CodememError, DetectedPattern, MemoryNode};
5use std::collections::HashSet;
6use std::path::Path;
7use std::sync::atomic::Ordering;
8
9fn is_spec_file_with_content(path: &str, content: &[u8]) -> bool {
14 let filename = path.rsplit('/').next().unwrap_or(path);
15 let filename_lower = filename.to_lowercase();
16
17 if matches!(
19 filename_lower.as_str(),
20 "openapi.yaml"
21 | "openapi.yml"
22 | "openapi.json"
23 | "swagger.yaml"
24 | "swagger.yml"
25 | "swagger.json"
26 | "asyncapi.yaml"
27 | "asyncapi.yml"
28 | "asyncapi.json"
29 ) {
30 return true;
31 }
32
33 let is_yaml_json = filename_lower.ends_with(".yaml")
35 || filename_lower.ends_with(".yml")
36 || filename_lower.ends_with(".json");
37 if !is_yaml_json {
38 return false;
39 }
40
41 let peek = std::str::from_utf8(&content[..content.len().min(300)]).unwrap_or("");
42 let peek_lower = peek.to_lowercase();
43 peek_lower.contains("\"openapi\"")
44 || peek_lower.contains("\"swagger\"")
45 || peek_lower.contains("\"asyncapi\"")
46 || peek_lower.contains("openapi:")
47 || peek_lower.contains("swagger:")
48 || peek_lower.contains("asyncapi:")
49}
50
51impl CodememEngine {
52 pub fn save_index(&self) {
58 if let Some(ref db_path) = self.db_path {
59 if self.vector_ready() {
61 let idx_path = db_path.with_extension("idx");
62 if let Ok(mut vi) = self.lock_vector() {
63 if vi.needs_compaction() {
65 let ghost = vi.ghost_count();
66 let live = vi.stats().count;
67 tracing::info!(
68 "HNSW ghost compaction: {ghost} ghosts / {live} live entries, rebuilding..."
69 );
70 if let Ok(embeddings) = self.storage.list_all_embeddings() {
71 if let Err(e) = vi.rebuild_from_entries(&embeddings) {
72 tracing::warn!("HNSW compaction failed: {e}");
73 }
74 }
75 }
76 if let Err(e) = vi.save(&idx_path) {
77 tracing::warn!("Failed to save vector index: {e}");
78 }
79 }
80 }
81
82 if self.bm25_ready() {
84 let bm25_path = db_path.with_extension("bm25");
85 if let Ok(bm25) = self.lock_bm25() {
86 if bm25.needs_save() {
87 let data = bm25.serialize();
88 let tmp_path = db_path.with_extension("bm25.tmp");
89 if let Err(e) = std::fs::write(&tmp_path, &data)
90 .and_then(|_| std::fs::rename(&tmp_path, &bm25_path))
91 {
92 tracing::warn!("Failed to save BM25 index: {e}");
93 }
94 }
95 }
96 }
97 }
98 self.dirty.store(false, Ordering::Release);
99 }
100
101 pub fn reload_graph(&self) -> Result<(), CodememError> {
103 let new_graph = codemem_storage::graph::GraphEngine::from_storage(&*self.storage)?;
104 let mut graph = self.lock_graph()?;
105 *graph = Box::new(new_graph);
106 graph.recompute_centrality();
107 Ok(())
108 }
109
110 pub fn process_watch_event(
122 &self,
123 event: &crate::watch::WatchEvent,
124 namespace: Option<&str>,
125 project_root: Option<&Path>,
126 ) -> Result<(), CodememError> {
127 match event {
128 crate::watch::WatchEvent::FileChanged(path)
129 | crate::watch::WatchEvent::FileCreated(path) => {
130 self.index_single_file(path, namespace, project_root)?;
131 }
132 crate::watch::WatchEvent::FileDeleted(path) => {
133 let rel = if let Some(root) = project_root {
135 path.strip_prefix(root)
136 .unwrap_or(path)
137 .to_string_lossy()
138 .to_string()
139 } else {
140 path.to_string_lossy().to_string()
141 };
142 self.cleanup_file_nodes(&rel)?;
143 }
144 }
145 Ok(())
146 }
147
148 fn index_single_file(
158 &self,
159 path: &Path,
160 namespace: Option<&str>,
161 project_root: Option<&Path>,
162 ) -> Result<(), CodememError> {
163 let content = std::fs::read(path)?;
164
165 let path_str = if let Some(root) = project_root {
166 path.strip_prefix(root)
167 .unwrap_or(path)
168 .to_string_lossy()
169 .to_string()
170 } else {
171 path.to_string_lossy().to_string()
172 };
173
174 let hash = {
177 let mut cd_guard = self
178 .change_detector
179 .lock()
180 .map_err(|_| CodememError::LockPoisoned("change_detector".into()))?;
181 let ns = namespace.unwrap_or("");
182 let cd = cd_guard.get_or_insert_with(|| {
183 let mut cd = index::incremental::ChangeDetector::new();
184 cd.load_from_storage(&*self.storage, ns);
185 cd
186 });
187 let (changed, hash) = cd.check_changed(&path_str, &content);
188 if !changed {
189 tracing::debug!("Skipping unchanged file: {path_str}");
190 return Ok(());
191 }
192 if self.config.memory.expire_enrichments_on_reindex {
194 match self.storage.expire_memories_for_file(&path_str) {
195 Ok(0) => {}
196 Ok(n) => tracing::debug!("Expired {n} enrichment memories for {path_str}"),
197 Err(e) => tracing::warn!("Failed to expire memories for {path_str}: {e}"),
198 }
199 }
200 hash
201 };
202
203 if is_spec_file_with_content(&path_str, &content) {
206 self.reparse_spec_file(path, namespace.unwrap_or(""))?;
207 if let Ok(mut cd_guard) = self.change_detector.lock() {
209 if let Some(cd) = cd_guard.as_mut() {
210 cd.record_hash(&path_str, hash);
211 let _ = cd.save_to_storage(&*self.storage, namespace.unwrap_or(""));
212 }
213 }
214 return Ok(());
215 }
216
217 let parser = index::CodeParser::new();
218
219 let parse_result = match parser.parse_file(&path_str, &content) {
220 Some(pr) => pr,
221 None => return Ok(()), };
223
224 let mut file_paths = HashSet::new();
226 file_paths.insert(parse_result.file_path.clone());
227
228 let mut resolver = index::ReferenceResolver::new();
234 resolver.add_symbols(&parse_result.symbols);
235 if let Ok(graph) = self.lock_graph() {
236 let graph_symbols: Vec<index::Symbol> = graph
237 .get_all_nodes()
238 .iter()
239 .filter(|n| n.id.starts_with("sym:"))
240 .filter_map(index::symbol::symbol_from_graph_node)
241 .collect();
242 resolver.add_symbols(&graph_symbols);
243 }
244 let resolve_result = resolver.resolve_all_with_unresolved(&parse_result.references);
245
246 let results = IndexAndResolveResult {
247 index: index::IndexResult {
248 files_scanned: 1,
249 files_parsed: 1,
250 files_skipped: 0,
251 total_symbols: parse_result.symbols.len(),
252 total_references: parse_result.references.len(),
253 total_chunks: parse_result.chunks.len(),
254 parse_results: Vec::new(),
255 },
256 symbols: parse_result.symbols,
257 references: parse_result.references,
258 chunks: parse_result.chunks,
259 file_paths,
260 edges: resolve_result.edges,
261 unresolved: resolve_result.unresolved,
262 root_path: project_root
263 .map(|p| p.to_path_buf())
264 .unwrap_or_else(|| path.to_path_buf()),
265 scip_build: None,
266 };
267
268 self.persist_index_results(&results, namespace)?;
269
270 if let Ok(mut cd_guard) = self.change_detector.lock() {
272 if let Some(cd) = cd_guard.as_mut() {
273 cd.record_hash(&path_str, hash);
274 if let Err(e) = cd.save_to_storage(&*self.storage, namespace.unwrap_or("")) {
275 tracing::warn!("Failed to save file hash for {path_str}: {e}");
276 }
277 }
278 }
279
280 Ok(())
281 }
282
283 pub fn cleanup_stale_symbols(
298 &self,
299 file_path: &str,
300 old_symbol_ids: &HashSet<String>,
301 new_symbol_ids: &HashSet<String>,
302 ) -> Result<usize, CodememError> {
303 let stale_ids: Vec<&String> = old_symbol_ids
305 .iter()
306 .filter(|id| !new_symbol_ids.contains(*id))
307 .collect();
308
309 if stale_ids.is_empty() {
310 return Ok(0);
311 }
312
313 let count = stale_ids.len();
314 tracing::info!(
315 "Cleaning up {count} stale symbols for {file_path}: {:?}",
316 stale_ids
317 );
318
319 let file_node_id = format!("file:{file_path}");
320 let mut redirected_pairs: std::collections::HashSet<(String, String)> =
321 std::collections::HashSet::new();
322 let mut redirected_edges: Vec<codemem_core::Edge> = Vec::new();
323 for sym_id in &stale_ids {
324 let edges = self.storage.get_edges_for_node(sym_id.as_str())?;
328 for edge in &edges {
329 let other = if edge.src.as_str() == sym_id.as_str() {
330 &edge.dst
331 } else {
332 &edge.src
333 };
334 let is_code_node = other.starts_with("sym:")
335 || other.starts_with("file:")
336 || other.starts_with("chunk:")
337 || other.starts_with("pkg:");
338 if !is_code_node {
339 let pair = (other.to_string(), file_node_id.clone());
341 if !redirected_pairs.insert(pair) {
342 continue;
343 }
344 let mut redirected = edge.clone();
345 if redirected.src.as_str() == sym_id.as_str() {
346 redirected.src = file_node_id.clone();
347 } else {
348 redirected.dst = file_node_id.clone();
349 }
350 redirected.id = format!("{}-redirected", edge.id);
353 if let Err(e) = self.storage.insert_graph_edge(&redirected) {
354 tracing::warn!("Failed to redirect memory edge {}: {e}", edge.id);
355 }
356 redirected_edges.push(redirected);
357 }
358 }
359
360 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
362 tracing::warn!("Failed to delete edges for stale symbol {sym_id}: {e}");
363 }
364 if let Err(e) = self.storage.delete_graph_node(sym_id) {
365 tracing::warn!("Failed to delete stale symbol node {sym_id}: {e}");
366 }
367 if let Err(e) = self.storage.delete_embedding(sym_id) {
368 tracing::warn!("Failed to delete embedding for stale symbol {sym_id}: {e}");
369 }
370 }
371
372 {
374 let mut graph = self.lock_graph()?;
375 for sym_id in &stale_ids {
376 if let Err(e) = graph.remove_node(sym_id.as_str()) {
377 tracing::warn!("Failed to remove stale {sym_id} from in-memory graph: {e}");
378 }
379 }
380 for edge in redirected_edges {
383 let _ = graph.add_edge(edge);
384 }
385 }
386 {
387 let mut vec = self.lock_vector()?;
388 for sym_id in &stale_ids {
389 if let Err(e) = vec.remove(sym_id.as_str()) {
390 tracing::warn!("Failed to remove stale {sym_id} from vector index: {e}");
391 }
392 }
393 }
394 if let Ok(mut bm25) = self.lock_bm25() {
397 for sym_id in &stale_ids {
398 bm25.remove_document(sym_id);
399 }
400 }
401
402 Ok(count)
403 }
404
405 fn cleanup_file_nodes(&self, file_path: &str) -> Result<(), CodememError> {
409 let file_node_id = format!("file:{file_path}");
410
411 let chunk_prefix = format!("chunk:{file_path}:");
413 if let Err(e) = self.storage.delete_graph_nodes_by_prefix(&chunk_prefix) {
414 tracing::warn!("Failed to delete chunk nodes for {file_path}: {e}");
415 }
416
417 let graph = self.lock_graph()?;
419 let sym_ids: Vec<String> = graph
420 .get_all_nodes()
421 .into_iter()
422 .filter(|n| {
423 n.id.starts_with("sym:")
424 && n.payload.get("file_path").and_then(|v| v.as_str()) == Some(file_path)
425 })
426 .map(|n| n.id.clone())
427 .collect();
428 drop(graph);
429
430 for sym_id in &sym_ids {
431 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
432 tracing::warn!("Failed to delete graph edges for {sym_id}: {e}");
433 }
434 if let Err(e) = self.storage.delete_graph_node(sym_id) {
435 tracing::warn!("Failed to delete graph node {sym_id}: {e}");
436 }
437 if let Err(e) = self.storage.delete_embedding(sym_id) {
438 tracing::warn!("Failed to delete embedding {sym_id}: {e}");
439 }
440 }
441
442 if let Err(e) = self.storage.delete_graph_edges_for_node(&file_node_id) {
444 tracing::warn!("Failed to delete graph edges for {file_node_id}: {e}");
445 }
446 if let Err(e) = self.storage.delete_graph_node(&file_node_id) {
447 tracing::warn!("Failed to delete graph node {file_node_id}: {e}");
448 }
449
450 let mut graph = self.lock_graph()?;
452 for sym_id in &sym_ids {
453 if let Err(e) = graph.remove_node(sym_id) {
454 tracing::warn!("Failed to remove {sym_id} from in-memory graph: {e}");
455 }
456 }
457 let chunk_ids: Vec<String> = graph
459 .get_all_nodes()
460 .into_iter()
461 .filter(|n| n.id.starts_with(&format!("chunk:{file_path}:")))
462 .map(|n| n.id.clone())
463 .collect();
464 for chunk_id in &chunk_ids {
465 if let Err(e) = graph.remove_node(chunk_id) {
466 tracing::warn!("Failed to remove {chunk_id} from in-memory graph: {e}");
467 }
468 }
469 if let Err(e) = graph.remove_node(&file_node_id) {
470 tracing::warn!("Failed to remove {file_node_id} from in-memory graph: {e}");
471 }
472 drop(graph);
473
474 let mut vec = self.lock_vector()?;
476 for sym_id in &sym_ids {
477 if let Err(e) = vec.remove(sym_id) {
478 tracing::warn!("Failed to remove {sym_id} from vector index: {e}");
479 }
480 }
481 for chunk_id in &chunk_ids {
482 if let Err(e) = vec.remove(chunk_id) {
483 tracing::warn!("Failed to remove {chunk_id} from vector index: {e}");
484 }
485 }
486 drop(vec);
487
488 if let Ok(mut bm25) = self.lock_bm25() {
490 for sym_id in &sym_ids {
491 bm25.remove_document(sym_id);
492 }
493 for chunk_id in &chunk_ids {
494 bm25.remove_document(chunk_id);
495 }
496 }
497
498 self.save_index();
499 Ok(())
500 }
501
502 pub fn detect_orphans(
512 &self,
513 project_root: Option<&Path>,
514 ) -> Result<(usize, usize), CodememError> {
515 let all_nodes = self.storage.all_graph_nodes()?;
517 let node_ids: HashSet<String> = all_nodes.iter().map(|n| n.id.clone()).collect();
518
519 let mut orphan_sym_ids: Vec<String> = Vec::new();
520
521 if let Some(root) = project_root {
524 for node in &all_nodes {
525 if node.id.starts_with("sym:") || node.id.starts_with("chunk:") {
527 let file_path = match node.payload.get("file_path").and_then(|v| v.as_str()) {
528 Some(fp) => fp,
529 None => continue,
530 };
531 let abs_path = root.join(file_path);
532 if !abs_path.exists() {
533 orphan_sym_ids.push(node.id.clone());
534 }
535 }
536 else if let Some(fp) = node.id.strip_prefix("file:") {
538 let abs_path = root.join(fp);
539 if !abs_path.exists() {
540 orphan_sym_ids.push(node.id.clone());
541 }
542 }
543 else if let Some(dir) = node.id.strip_prefix("pkg:") {
545 let dir_trimmed = dir.trim_end_matches('/');
546 let abs_path = root.join(dir_trimmed);
547 if !abs_path.exists() {
548 orphan_sym_ids.push(node.id.clone());
549 }
550 }
551 }
552 }
553
554 let all_edges = self.storage.all_graph_edges()?;
556 let mut dangling_edge_ids: Vec<String> = Vec::new();
557 for edge in &all_edges {
558 if !node_ids.contains(&edge.src) || !node_ids.contains(&edge.dst) {
559 dangling_edge_ids.push(edge.id.clone());
560 }
561 }
562
563 let symbols_cleaned = orphan_sym_ids.len();
564
565 for sym_id in &orphan_sym_ids {
567 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
568 tracing::warn!("Orphan cleanup: failed to delete edges for {sym_id}: {e}");
569 }
570 if let Err(e) = self.storage.delete_graph_node(sym_id) {
571 tracing::warn!("Orphan cleanup: failed to delete node {sym_id}: {e}");
572 }
573 if let Err(e) = self.storage.delete_embedding(sym_id) {
574 tracing::warn!("Orphan cleanup: failed to delete embedding {sym_id}: {e}");
575 }
576 }
577
578 if !orphan_sym_ids.is_empty() {
580 if let Ok(mut graph) = self.lock_graph() {
581 for sym_id in &orphan_sym_ids {
582 let _ = graph.remove_node(sym_id);
583 }
584 }
585 if let Ok(mut vec) = self.lock_vector() {
586 for sym_id in &orphan_sym_ids {
587 let _ = vec.remove(sym_id);
588 }
589 }
590 }
591
592 let mut edges_cleaned = 0usize;
594 for edge_id in &dangling_edge_ids {
595 match self.storage.delete_graph_edge(edge_id) {
596 Ok(true) => edges_cleaned += 1,
597 Ok(false) => {} Err(e) => {
599 tracing::warn!("Orphan cleanup: failed to delete dangling edge {edge_id}: {e}");
600 }
601 }
602 }
603
604 if symbols_cleaned > 0 || edges_cleaned > 0 {
605 tracing::info!(
606 "Orphan scan: cleaned {symbols_cleaned} symbol/chunk nodes, {edges_cleaned} dangling edges"
607 );
608 }
609
610 Ok((symbols_cleaned, edges_cleaned))
611 }
612
613 fn reparse_spec_file(&self, path: &Path, namespace: &str) -> Result<(), CodememError> {
617 use crate::index::spec_parser::{parse_asyncapi, parse_openapi, SpecFileResult};
618
619 let result = if let Some(openapi) = parse_openapi(path) {
620 Some(SpecFileResult::OpenApi(openapi))
621 } else {
622 parse_asyncapi(path).map(SpecFileResult::AsyncApi)
623 };
624
625 match result {
626 Some(SpecFileResult::OpenApi(spec)) => {
627 for ep in &spec.endpoints {
628 let _ = self.storage.store_api_endpoint(
629 &ep.method,
630 &ep.path,
631 ep.operation_id.as_deref().unwrap_or(""),
632 namespace,
633 );
634 }
635 tracing::info!(
636 "Re-parsed OpenAPI spec: {} endpoints from {}",
637 spec.endpoints.len(),
638 path.display()
639 );
640 }
641 Some(SpecFileResult::AsyncApi(spec)) => {
642 for ch in &spec.channels {
643 let _ = self.storage.store_event_channel(
644 &ch.channel,
645 &ch.direction,
646 ch.protocol.as_deref().unwrap_or(""),
647 ch.operation_id.as_deref().unwrap_or(""),
648 namespace,
649 ch.description.as_deref().unwrap_or(""),
650 );
651 }
652 tracing::info!(
653 "Re-parsed AsyncAPI spec: {} channels from {}",
654 spec.channels.len(),
655 path.display()
656 );
657 }
658 None => {
659 tracing::debug!("Not a recognized spec file: {}", path.display());
660 }
661 }
662 Ok(())
663 }
664
665 pub fn analyze(&self, options: AnalyzeOptions<'_>) -> Result<AnalyzeResult, CodememError> {
673 let root = options.path;
674
675 if !options.skip_embed {
679 drop(self.lock_embeddings());
680 drop(self.lock_vector());
681 drop(self.lock_bm25());
682 }
683
684 let (scip_covered, scip_build) = if !options.skip_scip && self.config.scip.enabled {
687 match self.run_scip_phase(root, options.namespace) {
688 Ok((covered, build)) => (Some(covered), Some(build)),
689 Err(e) => {
690 tracing::warn!("SCIP phase failed, falling back to ast-grep only: {e}");
691 (None, None)
692 }
693 }
694 } else {
695 (None, None)
696 };
697
698 let scip_nodes_created = scip_build.as_ref().map_or(0, |b| b.nodes.len());
699 let scip_edges_created = scip_build.as_ref().map_or(0, |b| b.edges.len());
700 let scip_files_covered = scip_covered.as_ref().map_or(0, |s| s.len());
701
702 let mut indexer = match options.change_detector {
705 Some(cd) if !options.force => Indexer::with_change_detector(cd),
706 _ => Indexer::new(),
707 };
708 let resolved =
709 indexer.index_and_resolve_with_scip(root, scip_covered.as_ref(), scip_build)?;
710
711 let persist = if options.skip_embed {
713 self.persist_graph_only(&resolved, Some(options.namespace))?
714 } else if let Some(ref on_progress) = options.progress {
715 self.persist_index_results_with_progress(
716 &resolved,
717 Some(options.namespace),
718 |done, total| {
719 on_progress(AnalyzeProgress::Embedding { done, total });
720 },
721 )?
722 } else {
723 self.persist_index_results(&resolved, Some(options.namespace))?
724 };
725
726 {
728 if let Ok(mut cache) = self.lock_index_cache() {
729 *cache = Some(crate::IndexCache {
730 symbols: resolved.symbols,
731 chunks: resolved.chunks,
732 root_path: root.to_string_lossy().to_string(),
733 });
734 }
735 }
736
737 let enrichment = if options.skip_enrich {
739 crate::enrichment::EnrichmentPipelineResult {
740 results: serde_json::json!({}),
741 total_insights: 0,
742 }
743 } else {
744 let path_str = root.to_str().unwrap_or("");
745 self.run_enrichments(
746 path_str,
747 &[],
748 options.git_days,
749 Some(options.namespace),
750 None,
751 )
752 };
753
754 self.lock_graph()?.recompute_centrality();
756
757 let top_nodes = self.find_important_nodes(10, 0.85).unwrap_or_default();
759 let community_count = self.louvain_communities(1.0).map(|c| c.len()).unwrap_or(0);
760
761 self.save_index();
763
764 indexer
766 .change_detector()
767 .save_to_storage(self.storage(), options.namespace)?;
768
769 Ok(AnalyzeResult {
770 files_parsed: resolved.index.files_parsed,
771 files_skipped: resolved.index.files_skipped,
772 symbols_found: resolved.index.total_symbols,
773 edges_resolved: persist.edges_resolved,
774 chunks_stored: persist.chunks_stored,
775 symbols_embedded: persist.symbols_embedded,
776 chunks_embedded: persist.chunks_embedded,
777 chunks_pruned: persist.chunks_pruned,
778 symbols_pruned: persist.symbols_pruned,
779 enrichment_results: enrichment.results,
780 total_insights: enrichment.total_insights,
781 top_nodes,
782 community_count,
783 scip_nodes_created,
784 scip_edges_created,
785 scip_files_covered,
786 })
787 }
788
789 fn run_scip_phase(
791 &self,
792 root: &Path,
793 namespace: &str,
794 ) -> Result<(HashSet<String>, index::scip::graph_builder::ScipBuildResult), CodememError> {
795 let orchestrator =
796 index::scip::orchestrate::ScipOrchestrator::new(self.config.scip.clone());
797 let orch_result = orchestrator.run(root, namespace)?;
798
799 if orch_result.scip_result.covered_files.is_empty() {
800 return Ok((
801 HashSet::new(),
802 index::scip::graph_builder::ScipBuildResult::default(),
803 ));
804 }
805
806 for (lang, err) in &orch_result.failed_languages {
807 tracing::warn!("SCIP indexer for {:?} failed: {}", lang, err);
808 }
809 for lang in &orch_result.indexed_languages {
810 tracing::info!("SCIP indexed {:?} successfully", lang);
811 }
812
813 let build = index::scip::graph_builder::build_graph(
814 &orch_result.scip_result,
815 Some(namespace),
816 &self.config.scip,
817 );
818 let covered: HashSet<String> = build.files_covered.clone();
819
820 tracing::info!(
821 "SCIP phase: {} nodes, {} edges, {} ext nodes, {} files covered, {} doc memories",
822 build.nodes.len(),
823 build.edges.len(),
824 build.ext_nodes_created,
825 covered.len(),
826 build.doc_memories_created,
827 );
828
829 Ok((covered, build))
830 }
831
832 pub fn session_context(&self, namespace: Option<&str>) -> Result<SessionContext, CodememError> {
837 let now = chrono::Utc::now();
838 let cutoff_24h = now - chrono::Duration::hours(24);
839
840 let ids = match namespace {
842 Some(ns) => self.storage.list_memory_ids_for_namespace(ns)?,
843 None => self.storage.list_memory_ids()?,
844 };
845
846 let mut recent_memories = Vec::new();
847 let mut pending_analyses = Vec::new();
848
849 for id in ids.iter().rev().take(200) {
850 if let Ok(Some(m)) = self.storage.get_memory_no_touch(id) {
851 if m.tags.contains(&"pending-analysis".to_string()) {
853 pending_analyses.push(m.clone());
854 }
855 if m.created_at >= cutoff_24h {
857 recent_memories.push(m);
858 }
859 if recent_memories.len() >= 50 && pending_analyses.len() >= 10 {
860 break;
861 }
862 }
863 }
864
865 let session_count = self.storage.session_count(namespace).unwrap_or(1).max(1);
867 let active_patterns = patterns::detect_patterns(
868 &*self.storage,
869 namespace,
870 2, session_count,
872 )
873 .unwrap_or_default();
874
875 let last_session_summary = self
877 .storage
878 .list_sessions(namespace, 1)?
879 .into_iter()
880 .next()
881 .and_then(|s| s.summary);
882
883 Ok(SessionContext {
884 recent_memories,
885 pending_analyses,
886 active_patterns,
887 last_session_summary,
888 })
889 }
890}
891
892pub struct AnalyzeOptions<'a> {
896 pub path: &'a Path,
897 pub namespace: &'a str,
898 pub git_days: u64,
899 pub change_detector: Option<index::incremental::ChangeDetector>,
900 pub progress: Option<Box<dyn Fn(AnalyzeProgress) + Send + 'a>>,
901 pub skip_scip: bool,
903 pub skip_embed: bool,
905 pub skip_enrich: bool,
907 pub force: bool,
909}
910
911#[derive(Debug, Clone)]
913pub enum AnalyzeProgress {
914 Embedding { done: usize, total: usize },
915}
916
917#[derive(Debug)]
919pub struct AnalyzeResult {
920 pub files_parsed: usize,
921 pub files_skipped: usize,
922 pub symbols_found: usize,
923 pub edges_resolved: usize,
924 pub chunks_stored: usize,
925 pub symbols_embedded: usize,
926 pub chunks_embedded: usize,
927 pub chunks_pruned: usize,
928 pub symbols_pruned: usize,
929 pub enrichment_results: serde_json::Value,
930 pub total_insights: usize,
931 pub top_nodes: Vec<crate::graph_ops::RankedNode>,
932 pub community_count: usize,
933 pub scip_nodes_created: usize,
935 pub scip_edges_created: usize,
937 pub scip_files_covered: usize,
939}
940
941#[derive(Debug)]
943pub struct SessionContext {
944 pub recent_memories: Vec<MemoryNode>,
946 pub pending_analyses: Vec<MemoryNode>,
948 pub active_patterns: Vec<DetectedPattern>,
950 pub last_session_summary: Option<String>,
952}
953
954#[cfg(test)]
955mod tests {
956 use super::*;
957 use codemem_core::{Edge, GraphNode, NodeKind, RelationshipType};
958 use std::collections::{HashMap, HashSet};
959
960 fn test_engine() -> CodememEngine {
962 let dir = tempfile::tempdir().unwrap();
963 let db_path = dir.path().join("test.db");
964 let _ = Box::leak(Box::new(dir));
966 CodememEngine::from_db_path(&db_path).unwrap()
967 }
968
969 fn graph_node(id: &str, kind: NodeKind, file_path: Option<&str>) -> GraphNode {
970 let mut payload = HashMap::new();
971 if let Some(fp) = file_path {
972 payload.insert(
973 "file_path".to_string(),
974 serde_json::Value::String(fp.to_string()),
975 );
976 }
977 GraphNode {
978 id: id.to_string(),
979 kind,
980 label: id.to_string(),
981 payload,
982 centrality: 0.0,
983 memory_id: None,
984 namespace: None,
985 valid_from: None,
986 valid_to: None,
987 }
988 }
989
990 fn edge(src: &str, dst: &str, rel: RelationshipType) -> Edge {
991 Edge {
992 id: format!("{rel}:{src}->{dst}"),
993 src: src.to_string(),
994 dst: dst.to_string(),
995 relationship: rel,
996 weight: 1.0,
997 properties: HashMap::new(),
998 created_at: chrono::Utc::now(),
999 valid_from: None,
1000 valid_to: None,
1001 }
1002 }
1003
1004 #[test]
1007 fn cleanup_stale_symbols_deletes_stale_nodes() {
1008 let engine = test_engine();
1009
1010 let file = graph_node("file:src/a.rs", NodeKind::File, None);
1012 let sym_keep = graph_node("sym:a::keep", NodeKind::Function, Some("src/a.rs"));
1013 let sym_stale = graph_node("sym:a::stale", NodeKind::Function, Some("src/a.rs"));
1014
1015 {
1016 let mut g = engine.lock_graph().unwrap();
1017 g.add_node(file).unwrap();
1018 g.add_node(sym_keep.clone()).unwrap();
1019 g.add_node(sym_stale.clone()).unwrap();
1020 g.add_edge(edge(
1021 "file:src/a.rs",
1022 "sym:a::keep",
1023 RelationshipType::Contains,
1024 ))
1025 .unwrap();
1026 g.add_edge(edge(
1027 "file:src/a.rs",
1028 "sym:a::stale",
1029 RelationshipType::Contains,
1030 ))
1031 .unwrap();
1032 }
1033 let _ =
1035 engine
1036 .storage
1037 .insert_graph_node(&graph_node("file:src/a.rs", NodeKind::File, None));
1038 let _ = engine.storage.insert_graph_node(&sym_keep);
1039 let _ = engine.storage.insert_graph_node(&sym_stale);
1040 let _ = engine.storage.insert_graph_edge(&edge(
1041 "file:src/a.rs",
1042 "sym:a::keep",
1043 RelationshipType::Contains,
1044 ));
1045 let _ = engine.storage.insert_graph_edge(&edge(
1046 "file:src/a.rs",
1047 "sym:a::stale",
1048 RelationshipType::Contains,
1049 ));
1050
1051 let old_ids: HashSet<String> = ["sym:a::keep", "sym:a::stale"]
1052 .iter()
1053 .map(|s| s.to_string())
1054 .collect();
1055 let new_ids: HashSet<String> = ["sym:a::keep"].iter().map(|s| s.to_string()).collect();
1056
1057 let cleaned = engine
1058 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
1059 .unwrap();
1060 assert_eq!(cleaned, 1);
1061
1062 let g = engine.lock_graph().unwrap();
1064 assert!(g.get_node("sym:a::stale").unwrap().is_none());
1065 assert!(g.get_node("sym:a::keep").unwrap().is_some());
1066 }
1067
1068 #[test]
1069 fn cleanup_stale_symbols_redirects_memory_edges_to_graph() {
1070 let engine = test_engine();
1071
1072 let file = graph_node("file:src/a.rs", NodeKind::File, None);
1073 let sym_stale = graph_node("sym:a::old_fn", NodeKind::Function, Some("src/a.rs"));
1074 let mem = graph_node("mem-uuid-123", NodeKind::Memory, None);
1075
1076 {
1077 let mut g = engine.lock_graph().unwrap();
1078 g.add_node(file.clone()).unwrap();
1079 g.add_node(sym_stale.clone()).unwrap();
1080 g.add_node(mem.clone()).unwrap();
1081 g.add_edge(edge(
1082 "file:src/a.rs",
1083 "sym:a::old_fn",
1084 RelationshipType::Contains,
1085 ))
1086 .unwrap();
1087 g.add_edge(edge(
1088 "mem-uuid-123",
1089 "sym:a::old_fn",
1090 RelationshipType::RelatesTo,
1091 ))
1092 .unwrap();
1093 }
1094 let _ = engine.storage.insert_graph_node(&file);
1095 let _ = engine.storage.insert_graph_node(&sym_stale);
1096 let _ = engine.storage.insert_graph_node(&mem);
1097 let _ = engine.storage.insert_graph_edge(&edge(
1098 "file:src/a.rs",
1099 "sym:a::old_fn",
1100 RelationshipType::Contains,
1101 ));
1102 let _ = engine.storage.insert_graph_edge(&edge(
1103 "mem-uuid-123",
1104 "sym:a::old_fn",
1105 RelationshipType::RelatesTo,
1106 ));
1107
1108 let old_ids: HashSet<String> = ["sym:a::old_fn"].iter().map(|s| s.to_string()).collect();
1109 let new_ids: HashSet<String> = HashSet::new();
1110
1111 engine
1112 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
1113 .unwrap();
1114
1115 let g = engine.lock_graph().unwrap();
1117 let file_edges = g.get_edges("file:src/a.rs").unwrap();
1118 let has_redirect = file_edges.iter().any(|e| {
1119 (e.src == "mem-uuid-123" || e.dst == "mem-uuid-123") && e.id.contains("-redirected")
1120 });
1121 assert!(
1122 has_redirect,
1123 "redirected memory→file edge should be in the in-memory graph"
1124 );
1125 }
1126
1127 #[test]
1128 fn cleanup_stale_symbols_deduplicates_redirects() {
1129 let engine = test_engine();
1130
1131 let file = graph_node("file:src/a.rs", NodeKind::File, None);
1132 let sym1 = graph_node("sym:a::fn1", NodeKind::Function, Some("src/a.rs"));
1133 let sym2 = graph_node("sym:a::fn2", NodeKind::Function, Some("src/a.rs"));
1134 let mem = graph_node("mem-uuid-456", NodeKind::Memory, None);
1135
1136 let _ = engine.storage.insert_graph_node(&file);
1138 let _ = engine.storage.insert_graph_node(&sym1);
1139 let _ = engine.storage.insert_graph_node(&sym2);
1140 let _ = engine.storage.insert_graph_node(&mem);
1141 let _ = engine.storage.insert_graph_edge(&edge(
1142 "mem-uuid-456",
1143 "sym:a::fn1",
1144 RelationshipType::RelatesTo,
1145 ));
1146 let _ = engine.storage.insert_graph_edge(&edge(
1147 "mem-uuid-456",
1148 "sym:a::fn2",
1149 RelationshipType::RelatesTo,
1150 ));
1151
1152 {
1153 let mut g = engine.lock_graph().unwrap();
1154 g.add_node(file).unwrap();
1155 g.add_node(sym1).unwrap();
1156 g.add_node(sym2).unwrap();
1157 g.add_node(mem).unwrap();
1158 }
1159
1160 let old_ids: HashSet<String> = ["sym:a::fn1", "sym:a::fn2"]
1161 .iter()
1162 .map(|s| s.to_string())
1163 .collect();
1164 let new_ids: HashSet<String> = HashSet::new();
1165
1166 engine
1167 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
1168 .unwrap();
1169
1170 let g = engine.lock_graph().unwrap();
1172 let file_edges = g.get_edges("file:src/a.rs").unwrap();
1173 let redirect_count = file_edges
1174 .iter()
1175 .filter(|e| e.id.contains("-redirected"))
1176 .count();
1177 assert_eq!(
1178 redirect_count, 1,
1179 "should have exactly 1 redirected edge, got {redirect_count}"
1180 );
1181 }
1182
1183 #[test]
1186 fn detect_orphans_skips_file_check_when_no_root() {
1187 let engine = test_engine();
1188
1189 let sym = graph_node(
1191 "sym:nonexistent::fn",
1192 NodeKind::Function,
1193 Some("does/not/exist.rs"),
1194 );
1195 let _ = engine.storage.insert_graph_node(&sym);
1196 {
1197 let mut g = engine.lock_graph().unwrap();
1198 g.add_node(sym).unwrap();
1199 }
1200
1201 let (symbols_cleaned, _) = engine.detect_orphans(None).unwrap();
1203 assert_eq!(
1204 symbols_cleaned, 0,
1205 "detect_orphans(None) should not delete nodes based on file existence"
1206 );
1207 }
1208
1209 #[test]
1210 fn detect_orphans_removes_missing_files_with_root() {
1211 let dir = tempfile::tempdir().unwrap();
1212 let db_path = dir.path().join("test.db");
1213 let engine = CodememEngine::from_db_path(&db_path).unwrap();
1214
1215 let sym = graph_node(
1217 "sym:missing::fn",
1218 NodeKind::Function,
1219 Some("src/missing.rs"),
1220 );
1221 let _ = engine.storage.insert_graph_node(&sym);
1222 {
1223 let mut g = engine.lock_graph().unwrap();
1224 g.add_node(sym).unwrap();
1225 }
1226
1227 let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
1228 assert_eq!(symbols_cleaned, 1);
1229 }
1230
1231 #[test]
1232 fn detect_orphans_keeps_existing_files() {
1233 let dir = tempfile::tempdir().unwrap();
1234 let db_path = dir.path().join("test.db");
1235 let engine = CodememEngine::from_db_path(&db_path).unwrap();
1236
1237 let src_dir = dir.path().join("src");
1239 std::fs::create_dir_all(&src_dir).unwrap();
1240 std::fs::write(src_dir.join("exists.rs"), "fn main() {}").unwrap();
1241
1242 let sym = graph_node(
1243 "sym:exists::main",
1244 NodeKind::Function,
1245 Some("src/exists.rs"),
1246 );
1247 let _ = engine.storage.insert_graph_node(&sym);
1248 {
1249 let mut g = engine.lock_graph().unwrap();
1250 g.add_node(sym).unwrap();
1251 }
1252
1253 let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
1254 assert_eq!(symbols_cleaned, 0);
1255 }
1256
1257 }