1use crate::index::{self, IndexAndResolveResult, Indexer};
2use crate::patterns;
3use crate::CodememEngine;
4use codemem_core::{CodememError, DetectedPattern, MemoryNode};
5use std::collections::HashSet;
6use std::path::Path;
7use std::sync::atomic::Ordering;
8
9fn is_spec_file_with_content(path: &str, content: &[u8]) -> bool {
14 let filename = path.rsplit('/').next().unwrap_or(path);
15 let filename_lower = filename.to_lowercase();
16
17 if matches!(
19 filename_lower.as_str(),
20 "openapi.yaml"
21 | "openapi.yml"
22 | "openapi.json"
23 | "swagger.yaml"
24 | "swagger.yml"
25 | "swagger.json"
26 | "asyncapi.yaml"
27 | "asyncapi.yml"
28 | "asyncapi.json"
29 ) {
30 return true;
31 }
32
33 let is_yaml_json = filename_lower.ends_with(".yaml")
35 || filename_lower.ends_with(".yml")
36 || filename_lower.ends_with(".json");
37 if !is_yaml_json {
38 return false;
39 }
40
41 let peek = std::str::from_utf8(&content[..content.len().min(300)]).unwrap_or("");
42 let peek_lower = peek.to_lowercase();
43 peek_lower.contains("\"openapi\"")
44 || peek_lower.contains("\"swagger\"")
45 || peek_lower.contains("\"asyncapi\"")
46 || peek_lower.contains("openapi:")
47 || peek_lower.contains("swagger:")
48 || peek_lower.contains("asyncapi:")
49}
50
51impl CodememEngine {
52 pub fn save_index(&self) {
58 if let Some(ref db_path) = self.db_path {
59 if self.vector_ready() {
61 let idx_path = db_path.with_extension("idx");
62 if let Ok(mut vi) = self.lock_vector() {
63 if vi.needs_compaction() {
65 let ghost = vi.ghost_count();
66 let live = vi.stats().count;
67 tracing::info!(
68 "HNSW ghost compaction: {ghost} ghosts / {live} live entries, rebuilding..."
69 );
70 if let Ok(embeddings) = self.storage.list_all_embeddings() {
71 if let Err(e) = vi.rebuild_from_entries(&embeddings) {
72 tracing::warn!("HNSW compaction failed: {e}");
73 }
74 }
75 }
76 if let Err(e) = vi.save(&idx_path) {
77 tracing::warn!("Failed to save vector index: {e}");
78 }
79 }
80 }
81
82 if self.bm25_ready() {
84 let bm25_path = db_path.with_extension("bm25");
85 if let Ok(bm25) = self.lock_bm25() {
86 if bm25.needs_save() {
87 let data = bm25.serialize();
88 let tmp_path = db_path.with_extension("bm25.tmp");
89 if let Err(e) = std::fs::write(&tmp_path, &data)
90 .and_then(|_| std::fs::rename(&tmp_path, &bm25_path))
91 {
92 tracing::warn!("Failed to save BM25 index: {e}");
93 }
94 }
95 }
96 }
97 }
98 self.dirty.store(false, Ordering::Release);
99 }
100
101 pub fn reload_graph(&self) -> Result<(), CodememError> {
107 let new_graph = codemem_storage::graph::GraphEngine::from_storage(&*self.storage)?;
108 let mut graph = self.lock_graph()?;
109 *graph = Box::new(new_graph);
110 Ok(())
111 }
112
113 pub fn process_watch_event(
125 &self,
126 event: &crate::watch::WatchEvent,
127 namespace: Option<&str>,
128 project_root: Option<&Path>,
129 ) -> Result<(), CodememError> {
130 match event {
131 crate::watch::WatchEvent::FileChanged(path)
132 | crate::watch::WatchEvent::FileCreated(path) => {
133 self.index_single_file(path, namespace, project_root)?;
134 }
135 crate::watch::WatchEvent::FileDeleted(path) => {
136 let rel = if let Some(root) = project_root {
138 path.strip_prefix(root)
139 .unwrap_or(path)
140 .to_string_lossy()
141 .to_string()
142 } else {
143 path.to_string_lossy().to_string()
144 };
145 self.cleanup_file_nodes(&rel)?;
146 }
147 }
148 Ok(())
149 }
150
151 fn index_single_file(
161 &self,
162 path: &Path,
163 namespace: Option<&str>,
164 project_root: Option<&Path>,
165 ) -> Result<(), CodememError> {
166 let content = std::fs::read(path)?;
167
168 let path_str = if let Some(root) = project_root {
169 path.strip_prefix(root)
170 .unwrap_or(path)
171 .to_string_lossy()
172 .to_string()
173 } else {
174 path.to_string_lossy().to_string()
175 };
176
177 let hash = {
180 let mut cd_guard = self
181 .change_detector
182 .lock()
183 .map_err(|_| CodememError::LockPoisoned("change_detector".into()))?;
184 let ns = namespace.unwrap_or("");
185 let cd = cd_guard.get_or_insert_with(|| {
186 let mut cd = index::incremental::ChangeDetector::new();
187 cd.load_from_storage(&*self.storage, ns);
188 cd
189 });
190 let (changed, hash) = cd.check_changed(&path_str, &content);
191 if !changed {
192 tracing::debug!("Skipping unchanged file: {path_str}");
193 return Ok(());
194 }
195 if self.config.memory.expire_enrichments_on_reindex {
197 match self.storage.expire_memories_for_file(&path_str) {
198 Ok(0) => {}
199 Ok(n) => tracing::debug!("Expired {n} enrichment memories for {path_str}"),
200 Err(e) => tracing::warn!("Failed to expire memories for {path_str}: {e}"),
201 }
202 }
203 hash
204 };
205
206 if is_spec_file_with_content(&path_str, &content) {
209 self.reparse_spec_file(path, namespace.unwrap_or(""))?;
210 if let Ok(mut cd_guard) = self.change_detector.lock() {
212 if let Some(cd) = cd_guard.as_mut() {
213 cd.record_hash(&path_str, hash);
214 let _ = cd.save_to_storage(&*self.storage, namespace.unwrap_or(""));
215 }
216 }
217 return Ok(());
218 }
219
220 let parser = index::CodeParser::new();
221
222 let parse_result = match parser.parse_file(&path_str, &content) {
223 Some(pr) => pr,
224 None => return Ok(()), };
226
227 let mut file_paths = HashSet::new();
229 file_paths.insert(parse_result.file_path.clone());
230
231 let mut resolver = index::ReferenceResolver::new();
237 resolver.add_symbols(&parse_result.symbols);
238 if let Ok(graph) = self.lock_graph() {
239 let graph_symbols: Vec<index::Symbol> = graph
240 .get_all_nodes()
241 .iter()
242 .filter(|n| n.id.starts_with("sym:"))
243 .filter_map(index::symbol::symbol_from_graph_node)
244 .collect();
245 resolver.add_symbols(&graph_symbols);
246 }
247 let resolve_result = resolver.resolve_all_with_unresolved(&parse_result.references);
248
249 let results = IndexAndResolveResult {
250 index: index::IndexResult {
251 files_scanned: 1,
252 files_parsed: 1,
253 files_skipped: 0,
254 total_symbols: parse_result.symbols.len(),
255 total_references: parse_result.references.len(),
256 total_chunks: parse_result.chunks.len(),
257 parse_results: Vec::new(),
258 },
259 symbols: parse_result.symbols,
260 references: parse_result.references,
261 chunks: parse_result.chunks,
262 file_paths,
263 edges: resolve_result.edges,
264 unresolved: resolve_result.unresolved,
265 root_path: project_root
266 .map(|p| p.to_path_buf())
267 .unwrap_or_else(|| path.to_path_buf()),
268 scip_build: None,
269 };
270
271 self.persist_index_results(&results, namespace)?;
272
273 if let Ok(mut cd_guard) = self.change_detector.lock() {
275 if let Some(cd) = cd_guard.as_mut() {
276 cd.record_hash(&path_str, hash);
277 if let Err(e) = cd.save_to_storage(&*self.storage, namespace.unwrap_or("")) {
278 tracing::warn!("Failed to save file hash for {path_str}: {e}");
279 }
280 }
281 }
282
283 Ok(())
284 }
285
286 pub fn cleanup_stale_symbols(
301 &self,
302 file_path: &str,
303 old_symbol_ids: &HashSet<String>,
304 new_symbol_ids: &HashSet<String>,
305 ) -> Result<usize, CodememError> {
306 let stale_ids: Vec<&String> = old_symbol_ids
308 .iter()
309 .filter(|id| !new_symbol_ids.contains(*id))
310 .collect();
311
312 if stale_ids.is_empty() {
313 return Ok(0);
314 }
315
316 let count = stale_ids.len();
317 tracing::info!(
318 "Cleaning up {count} stale symbols for {file_path}: {:?}",
319 stale_ids
320 );
321
322 let file_node_id = format!("file:{file_path}");
323 let mut redirected_pairs: std::collections::HashSet<(String, String)> =
324 std::collections::HashSet::new();
325 let mut redirected_edges: Vec<codemem_core::Edge> = Vec::new();
326 for sym_id in &stale_ids {
327 let edges = self.storage.get_edges_for_node(sym_id.as_str())?;
331 for edge in &edges {
332 let other = if edge.src.as_str() == sym_id.as_str() {
333 &edge.dst
334 } else {
335 &edge.src
336 };
337 let is_code_node = other.starts_with("sym:")
338 || other.starts_with("file:")
339 || other.starts_with("chunk:")
340 || other.starts_with("pkg:");
341 if !is_code_node {
342 let pair = (other.to_string(), file_node_id.clone());
344 if !redirected_pairs.insert(pair) {
345 continue;
346 }
347 let mut redirected = edge.clone();
348 if redirected.src.as_str() == sym_id.as_str() {
349 redirected.src = file_node_id.clone();
350 } else {
351 redirected.dst = file_node_id.clone();
352 }
353 redirected.id = format!("{}-redirected", edge.id);
356 if let Err(e) = self.storage.insert_graph_edge(&redirected) {
357 tracing::warn!("Failed to redirect memory edge {}: {e}", edge.id);
358 }
359 redirected_edges.push(redirected);
360 }
361 }
362
363 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
365 tracing::warn!("Failed to delete edges for stale symbol {sym_id}: {e}");
366 }
367 if let Err(e) = self.storage.delete_graph_node(sym_id) {
368 tracing::warn!("Failed to delete stale symbol node {sym_id}: {e}");
369 }
370 if let Err(e) = self.storage.delete_embedding(sym_id) {
371 tracing::warn!("Failed to delete embedding for stale symbol {sym_id}: {e}");
372 }
373 }
374
375 {
377 let mut graph = self.lock_graph()?;
378 for sym_id in &stale_ids {
379 if let Err(e) = graph.remove_node(sym_id.as_str()) {
380 tracing::warn!("Failed to remove stale {sym_id} from in-memory graph: {e}");
381 }
382 }
383 for edge in redirected_edges {
386 let _ = graph.add_edge(edge);
387 }
388 }
389 {
390 let mut vec = self.lock_vector()?;
391 for sym_id in &stale_ids {
392 if let Err(e) = vec.remove(sym_id.as_str()) {
393 tracing::warn!("Failed to remove stale {sym_id} from vector index: {e}");
394 }
395 }
396 }
397 if let Ok(mut bm25) = self.lock_bm25() {
400 for sym_id in &stale_ids {
401 bm25.remove_document(sym_id);
402 }
403 }
404
405 Ok(count)
406 }
407
408 fn cleanup_file_nodes(&self, file_path: &str) -> Result<(), CodememError> {
412 let file_node_id = format!("file:{file_path}");
413
414 let chunk_prefix = format!("chunk:{file_path}:");
416 if let Err(e) = self.storage.delete_graph_nodes_by_prefix(&chunk_prefix) {
417 tracing::warn!("Failed to delete chunk nodes for {file_path}: {e}");
418 }
419
420 let graph = self.lock_graph()?;
422 let sym_ids: Vec<String> = graph
423 .get_all_nodes()
424 .into_iter()
425 .filter(|n| {
426 n.id.starts_with("sym:")
427 && n.payload.get("file_path").and_then(|v| v.as_str()) == Some(file_path)
428 })
429 .map(|n| n.id.clone())
430 .collect();
431 drop(graph);
432
433 for sym_id in &sym_ids {
434 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
435 tracing::warn!("Failed to delete graph edges for {sym_id}: {e}");
436 }
437 if let Err(e) = self.storage.delete_graph_node(sym_id) {
438 tracing::warn!("Failed to delete graph node {sym_id}: {e}");
439 }
440 if let Err(e) = self.storage.delete_embedding(sym_id) {
441 tracing::warn!("Failed to delete embedding {sym_id}: {e}");
442 }
443 }
444
445 if let Err(e) = self.storage.delete_graph_edges_for_node(&file_node_id) {
447 tracing::warn!("Failed to delete graph edges for {file_node_id}: {e}");
448 }
449 if let Err(e) = self.storage.delete_graph_node(&file_node_id) {
450 tracing::warn!("Failed to delete graph node {file_node_id}: {e}");
451 }
452
453 let mut graph = self.lock_graph()?;
455 for sym_id in &sym_ids {
456 if let Err(e) = graph.remove_node(sym_id) {
457 tracing::warn!("Failed to remove {sym_id} from in-memory graph: {e}");
458 }
459 }
460 let chunk_ids: Vec<String> = graph
462 .get_all_nodes()
463 .into_iter()
464 .filter(|n| n.id.starts_with(&format!("chunk:{file_path}:")))
465 .map(|n| n.id.clone())
466 .collect();
467 for chunk_id in &chunk_ids {
468 if let Err(e) = graph.remove_node(chunk_id) {
469 tracing::warn!("Failed to remove {chunk_id} from in-memory graph: {e}");
470 }
471 }
472 if let Err(e) = graph.remove_node(&file_node_id) {
473 tracing::warn!("Failed to remove {file_node_id} from in-memory graph: {e}");
474 }
475 drop(graph);
476
477 let mut vec = self.lock_vector()?;
479 for sym_id in &sym_ids {
480 if let Err(e) = vec.remove(sym_id) {
481 tracing::warn!("Failed to remove {sym_id} from vector index: {e}");
482 }
483 }
484 for chunk_id in &chunk_ids {
485 if let Err(e) = vec.remove(chunk_id) {
486 tracing::warn!("Failed to remove {chunk_id} from vector index: {e}");
487 }
488 }
489 drop(vec);
490
491 if let Ok(mut bm25) = self.lock_bm25() {
493 for sym_id in &sym_ids {
494 bm25.remove_document(sym_id);
495 }
496 for chunk_id in &chunk_ids {
497 bm25.remove_document(chunk_id);
498 }
499 }
500
501 self.save_index();
502 Ok(())
503 }
504
505 pub fn detect_orphans(
515 &self,
516 project_root: Option<&Path>,
517 ) -> Result<(usize, usize), CodememError> {
518 let all_nodes = self.storage.all_graph_nodes()?;
520 let node_ids: HashSet<String> = all_nodes.iter().map(|n| n.id.clone()).collect();
521
522 let mut orphan_sym_ids: Vec<String> = Vec::new();
523
524 if let Some(root) = project_root {
527 for node in &all_nodes {
528 if node.id.starts_with("sym:") || node.id.starts_with("chunk:") {
530 let file_path = match node.payload.get("file_path").and_then(|v| v.as_str()) {
531 Some(fp) => fp,
532 None => continue,
533 };
534 let abs_path = root.join(file_path);
535 if !abs_path.exists() {
536 orphan_sym_ids.push(node.id.clone());
537 }
538 }
539 else if let Some(fp) = node.id.strip_prefix("file:") {
541 let abs_path = root.join(fp);
542 if !abs_path.exists() {
543 orphan_sym_ids.push(node.id.clone());
544 }
545 }
546 else if let Some(dir) = node.id.strip_prefix("pkg:") {
548 let dir_trimmed = dir.trim_end_matches('/');
549 let abs_path = root.join(dir_trimmed);
550 if !abs_path.exists() {
551 orphan_sym_ids.push(node.id.clone());
552 }
553 }
554 }
555 }
556
557 let all_edges = self.storage.all_graph_edges()?;
559 let mut dangling_edge_ids: Vec<String> = Vec::new();
560 for edge in &all_edges {
561 if !node_ids.contains(&edge.src) || !node_ids.contains(&edge.dst) {
562 dangling_edge_ids.push(edge.id.clone());
563 }
564 }
565
566 let symbols_cleaned = orphan_sym_ids.len();
567
568 for sym_id in &orphan_sym_ids {
570 if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
571 tracing::warn!("Orphan cleanup: failed to delete edges for {sym_id}: {e}");
572 }
573 if let Err(e) = self.storage.delete_graph_node(sym_id) {
574 tracing::warn!("Orphan cleanup: failed to delete node {sym_id}: {e}");
575 }
576 if let Err(e) = self.storage.delete_embedding(sym_id) {
577 tracing::warn!("Orphan cleanup: failed to delete embedding {sym_id}: {e}");
578 }
579 }
580
581 if !orphan_sym_ids.is_empty() {
583 if let Ok(mut graph) = self.lock_graph() {
584 for sym_id in &orphan_sym_ids {
585 let _ = graph.remove_node(sym_id);
586 }
587 }
588 if let Ok(mut vec) = self.lock_vector() {
589 for sym_id in &orphan_sym_ids {
590 let _ = vec.remove(sym_id);
591 }
592 }
593 }
594
595 let mut edges_cleaned = 0usize;
597 for edge_id in &dangling_edge_ids {
598 match self.storage.delete_graph_edge(edge_id) {
599 Ok(true) => edges_cleaned += 1,
600 Ok(false) => {} Err(e) => {
602 tracing::warn!("Orphan cleanup: failed to delete dangling edge {edge_id}: {e}");
603 }
604 }
605 }
606
607 if symbols_cleaned > 0 || edges_cleaned > 0 {
608 tracing::info!(
609 "Orphan scan: cleaned {symbols_cleaned} symbol/chunk nodes, {edges_cleaned} dangling edges"
610 );
611 }
612
613 Ok((symbols_cleaned, edges_cleaned))
614 }
615
616 fn reparse_spec_file(&self, path: &Path, namespace: &str) -> Result<(), CodememError> {
620 use crate::index::spec_parser::{parse_asyncapi, parse_openapi, SpecFileResult};
621
622 let result = if let Some(openapi) = parse_openapi(path) {
623 Some(SpecFileResult::OpenApi(openapi))
624 } else {
625 parse_asyncapi(path).map(SpecFileResult::AsyncApi)
626 };
627
628 match result {
629 Some(SpecFileResult::OpenApi(spec)) => {
630 for ep in &spec.endpoints {
631 let _ = self.storage.store_api_endpoint(
632 &ep.method,
633 &ep.path,
634 ep.operation_id.as_deref().unwrap_or(""),
635 namespace,
636 );
637 }
638 tracing::info!(
639 "Re-parsed OpenAPI spec: {} endpoints from {}",
640 spec.endpoints.len(),
641 path.display()
642 );
643 }
644 Some(SpecFileResult::AsyncApi(spec)) => {
645 for ch in &spec.channels {
646 let _ = self.storage.store_event_channel(
647 &ch.channel,
648 &ch.direction,
649 ch.protocol.as_deref().unwrap_or(""),
650 ch.operation_id.as_deref().unwrap_or(""),
651 namespace,
652 ch.description.as_deref().unwrap_or(""),
653 );
654 }
655 tracing::info!(
656 "Re-parsed AsyncAPI spec: {} channels from {}",
657 spec.channels.len(),
658 path.display()
659 );
660 }
661 None => {
662 tracing::debug!("Not a recognized spec file: {}", path.display());
663 }
664 }
665 Ok(())
666 }
667
668 pub fn analyze(&self, options: AnalyzeOptions<'_>) -> Result<AnalyzeResult, CodememError> {
676 let root = options.path;
677
678 if !options.skip_embed {
682 drop(self.lock_embeddings());
683 drop(self.lock_vector());
684 drop(self.lock_bm25());
685 }
686
687 let (scip_covered, scip_build) = if !options.skip_scip && self.config.scip.enabled {
690 match self.run_scip_phase(root, options.namespace) {
691 Ok((covered, build)) => (Some(covered), Some(build)),
692 Err(e) => {
693 tracing::warn!("SCIP phase failed, falling back to ast-grep only: {e}");
694 (None, None)
695 }
696 }
697 } else {
698 (None, None)
699 };
700
701 let scip_nodes_created = scip_build.as_ref().map_or(0, |b| b.nodes.len());
702 let scip_edges_created = scip_build.as_ref().map_or(0, |b| b.edges.len());
703 let scip_files_covered = scip_covered.as_ref().map_or(0, |s| s.len());
704
705 let mut indexer = match options.change_detector {
708 Some(cd) if !options.force => Indexer::with_change_detector(cd),
709 _ => Indexer::new(),
710 };
711 let resolved =
712 indexer.index_and_resolve_with_scip(root, scip_covered.as_ref(), scip_build)?;
713
714 let persist = if options.skip_embed {
716 self.persist_graph_only(&resolved, Some(options.namespace))?
717 } else if let Some(ref on_progress) = options.progress {
718 self.persist_index_results_with_progress(
719 &resolved,
720 Some(options.namespace),
721 |done, total| {
722 on_progress(AnalyzeProgress::Embedding { done, total });
723 },
724 )?
725 } else {
726 self.persist_index_results(&resolved, Some(options.namespace))?
727 };
728
729 {
731 if let Ok(mut cache) = self.lock_index_cache() {
732 *cache = Some(crate::IndexCache {
733 symbols: resolved.symbols,
734 chunks: resolved.chunks,
735 root_path: root.to_string_lossy().to_string(),
736 });
737 }
738 }
739
740 let enrichment = if options.skip_enrich {
742 crate::enrichment::EnrichmentPipelineResult {
743 results: serde_json::json!({}),
744 total_insights: 0,
745 }
746 } else {
747 let path_str = root.to_str().unwrap_or("");
748 self.run_enrichments(
749 path_str,
750 &[],
751 options.git_days,
752 Some(options.namespace),
753 None,
754 )
755 };
756
757 self.lock_graph()?
763 .recompute_centrality_for_namespace(options.namespace);
764
765 let top_nodes = self
767 .find_important_nodes(10, 0.85, Some(options.namespace))
768 .unwrap_or_default();
769 let community_count = self.louvain_communities(1.0).map(|c| c.len()).unwrap_or(0);
770
771 self.save_index();
773
774 indexer
776 .change_detector()
777 .save_to_storage(self.storage(), options.namespace)?;
778
779 Ok(AnalyzeResult {
780 files_parsed: resolved.index.files_parsed,
781 files_skipped: resolved.index.files_skipped,
782 symbols_found: resolved.index.total_symbols,
783 edges_resolved: persist.edges_resolved,
784 chunks_stored: persist.chunks_stored,
785 symbols_embedded: persist.symbols_embedded,
786 chunks_embedded: persist.chunks_embedded,
787 chunks_pruned: persist.chunks_pruned,
788 symbols_pruned: persist.symbols_pruned,
789 enrichment_results: enrichment.results,
790 total_insights: enrichment.total_insights,
791 top_nodes,
792 community_count,
793 scip_nodes_created,
794 scip_edges_created,
795 scip_files_covered,
796 })
797 }
798
799 fn run_scip_phase(
801 &self,
802 root: &Path,
803 namespace: &str,
804 ) -> Result<(HashSet<String>, index::scip::graph_builder::ScipBuildResult), CodememError> {
805 let orchestrator =
806 index::scip::orchestrate::ScipOrchestrator::new(self.config.scip.clone());
807 let orch_result = orchestrator.run(root, namespace)?;
808
809 if orch_result.scip_result.covered_files.is_empty() {
810 return Ok((
811 HashSet::new(),
812 index::scip::graph_builder::ScipBuildResult::default(),
813 ));
814 }
815
816 for (lang, err) in &orch_result.failed_languages {
817 tracing::warn!("SCIP indexer for {:?} failed: {}", lang, err);
818 }
819 for lang in &orch_result.indexed_languages {
820 tracing::info!("SCIP indexed {:?} successfully", lang);
821 }
822
823 let build = index::scip::graph_builder::build_graph(
824 &orch_result.scip_result,
825 Some(namespace),
826 &self.config.scip,
827 );
828 let covered: HashSet<String> = build.files_covered.clone();
829
830 tracing::info!(
831 "SCIP phase: {} nodes, {} edges, {} ext nodes, {} files covered, {} doc memories",
832 build.nodes.len(),
833 build.edges.len(),
834 build.ext_nodes_created,
835 covered.len(),
836 build.doc_memories_created,
837 );
838
839 Ok((covered, build))
840 }
841
842 pub fn session_context(&self, namespace: Option<&str>) -> Result<SessionContext, CodememError> {
847 let now = chrono::Utc::now();
848 let cutoff_24h = now - chrono::Duration::hours(24);
849
850 let ids = match namespace {
852 Some(ns) => self.storage.list_memory_ids_for_namespace(ns)?,
853 None => self.storage.list_memory_ids()?,
854 };
855
856 let mut recent_memories = Vec::new();
857 let mut pending_analyses = Vec::new();
858
859 for id in ids.iter().rev().take(200) {
860 if let Ok(Some(m)) = self.storage.get_memory_no_touch(id) {
861 if m.tags.contains(&"pending-analysis".to_string()) {
863 pending_analyses.push(m.clone());
864 }
865 if m.created_at >= cutoff_24h {
867 recent_memories.push(m);
868 }
869 if recent_memories.len() >= 50 && pending_analyses.len() >= 10 {
870 break;
871 }
872 }
873 }
874
875 let session_count = self.storage.session_count(namespace).unwrap_or(1).max(1);
877 let active_patterns = patterns::detect_patterns(
878 &*self.storage,
879 namespace,
880 2, session_count,
882 )
883 .unwrap_or_default();
884
885 let last_session_summary = self
887 .storage
888 .list_sessions(namespace, 1)?
889 .into_iter()
890 .next()
891 .and_then(|s| s.summary);
892
893 Ok(SessionContext {
894 recent_memories,
895 pending_analyses,
896 active_patterns,
897 last_session_summary,
898 })
899 }
900}
901
902pub struct AnalyzeOptions<'a> {
906 pub path: &'a Path,
907 pub namespace: &'a str,
908 pub git_days: u64,
909 pub change_detector: Option<index::incremental::ChangeDetector>,
910 pub progress: Option<Box<dyn Fn(AnalyzeProgress) + Send + 'a>>,
911 pub skip_scip: bool,
913 pub skip_embed: bool,
915 pub skip_enrich: bool,
917 pub force: bool,
919}
920
921#[derive(Debug, Clone)]
923pub enum AnalyzeProgress {
924 Embedding { done: usize, total: usize },
925}
926
927#[derive(Debug)]
929pub struct AnalyzeResult {
930 pub files_parsed: usize,
931 pub files_skipped: usize,
932 pub symbols_found: usize,
933 pub edges_resolved: usize,
934 pub chunks_stored: usize,
935 pub symbols_embedded: usize,
936 pub chunks_embedded: usize,
937 pub chunks_pruned: usize,
938 pub symbols_pruned: usize,
939 pub enrichment_results: serde_json::Value,
940 pub total_insights: usize,
941 pub top_nodes: Vec<crate::graph_ops::RankedNode>,
942 pub community_count: usize,
943 pub scip_nodes_created: usize,
945 pub scip_edges_created: usize,
947 pub scip_files_covered: usize,
949}
950
951#[derive(Debug)]
953pub struct SessionContext {
954 pub recent_memories: Vec<MemoryNode>,
956 pub pending_analyses: Vec<MemoryNode>,
958 pub active_patterns: Vec<DetectedPattern>,
960 pub last_session_summary: Option<String>,
962}
963
964#[cfg(test)]
965mod tests {
966 use super::*;
967 use codemem_core::{Edge, GraphNode, NodeKind, RelationshipType};
968 use std::collections::{HashMap, HashSet};
969
970 fn test_engine() -> CodememEngine {
972 let dir = tempfile::tempdir().unwrap();
973 let db_path = dir.path().join("test.db");
974 let _ = Box::leak(Box::new(dir));
976 CodememEngine::from_db_path(&db_path).unwrap()
977 }
978
979 fn graph_node(id: &str, kind: NodeKind, file_path: Option<&str>) -> GraphNode {
980 let mut payload = HashMap::new();
981 if let Some(fp) = file_path {
982 payload.insert(
983 "file_path".to_string(),
984 serde_json::Value::String(fp.to_string()),
985 );
986 }
987 GraphNode {
988 id: id.to_string(),
989 kind,
990 label: id.to_string(),
991 payload,
992 centrality: 0.0,
993 memory_id: None,
994 namespace: None,
995 valid_from: None,
996 valid_to: None,
997 }
998 }
999
1000 fn edge(src: &str, dst: &str, rel: RelationshipType) -> Edge {
1001 Edge {
1002 id: format!("{rel}:{src}->{dst}"),
1003 src: src.to_string(),
1004 dst: dst.to_string(),
1005 relationship: rel,
1006 weight: 1.0,
1007 properties: HashMap::new(),
1008 created_at: chrono::Utc::now(),
1009 valid_from: None,
1010 valid_to: None,
1011 }
1012 }
1013
1014 #[test]
1017 fn cleanup_stale_symbols_deletes_stale_nodes() {
1018 let engine = test_engine();
1019
1020 let file = graph_node("file:src/a.rs", NodeKind::File, None);
1022 let sym_keep = graph_node("sym:a::keep", NodeKind::Function, Some("src/a.rs"));
1023 let sym_stale = graph_node("sym:a::stale", NodeKind::Function, Some("src/a.rs"));
1024
1025 {
1026 let mut g = engine.lock_graph().unwrap();
1027 g.add_node(file).unwrap();
1028 g.add_node(sym_keep.clone()).unwrap();
1029 g.add_node(sym_stale.clone()).unwrap();
1030 g.add_edge(edge(
1031 "file:src/a.rs",
1032 "sym:a::keep",
1033 RelationshipType::Contains,
1034 ))
1035 .unwrap();
1036 g.add_edge(edge(
1037 "file:src/a.rs",
1038 "sym:a::stale",
1039 RelationshipType::Contains,
1040 ))
1041 .unwrap();
1042 }
1043 let _ =
1045 engine
1046 .storage
1047 .insert_graph_node(&graph_node("file:src/a.rs", NodeKind::File, None));
1048 let _ = engine.storage.insert_graph_node(&sym_keep);
1049 let _ = engine.storage.insert_graph_node(&sym_stale);
1050 let _ = engine.storage.insert_graph_edge(&edge(
1051 "file:src/a.rs",
1052 "sym:a::keep",
1053 RelationshipType::Contains,
1054 ));
1055 let _ = engine.storage.insert_graph_edge(&edge(
1056 "file:src/a.rs",
1057 "sym:a::stale",
1058 RelationshipType::Contains,
1059 ));
1060
1061 let old_ids: HashSet<String> = ["sym:a::keep", "sym:a::stale"]
1062 .iter()
1063 .map(|s| s.to_string())
1064 .collect();
1065 let new_ids: HashSet<String> = ["sym:a::keep"].iter().map(|s| s.to_string()).collect();
1066
1067 let cleaned = engine
1068 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
1069 .unwrap();
1070 assert_eq!(cleaned, 1);
1071
1072 let g = engine.lock_graph().unwrap();
1074 assert!(g.get_node("sym:a::stale").unwrap().is_none());
1075 assert!(g.get_node("sym:a::keep").unwrap().is_some());
1076 }
1077
1078 #[test]
1079 fn cleanup_stale_symbols_redirects_memory_edges_to_graph() {
1080 let engine = test_engine();
1081
1082 let file = graph_node("file:src/a.rs", NodeKind::File, None);
1083 let sym_stale = graph_node("sym:a::old_fn", NodeKind::Function, Some("src/a.rs"));
1084 let mem = graph_node("mem-uuid-123", NodeKind::Memory, None);
1085
1086 {
1087 let mut g = engine.lock_graph().unwrap();
1088 g.add_node(file.clone()).unwrap();
1089 g.add_node(sym_stale.clone()).unwrap();
1090 g.add_node(mem.clone()).unwrap();
1091 g.add_edge(edge(
1092 "file:src/a.rs",
1093 "sym:a::old_fn",
1094 RelationshipType::Contains,
1095 ))
1096 .unwrap();
1097 g.add_edge(edge(
1098 "mem-uuid-123",
1099 "sym:a::old_fn",
1100 RelationshipType::RelatesTo,
1101 ))
1102 .unwrap();
1103 }
1104 let _ = engine.storage.insert_graph_node(&file);
1105 let _ = engine.storage.insert_graph_node(&sym_stale);
1106 let _ = engine.storage.insert_graph_node(&mem);
1107 let _ = engine.storage.insert_graph_edge(&edge(
1108 "file:src/a.rs",
1109 "sym:a::old_fn",
1110 RelationshipType::Contains,
1111 ));
1112 let _ = engine.storage.insert_graph_edge(&edge(
1113 "mem-uuid-123",
1114 "sym:a::old_fn",
1115 RelationshipType::RelatesTo,
1116 ));
1117
1118 let old_ids: HashSet<String> = ["sym:a::old_fn"].iter().map(|s| s.to_string()).collect();
1119 let new_ids: HashSet<String> = HashSet::new();
1120
1121 engine
1122 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
1123 .unwrap();
1124
1125 let g = engine.lock_graph().unwrap();
1127 let file_edges = g.get_edges("file:src/a.rs").unwrap();
1128 let has_redirect = file_edges.iter().any(|e| {
1129 (e.src == "mem-uuid-123" || e.dst == "mem-uuid-123") && e.id.contains("-redirected")
1130 });
1131 assert!(
1132 has_redirect,
1133 "redirected memory→file edge should be in the in-memory graph"
1134 );
1135 }
1136
1137 #[test]
1138 fn cleanup_stale_symbols_deduplicates_redirects() {
1139 let engine = test_engine();
1140
1141 let file = graph_node("file:src/a.rs", NodeKind::File, None);
1142 let sym1 = graph_node("sym:a::fn1", NodeKind::Function, Some("src/a.rs"));
1143 let sym2 = graph_node("sym:a::fn2", NodeKind::Function, Some("src/a.rs"));
1144 let mem = graph_node("mem-uuid-456", NodeKind::Memory, None);
1145
1146 let _ = engine.storage.insert_graph_node(&file);
1148 let _ = engine.storage.insert_graph_node(&sym1);
1149 let _ = engine.storage.insert_graph_node(&sym2);
1150 let _ = engine.storage.insert_graph_node(&mem);
1151 let _ = engine.storage.insert_graph_edge(&edge(
1152 "mem-uuid-456",
1153 "sym:a::fn1",
1154 RelationshipType::RelatesTo,
1155 ));
1156 let _ = engine.storage.insert_graph_edge(&edge(
1157 "mem-uuid-456",
1158 "sym:a::fn2",
1159 RelationshipType::RelatesTo,
1160 ));
1161
1162 {
1163 let mut g = engine.lock_graph().unwrap();
1164 g.add_node(file).unwrap();
1165 g.add_node(sym1).unwrap();
1166 g.add_node(sym2).unwrap();
1167 g.add_node(mem).unwrap();
1168 }
1169
1170 let old_ids: HashSet<String> = ["sym:a::fn1", "sym:a::fn2"]
1171 .iter()
1172 .map(|s| s.to_string())
1173 .collect();
1174 let new_ids: HashSet<String> = HashSet::new();
1175
1176 engine
1177 .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
1178 .unwrap();
1179
1180 let g = engine.lock_graph().unwrap();
1182 let file_edges = g.get_edges("file:src/a.rs").unwrap();
1183 let redirect_count = file_edges
1184 .iter()
1185 .filter(|e| e.id.contains("-redirected"))
1186 .count();
1187 assert_eq!(
1188 redirect_count, 1,
1189 "should have exactly 1 redirected edge, got {redirect_count}"
1190 );
1191 }
1192
1193 #[test]
1196 fn detect_orphans_skips_file_check_when_no_root() {
1197 let engine = test_engine();
1198
1199 let sym = graph_node(
1201 "sym:nonexistent::fn",
1202 NodeKind::Function,
1203 Some("does/not/exist.rs"),
1204 );
1205 let _ = engine.storage.insert_graph_node(&sym);
1206 {
1207 let mut g = engine.lock_graph().unwrap();
1208 g.add_node(sym).unwrap();
1209 }
1210
1211 let (symbols_cleaned, _) = engine.detect_orphans(None).unwrap();
1213 assert_eq!(
1214 symbols_cleaned, 0,
1215 "detect_orphans(None) should not delete nodes based on file existence"
1216 );
1217 }
1218
1219 #[test]
1220 fn detect_orphans_removes_missing_files_with_root() {
1221 let dir = tempfile::tempdir().unwrap();
1222 let db_path = dir.path().join("test.db");
1223 let engine = CodememEngine::from_db_path(&db_path).unwrap();
1224
1225 let sym = graph_node(
1227 "sym:missing::fn",
1228 NodeKind::Function,
1229 Some("src/missing.rs"),
1230 );
1231 let _ = engine.storage.insert_graph_node(&sym);
1232 {
1233 let mut g = engine.lock_graph().unwrap();
1234 g.add_node(sym).unwrap();
1235 }
1236
1237 let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
1238 assert_eq!(symbols_cleaned, 1);
1239 }
1240
1241 #[test]
1242 fn detect_orphans_keeps_existing_files() {
1243 let dir = tempfile::tempdir().unwrap();
1244 let db_path = dir.path().join("test.db");
1245 let engine = CodememEngine::from_db_path(&db_path).unwrap();
1246
1247 let src_dir = dir.path().join("src");
1249 std::fs::create_dir_all(&src_dir).unwrap();
1250 std::fs::write(src_dir.join("exists.rs"), "fn main() {}").unwrap();
1251
1252 let sym = graph_node(
1253 "sym:exists::main",
1254 NodeKind::Function,
1255 Some("src/exists.rs"),
1256 );
1257 let _ = engine.storage.insert_graph_node(&sym);
1258 {
1259 let mut g = engine.lock_graph().unwrap();
1260 g.add_node(sym).unwrap();
1261 }
1262
1263 let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
1264 assert_eq!(symbols_cleaned, 0);
1265 }
1266
1267 }