Skip to main content

codemem_engine/
file_indexing.rs

1use crate::index::{self, IndexAndResolveResult, Indexer};
2use crate::patterns;
3use crate::CodememEngine;
4use codemem_core::{CodememError, DetectedPattern, GraphBackend, MemoryNode, VectorBackend};
5use std::collections::HashSet;
6use std::path::Path;
7use std::sync::atomic::Ordering;
8
9impl CodememEngine {
10    // ── Index Persistence ────────────────────────────────────────────────
11
12    /// Save the vector and BM25 indexes to disk if a db_path is configured.
13    /// Compacts the HNSW index if ghost entries exceed 20% of live entries.
14    /// Always clears the dirty flag so `flush_if_dirty()` won't double-save.
15    pub fn save_index(&self) {
16        if let Some(ref db_path) = self.db_path {
17            // Only save vector index if it has been lazily initialized.
18            if self.vector_ready() {
19                let idx_path = db_path.with_extension("idx");
20                if let Ok(mut vi) = self.lock_vector() {
21                    // Compact HNSW if ghost entries exceed threshold
22                    if vi.needs_compaction() {
23                        let ghost = vi.ghost_count();
24                        let live = vi.stats().count;
25                        tracing::info!(
26                            "HNSW ghost compaction: {ghost} ghosts / {live} live entries, rebuilding..."
27                        );
28                        if let Ok(embeddings) = self.storage.list_all_embeddings() {
29                            if let Err(e) = vi.rebuild_from_entries(&embeddings) {
30                                tracing::warn!("HNSW compaction failed: {e}");
31                            }
32                        }
33                    }
34                    if let Err(e) = vi.save(&idx_path) {
35                        tracing::warn!("Failed to save vector index: {e}");
36                    }
37                }
38            }
39
40            // Only save BM25 index if it has been lazily initialized.
41            if self.bm25_ready() {
42                let bm25_path = db_path.with_extension("bm25");
43                if let Ok(bm25) = self.lock_bm25() {
44                    if bm25.needs_save() {
45                        let data = bm25.serialize();
46                        let tmp_path = db_path.with_extension("bm25.tmp");
47                        if let Err(e) = std::fs::write(&tmp_path, &data)
48                            .and_then(|_| std::fs::rename(&tmp_path, &bm25_path))
49                        {
50                            tracing::warn!("Failed to save BM25 index: {e}");
51                        }
52                    }
53                }
54            }
55        }
56        self.dirty.store(false, Ordering::Release);
57    }
58
59    /// Reload the in-memory graph from the database.
60    pub fn reload_graph(&self) -> Result<(), CodememError> {
61        let new_graph = codemem_storage::graph::GraphEngine::from_storage(&*self.storage)?;
62        let mut graph = self.lock_graph()?;
63        *graph = new_graph;
64        graph.recompute_centrality();
65        Ok(())
66    }
67
68    // ── A2: File Watcher Event Processing ───────────────────────────────
69
70    /// Process a single file watcher event by re-indexing changed/created files
71    /// or cleaning up deleted file nodes.
72    ///
73    /// Call this from a watcher event loop:
74    /// ```ignore
75    /// while let Ok(event) = watcher.receiver().recv() {
76    ///     engine.process_watch_event(&event, namespace, Some(root));
77    /// }
78    /// ```
79    pub fn process_watch_event(
80        &self,
81        event: &crate::watch::WatchEvent,
82        namespace: Option<&str>,
83        project_root: Option<&Path>,
84    ) -> Result<(), CodememError> {
85        match event {
86            crate::watch::WatchEvent::FileChanged(path)
87            | crate::watch::WatchEvent::FileCreated(path) => {
88                self.index_single_file(path, namespace, project_root)?;
89            }
90            crate::watch::WatchEvent::FileDeleted(path) => {
91                // Relativize the deleted path so the node ID matches what was indexed.
92                let rel = if let Some(root) = project_root {
93                    path.strip_prefix(root)
94                        .unwrap_or(path)
95                        .to_string_lossy()
96                        .to_string()
97                } else {
98                    path.to_string_lossy().to_string()
99                };
100                self.cleanup_file_nodes(&rel)?;
101            }
102        }
103        Ok(())
104    }
105
106    /// Index (or re-index) a single file: parse it, persist nodes/edges/embeddings,
107    /// and update the index cache.
108    ///
109    /// `project_root` is used to relativize the absolute `path` so node IDs are
110    /// portable. If `None`, the path is stored as-is (absolute).
111    ///
112    /// Uses SHA-256 hash dedup to skip re-indexing when content is unchanged.
113    /// This prevents duplicate work when both the PostToolUse hook and the
114    /// background file watcher fire for the same edit.
115    fn index_single_file(
116        &self,
117        path: &Path,
118        namespace: Option<&str>,
119        project_root: Option<&Path>,
120    ) -> Result<(), CodememError> {
121        let content = std::fs::read(path)?;
122
123        let path_str = if let Some(root) = project_root {
124            path.strip_prefix(root)
125                .unwrap_or(path)
126                .to_string_lossy()
127                .to_string()
128        } else {
129            path.to_string_lossy().to_string()
130        };
131
132        // SHA-256 dedup: skip if content unchanged since last index.
133        // Uses cached ChangeDetector to avoid reloading all hashes from storage per file.
134        let hash = {
135            let mut cd_guard = self
136                .change_detector
137                .lock()
138                .map_err(|_| CodememError::LockPoisoned("change_detector".into()))?;
139            let cd = cd_guard.get_or_insert_with(|| {
140                let mut cd = index::incremental::ChangeDetector::new();
141                cd.load_from_storage(&*self.storage);
142                cd
143            });
144            let (changed, hash) = cd.check_changed(&path_str, &content);
145            if !changed {
146                tracing::debug!("Skipping unchanged file: {path_str}");
147                return Ok(());
148            }
149            hash
150        };
151
152        let parser = index::CodeParser::new();
153
154        let parse_result = match parser.parse_file(&path_str, &content) {
155            Some(pr) => pr,
156            None => return Ok(()), // Unsupported file type or parse failure
157        };
158
159        // Build a minimal IndexAndResolveResult for this single file
160        let mut file_paths = HashSet::new();
161        file_paths.insert(parse_result.file_path.clone());
162
163        let mut resolver = index::ReferenceResolver::new();
164        resolver.add_symbols(&parse_result.symbols);
165        let edges = resolver.resolve_all(&parse_result.references);
166
167        let results = IndexAndResolveResult {
168            index: index::IndexResult {
169                files_scanned: 1,
170                files_parsed: 1,
171                files_skipped: 0,
172                total_symbols: parse_result.symbols.len(),
173                total_references: parse_result.references.len(),
174                total_chunks: parse_result.chunks.len(),
175                parse_results: Vec::new(),
176            },
177            symbols: parse_result.symbols,
178            references: parse_result.references,
179            chunks: parse_result.chunks,
180            file_paths,
181            edges,
182            root_path: project_root
183                .map(|p| p.to_path_buf())
184                .unwrap_or_else(|| path.to_path_buf()),
185        };
186
187        self.persist_index_results(&results, namespace)?;
188
189        // Record new hash in the cached detector after successful persist
190        if let Ok(mut cd_guard) = self.change_detector.lock() {
191            if let Some(cd) = cd_guard.as_mut() {
192                cd.record_hash(&path_str, hash);
193                if let Err(e) = cd.save_to_storage(&*self.storage) {
194                    tracing::warn!("Failed to save file hash for {path_str}: {e}");
195                }
196            }
197        }
198
199        Ok(())
200    }
201
202    // ── A2b: Symbol-Level Diff on Re-index ────────────────────────────
203
204    /// Remove symbols that existed for a file before re-indexing but are no
205    /// longer present in the new parse results. Returns count of cleaned symbols.
206    ///
207    /// For code→code edges (CALLS, IMPORTS, etc.), performs a hard delete.
208    /// For memory→symbol edges, creates a live redirected edge pointing to the
209    /// parent file node, preserving the memory→file connection so recall can
210    /// still traverse it. The original edge is then deleted along with the
211    /// stale symbol node.
212    ///
213    /// `old_symbol_ids` should be the set of symbol IDs that existed for this
214    /// file before re-indexing (collected from the in-memory graph by the caller
215    /// in a single pass across all files).
216    pub fn cleanup_stale_symbols(
217        &self,
218        file_path: &str,
219        old_symbol_ids: &HashSet<String>,
220        new_symbol_ids: &HashSet<String>,
221    ) -> Result<usize, CodememError> {
222        // Compute stale set: symbols that existed before but are not in the new parse
223        let stale_ids: Vec<&String> = old_symbol_ids
224            .iter()
225            .filter(|id| !new_symbol_ids.contains(*id))
226            .collect();
227
228        if stale_ids.is_empty() {
229            return Ok(0);
230        }
231
232        let count = stale_ids.len();
233        tracing::info!(
234            "Cleaning up {count} stale symbols for {file_path}: {:?}",
235            stale_ids
236        );
237
238        let file_node_id = format!("file:{file_path}");
239        let mut redirected_pairs: std::collections::HashSet<(String, String)> =
240            std::collections::HashSet::new();
241        let mut redirected_edges: Vec<codemem_core::Edge> = Vec::new();
242        for sym_id in &stale_ids {
243            // Before deleting the symbol, redirect memory→symbol edges to the
244            // parent file node, preserving historical context.
245            // Memory node IDs are UUIDs (no known prefix like sym:/file:/chunk:).
246            let edges = self.storage.get_edges_for_node(sym_id.as_str())?;
247            for edge in &edges {
248                let other = if edge.src.as_str() == sym_id.as_str() {
249                    &edge.dst
250                } else {
251                    &edge.src
252                };
253                let is_code_node = other.starts_with("sym:")
254                    || other.starts_with("file:")
255                    || other.starts_with("chunk:")
256                    || other.starts_with("pkg:");
257                if !is_code_node {
258                    // Skip if we already redirected this memory→file pair
259                    let pair = (other.to_string(), file_node_id.clone());
260                    if !redirected_pairs.insert(pair) {
261                        continue;
262                    }
263                    let mut redirected = edge.clone();
264                    if redirected.src.as_str() == sym_id.as_str() {
265                        redirected.src = file_node_id.clone();
266                    } else {
267                        redirected.dst = file_node_id.clone();
268                    }
269                    // Don't set valid_to — the redirect should be a live,
270                    // queryable edge so recall can still traverse memory→file.
271                    redirected.id = format!("{}-redirected", edge.id);
272                    if let Err(e) = self.storage.insert_graph_edge(&redirected) {
273                        tracing::warn!("Failed to redirect memory edge {}: {e}", edge.id);
274                    }
275                    redirected_edges.push(redirected);
276                }
277            }
278
279            // Delete all edges and the node itself
280            if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
281                tracing::warn!("Failed to delete edges for stale symbol {sym_id}: {e}");
282            }
283            if let Err(e) = self.storage.delete_graph_node(sym_id) {
284                tracing::warn!("Failed to delete stale symbol node {sym_id}: {e}");
285            }
286            if let Err(e) = self.storage.delete_embedding(sym_id) {
287                tracing::warn!("Failed to delete embedding for stale symbol {sym_id}: {e}");
288            }
289        }
290
291        // Clean up in-memory graph and vector index
292        {
293            let mut graph = self.lock_graph()?;
294            for sym_id in &stale_ids {
295                if let Err(e) = graph.remove_node(sym_id.as_str()) {
296                    tracing::warn!("Failed to remove stale {sym_id} from in-memory graph: {e}");
297                }
298            }
299            // Add redirected memory→file edges so they're visible to
300            // in-memory traversal (BFS, PageRank, recall) during this session.
301            for edge in redirected_edges {
302                let _ = graph.add_edge(edge);
303            }
304        }
305        {
306            let mut vec = self.lock_vector()?;
307            for sym_id in &stale_ids {
308                if let Err(e) = vec.remove(sym_id.as_str()) {
309                    tracing::warn!("Failed to remove stale {sym_id} from vector index: {e}");
310                }
311            }
312        }
313
314        Ok(count)
315    }
316
317    // ── A3: File Deletion Cleanup ───────────────────────────────────────
318
319    /// Remove graph nodes, edges, and embeddings for a single deleted file.
320    fn cleanup_file_nodes(&self, file_path: &str) -> Result<(), CodememError> {
321        let file_node_id = format!("file:{file_path}");
322
323        // Remove all chunk nodes for this file
324        let chunk_prefix = format!("chunk:{file_path}:");
325        if let Err(e) = self.storage.delete_graph_nodes_by_prefix(&chunk_prefix) {
326            tracing::warn!("Failed to delete chunk nodes for {file_path}: {e}");
327        }
328
329        // Remove symbol nodes for this file by checking graph
330        let graph = self.lock_graph()?;
331        let sym_ids: Vec<String> = graph
332            .get_all_nodes()
333            .into_iter()
334            .filter(|n| {
335                n.id.starts_with("sym:")
336                    && n.payload.get("file_path").and_then(|v| v.as_str()) == Some(file_path)
337            })
338            .map(|n| n.id.clone())
339            .collect();
340        drop(graph);
341
342        for sym_id in &sym_ids {
343            if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
344                tracing::warn!("Failed to delete graph edges for {sym_id}: {e}");
345            }
346            if let Err(e) = self.storage.delete_graph_node(sym_id) {
347                tracing::warn!("Failed to delete graph node {sym_id}: {e}");
348            }
349            if let Err(e) = self.storage.delete_embedding(sym_id) {
350                tracing::warn!("Failed to delete embedding {sym_id}: {e}");
351            }
352        }
353
354        // Remove file node itself
355        if let Err(e) = self.storage.delete_graph_edges_for_node(&file_node_id) {
356            tracing::warn!("Failed to delete graph edges for {file_node_id}: {e}");
357        }
358        if let Err(e) = self.storage.delete_graph_node(&file_node_id) {
359            tracing::warn!("Failed to delete graph node {file_node_id}: {e}");
360        }
361
362        // Clean up in-memory graph
363        let mut graph = self.lock_graph()?;
364        for sym_id in &sym_ids {
365            if let Err(e) = graph.remove_node(sym_id) {
366                tracing::warn!("Failed to remove {sym_id} from in-memory graph: {e}");
367            }
368        }
369        // Remove chunk nodes from in-memory graph
370        let chunk_ids: Vec<String> = graph
371            .get_all_nodes()
372            .into_iter()
373            .filter(|n| n.id.starts_with(&format!("chunk:{file_path}:")))
374            .map(|n| n.id.clone())
375            .collect();
376        for chunk_id in &chunk_ids {
377            if let Err(e) = graph.remove_node(chunk_id) {
378                tracing::warn!("Failed to remove {chunk_id} from in-memory graph: {e}");
379            }
380        }
381        if let Err(e) = graph.remove_node(&file_node_id) {
382            tracing::warn!("Failed to remove {file_node_id} from in-memory graph: {e}");
383        }
384        drop(graph);
385
386        // Remove stale embeddings from vector index
387        let mut vec = self.lock_vector()?;
388        for sym_id in &sym_ids {
389            if let Err(e) = vec.remove(sym_id) {
390                tracing::warn!("Failed to remove {sym_id} from vector index: {e}");
391            }
392        }
393        for chunk_id in &chunk_ids {
394            if let Err(e) = vec.remove(chunk_id) {
395                tracing::warn!("Failed to remove {chunk_id} from vector index: {e}");
396            }
397        }
398        drop(vec);
399
400        self.save_index();
401        Ok(())
402    }
403
404    // ── A3b: Orphan Detection ─────────────────────────────────────────
405
406    /// Scan for orphaned symbol/chunk nodes whose files no longer exist on disk.
407    /// Also cleans up dangling edges (src or dst node doesn't exist).
408    /// Returns `(symbols_cleaned, edges_cleaned)`.
409    ///
410    /// When `project_root` is `None`, file-existence checks are skipped
411    /// (only dangling edge cleanup runs) to avoid CWD-dependent path
412    /// resolution that could cause mass deletion.
413    pub fn detect_orphans(
414        &self,
415        project_root: Option<&Path>,
416    ) -> Result<(usize, usize), CodememError> {
417        // Use storage for both nodes and edges to avoid in-memory/storage sync races.
418        let all_nodes = self.storage.all_graph_nodes()?;
419        let node_ids: HashSet<String> = all_nodes.iter().map(|n| n.id.clone()).collect();
420
421        let mut orphan_sym_ids: Vec<String> = Vec::new();
422
423        // Only check file existence when we have a known project root.
424        // Without it, relative paths resolve against CWD which may be wrong.
425        if let Some(root) = project_root {
426            for node in &all_nodes {
427                if !node.id.starts_with("sym:") && !node.id.starts_with("chunk:") {
428                    continue;
429                }
430                let file_path = match node.payload.get("file_path").and_then(|v| v.as_str()) {
431                    Some(fp) => fp,
432                    None => continue,
433                };
434                let abs_path = root.join(file_path);
435                if !abs_path.exists() {
436                    orphan_sym_ids.push(node.id.clone());
437                }
438            }
439        }
440
441        // Also find dangling edges (src or dst doesn't exist in graph)
442        let all_edges = self.storage.all_graph_edges()?;
443        let mut dangling_edge_ids: Vec<String> = Vec::new();
444        for edge in &all_edges {
445            if !node_ids.contains(&edge.src) || !node_ids.contains(&edge.dst) {
446                dangling_edge_ids.push(edge.id.clone());
447            }
448        }
449
450        let symbols_cleaned = orphan_sym_ids.len();
451
452        // Clean up orphan nodes
453        for sym_id in &orphan_sym_ids {
454            if let Err(e) = self.storage.delete_graph_edges_for_node(sym_id) {
455                tracing::warn!("Orphan cleanup: failed to delete edges for {sym_id}: {e}");
456            }
457            if let Err(e) = self.storage.delete_graph_node(sym_id) {
458                tracing::warn!("Orphan cleanup: failed to delete node {sym_id}: {e}");
459            }
460            if let Err(e) = self.storage.delete_embedding(sym_id) {
461                tracing::warn!("Orphan cleanup: failed to delete embedding {sym_id}: {e}");
462            }
463        }
464
465        // Clean up orphan nodes from in-memory graph + vector
466        if !orphan_sym_ids.is_empty() {
467            if let Ok(mut graph) = self.lock_graph() {
468                for sym_id in &orphan_sym_ids {
469                    let _ = graph.remove_node(sym_id);
470                }
471            }
472            if let Ok(mut vec) = self.lock_vector() {
473                for sym_id in &orphan_sym_ids {
474                    let _ = vec.remove(sym_id);
475                }
476            }
477        }
478
479        // Delete dangling edges that weren't already removed by node cleanup
480        let mut edges_cleaned = 0usize;
481        for edge_id in &dangling_edge_ids {
482            match self.storage.delete_graph_edge(edge_id) {
483                Ok(true) => edges_cleaned += 1,
484                Ok(false) => {} // Already deleted by node cleanup above
485                Err(e) => {
486                    tracing::warn!("Orphan cleanup: failed to delete dangling edge {edge_id}: {e}");
487                }
488            }
489        }
490
491        if symbols_cleaned > 0 || edges_cleaned > 0 {
492            tracing::info!(
493                "Orphan scan: cleaned {symbols_cleaned} symbol/chunk nodes, {edges_cleaned} dangling edges"
494            );
495        }
496
497        Ok((symbols_cleaned, edges_cleaned))
498    }
499
500    // ── A4: Unified Analyze Pipeline ────────────────────────────────────
501
502    /// Full analysis pipeline: index → persist → enrich → recompute centrality.
503    ///
504    /// This is the single entry point for all callers (CLI, MCP, API).
505    /// Supports incremental indexing via `ChangeDetector`, progress callbacks,
506    /// and returns comprehensive results.
507    pub fn analyze(&self, options: AnalyzeOptions<'_>) -> Result<AnalyzeResult, CodememError> {
508        let root = options.path;
509
510        // Eagerly initialize embeddings/vector/BM25 for the full analysis pipeline.
511        // This triggers lazy init so that embed_and_persist() finds them ready.
512        drop(self.lock_embeddings());
513        drop(self.lock_vector());
514        drop(self.lock_bm25());
515
516        // 1. Index
517        let mut indexer = match options.change_detector {
518            Some(cd) => Indexer::with_change_detector(cd),
519            None => Indexer::new(),
520        };
521        let resolved = indexer.index_and_resolve(root)?;
522
523        // 2. Persist (with or without progress callback)
524        let persist = if let Some(ref on_progress) = options.progress {
525            self.persist_index_results_with_progress(
526                &resolved,
527                Some(options.namespace),
528                |done, total| {
529                    on_progress(AnalyzeProgress::Embedding { done, total });
530                },
531            )?
532        } else {
533            self.persist_index_results(&resolved, Some(options.namespace))?
534        };
535
536        // Cache results for structural queries
537        {
538            if let Ok(mut cache) = self.lock_index_cache() {
539                *cache = Some(crate::IndexCache {
540                    symbols: resolved.symbols,
541                    chunks: resolved.chunks,
542                    root_path: root.to_string_lossy().to_string(),
543                });
544            }
545        }
546
547        // 3. Enrich
548        let path_str = root.to_str().unwrap_or("");
549        let enrichment = self.run_enrichments(
550            path_str,
551            &[],
552            options.git_days,
553            Some(options.namespace),
554            None,
555        );
556
557        // 4. Recompute centrality
558        self.lock_graph()?.recompute_centrality();
559
560        // 5. Compute summary stats
561        let top_nodes = self.find_important_nodes(10, 0.85).unwrap_or_default();
562        let community_count = self.louvain_communities(1.0).map(|c| c.len()).unwrap_or(0);
563
564        // 6. Save indexes
565        self.save_index();
566
567        // Save incremental state
568        indexer.change_detector().save_to_storage(self.storage())?;
569
570        Ok(AnalyzeResult {
571            files_parsed: resolved.index.files_parsed,
572            files_skipped: resolved.index.files_skipped,
573            symbols_found: resolved.index.total_symbols,
574            edges_resolved: persist.edges_resolved,
575            chunks_stored: persist.chunks_stored,
576            symbols_embedded: persist.symbols_embedded,
577            chunks_embedded: persist.chunks_embedded,
578            chunks_pruned: persist.chunks_pruned,
579            symbols_pruned: persist.symbols_pruned,
580            enrichment_results: enrichment.results,
581            total_insights: enrichment.total_insights,
582            top_nodes,
583            community_count,
584        })
585    }
586
587    // ── A8: Session Context Synthesis ───────────────────────────────────
588
589    /// Synthesize context for a new session: recent memories, pending analyses,
590    /// active patterns, and last session summary.
591    pub fn session_context(&self, namespace: Option<&str>) -> Result<SessionContext, CodememError> {
592        let now = chrono::Utc::now();
593        let cutoff_24h = now - chrono::Duration::hours(24);
594
595        // 1. Recent memories (last 24h)
596        let ids = match namespace {
597            Some(ns) => self.storage.list_memory_ids_for_namespace(ns)?,
598            None => self.storage.list_memory_ids()?,
599        };
600
601        let mut recent_memories = Vec::new();
602        let mut pending_analyses = Vec::new();
603
604        for id in ids.iter().rev().take(200) {
605            if let Ok(Some(m)) = self.storage.get_memory_no_touch(id) {
606                // Collect pending analyses
607                if m.tags.contains(&"pending-analysis".to_string()) {
608                    pending_analyses.push(m.clone());
609                }
610                // Collect recent memories from last 24h
611                if m.created_at >= cutoff_24h {
612                    recent_memories.push(m);
613                }
614                if recent_memories.len() >= 50 && pending_analyses.len() >= 10 {
615                    break;
616                }
617            }
618        }
619
620        // 2. Active patterns
621        let session_count = self.storage.session_count(namespace).unwrap_or(1).max(1);
622        let active_patterns = patterns::detect_patterns(
623            &*self.storage,
624            namespace,
625            2, // min_frequency
626            session_count,
627        )
628        .unwrap_or_default();
629
630        // 3. Last session summary
631        let last_session_summary = self
632            .storage
633            .list_sessions(namespace, 1)?
634            .into_iter()
635            .next()
636            .and_then(|s| s.summary);
637
638        Ok(SessionContext {
639            recent_memories,
640            pending_analyses,
641            active_patterns,
642            last_session_summary,
643        })
644    }
645}
646
647// ── Result Types ────────────────────────────────────────────────────────────
648
649/// Options for the unified `analyze()` pipeline.
650pub struct AnalyzeOptions<'a> {
651    pub path: &'a Path,
652    pub namespace: &'a str,
653    pub git_days: u64,
654    pub change_detector: Option<index::incremental::ChangeDetector>,
655    pub progress: Option<Box<dyn Fn(AnalyzeProgress) + Send + 'a>>,
656}
657
658/// Progress events emitted during analysis.
659#[derive(Debug, Clone)]
660pub enum AnalyzeProgress {
661    Embedding { done: usize, total: usize },
662}
663
664/// Result of the unified `analyze()` pipeline.
665#[derive(Debug)]
666pub struct AnalyzeResult {
667    pub files_parsed: usize,
668    pub files_skipped: usize,
669    pub symbols_found: usize,
670    pub edges_resolved: usize,
671    pub chunks_stored: usize,
672    pub symbols_embedded: usize,
673    pub chunks_embedded: usize,
674    pub chunks_pruned: usize,
675    pub symbols_pruned: usize,
676    pub enrichment_results: serde_json::Value,
677    pub total_insights: usize,
678    pub top_nodes: Vec<crate::graph_ops::RankedNode>,
679    pub community_count: usize,
680}
681
682/// Session context synthesized at session start.
683#[derive(Debug)]
684pub struct SessionContext {
685    /// Memories created in the last 24 hours.
686    pub recent_memories: Vec<MemoryNode>,
687    /// Memories tagged `pending-analysis` awaiting code-mapper review.
688    pub pending_analyses: Vec<MemoryNode>,
689    /// Cross-session patterns detected with sufficient frequency.
690    pub active_patterns: Vec<DetectedPattern>,
691    /// Summary text from the most recent session (if any).
692    pub last_session_summary: Option<String>,
693}
694
695#[cfg(test)]
696mod tests {
697    use super::*;
698    use codemem_core::{Edge, GraphBackend, GraphNode, NodeKind, RelationshipType};
699    use std::collections::{HashMap, HashSet};
700
701    /// Create a test engine backed by a temporary database.
702    fn test_engine() -> CodememEngine {
703        let dir = tempfile::tempdir().unwrap();
704        let db_path = dir.path().join("test.db");
705        // Keep the tempdir alive by leaking it (tests are short-lived).
706        let _ = Box::leak(Box::new(dir));
707        CodememEngine::from_db_path(&db_path).unwrap()
708    }
709
710    fn graph_node(id: &str, kind: NodeKind, file_path: Option<&str>) -> GraphNode {
711        let mut payload = HashMap::new();
712        if let Some(fp) = file_path {
713            payload.insert(
714                "file_path".to_string(),
715                serde_json::Value::String(fp.to_string()),
716            );
717        }
718        GraphNode {
719            id: id.to_string(),
720            kind,
721            label: id.to_string(),
722            payload,
723            centrality: 0.0,
724            memory_id: None,
725            namespace: None,
726        }
727    }
728
729    fn edge(src: &str, dst: &str, rel: RelationshipType) -> Edge {
730        Edge {
731            id: format!("{rel}:{src}->{dst}"),
732            src: src.to_string(),
733            dst: dst.to_string(),
734            relationship: rel,
735            weight: 1.0,
736            properties: HashMap::new(),
737            created_at: chrono::Utc::now(),
738            valid_from: None,
739            valid_to: None,
740        }
741    }
742
743    // ── cleanup_stale_symbols tests ──────────────────────────────────────
744
745    #[test]
746    fn cleanup_stale_symbols_deletes_stale_nodes() {
747        let engine = test_engine();
748
749        // Set up: file with two symbols, one will become stale
750        let file = graph_node("file:src/a.rs", NodeKind::File, None);
751        let sym_keep = graph_node("sym:a::keep", NodeKind::Function, Some("src/a.rs"));
752        let sym_stale = graph_node("sym:a::stale", NodeKind::Function, Some("src/a.rs"));
753
754        {
755            let mut g = engine.lock_graph().unwrap();
756            g.add_node(file).unwrap();
757            g.add_node(sym_keep.clone()).unwrap();
758            g.add_node(sym_stale.clone()).unwrap();
759            g.add_edge(edge(
760                "file:src/a.rs",
761                "sym:a::keep",
762                RelationshipType::Contains,
763            ))
764            .unwrap();
765            g.add_edge(edge(
766                "file:src/a.rs",
767                "sym:a::stale",
768                RelationshipType::Contains,
769            ))
770            .unwrap();
771        }
772        // Also persist to storage so cleanup can find edges
773        let _ =
774            engine
775                .storage
776                .insert_graph_node(&graph_node("file:src/a.rs", NodeKind::File, None));
777        let _ = engine.storage.insert_graph_node(&sym_keep);
778        let _ = engine.storage.insert_graph_node(&sym_stale);
779        let _ = engine.storage.insert_graph_edge(&edge(
780            "file:src/a.rs",
781            "sym:a::keep",
782            RelationshipType::Contains,
783        ));
784        let _ = engine.storage.insert_graph_edge(&edge(
785            "file:src/a.rs",
786            "sym:a::stale",
787            RelationshipType::Contains,
788        ));
789
790        let old_ids: HashSet<String> = ["sym:a::keep", "sym:a::stale"]
791            .iter()
792            .map(|s| s.to_string())
793            .collect();
794        let new_ids: HashSet<String> = ["sym:a::keep"].iter().map(|s| s.to_string()).collect();
795
796        let cleaned = engine
797            .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
798            .unwrap();
799        assert_eq!(cleaned, 1);
800
801        // Stale node should be gone from in-memory graph
802        let g = engine.lock_graph().unwrap();
803        assert!(g.get_node("sym:a::stale").unwrap().is_none());
804        assert!(g.get_node("sym:a::keep").unwrap().is_some());
805    }
806
807    #[test]
808    fn cleanup_stale_symbols_redirects_memory_edges_to_graph() {
809        let engine = test_engine();
810
811        let file = graph_node("file:src/a.rs", NodeKind::File, None);
812        let sym_stale = graph_node("sym:a::old_fn", NodeKind::Function, Some("src/a.rs"));
813        let mem = graph_node("mem-uuid-123", NodeKind::Memory, None);
814
815        {
816            let mut g = engine.lock_graph().unwrap();
817            g.add_node(file.clone()).unwrap();
818            g.add_node(sym_stale.clone()).unwrap();
819            g.add_node(mem.clone()).unwrap();
820            g.add_edge(edge(
821                "file:src/a.rs",
822                "sym:a::old_fn",
823                RelationshipType::Contains,
824            ))
825            .unwrap();
826            g.add_edge(edge(
827                "mem-uuid-123",
828                "sym:a::old_fn",
829                RelationshipType::RelatesTo,
830            ))
831            .unwrap();
832        }
833        let _ = engine.storage.insert_graph_node(&file);
834        let _ = engine.storage.insert_graph_node(&sym_stale);
835        let _ = engine.storage.insert_graph_node(&mem);
836        let _ = engine.storage.insert_graph_edge(&edge(
837            "file:src/a.rs",
838            "sym:a::old_fn",
839            RelationshipType::Contains,
840        ));
841        let _ = engine.storage.insert_graph_edge(&edge(
842            "mem-uuid-123",
843            "sym:a::old_fn",
844            RelationshipType::RelatesTo,
845        ));
846
847        let old_ids: HashSet<String> = ["sym:a::old_fn"].iter().map(|s| s.to_string()).collect();
848        let new_ids: HashSet<String> = HashSet::new();
849
850        engine
851            .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
852            .unwrap();
853
854        // The redirected edge should be in the in-memory graph
855        let g = engine.lock_graph().unwrap();
856        let file_edges = g.get_edges("file:src/a.rs").unwrap();
857        let has_redirect = file_edges.iter().any(|e| {
858            (e.src == "mem-uuid-123" || e.dst == "mem-uuid-123") && e.id.contains("-redirected")
859        });
860        assert!(
861            has_redirect,
862            "redirected memory→file edge should be in the in-memory graph"
863        );
864    }
865
866    #[test]
867    fn cleanup_stale_symbols_deduplicates_redirects() {
868        let engine = test_engine();
869
870        let file = graph_node("file:src/a.rs", NodeKind::File, None);
871        let sym1 = graph_node("sym:a::fn1", NodeKind::Function, Some("src/a.rs"));
872        let sym2 = graph_node("sym:a::fn2", NodeKind::Function, Some("src/a.rs"));
873        let mem = graph_node("mem-uuid-456", NodeKind::Memory, None);
874
875        // Same memory linked to two symbols in the same file
876        let _ = engine.storage.insert_graph_node(&file);
877        let _ = engine.storage.insert_graph_node(&sym1);
878        let _ = engine.storage.insert_graph_node(&sym2);
879        let _ = engine.storage.insert_graph_node(&mem);
880        let _ = engine.storage.insert_graph_edge(&edge(
881            "mem-uuid-456",
882            "sym:a::fn1",
883            RelationshipType::RelatesTo,
884        ));
885        let _ = engine.storage.insert_graph_edge(&edge(
886            "mem-uuid-456",
887            "sym:a::fn2",
888            RelationshipType::RelatesTo,
889        ));
890
891        {
892            let mut g = engine.lock_graph().unwrap();
893            g.add_node(file).unwrap();
894            g.add_node(sym1).unwrap();
895            g.add_node(sym2).unwrap();
896            g.add_node(mem).unwrap();
897        }
898
899        let old_ids: HashSet<String> = ["sym:a::fn1", "sym:a::fn2"]
900            .iter()
901            .map(|s| s.to_string())
902            .collect();
903        let new_ids: HashSet<String> = HashSet::new();
904
905        engine
906            .cleanup_stale_symbols("src/a.rs", &old_ids, &new_ids)
907            .unwrap();
908
909        // Should have exactly one redirect edge, not two
910        let g = engine.lock_graph().unwrap();
911        let file_edges = g.get_edges("file:src/a.rs").unwrap();
912        let redirect_count = file_edges
913            .iter()
914            .filter(|e| e.id.contains("-redirected"))
915            .count();
916        assert_eq!(
917            redirect_count, 1,
918            "should have exactly 1 redirected edge, got {redirect_count}"
919        );
920    }
921
922    // ── detect_orphans tests ─────────────────────────────────────────────
923
924    #[test]
925    fn detect_orphans_skips_file_check_when_no_root() {
926        let engine = test_engine();
927
928        // Add a symbol node with a file path that definitely doesn't exist
929        let sym = graph_node(
930            "sym:nonexistent::fn",
931            NodeKind::Function,
932            Some("does/not/exist.rs"),
933        );
934        let _ = engine.storage.insert_graph_node(&sym);
935        {
936            let mut g = engine.lock_graph().unwrap();
937            g.add_node(sym).unwrap();
938        }
939
940        // With None, should NOT delete the node (skips file existence check)
941        let (symbols_cleaned, _) = engine.detect_orphans(None).unwrap();
942        assert_eq!(
943            symbols_cleaned, 0,
944            "detect_orphans(None) should not delete nodes based on file existence"
945        );
946    }
947
948    #[test]
949    fn detect_orphans_removes_missing_files_with_root() {
950        let dir = tempfile::tempdir().unwrap();
951        let db_path = dir.path().join("test.db");
952        let engine = CodememEngine::from_db_path(&db_path).unwrap();
953
954        // Add a symbol whose file doesn't exist under the project root
955        let sym = graph_node(
956            "sym:missing::fn",
957            NodeKind::Function,
958            Some("src/missing.rs"),
959        );
960        let _ = engine.storage.insert_graph_node(&sym);
961        {
962            let mut g = engine.lock_graph().unwrap();
963            g.add_node(sym).unwrap();
964        }
965
966        let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
967        assert_eq!(symbols_cleaned, 1);
968    }
969
970    #[test]
971    fn detect_orphans_keeps_existing_files() {
972        let dir = tempfile::tempdir().unwrap();
973        let db_path = dir.path().join("test.db");
974        let engine = CodememEngine::from_db_path(&db_path).unwrap();
975
976        // Create the actual file so it won't be orphaned
977        let src_dir = dir.path().join("src");
978        std::fs::create_dir_all(&src_dir).unwrap();
979        std::fs::write(src_dir.join("exists.rs"), "fn main() {}").unwrap();
980
981        let sym = graph_node(
982            "sym:exists::main",
983            NodeKind::Function,
984            Some("src/exists.rs"),
985        );
986        let _ = engine.storage.insert_graph_node(&sym);
987        {
988            let mut g = engine.lock_graph().unwrap();
989            g.add_node(sym).unwrap();
990        }
991
992        let (symbols_cleaned, _) = engine.detect_orphans(Some(dir.path())).unwrap();
993        assert_eq!(symbols_cleaned, 0);
994    }
995
996    // Note: dangling edge cleanup in detect_orphans is a defensive no-op
997    // because graph_edges has ON DELETE CASCADE foreign keys on src/dst.
998    // Deleting a node automatically cascades to its edges in SQLite.
999}