Skip to main content

codemem_engine/enrichment/
mod.rs

1//! Enrichment logic: store_insight, git history, security, performance,
2//! complexity, architecture, test mapping, API surface, doc coverage,
3//! change impact, code smells, hot+complex correlation, blame/ownership,
4//! enhanced security scanning, and quality stratification.
5
6mod api_surface;
7mod architecture;
8mod blame;
9mod change_impact;
10mod code_smells;
11mod complexity;
12pub(crate) mod dead_code;
13mod doc_coverage;
14mod git;
15mod hot_complex;
16mod performance;
17mod quality;
18mod security;
19mod security_scan;
20pub(crate) mod temporal;
21mod test_mapping;
22
23use crate::CodememEngine;
24use codemem_core::{Edge, MemoryNode, MemoryType, RelationshipType};
25use serde_json::json;
26use std::collections::HashMap;
27use std::path::{Path, PathBuf};
28
29/// Resolve a (possibly relative) file path against a project root.
30/// If `project_root` is `Some`, joins it with `rel_path` to produce an absolute path.
31/// Otherwise returns `rel_path` as-is.
32pub(crate) fn resolve_path(rel_path: &str, project_root: Option<&Path>) -> PathBuf {
33    match project_root {
34        Some(root) => root.join(rel_path),
35        None => PathBuf::from(rel_path),
36    }
37}
38
39/// Result from an enrichment operation.
40pub struct EnrichResult {
41    pub insights_stored: usize,
42    pub details: serde_json::Value,
43}
44
45/// Result from running multiple enrichment analyses.
46pub struct EnrichmentPipelineResult {
47    /// JSON object with one key per analysis (e.g. "git", "security", etc.).
48    pub results: serde_json::Value,
49    /// Total number of insights stored across all analyses.
50    pub total_insights: usize,
51}
52
53impl CodememEngine {
54    /// Store an Insight memory through a 3-phase pipeline:
55    /// 1. Semantic dedup check (reject near-duplicates before persisting)
56    /// 2. Core persist via `persist_memory_no_save` (storage, BM25, graph node, embedding)
57    /// 3. Post-step: RELATES_TO edges to linked nodes + auto-link to code nodes
58    ///
59    /// Returns the memory ID if inserted, or None if it was a duplicate.
60    /// Does NOT call `save_index()` -- callers should batch that at the end.
61    pub fn store_insight(
62        &self,
63        content: &str,
64        track: &str,
65        tags: &[&str],
66        importance: f64,
67        namespace: Option<&str>,
68        links: &[String],
69    ) -> Option<String> {
70        let now = chrono::Utc::now();
71        let id = uuid::Uuid::new_v4().to_string();
72        let mut all_tags: Vec<String> =
73            vec![format!("track:{track}"), "static-analysis".to_string()];
74        all_tags.extend(tags.iter().map(|t| t.to_string()));
75
76        // ── Phase 1: Semantic dedup check ────────────────────────────────
77        // Compute enriched embedding and check for near-duplicates BEFORE persisting.
78        let enriched = self.enrich_memory_text(
79            content,
80            MemoryType::Insight,
81            &all_tags,
82            namespace,
83            Some(&id),
84        );
85        if let Ok(Some(emb_guard)) = self.lock_embeddings() {
86            if let Ok(embedding) = emb_guard.embed(&enriched) {
87                drop(emb_guard);
88                if let Ok(vec) = self.lock_vector() {
89                    let neighbors = vec.search(&embedding, 3).unwrap_or_default();
90                    for (neighbor_id, similarity) in &neighbors {
91                        if *neighbor_id == id {
92                            continue;
93                        }
94                        if (*similarity as f64) > self.config.enrichment.dedup_similarity_threshold
95                        {
96                            return None; // Too similar — reject before persisting
97                        }
98                    }
99                }
100            }
101        }
102
103        // ── Phase 2: Core persist via persist_memory_no_save ─────────────
104        let mut memory = MemoryNode::new(content, MemoryType::Insight);
105        memory.id = id.clone();
106        memory.importance = importance.clamp(0.0, 1.0);
107        memory.confidence = self.config.enrichment.insight_confidence;
108        memory.tags = all_tags;
109        memory.metadata = HashMap::from([
110            ("track".into(), json!(track)),
111            ("generated_by".into(), json!("enrichment_pipeline")),
112        ]);
113        memory.namespace = namespace.map(String::from);
114
115        if self.persist_memory_no_save(&memory).is_err() {
116            return None; // duplicate or error -- skip silently
117        }
118
119        // ── Phase 3: Post-step — RELATES_TO edges to linked nodes ────────
120        if !links.is_empty() {
121            if let Ok(mut graph) = self.lock_graph() {
122                for link_id in links {
123                    let edge = Edge {
124                        id: format!("{id}-RELATES_TO-{link_id}"),
125                        src: id.clone(),
126                        dst: link_id.clone(),
127                        relationship: RelationshipType::RelatesTo,
128                        weight: 0.3,
129                        properties: HashMap::new(),
130                        created_at: now,
131                        valid_from: None,
132                        valid_to: None,
133                    };
134                    let _ = self.storage.insert_graph_edge(&edge);
135                    let _ = graph.add_edge(edge);
136                }
137            }
138        }
139
140        // Auto-link to code nodes mentioned in content
141        self.auto_link_to_code_nodes(&id, content, links);
142
143        Some(id)
144    }
145
146    /// Run selected enrichment analyses (or all 14 if `analyses` is empty).
147    ///
148    /// Parameters:
149    /// - `path`: project root (needed for git, blame, change_impact, complexity, code_smells, security_scan)
150    /// - `analyses`: which analyses to run; empty = all (except change_impact which needs file_path)
151    /// - `days`: git history lookback days
152    /// - `namespace`: optional namespace filter
153    /// - `file_path`: optional, needed only for change_impact
154    pub fn run_enrichments(
155        &self,
156        path: &str,
157        analyses: &[String],
158        days: u64,
159        namespace: Option<&str>,
160        file_path: Option<&str>,
161    ) -> EnrichmentPipelineResult {
162        let run_all = analyses.is_empty();
163        let mut results = json!({});
164        let mut total_insights: usize = 0;
165
166        let root = Path::new(path);
167        let project_root = Some(root);
168
169        macro_rules! run_analysis {
170            ($name:expr, $call:expr) => {
171                if run_all || analyses.iter().any(|a| a == $name) {
172                    match $call {
173                        Ok(r) => {
174                            total_insights += r.insights_stored;
175                            results[$name] = r.details;
176                        }
177                        Err(e) => {
178                            results[$name] = json!({"error": format!("{e}")});
179                        }
180                    }
181                }
182            };
183        }
184
185        run_analysis!("git", self.enrich_git_history(path, days, namespace));
186        run_analysis!("security", self.enrich_security(namespace));
187        run_analysis!("performance", self.enrich_performance(10, namespace));
188        run_analysis!(
189            "complexity",
190            self.enrich_complexity(namespace, project_root)
191        );
192        run_analysis!(
193            "code_smells",
194            self.enrich_code_smells(namespace, project_root)
195        );
196        run_analysis!(
197            "security_scan",
198            self.enrich_security_scan(namespace, project_root)
199        );
200        run_analysis!("architecture", self.enrich_architecture(namespace));
201        run_analysis!("test_mapping", self.enrich_test_mapping(namespace));
202        run_analysis!("api_surface", self.enrich_api_surface(namespace));
203        run_analysis!("doc_coverage", self.enrich_doc_coverage(namespace));
204        run_analysis!("hot_complex", self.enrich_hot_complex(namespace));
205        run_analysis!("blame", self.enrich_blame(path, namespace));
206        run_analysis!("quality", self.enrich_quality_stratification(namespace));
207
208        // change_impact requires a file_path, so it is not included in run_all
209        if analyses.iter().any(|a| a == "change_impact") {
210            let fp = file_path.unwrap_or("");
211            if fp.is_empty() {
212                results["change_impact"] =
213                    json!({"error": "change_impact requires 'file_path' parameter"});
214            } else {
215                match self.enrich_change_impact(fp, namespace) {
216                    Ok(r) => {
217                        total_insights += r.insights_stored;
218                        results["change_impact"] = r.details;
219                    }
220                    Err(e) => {
221                        results["change_impact"] = json!({"error": format!("{e}")});
222                    }
223                }
224            }
225        }
226
227        EnrichmentPipelineResult {
228            results,
229            total_insights,
230        }
231    }
232
233    /// Store a Pattern memory for code smell detection (E7).
234    /// Importance is fixed at 0.5 for code smells.
235    /// Uses the full persist pipeline (storage → BM25 → graph → embedding → vector).
236    pub(super) fn store_pattern_memory(
237        &self,
238        content: &str,
239        namespace: Option<&str>,
240        links: &[String],
241    ) -> Option<String> {
242        let id = uuid::Uuid::new_v4().to_string();
243        let now = chrono::Utc::now();
244        let tags = vec![
245            "static-analysis".to_string(),
246            "track:code-smell".to_string(),
247        ];
248
249        let mut memory = MemoryNode::new(content, MemoryType::Pattern);
250        memory.id = id.clone();
251        memory.confidence = self.config.enrichment.insight_confidence;
252        memory.tags = tags;
253        memory.metadata = HashMap::from([
254            ("track".into(), json!("code-smell")),
255            ("generated_by".into(), json!("enrichment_pipeline")),
256        ]);
257        memory.namespace = namespace.map(String::from);
258
259        if self.persist_memory_no_save(&memory).is_err() {
260            return None;
261        }
262
263        // Post-step: RELATES_TO edges to linked nodes
264        if !links.is_empty() {
265            if let Ok(mut graph) = self.lock_graph() {
266                for link_id in links {
267                    let edge = Edge {
268                        id: format!("{id}-RELATES_TO-{link_id}"),
269                        src: id.clone(),
270                        dst: link_id.clone(),
271                        relationship: RelationshipType::RelatesTo,
272                        weight: 0.3,
273                        properties: HashMap::new(),
274                        created_at: now,
275                        valid_from: None,
276                        valid_to: None,
277                    };
278                    let _ = self.storage.insert_graph_edge(&edge);
279                    let _ = graph.add_edge(edge);
280                }
281            }
282        }
283
284        self.auto_link_to_code_nodes(&id, content, links);
285
286        Some(id)
287    }
288}