Skip to main content

codemem_engine/enrichment/
mod.rs

1//! Enrichment logic: store_insight, git history, security, performance,
2//! complexity, architecture, test mapping, API surface, doc coverage,
3//! change impact, code smells, hot+complex correlation, blame/ownership,
4//! enhanced security scanning, and quality stratification.
5
6mod api_surface;
7mod architecture;
8mod blame;
9mod change_impact;
10mod code_smells;
11mod complexity;
12mod doc_coverage;
13mod git;
14mod hot_complex;
15mod performance;
16mod quality;
17mod security;
18mod security_scan;
19mod test_mapping;
20
21use crate::CodememEngine;
22use codemem_core::{Edge, GraphBackend, MemoryNode, MemoryType, RelationshipType, VectorBackend};
23use serde_json::json;
24use std::collections::HashMap;
25use std::path::{Path, PathBuf};
26
27/// Resolve a (possibly relative) file path against a project root.
28/// If `project_root` is `Some`, joins it with `rel_path` to produce an absolute path.
29/// Otherwise returns `rel_path` as-is.
30pub(crate) fn resolve_path(rel_path: &str, project_root: Option<&Path>) -> PathBuf {
31    match project_root {
32        Some(root) => root.join(rel_path),
33        None => PathBuf::from(rel_path),
34    }
35}
36
37/// Result from an enrichment operation.
38pub struct EnrichResult {
39    pub insights_stored: usize,
40    pub details: serde_json::Value,
41}
42
43/// Result from running multiple enrichment analyses.
44pub struct EnrichmentPipelineResult {
45    /// JSON object with one key per analysis (e.g. "git", "security", etc.).
46    pub results: serde_json::Value,
47    /// Total number of insights stored across all analyses.
48    pub total_insights: usize,
49}
50
51impl CodememEngine {
52    /// Store an Insight memory through a 3-phase pipeline:
53    /// 1. Semantic dedup check (reject near-duplicates before persisting)
54    /// 2. Core persist via `persist_memory_no_save` (storage, BM25, graph node, embedding)
55    /// 3. Post-step: RELATES_TO edges to linked nodes + auto-link to code nodes
56    ///
57    /// Returns the memory ID if inserted, or None if it was a duplicate.
58    /// Does NOT call `save_index()` -- callers should batch that at the end.
59    pub fn store_insight(
60        &self,
61        content: &str,
62        track: &str,
63        tags: &[&str],
64        importance: f64,
65        namespace: Option<&str>,
66        links: &[String],
67    ) -> Option<String> {
68        let now = chrono::Utc::now();
69        let id = uuid::Uuid::new_v4().to_string();
70        let mut all_tags: Vec<String> =
71            vec![format!("track:{track}"), "static-analysis".to_string()];
72        all_tags.extend(tags.iter().map(|t| t.to_string()));
73
74        // ── Phase 1: Semantic dedup check ────────────────────────────────
75        // Compute enriched embedding and check for near-duplicates BEFORE persisting.
76        let enriched = self.enrich_memory_text(
77            content,
78            MemoryType::Insight,
79            &all_tags,
80            namespace,
81            Some(&id),
82        );
83        if let Ok(Some(emb_guard)) = self.lock_embeddings() {
84            if let Ok(embedding) = emb_guard.embed(&enriched) {
85                drop(emb_guard);
86                if let Ok(vec) = self.lock_vector() {
87                    let neighbors = vec.search(&embedding, 3).unwrap_or_default();
88                    for (neighbor_id, similarity) in &neighbors {
89                        if *neighbor_id == id {
90                            continue;
91                        }
92                        if (*similarity as f64) > self.config.enrichment.dedup_similarity_threshold
93                        {
94                            return None; // Too similar — reject before persisting
95                        }
96                    }
97                }
98            }
99        }
100
101        // ── Phase 2: Core persist via persist_memory_no_save ─────────────
102        let mut memory = MemoryNode::new(content, MemoryType::Insight);
103        memory.id = id.clone();
104        memory.importance = importance.clamp(0.0, 1.0);
105        memory.confidence = self.config.enrichment.insight_confidence;
106        memory.tags = all_tags;
107        memory.metadata = HashMap::from([
108            ("track".into(), json!(track)),
109            ("generated_by".into(), json!("enrichment_pipeline")),
110        ]);
111        memory.namespace = namespace.map(String::from);
112
113        if self.persist_memory_no_save(&memory).is_err() {
114            return None; // duplicate or error -- skip silently
115        }
116
117        // ── Phase 3: Post-step — RELATES_TO edges to linked nodes ────────
118        if !links.is_empty() {
119            if let Ok(mut graph) = self.lock_graph() {
120                for link_id in links {
121                    let edge = Edge {
122                        id: format!("{id}-RELATES_TO-{link_id}"),
123                        src: id.clone(),
124                        dst: link_id.clone(),
125                        relationship: RelationshipType::RelatesTo,
126                        weight: 0.3,
127                        properties: HashMap::new(),
128                        created_at: now,
129                        valid_from: None,
130                        valid_to: None,
131                    };
132                    let _ = self.storage.insert_graph_edge(&edge);
133                    let _ = graph.add_edge(edge);
134                }
135            }
136        }
137
138        // Auto-link to code nodes mentioned in content
139        self.auto_link_to_code_nodes(&id, content, links);
140
141        Some(id)
142    }
143
144    /// Run selected enrichment analyses (or all 14 if `analyses` is empty).
145    ///
146    /// Parameters:
147    /// - `path`: project root (needed for git, blame, change_impact, complexity, code_smells, security_scan)
148    /// - `analyses`: which analyses to run; empty = all (except change_impact which needs file_path)
149    /// - `days`: git history lookback days
150    /// - `namespace`: optional namespace filter
151    /// - `file_path`: optional, needed only for change_impact
152    pub fn run_enrichments(
153        &self,
154        path: &str,
155        analyses: &[String],
156        days: u64,
157        namespace: Option<&str>,
158        file_path: Option<&str>,
159    ) -> EnrichmentPipelineResult {
160        let run_all = analyses.is_empty();
161        let mut results = json!({});
162        let mut total_insights: usize = 0;
163
164        let root = Path::new(path);
165        let project_root = Some(root);
166
167        macro_rules! run_analysis {
168            ($name:expr, $call:expr) => {
169                if run_all || analyses.iter().any(|a| a == $name) {
170                    match $call {
171                        Ok(r) => {
172                            total_insights += r.insights_stored;
173                            results[$name] = r.details;
174                        }
175                        Err(e) => {
176                            results[$name] = json!({"error": format!("{e}")});
177                        }
178                    }
179                }
180            };
181        }
182
183        run_analysis!("git", self.enrich_git_history(path, days, namespace));
184        run_analysis!("security", self.enrich_security(namespace));
185        run_analysis!("performance", self.enrich_performance(10, namespace));
186        run_analysis!(
187            "complexity",
188            self.enrich_complexity(namespace, project_root)
189        );
190        run_analysis!(
191            "code_smells",
192            self.enrich_code_smells(namespace, project_root)
193        );
194        run_analysis!(
195            "security_scan",
196            self.enrich_security_scan(namespace, project_root)
197        );
198        run_analysis!("architecture", self.enrich_architecture(namespace));
199        run_analysis!("test_mapping", self.enrich_test_mapping(namespace));
200        run_analysis!("api_surface", self.enrich_api_surface(namespace));
201        run_analysis!("doc_coverage", self.enrich_doc_coverage(namespace));
202        run_analysis!("hot_complex", self.enrich_hot_complex(namespace));
203        run_analysis!("blame", self.enrich_blame(path, namespace));
204        run_analysis!("quality", self.enrich_quality_stratification(namespace));
205
206        // change_impact requires a file_path, so it is not included in run_all
207        if analyses.iter().any(|a| a == "change_impact") {
208            let fp = file_path.unwrap_or("");
209            if fp.is_empty() {
210                results["change_impact"] =
211                    json!({"error": "change_impact requires 'file_path' parameter"});
212            } else {
213                match self.enrich_change_impact(fp, namespace) {
214                    Ok(r) => {
215                        total_insights += r.insights_stored;
216                        results["change_impact"] = r.details;
217                    }
218                    Err(e) => {
219                        results["change_impact"] = json!({"error": format!("{e}")});
220                    }
221                }
222            }
223        }
224
225        EnrichmentPipelineResult {
226            results,
227            total_insights,
228        }
229    }
230
231    /// Store a Pattern memory for code smell detection (E7).
232    /// Importance is fixed at 0.5 for code smells.
233    /// Uses the full persist pipeline (storage → BM25 → graph → embedding → vector).
234    pub(super) fn store_pattern_memory(
235        &self,
236        content: &str,
237        namespace: Option<&str>,
238        links: &[String],
239    ) -> Option<String> {
240        let id = uuid::Uuid::new_v4().to_string();
241        let now = chrono::Utc::now();
242        let tags = vec![
243            "static-analysis".to_string(),
244            "track:code-smell".to_string(),
245        ];
246
247        let mut memory = MemoryNode::new(content, MemoryType::Pattern);
248        memory.id = id.clone();
249        memory.confidence = self.config.enrichment.insight_confidence;
250        memory.tags = tags;
251        memory.metadata = HashMap::from([
252            ("track".into(), json!("code-smell")),
253            ("generated_by".into(), json!("enrichment_pipeline")),
254        ]);
255        memory.namespace = namespace.map(String::from);
256
257        if self.persist_memory_no_save(&memory).is_err() {
258            return None;
259        }
260
261        // Post-step: RELATES_TO edges to linked nodes
262        if !links.is_empty() {
263            if let Ok(mut graph) = self.lock_graph() {
264                for link_id in links {
265                    let edge = Edge {
266                        id: format!("{id}-RELATES_TO-{link_id}"),
267                        src: id.clone(),
268                        dst: link_id.clone(),
269                        relationship: RelationshipType::RelatesTo,
270                        weight: 0.3,
271                        properties: HashMap::new(),
272                        created_at: now,
273                        valid_from: None,
274                        valid_to: None,
275                    };
276                    let _ = self.storage.insert_graph_edge(&edge);
277                    let _ = graph.add_edge(edge);
278                }
279            }
280        }
281
282        self.auto_link_to_code_nodes(&id, content, links);
283
284        Some(id)
285    }
286}