Skip to main content

codemem_engine/enrichment/
mod.rs

1//! Enrichment logic: store_insight, git history, security, performance,
2//! complexity, architecture, test mapping, API surface, doc coverage,
3//! change impact, code smells, hot+complex correlation, blame/ownership,
4//! enhanced security scanning, and quality stratification.
5
6mod api_surface;
7mod architecture;
8mod blame;
9mod change_impact;
10mod code_smells;
11mod complexity;
12mod doc_coverage;
13mod git;
14mod hot_complex;
15mod performance;
16mod quality;
17mod security;
18mod security_scan;
19pub(crate) mod temporal;
20mod test_mapping;
21
22use crate::CodememEngine;
23use codemem_core::{Edge, MemoryNode, MemoryType, RelationshipType};
24use serde_json::json;
25use std::collections::HashMap;
26use std::path::{Path, PathBuf};
27
28/// Resolve a (possibly relative) file path against a project root.
29/// If `project_root` is `Some`, joins it with `rel_path` to produce an absolute path.
30/// Otherwise returns `rel_path` as-is.
31pub(crate) fn resolve_path(rel_path: &str, project_root: Option<&Path>) -> PathBuf {
32    match project_root {
33        Some(root) => root.join(rel_path),
34        None => PathBuf::from(rel_path),
35    }
36}
37
38/// Result from an enrichment operation.
39pub struct EnrichResult {
40    pub insights_stored: usize,
41    pub details: serde_json::Value,
42}
43
44/// Result from running multiple enrichment analyses.
45pub struct EnrichmentPipelineResult {
46    /// JSON object with one key per analysis (e.g. "git", "security", etc.).
47    pub results: serde_json::Value,
48    /// Total number of insights stored across all analyses.
49    pub total_insights: usize,
50}
51
52impl CodememEngine {
53    /// Store an Insight memory through a 3-phase pipeline:
54    /// 1. Semantic dedup check (reject near-duplicates before persisting)
55    /// 2. Core persist via `persist_memory_no_save` (storage, BM25, graph node, embedding)
56    /// 3. Post-step: RELATES_TO edges to linked nodes + auto-link to code nodes
57    ///
58    /// Returns the memory ID if inserted, or None if it was a duplicate.
59    /// Does NOT call `save_index()` -- callers should batch that at the end.
60    pub fn store_insight(
61        &self,
62        content: &str,
63        track: &str,
64        tags: &[&str],
65        importance: f64,
66        namespace: Option<&str>,
67        links: &[String],
68    ) -> Option<String> {
69        let now = chrono::Utc::now();
70        let id = uuid::Uuid::new_v4().to_string();
71        let mut all_tags: Vec<String> =
72            vec![format!("track:{track}"), "static-analysis".to_string()];
73        all_tags.extend(tags.iter().map(|t| t.to_string()));
74
75        // ── Phase 1: Semantic dedup check ────────────────────────────────
76        // Compute enriched embedding and check for near-duplicates BEFORE persisting.
77        let enriched = self.enrich_memory_text(
78            content,
79            MemoryType::Insight,
80            &all_tags,
81            namespace,
82            Some(&id),
83        );
84        if let Ok(Some(emb_guard)) = self.lock_embeddings() {
85            if let Ok(embedding) = emb_guard.embed(&enriched) {
86                drop(emb_guard);
87                if let Ok(vec) = self.lock_vector() {
88                    let neighbors = vec.search(&embedding, 3).unwrap_or_default();
89                    for (neighbor_id, similarity) in &neighbors {
90                        if *neighbor_id == id {
91                            continue;
92                        }
93                        if (*similarity as f64) > self.config.enrichment.dedup_similarity_threshold
94                        {
95                            return None; // Too similar — reject before persisting
96                        }
97                    }
98                }
99            }
100        }
101
102        // ── Phase 2: Core persist via persist_memory_no_save ─────────────
103        let mut memory = MemoryNode::new(content, MemoryType::Insight);
104        memory.id = id.clone();
105        memory.importance = importance.clamp(0.0, 1.0);
106        memory.confidence = self.config.enrichment.insight_confidence;
107        memory.tags = all_tags;
108        memory.metadata = HashMap::from([
109            ("track".into(), json!(track)),
110            ("generated_by".into(), json!("enrichment_pipeline")),
111        ]);
112        memory.namespace = namespace.map(String::from);
113
114        if self.persist_memory_no_save(&memory).is_err() {
115            return None; // duplicate or error -- skip silently
116        }
117
118        // ── Phase 3: Post-step — RELATES_TO edges to linked nodes ────────
119        if !links.is_empty() {
120            if let Ok(mut graph) = self.lock_graph() {
121                for link_id in links {
122                    let edge = Edge {
123                        id: format!("{id}-RELATES_TO-{link_id}"),
124                        src: id.clone(),
125                        dst: link_id.clone(),
126                        relationship: RelationshipType::RelatesTo,
127                        weight: 0.3,
128                        properties: HashMap::new(),
129                        created_at: now,
130                        valid_from: None,
131                        valid_to: None,
132                    };
133                    let _ = self.storage.insert_graph_edge(&edge);
134                    let _ = graph.add_edge(edge);
135                }
136            }
137        }
138
139        // Auto-link to code nodes mentioned in content
140        self.auto_link_to_code_nodes(&id, content, links);
141
142        Some(id)
143    }
144
145    /// Run selected enrichment analyses (or all 14 if `analyses` is empty).
146    ///
147    /// Parameters:
148    /// - `path`: project root (needed for git, blame, change_impact, complexity, code_smells, security_scan)
149    /// - `analyses`: which analyses to run; empty = all (except change_impact which needs file_path)
150    /// - `days`: git history lookback days
151    /// - `namespace`: optional namespace filter
152    /// - `file_path`: optional, needed only for change_impact
153    pub fn run_enrichments(
154        &self,
155        path: &str,
156        analyses: &[String],
157        days: u64,
158        namespace: Option<&str>,
159        file_path: Option<&str>,
160    ) -> EnrichmentPipelineResult {
161        let run_all = analyses.is_empty();
162        let mut results = json!({});
163        let mut total_insights: usize = 0;
164
165        let root = Path::new(path);
166        let project_root = Some(root);
167
168        macro_rules! run_analysis {
169            ($name:expr, $call:expr) => {
170                if run_all || analyses.iter().any(|a| a == $name) {
171                    match $call {
172                        Ok(r) => {
173                            total_insights += r.insights_stored;
174                            results[$name] = r.details;
175                        }
176                        Err(e) => {
177                            results[$name] = json!({"error": format!("{e}")});
178                        }
179                    }
180                }
181            };
182        }
183
184        run_analysis!("git", self.enrich_git_history(path, days, namespace));
185        run_analysis!("security", self.enrich_security(namespace));
186        run_analysis!("performance", self.enrich_performance(10, namespace));
187        run_analysis!(
188            "complexity",
189            self.enrich_complexity(namespace, project_root)
190        );
191        run_analysis!(
192            "code_smells",
193            self.enrich_code_smells(namespace, project_root)
194        );
195        run_analysis!(
196            "security_scan",
197            self.enrich_security_scan(namespace, project_root)
198        );
199        run_analysis!("architecture", self.enrich_architecture(namespace));
200        run_analysis!("test_mapping", self.enrich_test_mapping(namespace));
201        run_analysis!("api_surface", self.enrich_api_surface(namespace));
202        run_analysis!("doc_coverage", self.enrich_doc_coverage(namespace));
203        run_analysis!("hot_complex", self.enrich_hot_complex(namespace));
204        run_analysis!("blame", self.enrich_blame(path, namespace));
205        run_analysis!("quality", self.enrich_quality_stratification(namespace));
206
207        // change_impact requires a file_path, so it is not included in run_all
208        if analyses.iter().any(|a| a == "change_impact") {
209            let fp = file_path.unwrap_or("");
210            if fp.is_empty() {
211                results["change_impact"] =
212                    json!({"error": "change_impact requires 'file_path' parameter"});
213            } else {
214                match self.enrich_change_impact(fp, namespace) {
215                    Ok(r) => {
216                        total_insights += r.insights_stored;
217                        results["change_impact"] = r.details;
218                    }
219                    Err(e) => {
220                        results["change_impact"] = json!({"error": format!("{e}")});
221                    }
222                }
223            }
224        }
225
226        EnrichmentPipelineResult {
227            results,
228            total_insights,
229        }
230    }
231
232    /// Store a Pattern memory for code smell detection (E7).
233    /// Importance is fixed at 0.5 for code smells.
234    /// Uses the full persist pipeline (storage → BM25 → graph → embedding → vector).
235    pub(super) fn store_pattern_memory(
236        &self,
237        content: &str,
238        namespace: Option<&str>,
239        links: &[String],
240    ) -> Option<String> {
241        let id = uuid::Uuid::new_v4().to_string();
242        let now = chrono::Utc::now();
243        let tags = vec![
244            "static-analysis".to_string(),
245            "track:code-smell".to_string(),
246        ];
247
248        let mut memory = MemoryNode::new(content, MemoryType::Pattern);
249        memory.id = id.clone();
250        memory.confidence = self.config.enrichment.insight_confidence;
251        memory.tags = tags;
252        memory.metadata = HashMap::from([
253            ("track".into(), json!("code-smell")),
254            ("generated_by".into(), json!("enrichment_pipeline")),
255        ]);
256        memory.namespace = namespace.map(String::from);
257
258        if self.persist_memory_no_save(&memory).is_err() {
259            return None;
260        }
261
262        // Post-step: RELATES_TO edges to linked nodes
263        if !links.is_empty() {
264            if let Ok(mut graph) = self.lock_graph() {
265                for link_id in links {
266                    let edge = Edge {
267                        id: format!("{id}-RELATES_TO-{link_id}"),
268                        src: id.clone(),
269                        dst: link_id.clone(),
270                        relationship: RelationshipType::RelatesTo,
271                        weight: 0.3,
272                        properties: HashMap::new(),
273                        created_at: now,
274                        valid_from: None,
275                        valid_to: None,
276                    };
277                    let _ = self.storage.insert_graph_edge(&edge);
278                    let _ = graph.add_edge(edge);
279                }
280            }
281        }
282
283        self.auto_link_to_code_nodes(&id, content, links);
284
285        Some(id)
286    }
287}