Skip to main content

codemem_engine/enrichment/
mod.rs

1//! Enrichment logic: store_insight, git history, security, performance,
2//! complexity, architecture, test mapping, API surface, doc coverage,
3//! change impact, code smells, hot+complex correlation, blame/ownership,
4//! enhanced security scanning, and quality stratification.
5
6mod api_surface;
7mod architecture;
8mod blame;
9mod change_impact;
10mod code_smells;
11mod complexity;
12mod doc_coverage;
13mod git;
14mod hot_complex;
15mod performance;
16mod quality;
17mod security;
18mod security_scan;
19mod test_mapping;
20
21use crate::CodememEngine;
22use codemem_core::{Edge, GraphBackend, MemoryNode, MemoryType, RelationshipType, VectorBackend};
23use serde_json::json;
24use std::collections::HashMap;
25use std::path::{Path, PathBuf};
26
27/// Resolve a (possibly relative) file path against a project root.
28/// If `project_root` is `Some`, joins it with `rel_path` to produce an absolute path.
29/// Otherwise returns `rel_path` as-is.
30pub(crate) fn resolve_path(rel_path: &str, project_root: Option<&Path>) -> PathBuf {
31    match project_root {
32        Some(root) => root.join(rel_path),
33        None => PathBuf::from(rel_path),
34    }
35}
36
37/// Result from an enrichment operation.
38pub struct EnrichResult {
39    pub insights_stored: usize,
40    pub details: serde_json::Value,
41}
42
43/// Result from running multiple enrichment analyses.
44pub struct EnrichmentPipelineResult {
45    /// JSON object with one key per analysis (e.g. "git", "security", etc.).
46    pub results: serde_json::Value,
47    /// Total number of insights stored across all analyses.
48    pub total_insights: usize,
49}
50
51impl CodememEngine {
52    /// Store an Insight memory through a 3-phase pipeline:
53    /// 1. Semantic dedup check (reject near-duplicates before persisting)
54    /// 2. Core persist via `persist_memory_no_save` (storage, BM25, graph node, embedding)
55    /// 3. Post-step: RELATES_TO edges to linked nodes + auto-link to code nodes
56    ///
57    /// Returns the memory ID if inserted, or None if it was a duplicate.
58    /// Does NOT call `save_index()` -- callers should batch that at the end.
59    pub fn store_insight(
60        &self,
61        content: &str,
62        track: &str,
63        tags: &[&str],
64        importance: f64,
65        namespace: Option<&str>,
66        links: &[String],
67    ) -> Option<String> {
68        let now = chrono::Utc::now();
69        let id = uuid::Uuid::new_v4().to_string();
70        let mut all_tags: Vec<String> =
71            vec![format!("track:{track}"), "static-analysis".to_string()];
72        all_tags.extend(tags.iter().map(|t| t.to_string()));
73
74        // ── Phase 1: Semantic dedup check ────────────────────────────────
75        // Compute enriched embedding and check for near-duplicates BEFORE persisting.
76        let enriched = self.enrich_memory_text(
77            content,
78            MemoryType::Insight,
79            &all_tags,
80            namespace,
81            Some(&id),
82        );
83        if let Ok(Some(emb_guard)) = self.lock_embeddings() {
84            if let Ok(embedding) = emb_guard.embed(&enriched) {
85                drop(emb_guard);
86                if let Ok(vec) = self.lock_vector() {
87                    let neighbors = vec.search(&embedding, 3).unwrap_or_default();
88                    for (neighbor_id, similarity) in &neighbors {
89                        if *neighbor_id == id {
90                            continue;
91                        }
92                        if (*similarity as f64) > self.config.enrichment.dedup_similarity_threshold
93                        {
94                            return None; // Too similar — reject before persisting
95                        }
96                    }
97                }
98            }
99        }
100
101        // ── Phase 2: Core persist via persist_memory_no_save ─────────────
102        let hash = codemem_storage::Storage::content_hash(content);
103        let memory = MemoryNode {
104            id: id.clone(),
105            content: content.to_string(),
106            memory_type: MemoryType::Insight,
107            importance: importance.clamp(0.0, 1.0),
108            confidence: self.config.enrichment.insight_confidence,
109            access_count: 0,
110            content_hash: hash,
111            tags: all_tags,
112            metadata: HashMap::from([
113                ("track".into(), json!(track)),
114                ("generated_by".into(), json!("enrichment_pipeline")),
115            ]),
116            namespace: namespace.map(String::from),
117            session_id: None,
118            created_at: now,
119            updated_at: now,
120            last_accessed_at: now,
121        };
122
123        if self.persist_memory_no_save(&memory).is_err() {
124            return None; // duplicate or error -- skip silently
125        }
126
127        // ── Phase 3: Post-step — RELATES_TO edges to linked nodes ────────
128        if !links.is_empty() {
129            if let Ok(mut graph) = self.lock_graph() {
130                for link_id in links {
131                    let edge = Edge {
132                        id: format!("{id}-RELATES_TO-{link_id}"),
133                        src: id.clone(),
134                        dst: link_id.clone(),
135                        relationship: RelationshipType::RelatesTo,
136                        weight: 0.3,
137                        properties: HashMap::new(),
138                        created_at: now,
139                        valid_from: None,
140                        valid_to: None,
141                    };
142                    let _ = self.storage.insert_graph_edge(&edge);
143                    let _ = graph.add_edge(edge);
144                }
145            }
146        }
147
148        // Auto-link to code nodes mentioned in content
149        self.auto_link_to_code_nodes(&id, content, links);
150
151        Some(id)
152    }
153
154    /// Run selected enrichment analyses (or all 14 if `analyses` is empty).
155    ///
156    /// Parameters:
157    /// - `path`: project root (needed for git, blame, change_impact, complexity, code_smells, security_scan)
158    /// - `analyses`: which analyses to run; empty = all (except change_impact which needs file_path)
159    /// - `days`: git history lookback days
160    /// - `namespace`: optional namespace filter
161    /// - `file_path`: optional, needed only for change_impact
162    pub fn run_enrichments(
163        &self,
164        path: &str,
165        analyses: &[String],
166        days: u64,
167        namespace: Option<&str>,
168        file_path: Option<&str>,
169    ) -> EnrichmentPipelineResult {
170        let run_all = analyses.is_empty();
171        let mut results = json!({});
172        let mut total_insights: usize = 0;
173
174        let root = Path::new(path);
175        let project_root = Some(root);
176
177        macro_rules! run_analysis {
178            ($name:expr, $call:expr) => {
179                if run_all || analyses.iter().any(|a| a == $name) {
180                    match $call {
181                        Ok(r) => {
182                            total_insights += r.insights_stored;
183                            results[$name] = r.details;
184                        }
185                        Err(e) => {
186                            results[$name] = json!({"error": format!("{e}")});
187                        }
188                    }
189                }
190            };
191        }
192
193        run_analysis!("git", self.enrich_git_history(path, days, namespace));
194        run_analysis!("security", self.enrich_security(namespace));
195        run_analysis!("performance", self.enrich_performance(10, namespace));
196        run_analysis!(
197            "complexity",
198            self.enrich_complexity(namespace, project_root)
199        );
200        run_analysis!(
201            "code_smells",
202            self.enrich_code_smells(namespace, project_root)
203        );
204        run_analysis!(
205            "security_scan",
206            self.enrich_security_scan(namespace, project_root)
207        );
208        run_analysis!("architecture", self.enrich_architecture(namespace));
209        run_analysis!("test_mapping", self.enrich_test_mapping(namespace));
210        run_analysis!("api_surface", self.enrich_api_surface(namespace));
211        run_analysis!("doc_coverage", self.enrich_doc_coverage(namespace));
212        run_analysis!("hot_complex", self.enrich_hot_complex(namespace));
213        run_analysis!("blame", self.enrich_blame(path, namespace));
214        run_analysis!("quality", self.enrich_quality_stratification(namespace));
215
216        // change_impact requires a file_path, so it is not included in run_all
217        if analyses.iter().any(|a| a == "change_impact") {
218            let fp = file_path.unwrap_or("");
219            if fp.is_empty() {
220                results["change_impact"] =
221                    json!({"error": "change_impact requires 'file_path' parameter"});
222            } else {
223                match self.enrich_change_impact(fp, namespace) {
224                    Ok(r) => {
225                        total_insights += r.insights_stored;
226                        results["change_impact"] = r.details;
227                    }
228                    Err(e) => {
229                        results["change_impact"] = json!({"error": format!("{e}")});
230                    }
231                }
232            }
233        }
234
235        EnrichmentPipelineResult {
236            results,
237            total_insights,
238        }
239    }
240
241    /// Store a Pattern memory for code smell detection (E7).
242    /// Importance is fixed at 0.5 for code smells.
243    /// Uses the full persist pipeline (storage → BM25 → graph → embedding → vector).
244    pub(super) fn store_pattern_memory(
245        &self,
246        content: &str,
247        namespace: Option<&str>,
248        links: &[String],
249    ) -> Option<String> {
250        let hash = codemem_storage::Storage::content_hash(content);
251        let now = chrono::Utc::now();
252        let id = uuid::Uuid::new_v4().to_string();
253        let tags = vec![
254            "static-analysis".to_string(),
255            "track:code-smell".to_string(),
256        ];
257
258        let memory = MemoryNode {
259            id: id.clone(),
260            content: content.to_string(),
261            memory_type: MemoryType::Pattern,
262            importance: 0.5,
263            confidence: self.config.enrichment.insight_confidence,
264            access_count: 0,
265            content_hash: hash,
266            tags,
267            metadata: HashMap::from([
268                ("track".into(), json!("code-smell")),
269                ("generated_by".into(), json!("enrichment_pipeline")),
270            ]),
271            namespace: namespace.map(String::from),
272            session_id: None,
273            created_at: now,
274            updated_at: now,
275            last_accessed_at: now,
276        };
277
278        if self.persist_memory_no_save(&memory).is_err() {
279            return None;
280        }
281
282        // Post-step: RELATES_TO edges to linked nodes
283        if !links.is_empty() {
284            if let Ok(mut graph) = self.lock_graph() {
285                for link_id in links {
286                    let edge = Edge {
287                        id: format!("{id}-RELATES_TO-{link_id}"),
288                        src: id.clone(),
289                        dst: link_id.clone(),
290                        relationship: RelationshipType::RelatesTo,
291                        weight: 0.3,
292                        properties: HashMap::new(),
293                        created_at: now,
294                        valid_from: None,
295                        valid_to: None,
296                    };
297                    let _ = self.storage.insert_graph_edge(&edge);
298                    let _ = graph.add_edge(edge);
299                }
300            }
301        }
302
303        self.auto_link_to_code_nodes(&id, content, links);
304
305        Some(id)
306    }
307}