Skip to main content

codemem_engine/enrichment/
quality.rs

1//! Quality stratification: categorize enrichment insights by signal strength.
2
3use super::EnrichResult;
4use crate::CodememEngine;
5use codemem_core::CodememError;
6use serde_json::json;
7
8impl CodememEngine {
9    /// Categorize existing enrichment insights by signal strength and adjust importance.
10    ///
11    /// - Noise (< 0.3): basic counts, minor observations
12    /// - Signal (0.5-0.7): moderate complexity, useful patterns
13    /// - Critical (0.8-1.0): high-risk findings, security issues
14    pub fn enrich_quality_stratification(
15        &self,
16        namespace: Option<&str>,
17    ) -> Result<EnrichResult, CodememError> {
18        // Query all static-analysis memories
19        let all_ids = self.storage.list_memory_ids().unwrap_or_default();
20        let id_refs: Vec<&str> = all_ids.iter().map(|s| s.as_str()).collect();
21        let memories = self
22            .storage
23            .get_memories_batch(&id_refs)
24            .unwrap_or_default();
25
26        let mut noise_count = 0usize;
27        let mut signal_count = 0usize;
28        let mut critical_count = 0usize;
29        let mut reclassified = 0usize;
30
31        for memory in &memories {
32            if !memory.tags.contains(&"static-analysis".to_string()) {
33                continue;
34            }
35            // Apply namespace filter if specified
36            if let Some(ns) = namespace {
37                if memory.namespace.as_deref() != Some(ns) {
38                    continue;
39                }
40            }
41
42            let current_importance = memory.importance;
43            let content_lower = memory.content.to_lowercase();
44
45            // Determine signal strength based on content analysis
46            let is_critical = content_lower.contains("security")
47                || content_lower.contains("credential")
48                || content_lower.contains("sql injection")
49                || content_lower.contains("high-risk")
50                || content_lower.contains("critical")
51                || memory.tags.iter().any(|t| t.contains("severity:critical"));
52
53            let is_signal = content_lower.contains("complexity")
54                || content_lower.contains("untested")
55                || content_lower.contains("coupling")
56                || content_lower.contains("co-change")
57                || content_lower.contains("architecture")
58                || content_lower.contains("code smell");
59
60            let target_importance = if is_critical {
61                critical_count += 1;
62                current_importance.max(0.8)
63            } else if is_signal {
64                signal_count += 1;
65                current_importance.clamp(0.5, 0.7)
66            } else {
67                noise_count += 1;
68                current_importance.min(0.3)
69            };
70
71            // Only update if importance actually changed
72            if (target_importance - current_importance).abs() > 0.01 {
73                let _ = self.storage.update_memory(
74                    &memory.id,
75                    &memory.content,
76                    Some(target_importance),
77                );
78                reclassified += 1;
79            }
80        }
81
82        let total = noise_count + signal_count + critical_count;
83
84        Ok(EnrichResult {
85            insights_stored: 0,
86            details: json!({
87                "total_analyzed": total,
88                "noise": noise_count,
89                "signal": signal_count,
90                "critical": critical_count,
91                "reclassified": reclassified,
92            }),
93        })
94    }
95}