Skip to main content

codemem_engine/consolidation/
summarize.rs

1use super::ConsolidationResult;
2use crate::CodememEngine;
3use codemem_core::{CodememError, Edge, GraphBackend, MemoryNode, MemoryType, RelationshipType};
4use serde_json::json;
5use std::collections::HashMap;
6
7impl CodememEngine {
8    /// Consolidate summarize: LLM-powered consolidation that finds
9    /// connected components, summarizes large clusters into Insight memories
10    /// linked via SUMMARIZES edges.
11    pub fn consolidate_summarize(
12        &self,
13        min_cluster_size: Option<usize>,
14    ) -> Result<ConsolidationResult, CodememError> {
15        let min_cluster_size = min_cluster_size.unwrap_or(5);
16
17        let provider = crate::compress::CompressProvider::from_env();
18        if !provider.is_enabled() {
19            return Err(CodememError::Config(
20                "CODEMEM_COMPRESS_PROVIDER env var not set. \
21                 Set it to 'ollama', 'openai', or 'anthropic' to enable LLM-powered consolidation."
22                    .to_string(),
23            ));
24        }
25
26        // Find connected components via the graph
27        let graph = self.lock_graph()?;
28        let components = graph.connected_components();
29        drop(graph);
30
31        let large_clusters: Vec<&Vec<String>> = components
32            .iter()
33            .filter(|c| c.len() >= min_cluster_size)
34            .collect();
35
36        if large_clusters.is_empty() {
37            return Ok(ConsolidationResult {
38                cycle: "summarize".to_string(),
39                affected: 0,
40                details: json!({
41                    "clusters_found": 0,
42                    "min_cluster_size": min_cluster_size,
43                    "message": format!("No clusters with {} or more members found", min_cluster_size),
44                }),
45            });
46        }
47
48        let mut summarized_count = 0usize;
49        let mut created_ids: Vec<String> = Vec::new();
50
51        for cluster in &large_clusters {
52            let mut contents: Vec<String> = Vec::new();
53            let mut source_ids: Vec<String> = Vec::new();
54            let mut all_tags: Vec<String> = Vec::new();
55
56            // M12: Acquire graph lock once before the inner loop, collect all memory IDs,
57            // then drop the lock before batch-fetching memories from storage.
58            let memory_ids: Vec<String> = {
59                let graph = self.lock_graph()?;
60                cluster
61                    .iter()
62                    .filter_map(|node_id| {
63                        graph
64                            .get_node(node_id)
65                            .ok()
66                            .flatten()
67                            .and_then(|node| node.memory_id.clone())
68                    })
69                    .collect()
70            };
71
72            for mid in &memory_ids {
73                if let Ok(Some(mem)) = self.storage.get_memory_no_touch(mid) {
74                    contents.push(mem.content.clone());
75                    source_ids.push(mid.clone());
76                    all_tags.extend(mem.tags.clone());
77                }
78            }
79
80            if contents.len() < 2 {
81                continue;
82            }
83
84            let combined = contents.join("\n---\n");
85            let summary = match provider.compress(&combined, "consolidate_summarize", None) {
86                Some(s) => s,
87                None => continue,
88            };
89
90            all_tags.sort();
91            all_tags.dedup();
92
93            let now = chrono::Utc::now();
94            let new_id = uuid::Uuid::new_v4().to_string();
95            let hash = codemem_storage::Storage::content_hash(&summary);
96
97            let mem = MemoryNode {
98                id: new_id.clone(),
99                content: summary,
100                memory_type: MemoryType::Insight,
101                importance: 0.7,
102                confidence: 1.0,
103                access_count: 0,
104                content_hash: hash,
105                tags: all_tags,
106                metadata: HashMap::new(),
107                namespace: None,
108                session_id: None,
109                created_at: now,
110                updated_at: now,
111                last_accessed_at: now,
112            };
113
114            // M4: Use persist_memory_no_save to skip per-memory index save,
115            // then call save_index() once after the entire loop.
116            if self.persist_memory_no_save(&mem).is_err() {
117                tracing::warn!("Failed to persist summary memory: {new_id}");
118                continue;
119            }
120
121            if let Ok(mut graph) = self.lock_graph() {
122                for sid in &source_ids {
123                    let edge = Edge {
124                        id: format!("{new_id}-SUMMARIZES-{sid}"),
125                        src: new_id.clone(),
126                        dst: sid.clone(),
127                        relationship: RelationshipType::Summarizes,
128                        weight: 1.0,
129                        properties: HashMap::new(),
130                        created_at: now,
131                        valid_from: Some(now),
132                        valid_to: None,
133                    };
134                    if let Err(e) = self.storage.insert_graph_edge(&edge) {
135                        tracing::warn!("Failed to persist SUMMARIZES edge: {e}");
136                    }
137                    let _ = graph.add_edge(edge);
138                }
139            }
140
141            summarized_count += 1;
142            created_ids.push(new_id);
143        }
144
145        // M4: Save vector index once after all summaries are persisted
146        if summarized_count > 0 {
147            self.save_index();
148        }
149
150        if let Err(e) = self
151            .storage
152            .insert_consolidation_log("summarize", summarized_count)
153        {
154            tracing::warn!("Failed to log summarize consolidation: {e}");
155        }
156
157        Ok(ConsolidationResult {
158            cycle: "summarize".to_string(),
159            affected: summarized_count,
160            details: json!({
161                "clusters_found": large_clusters.len(),
162                "summarized": summarized_count,
163                "created_ids": created_ids,
164                "min_cluster_size": min_cluster_size,
165            }),
166        })
167    }
168}