ceylon_next/memory/advanced/
summarization.rs

1//! Conversation Summarization and Compression
2//!
3//! This module provides automatic conversation summarization, memory compression,
4//! and important moment extraction using LLM-based processing.
5
6use super::{EnhancedMemoryEntry, ImportanceLevel, semantic::EntityType};
7use crate::llm::types::Message;
8use serde::{Deserialize, Serialize};
9
10/// Strategy for summarizing conversations
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12pub enum SummaryStrategy {
13    /// Brief one-sentence summary
14    Brief,
15    /// Detailed multi-paragraph summary
16    Detailed,
17    /// Key points extraction
18    KeyPoints,
19    /// Hybrid: summary + key points
20    Hybrid,
21}
22
23/// Result of summarization
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct SummaryResult {
26    /// Main summary text
27    pub summary: String,
28    /// Extracted key points
29    pub key_points: Vec<String>,
30    /// Extracted entities
31    pub entities: Vec<ExtractedEntity>,
32    /// Extracted facts
33    pub facts: Vec<ExtractedFact>,
34    /// Importance level assessment
35    pub importance: ImportanceLevel,
36    /// Important moments/quotes
37    pub important_moments: Vec<ImportantMoment>,
38}
39
40/// An extracted entity from conversation
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ExtractedEntity {
43    pub name: String,
44    pub entity_type: EntityType,
45    pub context: String,
46}
47
48/// An extracted fact from conversation
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct ExtractedFact {
51    pub subject: String,
52    pub predicate: String,
53    pub object: String,
54    pub confidence: f32,
55}
56
57/// An important moment or quote from conversation
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct ImportantMoment {
60    pub content: String,
61    pub speaker: String,
62    pub reason: String,
63}
64
65/// Conversation summarizer
66pub struct Summarizer {
67    // Note: LLM integration can be added when needed
68    // For now, using rule-based summarization
69}
70
71impl Summarizer {
72    /// Create a new summarizer
73    pub fn new() -> Self {
74        Self {}
75    }
76
77    /// Summarize a conversation
78    pub async fn summarize(
79        &self,
80        entry: &EnhancedMemoryEntry,
81        strategy: SummaryStrategy,
82    ) -> Result<SummaryResult, String> {
83        if entry.entry.messages.is_empty() {
84            return Ok(SummaryResult {
85                summary: String::from("Empty conversation"),
86                key_points: Vec::new(),
87                entities: Vec::new(),
88                facts: Vec::new(),
89                importance: ImportanceLevel::Low,
90                important_moments: Vec::new(),
91            });
92        }
93
94        // Use rule-based summarization
95        // TODO: Add LLM integration when trait is made object-safe
96        self.rule_based_summarize(entry, strategy)
97    }
98
99    // LLM-based summarization - commented out until LLMClient trait is object-safe
100    // TODO: Re-enable when Ceylon's LLMClient trait supports dynamic dispatch
101    /*
102    async fn llm_summarize(
103        &self,
104        llm: &Arc<dyn LLMClient>,
105        entry: &EnhancedMemoryEntry,
106        strategy: SummaryStrategy,
107    ) -> Result<SummaryResult, String> {
108        // Implementation would go here
109        unimplemented!("LLM summarization not yet implemented")
110    }
111    */
112
113    /// Rule-based summarization (fallback when no LLM available)
114    fn rule_based_summarize(
115        &self,
116        entry: &EnhancedMemoryEntry,
117        strategy: SummaryStrategy,
118    ) -> Result<SummaryResult, String> {
119        let messages = &entry.entry.messages;
120
121        // Extract basic information
122        let user_messages: Vec<&String> = messages
123            .iter()
124            .filter(|m| m.role == "user")
125            .map(|m| &m.content)
126            .collect();
127
128        let assistant_messages: Vec<&String> = messages
129            .iter()
130            .filter(|m| m.role == "assistant")
131            .map(|m| &m.content)
132            .collect();
133
134        let summary = match strategy {
135            SummaryStrategy::Brief => {
136                format!(
137                    "Conversation with {} user messages and {} assistant responses",
138                    user_messages.len(),
139                    assistant_messages.len()
140                )
141            }
142            SummaryStrategy::Detailed => {
143                let mut details = String::from("Conversation summary:\n");
144                for (i, msg) in user_messages.iter().enumerate().take(3) {
145                    details.push_str(&format!("User query {}: {}\n", i + 1, Self::truncate(msg, 100)));
146                }
147                details
148            }
149            SummaryStrategy::KeyPoints | SummaryStrategy::Hybrid => {
150                format!(
151                    "Key points:\n- {} messages exchanged\n- Topics discussed: {}",
152                    messages.len(),
153                    "Multiple topics"
154                )
155            }
156        };
157
158        // Simple entity extraction (look for capitalized words)
159        let entities = self.extract_entities_simple(messages);
160
161        // Determine importance based on message length and count
162        let importance = self.assess_importance_simple(messages);
163
164        Ok(SummaryResult {
165            summary,
166            key_points: self.extract_key_points_simple(messages),
167            entities,
168            facts: Vec::new(), // Rule-based fact extraction is complex
169            importance,
170            important_moments: Vec::new(),
171        })
172    }
173
174    /// Parse LLM response into structured summary
175    #[allow(dead_code)]
176    fn parse_llm_response(
177        &self,
178        response: &str,
179        _strategy: SummaryStrategy,
180        _entry: &EnhancedMemoryEntry,
181    ) -> Result<SummaryResult, String> {
182        // For now, simple parsing
183        // In a real implementation, we'd use more sophisticated parsing or structured output
184
185        let lines: Vec<&str> = response.lines().collect();
186        let mut summary = String::new();
187        let mut key_points = Vec::new();
188        let mut entities = Vec::new();
189        let mut importance = ImportanceLevel::Medium;
190
191        let mut in_key_points = false;
192        let mut in_entities = false;
193
194        for line in lines {
195            let trimmed = line.trim();
196
197            if trimmed.is_empty() {
198                continue;
199            }
200
201            // Detect sections
202            if trimmed.to_lowercase().contains("key points") {
203                in_key_points = true;
204                in_entities = false;
205                continue;
206            } else if trimmed.to_lowercase().contains("entities") || trimmed.to_lowercase().contains("people") {
207                in_entities = true;
208                in_key_points = false;
209                continue;
210            } else if trimmed.to_lowercase().contains("importance") {
211                if trimmed.to_lowercase().contains("critical") {
212                    importance = ImportanceLevel::Critical;
213                } else if trimmed.to_lowercase().contains("high") {
214                    importance = ImportanceLevel::High;
215                } else if trimmed.to_lowercase().contains("low") {
216                    importance = ImportanceLevel::Low;
217                } else {
218                    importance = ImportanceLevel::Medium;
219                }
220                continue;
221            }
222
223            // Parse content
224            if in_key_points {
225                if trimmed.starts_with('-') || trimmed.starts_with('•') || trimmed.starts_with('*') {
226                    key_points.push(trimmed[1..].trim().to_string());
227                }
228            } else if in_entities {
229                if trimmed.starts_with('-') || trimmed.starts_with('•') || trimmed.starts_with('*') {
230                    let entity_name = trimmed[1..].trim().to_string();
231                    entities.push(ExtractedEntity {
232                        name: entity_name,
233                        entity_type: EntityType::Other("unknown".to_string()),
234                        context: String::new(),
235                    });
236                }
237            } else if summary.is_empty() {
238                summary = trimmed.to_string();
239            }
240        }
241
242        Ok(SummaryResult {
243            summary: if summary.is_empty() {
244                response.to_string()
245            } else {
246                summary
247            },
248            key_points,
249            entities,
250            facts: Vec::new(),
251            importance,
252            important_moments: Vec::new(),
253        })
254    }
255
256    /// Format conversation for LLM processing
257    fn format_conversation(&self, messages: &[Message]) -> String {
258        let mut formatted = String::new();
259
260        for msg in messages {
261            match msg.role.as_str() {
262                "user" => formatted.push_str(&format!("User: {}\n", msg.content)),
263                "assistant" => formatted.push_str(&format!("Assistant: {}\n", msg.content)),
264                "system" => {} // Skip system messages
265                _ => formatted.push_str(&format!("{}: {}\n", msg.role, msg.content)),
266            }
267        }
268
269        formatted
270    }
271
272    /// Simple entity extraction (capitalized words)
273    fn extract_entities_simple(&self, messages: &[Message]) -> Vec<ExtractedEntity> {
274        let mut entities = Vec::new();
275        let common_words = ["I", "The", "A", "An", "In", "On", "At", "To", "For", "User", "Assistant"];
276
277        for msg in messages {
278            let words: Vec<&str> = msg.content.split_whitespace().collect();
279            for word in words {
280                // Simple heuristic: capitalized word not at sentence start
281                if word.len() > 2
282                    && word.chars().next().unwrap().is_uppercase()
283                    && !common_words.contains(&word)
284                {
285                    entities.push(ExtractedEntity {
286                        name: word.trim_matches(|c: char| !c.is_alphanumeric()).to_string(),
287                        entity_type: EntityType::Other("unknown".to_string()),
288                        context: msg.content.clone(),
289                    });
290                }
291            }
292        }
293
294        // Deduplicate
295        entities.sort_by(|a, b| a.name.cmp(&b.name));
296        entities.dedup_by(|a, b| a.name == b.name);
297        entities.truncate(10); // Keep top 10
298
299        entities
300    }
301
302    /// Extract key points from messages
303    fn extract_key_points_simple(&self, messages: &[Message]) -> Vec<String> {
304        // Take first sentence of each user message as a key point
305        let mut points = Vec::new();
306
307        for msg in messages.iter().filter(|m| m.role == "user") {
308            if let Some(first_sentence) = msg.content.split('.').next() {
309                if !first_sentence.trim().is_empty() {
310                    points.push(first_sentence.trim().to_string());
311                }
312            }
313        }
314
315        points.truncate(5); // Keep top 5
316        points
317    }
318
319    /// Assess importance based on simple heuristics
320    fn assess_importance_simple(&self, messages: &[Message]) -> ImportanceLevel {
321        let total_length: usize = messages.iter().map(|m| m.content.len()).sum();
322        let message_count = messages.len();
323
324        if total_length > 2000 || message_count > 10 {
325            ImportanceLevel::High
326        } else if total_length > 500 || message_count > 5 {
327            ImportanceLevel::Medium
328        } else {
329            ImportanceLevel::Low
330        }
331    }
332
333    /// Truncate text to max length
334    fn truncate(text: &str, max_len: usize) -> String {
335        if text.len() <= max_len {
336            text.to_string()
337        } else {
338            format!("{}...", &text[..max_len])
339        }
340    }
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346    use crate::memory::MemoryEntry;
347
348    #[tokio::test]
349    async fn test_rule_based_summarization() {
350        let summarizer = Summarizer::new();
351
352        let messages = vec![
353            Message {
354                role: "user".to_string(),
355                content: "Hello, I need help with Rust programming.".to_string(),
356            },
357            Message {
358                role: "assistant".to_string(),
359                content: "I'd be happy to help with Rust!".to_string(),
360            },
361        ];
362
363        let entry = MemoryEntry::new("agent-1".to_string(), "task-1".to_string(), messages);
364        let enhanced = EnhancedMemoryEntry::new(entry, super::super::MemoryType::Working);
365
366        let result = summarizer
367            .summarize(&enhanced, SummaryStrategy::Brief)
368            .await
369            .unwrap();
370
371        assert!(!result.summary.is_empty());
372    }
373
374    #[tokio::test]
375    async fn test_entity_extraction() {
376        let summarizer = Summarizer::new();
377
378        let messages = vec![
379            Message {
380                role: "user".to_string(),
381                content: "I'm working on a project with Alice and Bob in Paris.".to_string(),
382            },
383        ];
384
385        let entry = MemoryEntry::new("agent-1".to_string(), "task-1".to_string(), messages);
386        let enhanced = EnhancedMemoryEntry::new(entry, super::super::MemoryType::Working);
387
388        let result = summarizer
389            .summarize(&enhanced, SummaryStrategy::Hybrid)
390            .await
391            .unwrap();
392
393        assert!(!result.entities.is_empty());
394    }
395}