Skip to main content

aster/memory/
compressor.rs

1//! 记忆压缩器
2//!
3//! 负责将多条对话摘要压缩成更精简的形式
4
5use std::collections::HashMap;
6
7use chrono::{DateTime, Datelike, Utc};
8
9use super::types::{ConversationSummary, MemoryEmotion, MemoryImportance, Timestamp};
10
11/// 压缩结果
12#[derive(Debug, Clone)]
13pub struct CompressionResult {
14    /// 压缩后的摘要
15    pub compressed_summary: String,
16    /// 保留的核心话题
17    pub preserved_topics: Vec<String>,
18    /// 保留的重要文件
19    pub preserved_files: Vec<String>,
20    /// 原始消息数量
21    pub original_count: usize,
22    /// 时间范围
23    pub time_range: (Timestamp, Timestamp),
24    /// 整体情感
25    pub dominant_emotion: MemoryEmotion,
26    /// 重要性评分
27    pub importance: MemoryImportance,
28}
29
30/// 压缩器配置
31#[derive(Debug, Clone)]
32pub struct CompressorConfig {
33    /// 最大摘要长度(字符)
34    pub max_summary_length: usize,
35    /// 保留的话题数量
36    pub max_topics: usize,
37    /// 保留的文件数量
38    pub max_files: usize,
39}
40
41impl Default for CompressorConfig {
42    fn default() -> Self {
43        Self {
44            max_summary_length: 500,
45            max_topics: 5,
46            max_files: 10,
47        }
48    }
49}
50
51/// 记忆压缩器
52pub struct MemoryCompressor {
53    config: CompressorConfig,
54}
55
56impl MemoryCompressor {
57    pub fn new(config: Option<CompressorConfig>) -> Self {
58        Self {
59            config: config.unwrap_or_default(),
60        }
61    }
62
63    /// 压缩多条对话摘要为一条
64    pub fn compress(&self, summaries: &[ConversationSummary]) -> Result<CompressionResult, String> {
65        if summaries.is_empty() {
66            return Err("Cannot compress empty summaries".to_string());
67        }
68
69        if summaries.len() == 1 {
70            return Ok(self.single_to_result(&summaries[0]));
71        }
72
73        let all_topics = self.collect_topics(summaries);
74        let all_files = self.collect_files(summaries);
75        let time_range = self.calculate_time_range(summaries);
76        let dominant_emotion = self.calculate_dominant_emotion(summaries);
77        let importance = self.calculate_importance(summaries);
78        let compressed_summary = self.generate_summary(summaries);
79
80        Ok(CompressionResult {
81            compressed_summary,
82            preserved_topics: all_topics
83                .into_iter()
84                .take(self.config.max_topics)
85                .collect(),
86            preserved_files: all_files.into_iter().take(self.config.max_files).collect(),
87            original_count: summaries.len(),
88            time_range,
89            dominant_emotion,
90            importance,
91        })
92    }
93
94    /// 判断是否应该压缩
95    pub fn should_compress(&self, summaries: &[ConversationSummary], threshold: usize) -> bool {
96        summaries.len() >= threshold
97    }
98
99    /// 按时间分组摘要
100    pub fn group_by_period<'a>(
101        &self,
102        summaries: &'a [ConversationSummary],
103        period: Period,
104    ) -> HashMap<String, Vec<&'a ConversationSummary>> {
105        let mut groups: HashMap<String, Vec<&ConversationSummary>> = HashMap::new();
106
107        for summary in summaries {
108            if let Ok(date) = DateTime::parse_from_rfc3339(&summary.start_time) {
109                let key = match period {
110                    Period::Day => date.format("%Y-%m-%d").to_string(),
111                    Period::Week => {
112                        let week_start = date.date_naive()
113                            - chrono::Duration::days(date.weekday().num_days_from_sunday() as i64);
114                        week_start.format("%Y-%m-%d").to_string()
115                    }
116                    Period::Month => date.format("%Y-%m").to_string(),
117                };
118
119                groups.entry(key).or_default().push(summary);
120            }
121        }
122
123        groups
124    }
125
126    /// 评估摘要的重要性
127    pub fn evaluate_importance(&self, summary: &ConversationSummary) -> MemoryImportance {
128        let mut score = 0;
129
130        if summary.emotion == MemoryEmotion::Meaningful {
131            score += 2;
132        }
133        if summary.emotion == MemoryEmotion::Positive {
134            score += 1;
135        }
136        if summary.topics.len() >= 3 {
137            score += 1;
138        }
139        if summary.files_discussed.len() >= 5 {
140            score += 1;
141        }
142        if summary.message_count >= 20 {
143            score += 1;
144        }
145
146        match score {
147            4.. => MemoryImportance::High,
148            2..=3 => MemoryImportance::Medium,
149            1 => MemoryImportance::Low,
150            _ => MemoryImportance::Ephemeral,
151        }
152    }
153
154    // === 私有方法 ===
155
156    fn single_to_result(&self, summary: &ConversationSummary) -> CompressionResult {
157        CompressionResult {
158            compressed_summary: summary.summary.clone(),
159            preserved_topics: summary.topics.clone(),
160            preserved_files: summary.files_discussed.clone(),
161            original_count: 1,
162            time_range: (summary.start_time.clone(), summary.end_time.clone()),
163            dominant_emotion: summary.emotion,
164            importance: summary.importance,
165        }
166    }
167
168    fn collect_topics(&self, summaries: &[ConversationSummary]) -> Vec<String> {
169        let mut topic_count: HashMap<&str, usize> = HashMap::new();
170
171        for summary in summaries {
172            for topic in &summary.topics {
173                *topic_count.entry(topic.as_str()).or_default() += 1;
174            }
175        }
176
177        let mut topics: Vec<_> = topic_count.into_iter().collect();
178        topics.sort_by(|a, b| b.1.cmp(&a.1));
179        topics.into_iter().map(|(t, _)| t.to_string()).collect()
180    }
181
182    fn collect_files(&self, summaries: &[ConversationSummary]) -> Vec<String> {
183        let mut file_count: HashMap<&str, usize> = HashMap::new();
184
185        for summary in summaries {
186            for file in &summary.files_discussed {
187                *file_count.entry(file.as_str()).or_default() += 1;
188            }
189        }
190
191        let mut files: Vec<_> = file_count.into_iter().collect();
192        files.sort_by(|a, b| b.1.cmp(&a.1));
193        files.into_iter().map(|(f, _)| f.to_string()).collect()
194    }
195
196    fn calculate_time_range(&self, summaries: &[ConversationSummary]) -> (Timestamp, Timestamp) {
197        let times: Vec<_> = summaries
198            .iter()
199            .flat_map(|s| {
200                vec![
201                    DateTime::parse_from_rfc3339(&s.start_time).ok(),
202                    DateTime::parse_from_rfc3339(&s.end_time).ok(),
203                ]
204            })
205            .flatten()
206            .collect();
207
208        if times.is_empty() {
209            let now = Utc::now().to_rfc3339();
210            return (now.clone(), now);
211        }
212
213        let min = times.iter().min().unwrap();
214        let max = times.iter().max().unwrap();
215
216        (min.to_rfc3339(), max.to_rfc3339())
217    }
218
219    fn calculate_dominant_emotion(&self, summaries: &[ConversationSummary]) -> MemoryEmotion {
220        let mut emotion_count: HashMap<MemoryEmotion, usize> = HashMap::new();
221
222        for summary in summaries {
223            *emotion_count.entry(summary.emotion).or_default() += 1;
224        }
225
226        emotion_count
227            .into_iter()
228            .max_by_key(|(_, count)| *count)
229            .map(|(emotion, _)| emotion)
230            .unwrap_or(MemoryEmotion::Neutral)
231    }
232
233    fn calculate_importance(&self, summaries: &[ConversationSummary]) -> MemoryImportance {
234        summaries
235            .iter()
236            .map(|s| s.importance)
237            .max()
238            .unwrap_or(MemoryImportance::Medium)
239    }
240
241    fn generate_summary(&self, summaries: &[ConversationSummary]) -> String {
242        let topics: Vec<_> = self.collect_topics(summaries).into_iter().take(5).collect();
243        let files: Vec<_> = self.collect_files(summaries).into_iter().take(3).collect();
244        let (start, end) = self.calculate_time_range(summaries);
245
246        let mut parts = Vec::new();
247
248        // 时间范围
249        let start_date = start.get(..10).unwrap_or(&start);
250        let end_date = end.get(..10).unwrap_or(&end);
251        if start_date == end_date {
252            parts.push(format!("{}:", start_date));
253        } else {
254            parts.push(format!("{} 至 {}:", start_date, end_date));
255        }
256
257        parts.push(format!("共 {} 次对话。", summaries.len()));
258
259        if !topics.is_empty() {
260            parts.push(format!("主要话题:{}。", topics.join("、")));
261        }
262
263        if !files.is_empty() {
264            parts.push(format!("涉及文件:{}。", files.join("、")));
265        }
266
267        let mut result = parts.join(" ");
268
269        if result.len() > self.config.max_summary_length {
270            result.truncate(self.config.max_summary_length - 3);
271            result.push_str("...");
272        }
273
274        result
275    }
276}
277
278/// 时间周期
279#[derive(Debug, Clone, Copy)]
280pub enum Period {
281    Day,
282    Week,
283    Month,
284}
285
286impl Default for MemoryCompressor {
287    fn default() -> Self {
288        Self::new(None)
289    }
290}