forja_memory/
compressor.rs1use chrono::{DateTime, Local};
2use forja_core::types::{Content, Message, Role};
3use std::collections::{BTreeSet, HashMap};
4
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct CompressedEntry {
7 pub timestamp: DateTime<Local>,
8 pub summary: String,
9 pub keywords: Vec<String>,
10 pub original_count: usize,
11 pub code_snippets: Vec<String>,
12}
13
14#[derive(Debug, Clone, Default)]
15pub struct Compressor;
16
17impl Compressor {
18 pub fn new() -> Self {
19 Self
20 }
21
22 pub fn compress(&self, messages: Vec<Message>) -> CompressedEntry {
23 let texts = message_texts(&messages);
24 let summary = build_summary(&texts);
25 let keywords = extract_keywords(&texts);
26 let code_snippets = extract_code_snippets(&texts);
27 let timestamp = Local::now();
28
29 CompressedEntry {
30 timestamp,
31 summary,
32 keywords,
33 original_count: messages.len(),
34 code_snippets,
35 }
36 }
37}
38
39fn message_texts(messages: &[Message]) -> Vec<String> {
40 messages
41 .iter()
42 .filter_map(|message| {
43 let Content::Text { text, .. } = &message.content else {
44 return None;
45 };
46 let role = match message.role {
47 Role::User => "user",
48 Role::Assistant => "assistant",
49 Role::System => "system",
50 Role::Tool => "tool",
51 };
52 Some(format!("{role}: {}", text.trim()))
53 })
54 .filter(|text| !text.trim().is_empty())
55 .collect()
56}
57
58fn build_summary(texts: &[String]) -> String {
59 let mut selected = texts
60 .iter()
61 .filter(|text| is_key_fact(text) || looks_like_action_item(text) || contains_code_reference(text))
62 .take(4)
63 .cloned()
64 .collect::<Vec<_>>();
65
66 if selected.is_empty() {
67 selected = texts.iter().take(3).cloned().collect();
68 }
69
70 let summary = selected
71 .into_iter()
72 .map(|text| truncate_text(&text, 160))
73 .collect::<Vec<_>>()
74 .join(" | ");
75
76 if summary.trim().is_empty() {
77 "Conversation summary unavailable.".to_string()
78 } else {
79 summary
80 }
81}
82
83fn extract_keywords(texts: &[String]) -> Vec<String> {
84 let mut frequencies = HashMap::<String, usize>::new();
85 for token in texts.iter().flat_map(|text| tokenize(text)) {
86 if is_stopword(&token) || token.len() < 3 {
87 continue;
88 }
89 *frequencies.entry(token).or_default() += 1;
90 }
91
92 let mut weighted = frequencies.into_iter().collect::<Vec<_>>();
93 weighted.sort_by(|left, right| right.1.cmp(&left.1).then_with(|| left.0.cmp(&right.0)));
94
95 let mut keywords = weighted
96 .into_iter()
97 .map(|(token, _)| token)
98 .take(8)
99 .collect::<Vec<_>>();
100
101 if keywords.is_empty() {
102 keywords.push("memory".to_string());
103 }
104
105 keywords
106}
107
108fn extract_code_snippets(texts: &[String]) -> Vec<String> {
109 let snippets = texts
110 .iter()
111 .filter(|text| contains_code_reference(text))
112 .map(|text| truncate_text(text, 200))
113 .collect::<BTreeSet<_>>();
114
115 snippets.into_iter().take(3).collect()
116}
117
118fn tokenize(text: &str) -> Vec<String> {
119 text.split(|character: char| !character.is_alphanumeric() && character != '_' && character != '.')
120 .map(str::trim)
121 .filter(|token| !token.is_empty())
122 .map(|token| token.to_lowercase())
123 .collect()
124}
125
126fn is_key_fact(text: &str) -> bool {
127 let normalized = text.to_lowercase();
128 [
129 "decide",
130 "decision",
131 "prefer",
132 "remember",
133 "implemented",
134 "fixed",
135 "resolved",
136 "deploy",
137 "ship",
138 ]
139 .iter()
140 .any(|keyword| normalized.contains(keyword))
141}
142
143fn looks_like_action_item(text: &str) -> bool {
144 let normalized = text.to_lowercase();
145 [
146 "todo",
147 "next",
148 "follow up",
149 "need to",
150 "should",
151 "must",
152 "action",
153 ]
154 .iter()
155 .any(|keyword| normalized.contains(keyword))
156}
157
158fn contains_code_reference(text: &str) -> bool {
159 let normalized = text.to_lowercase();
160 [
161 ".rs",
162 ".py",
163 ".ts",
164 ".js",
165 "fn ",
166 "struct ",
167 "impl ",
168 "cargo ",
169 "git ",
170 "```",
171 "::",
172 ]
173 .iter()
174 .any(|token| normalized.contains(token))
175}
176
177fn truncate_text(text: &str, max_len: usize) -> String {
178 if text.chars().count() <= max_len {
179 return text.to_string();
180 }
181
182 let truncated = text.chars().take(max_len).collect::<String>();
183 format!("{truncated}...")
184}
185
186fn is_stopword(token: &str) -> bool {
187 matches!(
188 token,
189 "the"
190 | "and"
191 | "are"
192 | "but"
193 | "for"
194 | "from"
195 | "have"
196 | "into"
197 | "that"
198 | "this"
199 | "with"
200 | "user"
201 | "assistant"
202 | "system"
203 | "tool"
204 )
205}
206
207#[cfg(test)]
208mod tests {
209 use super::Compressor;
210 use forja_core::types::{Message, Role};
211
212 #[test]
213 fn compressor_creates_non_empty_summary_and_keywords() {
214 let compressor = Compressor::new();
215 let entry = compressor.compress(vec![
216 Message::text(Role::User, "We decided to deploy after fixing auth.rs.", None),
217 Message::text(
218 Role::Assistant,
219 "Next action: run cargo test and then deploy.",
220 None,
221 ),
222 ]);
223
224 assert!(!entry.summary.is_empty());
225 assert!(!entry.keywords.is_empty());
226 assert_eq!(entry.original_count, 2);
227 assert!(!entry.code_snippets.is_empty());
228 }
229}