Skip to main content

forja_memory/
compressor.rs

1use chrono::{DateTime, Local};
2use forja_core::types::{Content, Message, Role};
3use std::collections::{BTreeSet, HashMap};
4
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct CompressedEntry {
7    pub timestamp: DateTime<Local>,
8    pub summary: String,
9    pub keywords: Vec<String>,
10    pub original_count: usize,
11    pub code_snippets: Vec<String>,
12}
13
14#[derive(Debug, Clone, Default)]
15pub struct Compressor;
16
17impl Compressor {
18    pub fn new() -> Self {
19        Self
20    }
21
22    pub fn compress(&self, messages: Vec<Message>) -> CompressedEntry {
23        let texts = message_texts(&messages);
24        let summary = build_summary(&texts);
25        let keywords = extract_keywords(&texts);
26        let code_snippets = extract_code_snippets(&texts);
27        let timestamp = Local::now();
28
29        CompressedEntry {
30            timestamp,
31            summary,
32            keywords,
33            original_count: messages.len(),
34            code_snippets,
35        }
36    }
37}
38
39fn message_texts(messages: &[Message]) -> Vec<String> {
40    messages
41        .iter()
42        .filter_map(|message| {
43            let Content::Text { text, .. } = &message.content else {
44                return None;
45            };
46            let role = match message.role {
47                Role::User => "user",
48                Role::Assistant => "assistant",
49                Role::System => "system",
50                Role::Tool => "tool",
51            };
52            Some(format!("{role}: {}", text.trim()))
53        })
54        .filter(|text| !text.trim().is_empty())
55        .collect()
56}
57
58fn build_summary(texts: &[String]) -> String {
59    let mut selected = texts
60        .iter()
61        .filter(|text| is_key_fact(text) || looks_like_action_item(text) || contains_code_reference(text))
62        .take(4)
63        .cloned()
64        .collect::<Vec<_>>();
65
66    if selected.is_empty() {
67        selected = texts.iter().take(3).cloned().collect();
68    }
69
70    let summary = selected
71        .into_iter()
72        .map(|text| truncate_text(&text, 160))
73        .collect::<Vec<_>>()
74        .join(" | ");
75
76    if summary.trim().is_empty() {
77        "Conversation summary unavailable.".to_string()
78    } else {
79        summary
80    }
81}
82
83fn extract_keywords(texts: &[String]) -> Vec<String> {
84    let mut frequencies = HashMap::<String, usize>::new();
85    for token in texts.iter().flat_map(|text| tokenize(text)) {
86        if is_stopword(&token) || token.len() < 3 {
87            continue;
88        }
89        *frequencies.entry(token).or_default() += 1;
90    }
91
92    let mut weighted = frequencies.into_iter().collect::<Vec<_>>();
93    weighted.sort_by(|left, right| right.1.cmp(&left.1).then_with(|| left.0.cmp(&right.0)));
94
95    let mut keywords = weighted
96        .into_iter()
97        .map(|(token, _)| token)
98        .take(8)
99        .collect::<Vec<_>>();
100
101    if keywords.is_empty() {
102        keywords.push("memory".to_string());
103    }
104
105    keywords
106}
107
108fn extract_code_snippets(texts: &[String]) -> Vec<String> {
109    let snippets = texts
110        .iter()
111        .filter(|text| contains_code_reference(text))
112        .map(|text| truncate_text(text, 200))
113        .collect::<BTreeSet<_>>();
114
115    snippets.into_iter().take(3).collect()
116}
117
118fn tokenize(text: &str) -> Vec<String> {
119    text.split(|character: char| !character.is_alphanumeric() && character != '_' && character != '.')
120        .map(str::trim)
121        .filter(|token| !token.is_empty())
122        .map(|token| token.to_lowercase())
123        .collect()
124}
125
126fn is_key_fact(text: &str) -> bool {
127    let normalized = text.to_lowercase();
128    [
129        "decide",
130        "decision",
131        "prefer",
132        "remember",
133        "implemented",
134        "fixed",
135        "resolved",
136        "deploy",
137        "ship",
138    ]
139    .iter()
140    .any(|keyword| normalized.contains(keyword))
141}
142
143fn looks_like_action_item(text: &str) -> bool {
144    let normalized = text.to_lowercase();
145    [
146        "todo",
147        "next",
148        "follow up",
149        "need to",
150        "should",
151        "must",
152        "action",
153    ]
154    .iter()
155    .any(|keyword| normalized.contains(keyword))
156}
157
158fn contains_code_reference(text: &str) -> bool {
159    let normalized = text.to_lowercase();
160    [
161        ".rs",
162        ".py",
163        ".ts",
164        ".js",
165        "fn ",
166        "struct ",
167        "impl ",
168        "cargo ",
169        "git ",
170        "```",
171        "::",
172    ]
173    .iter()
174    .any(|token| normalized.contains(token))
175}
176
177fn truncate_text(text: &str, max_len: usize) -> String {
178    if text.chars().count() <= max_len {
179        return text.to_string();
180    }
181
182    let truncated = text.chars().take(max_len).collect::<String>();
183    format!("{truncated}...")
184}
185
186fn is_stopword(token: &str) -> bool {
187    matches!(
188        token,
189        "the"
190            | "and"
191            | "are"
192            | "but"
193            | "for"
194            | "from"
195            | "have"
196            | "into"
197            | "that"
198            | "this"
199            | "with"
200            | "user"
201            | "assistant"
202            | "system"
203            | "tool"
204    )
205}
206
207#[cfg(test)]
208mod tests {
209    use super::Compressor;
210    use forja_core::types::{Message, Role};
211
212    #[test]
213    fn compressor_creates_non_empty_summary_and_keywords() {
214        let compressor = Compressor::new();
215        let entry = compressor.compress(vec![
216            Message::text(Role::User, "We decided to deploy after fixing auth.rs.", None),
217            Message::text(
218                Role::Assistant,
219                "Next action: run cargo test and then deploy.",
220                None,
221            ),
222        ]);
223
224        assert!(!entry.summary.is_empty());
225        assert!(!entry.keywords.is_empty());
226        assert_eq!(entry.original_count, 2);
227        assert!(!entry.code_snippets.is_empty());
228    }
229}