1use std::time::Duration;
15
16const COMPRESS_TIMEOUT: Duration = Duration::from_secs(30);
17
18const MIN_COMPRESS_LEN: usize = 200;
20
21const SYSTEM_PROMPT: &str = "\
22You are a code observation compressor for a memory engine. \
23Given a raw tool observation from an AI coding session, produce a concise summary (under 200 words) that captures:\n\
241. What: the key structures, functions, types, and patterns observed\n\
252. Why it matters: dependencies, relationships, design decisions, purpose\n\
263. Details worth remembering: important names, signatures, constants\n\n\
27Rules:\n\
28- Be specific — use actual function/type/file names\n\
29- Skip boilerplate and obvious information\n\
30- Focus on structural and behavioral insights\n\
31- For file reads: what is this file's role and key exports?\n\
32- For edits: what changed and why does it matter?\n\
33- For searches: what patterns were found and where?\n\
34- Output plain text, no markdown formatting";
35
36pub enum CompressProvider {
37 Ollama {
38 base_url: String,
39 model: String,
40 client: reqwest::blocking::Client,
41 },
42 OpenAi {
43 base_url: String,
44 model: String,
45 api_key: String,
46 client: reqwest::blocking::Client,
47 },
48 Anthropic {
49 api_key: String,
50 model: String,
51 client: reqwest::blocking::Client,
52 },
53 None,
54}
55
56impl CompressProvider {
57 pub fn from_env() -> Self {
63 let provider = std::env::var("CODEMEM_COMPRESS_PROVIDER").unwrap_or_default();
64
65 let client = || {
66 reqwest::blocking::Client::builder()
67 .timeout(COMPRESS_TIMEOUT)
68 .build()
69 .unwrap_or_default()
70 };
71
72 match provider.to_lowercase().as_str() {
73 "ollama" => {
74 let base_url = std::env::var("CODEMEM_COMPRESS_URL")
75 .unwrap_or_else(|_| "http://localhost:11434".to_string());
76 let model = std::env::var("CODEMEM_COMPRESS_MODEL")
77 .unwrap_or_else(|_| "llama3.2".to_string());
78 CompressProvider::Ollama {
79 base_url,
80 model,
81 client: client(),
82 }
83 }
84 "openai" => {
85 let base_url = std::env::var("CODEMEM_COMPRESS_URL")
86 .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
87 let model = std::env::var("CODEMEM_COMPRESS_MODEL")
88 .unwrap_or_else(|_| "gpt-4o-mini".to_string());
89 let api_key = std::env::var("CODEMEM_API_KEY")
90 .or_else(|_| std::env::var("OPENAI_API_KEY"))
91 .unwrap_or_default();
92 CompressProvider::OpenAi {
93 base_url,
94 model,
95 api_key,
96 client: client(),
97 }
98 }
99 "anthropic" => {
100 let api_key = std::env::var("CODEMEM_API_KEY")
101 .or_else(|_| std::env::var("ANTHROPIC_API_KEY"))
102 .unwrap_or_default();
103 let model = std::env::var("CODEMEM_COMPRESS_MODEL")
104 .unwrap_or_else(|_| "claude-haiku-4-5-20251001".to_string());
105 CompressProvider::Anthropic {
106 api_key,
107 model,
108 client: client(),
109 }
110 }
111 _ => CompressProvider::None,
112 }
113 }
114
115 pub fn is_enabled(&self) -> bool {
117 !matches!(self, CompressProvider::None)
118 }
119
120 pub fn compress(&self, content: &str, tool: &str, file_path: Option<&str>) -> Option<String> {
125 if !self.is_enabled() || content.len() < MIN_COMPRESS_LEN {
126 return None;
127 }
128
129 let user_prompt = build_user_prompt(content, tool, file_path);
130
131 match self.call_llm(&user_prompt) {
132 Ok(compressed) if compressed.trim().is_empty() => {
133 tracing::warn!("Compression returned empty output, using raw content");
134 None
135 }
136 Ok(compressed) => {
137 tracing::info!(
138 "Compressed observation: {} → {} chars ({:.0}% reduction)",
139 content.len(),
140 compressed.len(),
141 (1.0 - compressed.len() as f64 / content.len() as f64) * 100.0
142 );
143 Some(compressed)
144 }
145 Err(e) => {
146 tracing::warn!("Compression failed, using raw content: {e}");
147 None
148 }
149 }
150 }
151
152 pub fn summarize_batch(&self, raw_summary: &str) -> Option<String> {
156 if !self.is_enabled() {
157 return None;
158 }
159
160 let prompt = format!(
161 "Summarize this batch of file changes in one sentence describing the likely developer intent \
162 (e.g. 'Refactoring auth module error handling', 'Adding new API endpoints for user management'). \
163 Be specific about what was changed. Output only the summary sentence, nothing else.\n\n{raw_summary}"
164 );
165
166 match self.call_llm(&prompt) {
167 Ok(summary) if summary.trim().is_empty() => {
168 tracing::warn!("Batch summarization returned empty output");
169 None
170 }
171 Ok(summary) => {
172 tracing::info!("Batch summary: {}", summary.trim());
173 Some(summary.trim().to_string())
174 }
175 Err(e) => {
176 tracing::warn!("Batch summarization failed, using raw summary: {e}");
177 None
178 }
179 }
180 }
181
182 fn call_llm(&self, user_prompt: &str) -> anyhow::Result<String> {
183 match self {
184 CompressProvider::Ollama {
185 base_url,
186 model,
187 client,
188 } => {
189 let url = format!("{}/api/chat", base_url);
190 let body = serde_json::json!({
191 "model": model,
192 "messages": [
193 {"role": "system", "content": SYSTEM_PROMPT},
194 {"role": "user", "content": user_prompt}
195 ],
196 "stream": false,
197 });
198 let response = client.post(&url).json(&body).send()?;
199 if !response.status().is_success() {
200 anyhow::bail!("Ollama returned {}", response.status());
201 }
202 let json: serde_json::Value = response.json()?;
203 json.get("message")
204 .and_then(|m| m.get("content"))
205 .and_then(|c| c.as_str())
206 .map(|s| s.trim().to_string())
207 .ok_or_else(|| anyhow::anyhow!("Unexpected Ollama response format"))
208 }
209 CompressProvider::OpenAi {
210 base_url,
211 model,
212 api_key,
213 client,
214 } => {
215 let url = format!("{}/chat/completions", base_url);
216 let body = serde_json::json!({
217 "model": model,
218 "messages": [
219 {"role": "system", "content": SYSTEM_PROMPT},
220 {"role": "user", "content": user_prompt}
221 ],
222 "max_tokens": 512,
223 "temperature": 0.3,
224 });
225 let response = client
226 .post(&url)
227 .header("Authorization", format!("Bearer {}", api_key))
228 .json(&body)
229 .send()?;
230 if !response.status().is_success() {
231 let status = response.status();
232 let text = response.text().unwrap_or_default();
233 anyhow::bail!("OpenAI returned {}: {}", status, text);
234 }
235 let json: serde_json::Value = response.json()?;
236 json.get("choices")
237 .and_then(|c| c.as_array())
238 .and_then(|arr| arr.first())
239 .and_then(|choice| choice.get("message"))
240 .and_then(|m| m.get("content"))
241 .and_then(|c| c.as_str())
242 .map(|s| s.trim().to_string())
243 .ok_or_else(|| anyhow::anyhow!("Unexpected OpenAI response format"))
244 }
245 CompressProvider::Anthropic {
246 api_key,
247 model,
248 client,
249 } => {
250 let body = serde_json::json!({
251 "model": model,
252 "max_tokens": 512,
253 "system": SYSTEM_PROMPT,
254 "messages": [
255 {"role": "user", "content": user_prompt}
256 ],
257 });
258 let response = client
259 .post("https://api.anthropic.com/v1/messages")
260 .header("x-api-key", api_key.as_str())
261 .header("anthropic-version", "2023-06-01")
262 .header("content-type", "application/json")
263 .json(&body)
264 .send()?;
265 if !response.status().is_success() {
266 let status = response.status();
267 let text = response.text().unwrap_or_default();
268 anyhow::bail!("Anthropic returned {}: {}", status, text);
269 }
270 let json: serde_json::Value = response.json()?;
271 json.get("content")
272 .and_then(|c| c.as_array())
273 .and_then(|arr| arr.first())
274 .and_then(|block| block.get("text"))
275 .and_then(|t| t.as_str())
276 .map(|s| s.trim().to_string())
277 .ok_or_else(|| anyhow::anyhow!("Unexpected Anthropic response format"))
278 }
279 CompressProvider::None => {
280 anyhow::bail!("No compression provider configured")
281 }
282 }
283 }
284}
285
286pub(crate) fn build_user_prompt(content: &str, tool: &str, file_path: Option<&str>) -> String {
287 let file_info = file_path
288 .map(|p| format!("File: {p}\n"))
289 .unwrap_or_default();
290 let truncated = if content.len() > 8000 {
293 let mut end = 8000;
294 while end > 0 && !content.is_char_boundary(end) {
295 end -= 1;
296 }
297 &content[..end]
298 } else {
299 content
300 };
301 format!("Tool: {tool}\n{file_info}\nObservation:\n{truncated}")
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 #[test]
309 fn none_provider_returns_none() {
310 let provider = CompressProvider::None;
311 assert!(!provider.is_enabled());
312 assert!(provider.compress("some content here that is long enough to compress blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah", "Read", Some("src/main.rs")).is_none());
313 }
314
315 #[test]
316 fn short_content_skips_compression() {
317 let provider = CompressProvider::Ollama {
318 base_url: "http://localhost:99999".to_string(),
319 model: "test".to_string(),
320 client: reqwest::blocking::Client::new(),
321 };
322 assert!(provider.compress("short", "Read", None).is_none());
323 }
324
325 #[test]
326 fn build_user_prompt_with_file() {
327 let prompt = build_user_prompt("content here", "Read", Some("src/lib.rs"));
328 assert!(prompt.contains("Tool: Read"));
329 assert!(prompt.contains("File: src/lib.rs"));
330 assert!(prompt.contains("content here"));
331 }
332
333 #[test]
334 fn build_user_prompt_without_file() {
335 let prompt = build_user_prompt("content here", "Grep", None);
336 assert!(prompt.contains("Tool: Grep"));
337 assert!(!prompt.contains("File:"));
338 }
339
340 #[test]
341 fn build_user_prompt_truncates_long_content() {
342 let long = "x".repeat(10000);
343 let prompt = build_user_prompt(&long, "Read", None);
344 assert!(prompt.len() < 8200);
345 }
346
347 #[test]
348 fn from_env_defaults_to_none() {
349 let provider = CompressProvider::from_env();
350 assert!(!provider.is_enabled());
351 }
352}