Skip to main content

codemem_engine/
compress.rs

1//! LLM-powered observation compression for Codemem.
2//!
3//! Compresses raw tool observations into concise structural summaries
4//! using a configured LLM provider (Ollama, OpenAI-compatible, or Anthropic).
5//! Falls back to raw content on failure or when not configured.
6//!
7//! # Configuration (environment variables)
8//!
9//! - `CODEMEM_COMPRESS_PROVIDER`: `ollama` | `openai` | `anthropic` (default: disabled)
10//! - `CODEMEM_COMPRESS_MODEL`: model name (defaults: `llama3.2`, `gpt-4o-mini`, `claude-haiku-4-5-20251001`)
11//! - `CODEMEM_COMPRESS_URL`: base URL override (defaults: `http://localhost:11434`, `https://api.openai.com/v1`)
12//! - `CODEMEM_API_KEY` / `OPENAI_API_KEY` / `ANTHROPIC_API_KEY`: API key for cloud providers
13
14use std::time::Duration;
15
16const COMPRESS_TIMEOUT: Duration = Duration::from_secs(30);
17
18/// Minimum content length worth compressing. Shorter observations are already concise.
19const MIN_COMPRESS_LEN: usize = 200;
20
21const SYSTEM_PROMPT: &str = "\
22You are a code observation compressor for a memory engine. \
23Given a raw tool observation from an AI coding session, produce a concise summary (under 200 words) that captures:\n\
241. What: the key structures, functions, types, and patterns observed\n\
252. Why it matters: dependencies, relationships, design decisions, purpose\n\
263. Details worth remembering: important names, signatures, constants\n\n\
27Rules:\n\
28- Be specific — use actual function/type/file names\n\
29- Skip boilerplate and obvious information\n\
30- Focus on structural and behavioral insights\n\
31- For file reads: what is this file's role and key exports?\n\
32- For edits: what changed and why does it matter?\n\
33- For searches: what patterns were found and where?\n\
34- Output plain text, no markdown formatting";
35
36pub enum CompressProvider {
37    Ollama {
38        base_url: String,
39        model: String,
40        client: reqwest::blocking::Client,
41    },
42    OpenAi {
43        base_url: String,
44        model: String,
45        api_key: String,
46        client: reqwest::blocking::Client,
47    },
48    Anthropic {
49        api_key: String,
50        model: String,
51        client: reqwest::blocking::Client,
52    },
53    None,
54}
55
56impl CompressProvider {
57    /// Create a provider from environment variables.
58    ///
59    /// Note: each call constructs a new `reqwest::blocking::Client`. Callers
60    /// should invoke this once and cache the returned provider rather than
61    /// calling it repeatedly.
62    pub fn from_env() -> Self {
63        let provider = std::env::var("CODEMEM_COMPRESS_PROVIDER").unwrap_or_default();
64
65        let client = || {
66            reqwest::blocking::Client::builder()
67                .timeout(COMPRESS_TIMEOUT)
68                .build()
69                .unwrap_or_default()
70        };
71
72        match provider.to_lowercase().as_str() {
73            "ollama" => {
74                let base_url = std::env::var("CODEMEM_COMPRESS_URL")
75                    .unwrap_or_else(|_| "http://localhost:11434".to_string());
76                let model = std::env::var("CODEMEM_COMPRESS_MODEL")
77                    .unwrap_or_else(|_| "llama3.2".to_string());
78                CompressProvider::Ollama {
79                    base_url,
80                    model,
81                    client: client(),
82                }
83            }
84            "openai" => {
85                let base_url = std::env::var("CODEMEM_COMPRESS_URL")
86                    .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
87                let model = std::env::var("CODEMEM_COMPRESS_MODEL")
88                    .unwrap_or_else(|_| "gpt-4o-mini".to_string());
89                let api_key = std::env::var("CODEMEM_API_KEY")
90                    .or_else(|_| std::env::var("OPENAI_API_KEY"))
91                    .unwrap_or_default();
92                CompressProvider::OpenAi {
93                    base_url,
94                    model,
95                    api_key,
96                    client: client(),
97                }
98            }
99            "anthropic" => {
100                let api_key = std::env::var("CODEMEM_API_KEY")
101                    .or_else(|_| std::env::var("ANTHROPIC_API_KEY"))
102                    .unwrap_or_default();
103                let model = std::env::var("CODEMEM_COMPRESS_MODEL")
104                    .unwrap_or_else(|_| "claude-haiku-4-5-20251001".to_string());
105                CompressProvider::Anthropic {
106                    api_key,
107                    model,
108                    client: client(),
109                }
110            }
111            _ => CompressProvider::None,
112        }
113    }
114
115    /// Whether compression is enabled.
116    pub fn is_enabled(&self) -> bool {
117        !matches!(self, CompressProvider::None)
118    }
119
120    /// Compress a tool observation into a concise summary.
121    ///
122    /// Returns `None` if compression is disabled, content is too short,
123    /// or the LLM call fails (caller should use raw content as fallback).
124    pub fn compress(&self, content: &str, tool: &str, file_path: Option<&str>) -> Option<String> {
125        if !self.is_enabled() || content.len() < MIN_COMPRESS_LEN {
126            return None;
127        }
128
129        let user_prompt = build_user_prompt(content, tool, file_path);
130
131        match self.call_llm(&user_prompt) {
132            Ok(compressed) if compressed.trim().is_empty() => {
133                tracing::warn!("Compression returned empty output, using raw content");
134                None
135            }
136            Ok(compressed) => {
137                tracing::info!(
138                    "Compressed observation: {} → {} chars ({:.0}% reduction)",
139                    content.len(),
140                    compressed.len(),
141                    (1.0 - compressed.len() as f64 / content.len() as f64) * 100.0
142                );
143                Some(compressed)
144            }
145            Err(e) => {
146                tracing::warn!("Compression failed, using raw content: {e}");
147                None
148            }
149        }
150    }
151
152    /// Summarize a batch of file changes into a one-sentence developer intent summary.
153    ///
154    /// Returns `None` if compression is disabled or the LLM call fails.
155    pub fn summarize_batch(&self, raw_summary: &str) -> Option<String> {
156        if !self.is_enabled() {
157            return None;
158        }
159
160        let prompt = format!(
161            "Summarize this batch of file changes in one sentence describing the likely developer intent \
162             (e.g. 'Refactoring auth module error handling', 'Adding new API endpoints for user management'). \
163             Be specific about what was changed. Output only the summary sentence, nothing else.\n\n{raw_summary}"
164        );
165
166        match self.call_llm(&prompt) {
167            Ok(summary) if summary.trim().is_empty() => {
168                tracing::warn!("Batch summarization returned empty output");
169                None
170            }
171            Ok(summary) => {
172                tracing::info!("Batch summary: {}", summary.trim());
173                Some(summary.trim().to_string())
174            }
175            Err(e) => {
176                tracing::warn!("Batch summarization failed, using raw summary: {e}");
177                None
178            }
179        }
180    }
181
182    fn call_llm(&self, user_prompt: &str) -> anyhow::Result<String> {
183        match self {
184            CompressProvider::Ollama {
185                base_url,
186                model,
187                client,
188            } => {
189                let url = format!("{}/api/chat", base_url);
190                let body = serde_json::json!({
191                    "model": model,
192                    "messages": [
193                        {"role": "system", "content": SYSTEM_PROMPT},
194                        {"role": "user", "content": user_prompt}
195                    ],
196                    "stream": false,
197                });
198                let response = client.post(&url).json(&body).send()?;
199                if !response.status().is_success() {
200                    anyhow::bail!("Ollama returned {}", response.status());
201                }
202                let json: serde_json::Value = response.json()?;
203                json.get("message")
204                    .and_then(|m| m.get("content"))
205                    .and_then(|c| c.as_str())
206                    .map(|s| s.trim().to_string())
207                    .ok_or_else(|| anyhow::anyhow!("Unexpected Ollama response format"))
208            }
209            CompressProvider::OpenAi {
210                base_url,
211                model,
212                api_key,
213                client,
214            } => {
215                let url = format!("{}/chat/completions", base_url);
216                let body = serde_json::json!({
217                    "model": model,
218                    "messages": [
219                        {"role": "system", "content": SYSTEM_PROMPT},
220                        {"role": "user", "content": user_prompt}
221                    ],
222                    "max_tokens": 512,
223                    "temperature": 0.3,
224                });
225                let response = client
226                    .post(&url)
227                    .header("Authorization", format!("Bearer {}", api_key))
228                    .json(&body)
229                    .send()?;
230                if !response.status().is_success() {
231                    let status = response.status();
232                    let text = response.text().unwrap_or_default();
233                    anyhow::bail!("OpenAI returned {}: {}", status, text);
234                }
235                let json: serde_json::Value = response.json()?;
236                json.get("choices")
237                    .and_then(|c| c.as_array())
238                    .and_then(|arr| arr.first())
239                    .and_then(|choice| choice.get("message"))
240                    .and_then(|m| m.get("content"))
241                    .and_then(|c| c.as_str())
242                    .map(|s| s.trim().to_string())
243                    .ok_or_else(|| anyhow::anyhow!("Unexpected OpenAI response format"))
244            }
245            CompressProvider::Anthropic {
246                api_key,
247                model,
248                client,
249            } => {
250                let body = serde_json::json!({
251                    "model": model,
252                    "max_tokens": 512,
253                    "system": SYSTEM_PROMPT,
254                    "messages": [
255                        {"role": "user", "content": user_prompt}
256                    ],
257                });
258                let response = client
259                    .post("https://api.anthropic.com/v1/messages")
260                    .header("x-api-key", api_key.as_str())
261                    .header("anthropic-version", "2023-06-01")
262                    .header("content-type", "application/json")
263                    .json(&body)
264                    .send()?;
265                if !response.status().is_success() {
266                    let status = response.status();
267                    let text = response.text().unwrap_or_default();
268                    anyhow::bail!("Anthropic returned {}: {}", status, text);
269                }
270                let json: serde_json::Value = response.json()?;
271                json.get("content")
272                    .and_then(|c| c.as_array())
273                    .and_then(|arr| arr.first())
274                    .and_then(|block| block.get("text"))
275                    .and_then(|t| t.as_str())
276                    .map(|s| s.trim().to_string())
277                    .ok_or_else(|| anyhow::anyhow!("Unexpected Anthropic response format"))
278            }
279            CompressProvider::None => {
280                anyhow::bail!("No compression provider configured")
281            }
282        }
283    }
284}
285
286pub(crate) fn build_user_prompt(content: &str, tool: &str, file_path: Option<&str>) -> String {
287    let file_info = file_path
288        .map(|p| format!("File: {p}\n"))
289        .unwrap_or_default();
290    // Cap at 8KB to avoid excessive LLM input costs.
291    // Find a valid char boundary to avoid panicking on multi-byte UTF-8.
292    let truncated = if content.len() > 8000 {
293        let mut end = 8000;
294        while end > 0 && !content.is_char_boundary(end) {
295            end -= 1;
296        }
297        &content[..end]
298    } else {
299        content
300    };
301    format!("Tool: {tool}\n{file_info}\nObservation:\n{truncated}")
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    #[test]
309    fn none_provider_returns_none() {
310        let provider = CompressProvider::None;
311        assert!(!provider.is_enabled());
312        assert!(provider.compress("some content here that is long enough to compress blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah", "Read", Some("src/main.rs")).is_none());
313    }
314
315    #[test]
316    fn short_content_skips_compression() {
317        let provider = CompressProvider::Ollama {
318            base_url: "http://localhost:99999".to_string(),
319            model: "test".to_string(),
320            client: reqwest::blocking::Client::new(),
321        };
322        assert!(provider.compress("short", "Read", None).is_none());
323    }
324
325    #[test]
326    fn build_user_prompt_with_file() {
327        let prompt = build_user_prompt("content here", "Read", Some("src/lib.rs"));
328        assert!(prompt.contains("Tool: Read"));
329        assert!(prompt.contains("File: src/lib.rs"));
330        assert!(prompt.contains("content here"));
331    }
332
333    #[test]
334    fn build_user_prompt_without_file() {
335        let prompt = build_user_prompt("content here", "Grep", None);
336        assert!(prompt.contains("Tool: Grep"));
337        assert!(!prompt.contains("File:"));
338    }
339
340    #[test]
341    fn build_user_prompt_truncates_long_content() {
342        let long = "x".repeat(10000);
343        let prompt = build_user_prompt(&long, "Read", None);
344        assert!(prompt.len() < 8200);
345    }
346
347    #[test]
348    fn from_env_defaults_to_none() {
349        let provider = CompressProvider::from_env();
350        assert!(!provider.is_enabled());
351    }
352}