Skip to main content

lean_ctx/core/
transcript_compact.rs

1//! Transcript/conversation compaction for agent session JSONL files.
2//!
3//! Compresses tool_result blocks in JSONL conversation transcripts (e.g.
4//! `~/.claude/projects/*.jsonl`, `~/.cursor/agent-transcripts/*.jsonl`)
5//! by replacing large tool outputs with compact summaries.
6//!
7//! Analogous to ContextZip's approach: 85.8% of transcript bytes are tool I/O.
8
9use std::path::Path;
10
11const MAX_TOOL_OUTPUT_CHARS: usize = 500;
12const MIN_COMPRESS_CHARS: usize = 200;
13
14#[derive(Debug, Default)]
15pub struct CompactionStats {
16    pub lines_processed: usize,
17    pub lines_compacted: usize,
18    pub original_bytes: usize,
19    pub compacted_bytes: usize,
20}
21
22impl CompactionStats {
23    pub fn savings_pct(&self) -> f64 {
24        if self.original_bytes == 0 {
25            return 0.0;
26        }
27        (1.0 - self.compacted_bytes as f64 / self.original_bytes as f64) * 100.0
28    }
29}
30
31impl std::fmt::Display for CompactionStats {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        write!(
34            f,
35            "{} lines ({} compacted), {:.0}% savings ({} → {} bytes)",
36            self.lines_processed,
37            self.lines_compacted,
38            self.savings_pct(),
39            self.original_bytes,
40            self.compacted_bytes,
41        )
42    }
43}
44
45/// Compact a single JSONL transcript file in-place.
46/// Returns stats about what was compacted.
47pub fn compact_file(path: &Path) -> Result<CompactionStats, String> {
48    let content = std::fs::read_to_string(path).map_err(|e| format!("read: {e}"))?;
49    let mut stats = CompactionStats {
50        original_bytes: content.len(),
51        ..Default::default()
52    };
53
54    let mut output_lines = Vec::new();
55
56    for line in content.lines() {
57        stats.lines_processed += 1;
58
59        if line.len() < MIN_COMPRESS_CHARS || !line.contains("tool_result") {
60            output_lines.push(line.to_string());
61            continue;
62        }
63
64        match compact_jsonl_line(line) {
65            Some(compacted) => {
66                stats.lines_compacted += 1;
67                output_lines.push(compacted);
68            }
69            None => {
70                output_lines.push(line.to_string());
71            }
72        }
73    }
74
75    let result = output_lines.join("\n");
76    stats.compacted_bytes = result.len();
77
78    if stats.lines_compacted > 0 {
79        std::fs::write(path, &result).map_err(|e| format!("write: {e}"))?;
80    }
81
82    Ok(stats)
83}
84
85/// Compact all JSONL files in a directory.
86pub fn compact_directory(dir: &Path) -> Result<CompactionStats, String> {
87    if !dir.is_dir() {
88        return Err(format!("not a directory: {}", dir.display()));
89    }
90
91    let mut total = CompactionStats::default();
92
93    let entries = std::fs::read_dir(dir).map_err(|e| format!("readdir: {e}"))?;
94    for entry in entries.flatten() {
95        let path = entry.path();
96        if path.extension().is_some_and(|e| e == "jsonl") {
97            match compact_file(&path) {
98                Ok(s) => {
99                    total.lines_processed += s.lines_processed;
100                    total.lines_compacted += s.lines_compacted;
101                    total.original_bytes += s.original_bytes;
102                    total.compacted_bytes += s.compacted_bytes;
103                }
104                Err(e) => {
105                    tracing::warn!("skip {}: {e}", path.display());
106                }
107            }
108        }
109    }
110
111    Ok(total)
112}
113
114fn compact_jsonl_line(line: &str) -> Option<String> {
115    let mut doc: serde_json::Value = serde_json::from_str(line).ok()?;
116
117    let mut modified = false;
118
119    if let Some(content) = doc.get_mut("content") {
120        if let Some(arr) = content.as_array_mut() {
121            for item in arr.iter_mut() {
122                if compact_content_block(item) {
123                    modified = true;
124                }
125            }
126        } else if let Some(s) = content.as_str() {
127            if s.len() > MAX_TOOL_OUTPUT_CHARS && has_tool_markers(s) {
128                let summary = summarize_content(s);
129                *content = serde_json::Value::String(summary);
130                modified = true;
131            }
132        }
133    }
134
135    if let Some(result) = doc.get_mut("result") {
136        if compact_content_block(result) {
137            modified = true;
138        }
139    }
140
141    if modified {
142        Some(serde_json::to_string(&doc).ok()?)
143    } else {
144        None
145    }
146}
147
148fn compact_content_block(block: &mut serde_json::Value) -> bool {
149    if let Some(text) = block.get_mut("text") {
150        if let Some(s) = text.as_str() {
151            if s.len() > MAX_TOOL_OUTPUT_CHARS && has_tool_markers(s) {
152                let summary = summarize_content(s);
153                *text = serde_json::Value::String(summary);
154                return true;
155            }
156        }
157    }
158
159    if let Some(content) = block.get_mut("content") {
160        if let Some(s) = content.as_str() {
161            if s.len() > MAX_TOOL_OUTPUT_CHARS {
162                let summary = summarize_content(s);
163                *content = serde_json::Value::String(summary);
164                return true;
165            }
166        }
167        if let Some(arr) = content.as_array_mut() {
168            let mut any_modified = false;
169            for item in arr.iter_mut() {
170                if compact_content_block(item) {
171                    any_modified = true;
172                }
173            }
174            return any_modified;
175        }
176    }
177
178    false
179}
180
181fn has_tool_markers(s: &str) -> bool {
182    s.contains("tool_result") || s.contains("ctx_") || s.contains("```") || s.len() > 2000
183}
184
185fn summarize_content(text: &str) -> String {
186    let lines: Vec<&str> = text.lines().collect();
187    let total_lines = lines.len();
188    let char_count = text.len();
189
190    let trunc = |s: &str| -> String {
191        if s.len() > 120 {
192            format!("{}...", &s[..s.floor_char_boundary(120)])
193        } else {
194            s.to_string()
195        }
196    };
197
198    let first_meaningful = lines
199        .iter()
200        .take(3)
201        .filter(|l| !l.trim().is_empty())
202        .map(|l| trunc(l))
203        .collect::<Vec<_>>()
204        .join("\n");
205
206    let last_line = lines
207        .iter()
208        .rev()
209        .find(|l| !l.trim().is_empty())
210        .map(|l| trunc(l))
211        .unwrap_or_default();
212
213    format!("[compacted: {total_lines}L, {char_count}ch]\n{first_meaningful}\n...\n{last_line}")
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn summarize_preserves_first_and_last() {
222        let text = "line 1\nline 2\nline 3\nline 4\nline 5\nline 6";
223        let result = summarize_content(text);
224        assert!(result.contains("line 1"));
225        assert!(result.contains("line 6"));
226        assert!(result.contains("[compacted:"));
227    }
228
229    #[test]
230    fn compact_skips_short_lines() {
231        let short = r#"{"type":"text","content":"hello"}"#;
232        assert!(compact_jsonl_line(short).is_none());
233    }
234
235    #[test]
236    fn compact_file_roundtrip() {
237        let dir = tempfile::tempdir().unwrap();
238        let path = dir.path().join("test.jsonl");
239        let line = serde_json::json!({
240            "type": "tool_result",
241            "content": "x".repeat(3000)
242        });
243        std::fs::write(&path, serde_json::to_string(&line).unwrap()).unwrap();
244
245        let stats = compact_file(&path).unwrap();
246        assert_eq!(stats.lines_processed, 1);
247        assert!(stats.compacted_bytes < stats.original_bytes);
248    }
249
250    #[test]
251    fn savings_pct_empty() {
252        let stats = CompactionStats::default();
253        assert_eq!(stats.savings_pct(), 0.0);
254    }
255
256    #[test]
257    fn savings_pct_calculation() {
258        let stats = CompactionStats {
259            original_bytes: 1000,
260            compacted_bytes: 200,
261            ..Default::default()
262        };
263        assert!((stats.savings_pct() - 80.0).abs() < 0.1);
264    }
265}