Skip to main content

harness_compactor/
lib.rs

1//! Five-stage progressive compaction (DESIGN.md §9), borrowed from Claude Code.
2//!
3//! `DefaultCompactor` is purely structural — it doesn't call a model. Stage 3
4//! (Microcompact) and Stage 5 (AutoCompact) would normally invoke a cheap LLM;
5//! here we collapse content into terse summaries so the framework can run
6//! offline. Wire a `ModelBackedCompactor` later if you want semantic summaries.
7
8use async_trait::async_trait;
9use harness_core::{
10    Block, Budget, CompactError, CompactionStage, Compactor, Context, Model, Policy, Task, Turn,
11    TurnRole,
12};
13use std::sync::Arc;
14
15/// Heuristic compactor — operates on the structure of the context only.
16pub struct DefaultCompactor {
17    /// Approximate tokens per char. 0.30 ≈ 3.3 chars/token (a generous English
18    /// upper bound for non-Asian content).
19    pub tokens_per_char: f32,
20}
21
22impl Default for DefaultCompactor {
23    fn default() -> Self {
24        Self {
25            tokens_per_char: 0.30,
26        }
27    }
28}
29
30impl DefaultCompactor {
31    pub fn new() -> Self {
32        Self::default()
33    }
34
35    fn estimate_tokens(&self, ctx: &Context) -> u32 {
36        let mut chars: usize = 0;
37        for b in ctx.system.iter().chain(ctx.guides.iter()) {
38            chars += block_chars(b);
39        }
40        for turn in &ctx.history {
41            for b in &turn.blocks {
42                chars += block_chars(b);
43            }
44        }
45        chars += ctx.task.description.len();
46        (chars as f32 * self.tokens_per_char) as u32
47    }
48}
49
50#[async_trait]
51impl Compactor for DefaultCompactor {
52    fn budget(&self, ctx: &Context) -> Budget {
53        Budget {
54            used: self.estimate_tokens(ctx),
55            window: ctx.policy.max_input_tokens,
56        }
57    }
58
59    async fn compact(&self, stage: CompactionStage, ctx: &mut Context) -> Result<(), CompactError> {
60        tracing::debug!(?stage, "compaction stage running");
61        match stage {
62            CompactionStage::BudgetReduce => budget_reduce(ctx),
63            CompactionStage::Snip => snip_file_reads(ctx),
64            CompactionStage::Microcompact => microcompact_old(ctx),
65            CompactionStage::ContextCollapse => context_collapse(ctx),
66            CompactionStage::AutoCompact => auto_compact(ctx),
67            // Forward-compat: ignore stages this version doesn't recognise.
68            _ => tracing::warn!(?stage, "unknown compaction stage — ignoring"),
69        }
70        Ok(())
71    }
72}
73
74// ============================================================
75// ModelBackedCompactor — uses a (typically cheap) Model to do real semantic
76// summarisation for Microcompact and AutoCompact stages.
77// ============================================================
78
79/// Compactor that calls an LLM for the inferential stages and falls back to
80/// `DefaultCompactor`'s structural strategies for the computational ones.
81///
82/// Typical wiring:
83/// ```ignore
84/// let cheap = OpenAiCompat::with_key(providers::DEEPSEEK, "deepseek-v4-flash", key);
85/// let compactor = ModelBackedCompactor::new(Arc::new(cheap));
86/// let loop_ = AgentLoop::new(main_model).with_compactor(Arc::new(compactor));
87/// ```
88pub struct ModelBackedCompactor {
89    pub model: Arc<dyn Model>,
90    pub tokens_per_char: f32,
91    /// Keep the most recent N turns intact during semantic compaction.
92    pub keep_recent: usize,
93    /// Hard cap on the summary length the model is asked to produce.
94    pub summary_max_tokens: u32,
95}
96
97impl ModelBackedCompactor {
98    pub fn new(model: Arc<dyn Model>) -> Self {
99        Self {
100            model,
101            tokens_per_char: 0.30,
102            keep_recent: 6,
103            summary_max_tokens: 600,
104        }
105    }
106}
107
108#[async_trait]
109impl Compactor for ModelBackedCompactor {
110    fn budget(&self, ctx: &Context) -> Budget {
111        DefaultCompactor {
112            tokens_per_char: self.tokens_per_char,
113        }
114        .budget(ctx)
115    }
116
117    async fn compact(&self, stage: CompactionStage, ctx: &mut Context) -> Result<(), CompactError> {
118        match stage {
119            CompactionStage::BudgetReduce => {
120                budget_reduce(ctx);
121                Ok(())
122            }
123            CompactionStage::Snip => {
124                snip_file_reads(ctx);
125                Ok(())
126            }
127            CompactionStage::ContextCollapse => {
128                context_collapse(ctx);
129                Ok(())
130            }
131            CompactionStage::Microcompact => {
132                self.model_summarise(ctx, "microcompact-summary").await
133            }
134            CompactionStage::AutoCompact => self.model_summarise(ctx, "auto-compact-summary").await,
135            _ => Ok(()),
136        }
137    }
138}
139
140impl ModelBackedCompactor {
141    /// Ask the model to produce a tight summary of the older history; replace
142    /// `0..split` with the resulting [`Block::Text`] in a synthetic system turn.
143    async fn model_summarise(&self, ctx: &mut Context, tag: &str) -> Result<(), CompactError> {
144        if ctx.history.len() <= self.keep_recent {
145            return Ok(());
146        }
147        let split = ctx.history.len() - self.keep_recent;
148        let mut dump = String::new();
149        for turn in ctx.history.iter().take(split) {
150            dump.push_str(&format_turn_for_summary(turn));
151        }
152        if dump.trim().is_empty() {
153            return Ok(());
154        }
155
156        let prompt = format!(
157            "You are compacting an in-progress agent conversation for downstream replay. \
158             Produce a terse summary (≤ 200 words) of the conversation below. Preserve: \
159             concrete file paths, decisions made, sensor outcomes, and the current goal. \
160             Drop: chit-chat, redundant tool reads, verbose stack traces.\n\n\
161             ---- TRANSCRIPT ----\n{dump}\n---- END ----\n\n\
162             Reply with the summary text only, no preamble."
163        );
164
165        let mut summary_ctx = Context::new(Task {
166            description: prompt,
167            source: None,
168            deadline: None,
169        });
170        summary_ctx.policy = Policy {
171            max_iters: 1,
172            max_input_tokens: 100_000,
173            max_output_tokens: self.summary_max_tokens,
174            self_correct_rounds: 0,
175        };
176        summary_ctx.history.push(Turn {
177            role: TurnRole::User,
178            blocks: vec![Block::Text(summary_ctx.task.description.clone())],
179        });
180
181        let out = self
182            .model
183            .complete(&summary_ctx)
184            .await
185            .map_err(|e| CompactError::Failed {
186                stage: tag.into(),
187                reason: format!("model: {e}"),
188            })?;
189
190        let summary = out.text.unwrap_or_else(|| "(empty summary)".into());
191        let mut new_history = vec![Turn {
192            role: TurnRole::System,
193            blocks: vec![Block::Text(format!("[{tag}]\n{summary}"))],
194        }];
195        new_history.extend(ctx.history.drain(split..));
196        ctx.history = new_history;
197        Ok(())
198    }
199}
200
201fn format_turn_for_summary(turn: &Turn) -> String {
202    let role = match turn.role {
203        TurnRole::User => "user",
204        TurnRole::Assistant => "assistant",
205        TurnRole::Tool => "tool",
206        TurnRole::System => "system",
207        _ => "unknown",
208    };
209    let mut s = format!("[{role}]\n");
210    for b in &turn.blocks {
211        match b {
212            Block::Text(t) => {
213                s.push_str(t);
214                s.push('\n');
215            }
216            Block::ToolCall { name, args, .. } => {
217                s.push_str(&format!("(tool-call {name} {args})\n"));
218            }
219            Block::ToolResult { call_id, content } => {
220                let preview = content.to_string();
221                let preview = preview.chars().take(160).collect::<String>();
222                s.push_str(&format!("(tool-result {call_id}: {preview}…)\n"));
223            }
224            Block::FileRef { path, .. } => {
225                s.push_str(&format!("(file-ref {path})\n"));
226            }
227            _ => {}
228        }
229    }
230    s.push('\n');
231    s
232}
233
234fn block_chars(b: &Block) -> usize {
235    match b {
236        Block::Text(s) => s.len(),
237        Block::FileRef {
238            path,
239            hash: _,
240            excerpt,
241        } => path.len() + excerpt.as_ref().map_or(0, String::len),
242        Block::Skill { name, body } => name.len() + body.len(),
243        Block::ToolCall {
244            call_id,
245            name,
246            args,
247        } => call_id.len() + name.len() + args.to_string().len(),
248        Block::ToolResult { call_id, content } => call_id.len() + content.to_string().len(),
249        Block::Feedback(signals) => signals
250            .iter()
251            .map(|s| s.message.len() + s.agent_hint.as_ref().map_or(0, String::len))
252            .sum(),
253        Block::Reasoning(s) => s.len(),
254        _ => 0,
255    }
256}
257
258// ---------- Stage 1: BudgetReduce ----------
259
260/// Trim redundant content: keep the most recent N turns intact, summarise older.
261/// Conservative — only collapses big tool results, leaves text alone.
262fn budget_reduce(ctx: &mut Context) {
263    let keep_recent = 8;
264    if ctx.history.len() <= keep_recent {
265        return;
266    }
267    let split = ctx.history.len() - keep_recent;
268    for turn in ctx.history.iter_mut().take(split) {
269        for b in turn.blocks.iter_mut() {
270            if let Block::ToolResult { call_id, content } = b
271                && content.to_string().len() > 800
272            {
273                let preview = content.to_string().chars().take(200).collect::<String>();
274                *b = Block::Text(format!("[tool-result:{call_id} (trimmed)] {preview}…"));
275            }
276        }
277    }
278}
279
280// ---------- Stage 2: Snip ----------
281
282/// Replace old `Block::FileRef { excerpt }` with hash-only references.
283fn snip_file_reads(ctx: &mut Context) {
284    let keep_recent = 4;
285    if ctx.history.len() <= keep_recent {
286        return;
287    }
288    let split = ctx.history.len() - keep_recent;
289    for turn in ctx.history.iter_mut().take(split) {
290        for b in turn.blocks.iter_mut() {
291            if let Block::FileRef {
292                path,
293                hash,
294                excerpt,
295            } = b
296                && excerpt.is_some()
297            {
298                *b = Block::FileRef {
299                    path: path.clone(),
300                    hash: hash.clone(),
301                    excerpt: None,
302                };
303            }
304        }
305    }
306}
307
308// ---------- Stage 3: Microcompact ----------
309
310/// Summarise older conversation segments. In `DefaultCompactor` we just
311/// rewrite the older half of the history into a single text block tagged
312/// `[microcompact-summary]`. Real provider-backed implementations should
313/// replace this with a cheap-model summarisation call.
314fn microcompact_old(ctx: &mut Context) {
315    if ctx.history.len() < 12 {
316        return;
317    }
318    let keep_recent = 6;
319    let split = ctx.history.len() - keep_recent;
320
321    // Build a textual summary of `0..split`.
322    let mut summary = String::from("[microcompact-summary]\n");
323    for turn in ctx.history.iter().take(split) {
324        let role = match turn.role {
325            TurnRole::User => "user",
326            TurnRole::Assistant => "assistant",
327            TurnRole::Tool => "tool",
328            TurnRole::System => "system",
329            _ => "unknown",
330        };
331        summary.push_str(&format!("- {role}: "));
332        for b in &turn.blocks {
333            match b {
334                Block::Text(t) => {
335                    summary.push_str(&t.chars().take(80).collect::<String>());
336                    summary.push(' ');
337                }
338                Block::ToolCall { name, .. } => summary.push_str(&format!("(call:{name}) ")),
339                Block::ToolResult { call_id, .. } => {
340                    summary.push_str(&format!("(result:{call_id}) "))
341                }
342                Block::FileRef { path, .. } => summary.push_str(&format!("(file:{path}) ")),
343                _ => {}
344            }
345        }
346        summary.push('\n');
347    }
348
349    let mut new_history = vec![Turn {
350        role: TurnRole::System,
351        blocks: vec![Block::Text(summary)],
352    }];
353    new_history.extend(ctx.history.drain(split..));
354    ctx.history = new_history;
355}
356
357// ---------- Stage 4: ContextCollapse ----------
358
359/// Collapse all FileRefs into a single inventory at the top, plus key excerpts.
360fn context_collapse(ctx: &mut Context) {
361    // Walk all history, collect file paths.
362    let mut files = std::collections::BTreeSet::new();
363    for turn in &ctx.history {
364        for b in &turn.blocks {
365            if let Block::FileRef { path, .. } = b {
366                files.insert(path.clone());
367            }
368        }
369    }
370    if files.is_empty() {
371        return;
372    }
373
374    let mut inv = String::from("[file-inventory]\n");
375    for f in &files {
376        inv.push_str(&format!("- {f}\n"));
377    }
378
379    // Remove file-ref blocks from history (inventory replaces them).
380    for turn in ctx.history.iter_mut() {
381        turn.blocks.retain(|b| !matches!(b, Block::FileRef { .. }));
382    }
383
384    // Insert inventory as the first system turn.
385    ctx.history.insert(
386        0,
387        Turn {
388            role: TurnRole::System,
389            blocks: vec![Block::Text(inv)],
390        },
391    );
392}
393
394// ---------- Stage 5: AutoCompact ----------
395
396/// Last resort: rewrite the whole history into a single condensed summary block.
397fn auto_compact(ctx: &mut Context) {
398    let keep_recent = 2;
399    if ctx.history.len() <= keep_recent {
400        return;
401    }
402    let split = ctx.history.len() - keep_recent;
403    let mut combined =
404        String::from("[auto-compact-summary]\nCondensed history of earlier turns:\n");
405    let mut counts = std::collections::BTreeMap::new();
406    for turn in ctx.history.iter().take(split) {
407        for b in &turn.blocks {
408            let key = match b {
409                Block::Text(_) => "text",
410                Block::ToolCall { .. } => "tool_call",
411                Block::ToolResult { .. } => "tool_result",
412                Block::FileRef { .. } => "file_ref",
413                Block::Skill { .. } => "skill",
414                Block::Feedback(_) => "feedback",
415                Block::Reasoning(_) => "reasoning",
416                _ => "unknown",
417            };
418            *counts.entry(key).or_insert(0u32) += 1;
419        }
420    }
421    for (k, v) in counts {
422        combined.push_str(&format!("- {v} × {k} block(s)\n"));
423    }
424
425    let mut new_history = vec![Turn {
426        role: TurnRole::System,
427        blocks: vec![Block::Text(combined)],
428    }];
429    new_history.extend(ctx.history.drain(split..));
430    ctx.history = new_history;
431}
432
433#[cfg(test)]
434mod tests {
435    use super::*;
436    use harness_core::{Block, Policy, Task, Turn, TurnRole};
437    use std::collections::BTreeMap;
438
439    fn mk_ctx(turns: usize) -> Context {
440        let mut ctx = Context {
441            system: vec![],
442            guides: vec![],
443            history: Vec::new(),
444            task: Task {
445                description: "t".into(),
446                source: None,
447                deadline: None,
448            },
449            policy: Policy::default(),
450            metadata: BTreeMap::new(),
451            tools: Vec::new(),
452            response_format: harness_core::ResponseFormat::Free,
453        };
454        for i in 0..turns {
455            ctx.history.push(Turn {
456                role: if i % 2 == 0 {
457                    TurnRole::User
458                } else {
459                    TurnRole::Assistant
460                },
461                blocks: vec![Block::Text(format!("turn {i}: {}", "x".repeat(50)))],
462            });
463        }
464        ctx
465    }
466
467    #[tokio::test]
468    async fn budget_reduce_keeps_recent() {
469        let c = DefaultCompactor::new();
470        let mut ctx = mk_ctx(20);
471        // Inject big tool results in early turns
472        ctx.history[0].blocks.push(Block::ToolResult {
473            call_id: "c1".into(),
474            content: serde_json::Value::String("y".repeat(2000)),
475        });
476        c.compact(CompactionStage::BudgetReduce, &mut ctx)
477            .await
478            .unwrap();
479        // First turn's big tool result should be trimmed.
480        let has_trim = ctx.history[0]
481            .blocks
482            .iter()
483            .any(|b| matches!(b, Block::Text(t) if t.contains("trimmed")));
484        assert!(has_trim);
485    }
486
487    #[tokio::test]
488    async fn microcompact_collapses_old_turns() {
489        let c = DefaultCompactor::new();
490        let mut ctx = mk_ctx(20);
491        c.compact(CompactionStage::Microcompact, &mut ctx)
492            .await
493            .unwrap();
494        // First turn should be the synthetic system summary.
495        assert!(matches!(ctx.history[0].role, TurnRole::System));
496        let first_text = match &ctx.history[0].blocks[0] {
497            Block::Text(t) => t.clone(),
498            _ => String::new(),
499        };
500        assert!(first_text.starts_with("[microcompact-summary]"));
501    }
502
503    #[tokio::test]
504    async fn model_backed_compactor_replaces_old_turns_with_summary() {
505        use harness_models::{MockModel, MockResponse};
506
507        let model = Arc::new(MockModel::new().script(MockResponse::text("CONCISE-SUMMARY-OK")))
508            as Arc<dyn Model>;
509        let c = ModelBackedCompactor::new(model);
510
511        let mut ctx = mk_ctx(20);
512        let original_len = ctx.history.len();
513        c.compact(CompactionStage::Microcompact, &mut ctx)
514            .await
515            .unwrap();
516        // First turn now the summary, total shrinks to keep_recent (6) + 1 summary = 7
517        assert_eq!(ctx.history.len(), c.keep_recent + 1);
518        assert!(original_len > ctx.history.len());
519        let first = match &ctx.history[0].blocks[0] {
520            Block::Text(t) => t.clone(),
521            _ => String::new(),
522        };
523        assert!(first.starts_with("[microcompact-summary]"));
524        assert!(first.contains("CONCISE-SUMMARY-OK"));
525    }
526
527    #[tokio::test]
528    async fn model_backed_compactor_noop_when_history_short() {
529        use harness_models::{MockModel, MockResponse};
530        let mock = Arc::new(MockModel::new().script(MockResponse::text("never called")));
531        let c = ModelBackedCompactor::new(mock.clone() as Arc<dyn Model>);
532        let mut ctx = mk_ctx(4); // < keep_recent
533        c.compact(CompactionStage::Microcompact, &mut ctx)
534            .await
535            .unwrap();
536        assert_eq!(ctx.history.len(), 4);
537        assert_eq!(
538            mock.call_count(),
539            0,
540            "model must not be called when history is short"
541        );
542    }
543
544    #[tokio::test]
545    async fn budget_required_stages_escalates() {
546        // 95% triggers ALL five stages.
547        let b = Budget {
548            used: 95,
549            window: 100,
550        };
551        assert_eq!(b.required_stages().len(), 4);
552        // 99% triggers all 5
553        let b = Budget {
554            used: 99,
555            window: 100,
556        };
557        assert_eq!(b.required_stages().len(), 5);
558    }
559}