Skip to main content

harn_vm/orchestration/
compaction.rs

1//! Auto-compaction — transcript size management strategies.
2
3use std::collections::BTreeMap;
4use std::rc::Rc;
5
6use crate::llm::{vm_call_llm_full, vm_value_to_json};
7use crate::value::{VmError, VmValue};
8
9#[derive(Clone, Debug, PartialEq, Eq)]
10pub enum CompactStrategy {
11    Llm,
12    Truncate,
13    Custom,
14    ObservationMask,
15}
16
17pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
18    match value {
19        "llm" => Ok(CompactStrategy::Llm),
20        "truncate" => Ok(CompactStrategy::Truncate),
21        "custom" => Ok(CompactStrategy::Custom),
22        "observation_mask" => Ok(CompactStrategy::ObservationMask),
23        other => Err(VmError::Runtime(format!(
24            "unknown compact_strategy '{other}' (expected 'llm', 'truncate', 'custom', or 'observation_mask')"
25        ))),
26    }
27}
28
29pub fn compact_strategy_name(strategy: &CompactStrategy) -> &'static str {
30    match strategy {
31        CompactStrategy::Llm => "llm",
32        CompactStrategy::Truncate => "truncate",
33        CompactStrategy::Custom => "custom",
34        CompactStrategy::ObservationMask => "observation_mask",
35    }
36}
37
38/// Configuration for automatic transcript compaction in agent loops.
39///
40/// Two-tier compaction:
41///   Tier 1 (`token_threshold` / `compact_strategy`): lightweight, deterministic
42///     observation masking that fires early. Masks verbose tool results while
43///     preserving assistant prose and error output.
44///   Tier 2 (`hard_limit_tokens` / `hard_limit_strategy`): aggressive LLM-powered
45///     summarization that fires when tier-1 alone isn't enough, typically as the
46///     transcript approaches the model's actual context window.
47#[derive(Clone, Debug)]
48pub struct AutoCompactConfig {
49    /// Number of earliest messages to keep verbatim before the compacted
50    /// summary. The system prompt is not part of this list and is always
51    /// preserved separately by the caller.
52    pub keep_first: usize,
53    /// Tier-1 threshold: estimated tokens before lightweight compaction.
54    pub token_threshold: usize,
55    /// Maximum character length for a single tool result before microcompaction.
56    pub tool_output_max_chars: usize,
57    /// Number of recent messages to keep during compaction.
58    pub keep_last: usize,
59    /// Tier-1 strategy (default: ObservationMask).
60    pub compact_strategy: CompactStrategy,
61    /// Tier-2 threshold: fires when tier-1 result still exceeds this.
62    /// Typically set to ~75% of the model's actual context window.
63    /// When `None`, tier-2 is disabled.
64    pub hard_limit_tokens: Option<usize>,
65    /// Tier-2 strategy (default: Llm).
66    pub hard_limit_strategy: CompactStrategy,
67    /// Optional Harn callback used when a strategy is `custom`.
68    pub custom_compactor: Option<VmValue>,
69    /// Optional callback for domain-specific per-message masking during
70    /// observation mask compaction. Called with a list of archived messages,
71    /// returns a list of `Option<String>` — `Some(masked)` to override the
72    /// default mask for that message, `None` to use the default.
73    /// This lets the host (e.g. an IDE or cloud runner) inject AST outlines,
74    /// file summaries, etc. without putting language-specific logic in Harn.
75    pub mask_callback: Option<VmValue>,
76    /// Optional callback for per-tool-result compression. Called with
77    /// `{tool_name, output, max_chars}` and returns compressed output string.
78    /// When set, used INSTEAD of the built-in `microcompact_tool_output`.
79    /// This allows the pipeline to use LLM-based compression rather than
80    /// keyword heuristics.
81    pub compress_callback: Option<VmValue>,
82    /// Optional prompt-template asset path used when LLM compaction is
83    /// selected. The rendered template becomes the user message sent to
84    /// the summarizer.
85    pub summarize_prompt: Option<String>,
86    /// User-facing policy label for replay and observability. This can be
87    /// broader than the engine strategy, e.g. `hybrid` lowers to LLM
88    /// summarization plus truncate fallback.
89    pub policy_strategy: String,
90}
91
92impl Default for AutoCompactConfig {
93    fn default() -> Self {
94        Self {
95            keep_first: 0,
96            token_threshold: 48_000,
97            tool_output_max_chars: 16_000,
98            keep_last: 12,
99            compact_strategy: CompactStrategy::ObservationMask,
100            hard_limit_tokens: None,
101            hard_limit_strategy: CompactStrategy::Llm,
102            custom_compactor: None,
103            mask_callback: None,
104            compress_callback: None,
105            summarize_prompt: None,
106            policy_strategy: compact_strategy_name(&CompactStrategy::ObservationMask).to_string(),
107        }
108    }
109}
110
111/// Estimate token count from a list of JSON messages (chars / 4 heuristic).
112pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
113    messages
114        .iter()
115        .map(|m| {
116            m.get("content")
117                .and_then(|c| c.as_str())
118                .map(|s| s.len())
119                .unwrap_or(0)
120        })
121        .sum::<usize>()
122        / 4
123}
124
125fn is_reasoning_or_tool_turn_message(message: &serde_json::Value) -> bool {
126    let role = message
127        .get("role")
128        .and_then(|value| value.as_str())
129        .unwrap_or_default();
130    role == "tool"
131        || message.get("tool_calls").is_some()
132        || message
133            .get("reasoning")
134            .map(|value| !value.is_null())
135            .unwrap_or(false)
136}
137
138fn find_prev_user_boundary(messages: &[serde_json::Value], start: usize) -> Option<usize> {
139    (0..=start)
140        .rev()
141        .find(|idx| messages[*idx].get("role").and_then(|value| value.as_str()) == Some("user"))
142}
143
144/// Microcompact a tool result: if it exceeds `max_chars`, keep the first and
145/// last portions with a snip marker in between.
146pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
147    if output.len() <= max_chars || max_chars < 200 {
148        return output.to_string();
149    }
150    let diagnostic_lines = output
151        .lines()
152        .filter(|line| {
153            let trimmed = line.trim();
154            let lower = trimmed.to_lowercase();
155            let has_file_line = {
156                let bytes = trimmed.as_bytes();
157                let mut i = 0;
158                let mut found_colon = false;
159                while i < bytes.len() {
160                    if bytes[i] == b':' {
161                        found_colon = true;
162                        break;
163                    }
164                    i += 1;
165                }
166                found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
167            };
168            let has_strong_keyword =
169                trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
170            let has_weak_keyword = trimmed.contains("error")
171                || trimmed.contains("undefined")
172                || trimmed.contains("expected")
173                || trimmed.contains("got")
174                || lower.contains("cannot find")
175                || lower.contains("not found")
176                || lower.contains("no such")
177                || lower.contains("unresolved")
178                || lower.contains("missing")
179                || lower.contains("declared but not used")
180                || lower.contains("unused")
181                || lower.contains("mismatch");
182            let positional = lower.contains(" error ")
183                || lower.starts_with("error:")
184                || lower.starts_with("warning:")
185                || lower.starts_with("note:")
186                || lower.contains("panic:");
187            has_strong_keyword || (has_file_line && has_weak_keyword) || positional
188        })
189        .take(32)
190        .collect::<Vec<_>>();
191    if !diagnostic_lines.is_empty() {
192        let diagnostics = diagnostic_lines.join("\n");
193        let budget = max_chars.saturating_sub(diagnostics.len() + 64);
194        let keep = budget / 2;
195        if keep >= 80 && output.len() > keep * 2 {
196            let head = snap_to_line_end(output, keep);
197            let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
198            return format!(
199                "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
200            );
201        }
202    }
203    let keep = max_chars / 2;
204    let head = snap_to_line_end(output, keep);
205    let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
206    let snipped = output.len().saturating_sub(head.len() + tail.len());
207    format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
208}
209
210/// Invoke the compress_callback to compress a tool result via pipeline-defined
211/// logic (typically an LLM call). Returns the compressed output, or falls back
212/// to `microcompact_tool_output` on error.
213pub(crate) async fn invoke_compress_callback(
214    callback: &VmValue,
215    tool_name: &str,
216    output: &str,
217    max_chars: usize,
218) -> String {
219    let VmValue::Closure(closure) = callback.clone() else {
220        return microcompact_tool_output(output, max_chars);
221    };
222    let mut vm = match crate::vm::clone_async_builtin_child_vm() {
223        Some(vm) => vm,
224        None => return microcompact_tool_output(output, max_chars),
225    };
226    let args_dict = VmValue::Dict(Rc::new({
227        let mut dict = std::collections::BTreeMap::new();
228        dict.insert(
229            "tool_name".to_string(),
230            VmValue::String(Rc::from(tool_name)),
231        );
232        dict.insert("output".to_string(), VmValue::String(Rc::from(output)));
233        dict.insert("max_chars".to_string(), VmValue::Int(max_chars as i64));
234        dict
235    }));
236    match vm.call_closure_pub(&closure, &[args_dict]).await {
237        Ok(VmValue::String(s)) if !s.is_empty() => s.to_string(),
238        _ => microcompact_tool_output(output, max_chars),
239    }
240}
241
242/// Snap a byte offset to the nearest preceding line boundary (end of a complete line).
243/// Returns the substring from the start up to and including the last complete line
244/// that fits within `max_bytes`. Never cuts mid-line.
245fn snap_to_line_end(s: &str, max_bytes: usize) -> &str {
246    if max_bytes >= s.len() {
247        return s;
248    }
249    let search_end = s.floor_char_boundary(max_bytes);
250    match s[..search_end].rfind('\n') {
251        Some(pos) => &s[..pos + 1],
252        None => &s[..search_end], // single long line — fall back to char boundary
253    }
254}
255
256/// Snap a byte offset to the nearest following line boundary (start of a complete line).
257/// Returns the substring from the first complete line at or after `start_byte`.
258/// Never cuts mid-line.
259fn snap_to_line_start(s: &str, start_byte: usize) -> &str {
260    if start_byte == 0 {
261        return s;
262    }
263    let search_start = s.ceil_char_boundary(start_byte);
264    if search_start >= s.len() {
265        return "";
266    }
267    match s[search_start..].find('\n') {
268        Some(pos) => {
269            let line_start = search_start + pos + 1;
270            if line_start < s.len() {
271                &s[line_start..]
272            } else {
273                &s[search_start..]
274            }
275        }
276        None => &s[search_start..], // already at start of last line
277    }
278}
279
280fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
281    messages
282        .iter()
283        .map(|msg| {
284            let role = msg
285                .get("role")
286                .and_then(|v| v.as_str())
287                .unwrap_or("user")
288                .to_uppercase();
289            let content = msg
290                .get("content")
291                .and_then(|v| v.as_str())
292                .unwrap_or_default();
293            format!("{role}: {content}")
294        })
295        .collect::<Vec<_>>()
296        .join("\n")
297}
298
299fn truncate_compaction_summary(
300    old_messages: &[serde_json::Value],
301    archived_count: usize,
302) -> String {
303    truncate_compaction_summary_with_context(old_messages, archived_count, false)
304}
305
306fn truncate_compaction_summary_with_context(
307    old_messages: &[serde_json::Value],
308    archived_count: usize,
309    is_llm_fallback: bool,
310) -> String {
311    let per_msg_limit = 500_usize;
312    let summary_parts: Vec<String> = old_messages
313        .iter()
314        .filter_map(|m| {
315            let role = m.get("role")?.as_str()?;
316            let content = m.get("content")?.as_str()?;
317            if content.is_empty() {
318                return None;
319            }
320            let truncated = if content.len() > per_msg_limit {
321                format!(
322                    "{}... [truncated from {} chars]",
323                    &content[..content.floor_char_boundary(per_msg_limit)],
324                    content.len()
325                )
326            } else {
327                content.to_string()
328            };
329            Some(format!("[{role}] {truncated}"))
330        })
331        .take(15)
332        .collect();
333    let header = if is_llm_fallback {
334        format!(
335            "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
336        )
337    } else {
338        format!("[auto-compacted {archived_count} older messages via truncate strategy]")
339    };
340    format!(
341        "{header}\n{}{}",
342        summary_parts.join("\n"),
343        if archived_count > 15 {
344            format!("\n... and {} more", archived_count - 15)
345        } else {
346            String::new()
347        }
348    )
349}
350
351fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
352    if let Some(map) = value.as_dict() {
353        if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
354            return Ok(summary.display());
355        }
356    }
357    match value {
358        VmValue::String(text) => Ok(text.to_string()),
359        VmValue::Nil => Ok(String::new()),
360        _ => serde_json::to_string_pretty(&vm_value_to_json(value))
361            .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
362    }
363}
364
365async fn llm_compaction_summary(
366    old_messages: &[serde_json::Value],
367    archived_count: usize,
368    llm_opts: &crate::llm::api::LlmCallOptions,
369    summarize_prompt: Option<&str>,
370) -> Result<String, VmError> {
371    let mut compact_opts = llm_opts.clone();
372    let formatted = format_compaction_messages(old_messages);
373    compact_opts.system = None;
374    compact_opts.transcript_summary = None;
375    compact_opts.native_tools = None;
376    compact_opts.tool_choice = None;
377    compact_opts.output_format = crate::llm::api::OutputFormat::Text;
378    compact_opts.response_format = None;
379    compact_opts.json_schema = None;
380    compact_opts.output_schema = None;
381    let prompt = render_llm_compaction_prompt(summarize_prompt, &formatted, archived_count)?;
382    compact_opts.messages = vec![serde_json::json!({
383        "role": "user",
384        "content": prompt,
385    })];
386    let result = vm_call_llm_full(&compact_opts).await?;
387    let summary = result.text.trim();
388    if summary.is_empty() {
389        Ok(truncate_compaction_summary_with_context(
390            old_messages,
391            archived_count,
392            true,
393        ))
394    } else {
395        Ok(format!(
396            "[auto-compacted {archived_count} older messages]\n{summary}"
397        ))
398    }
399}
400
401fn render_llm_compaction_prompt(
402    summarize_prompt: Option<&str>,
403    formatted: &str,
404    archived_count: usize,
405) -> Result<String, VmError> {
406    let Some(path) = summarize_prompt.filter(|path| !path.trim().is_empty()) else {
407        return Ok(format!(
408            "Summarize these archived conversation messages for a follow-on agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
409        ));
410    };
411
412    let resolved = crate::stdlib::process::resolve_source_asset_path(path);
413    let template = std::fs::read_to_string(&resolved).map_err(|error| {
414        VmError::Runtime(format!(
415            "failed to read compaction summarize_prompt {}: {error}",
416            resolved.display()
417        ))
418    })?;
419    let mut bindings = BTreeMap::new();
420    bindings.insert(
421        "formatted_messages".to_string(),
422        VmValue::String(Rc::from(formatted.to_string())),
423    );
424    bindings.insert(
425        "archived_count".to_string(),
426        VmValue::Int(archived_count as i64),
427    );
428    crate::stdlib::template::render_template_result(
429        &template,
430        Some(&bindings),
431        resolved.parent(),
432        Some(&resolved),
433    )
434    .map_err(|error| {
435        VmError::Runtime(format!(
436            "compaction summarize_prompt render error: {error:?}"
437        ))
438    })
439}
440
441async fn custom_compaction_summary(
442    old_messages: &[serde_json::Value],
443    archived_count: usize,
444    callback: &VmValue,
445) -> Result<String, VmError> {
446    let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
447        return Err(VmError::Runtime(
448            "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
449        ));
450    };
451    let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
452        VmError::Runtime(
453            "custom transcript compaction requires an async builtin VM context".to_string(),
454        )
455    })?;
456    let messages_vm = VmValue::List(Rc::new(
457        old_messages
458            .iter()
459            .map(crate::stdlib::json_to_vm_value)
460            .collect(),
461    ));
462    let result = vm.call_closure_pub(&closure, &[messages_vm]).await;
463    let summary = compact_summary_text_from_value(&result?)?;
464    if summary.trim().is_empty() {
465        Ok(truncate_compaction_summary(old_messages, archived_count))
466    } else {
467        Ok(format!(
468            "[auto-compacted {archived_count} older messages]\n{summary}"
469        ))
470    }
471}
472
473/// Check whether a tool-result string should be preserved verbatim during
474/// observation masking. Uses content length as the primary heuristic:
475/// short results (< 500 chars) are kept since they're typically error messages,
476/// status lines, or concise answers that are cheap to retain and risky to mask.
477/// Long results are masked to save context budget.
478fn content_should_preserve(content: &str) -> bool {
479    content.len() < 500
480}
481
482/// Default per-message masking for tool results.
483fn default_mask_tool_result(role: &str, content: &str) -> String {
484    let first_line = content.lines().next().unwrap_or(content);
485    let line_count = content.lines().count();
486    let char_count = content.len();
487    if line_count <= 3 {
488        format!("[{role}] {content}")
489    } else {
490        let preview = &first_line[..first_line.len().min(120)];
491        format!("[{role}] {preview}... [{line_count} lines, {char_count} chars masked]")
492    }
493}
494
495/// Deterministic observation-mask compaction.
496#[cfg(test)]
497pub(crate) fn observation_mask_compaction(
498    old_messages: &[serde_json::Value],
499    archived_count: usize,
500) -> String {
501    observation_mask_compaction_with_callback(old_messages, archived_count, None)
502}
503
504fn observation_mask_compaction_with_callback(
505    old_messages: &[serde_json::Value],
506    archived_count: usize,
507    mask_results: Option<&[Option<String>]>,
508) -> String {
509    let mut parts = Vec::new();
510    parts.push(format!(
511        "[auto-compacted {archived_count} older messages via observation masking]"
512    ));
513    for (idx, msg) in old_messages.iter().enumerate() {
514        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
515        let content = msg
516            .get("content")
517            .and_then(|v| v.as_str())
518            .unwrap_or_default();
519        if content.is_empty() {
520            continue;
521        }
522        if role == "assistant" {
523            parts.push(format!("[assistant] {content}"));
524            continue;
525        }
526        if content_should_preserve(content) {
527            parts.push(format!("[{role}] {content}"));
528        } else if let Some(Some(custom)) = mask_results.and_then(|r| r.get(idx)) {
529            parts.push(custom.clone());
530        } else {
531            parts.push(default_mask_tool_result(role, content));
532        }
533    }
534    parts.join("\n")
535}
536
537/// Invoke the mask_callback to get per-message custom masks.
538async fn invoke_mask_callback(
539    callback: &VmValue,
540    old_messages: &[serde_json::Value],
541) -> Result<Vec<Option<String>>, VmError> {
542    let VmValue::Closure(closure) = callback.clone() else {
543        return Err(VmError::Runtime(
544            "mask_callback must be a closure".to_string(),
545        ));
546    };
547    let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
548        VmError::Runtime("mask_callback requires an async builtin VM context".to_string())
549    })?;
550    let messages_vm = VmValue::List(Rc::new(
551        old_messages
552            .iter()
553            .map(crate::stdlib::json_to_vm_value)
554            .collect(),
555    ));
556    let result = vm.call_closure_pub(&closure, &[messages_vm]).await?;
557    let list = match result {
558        VmValue::List(items) => items,
559        _ => return Ok(vec![None; old_messages.len()]),
560    };
561    Ok(list
562        .iter()
563        .map(|v| match v {
564            VmValue::String(s) => Some(s.to_string()),
565            VmValue::Nil => None,
566            _ => None,
567        })
568        .collect())
569}
570
571/// Apply a single compaction strategy to a list of archived messages.
572async fn apply_compaction_strategy(
573    strategy: &CompactStrategy,
574    old_messages: &[serde_json::Value],
575    archived_count: usize,
576    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
577    custom_compactor: Option<&VmValue>,
578    mask_callback: Option<&VmValue>,
579    summarize_prompt: Option<&str>,
580) -> Result<String, VmError> {
581    match strategy {
582        CompactStrategy::Truncate => Ok(truncate_compaction_summary(old_messages, archived_count)),
583        CompactStrategy::Llm => {
584            llm_compaction_summary(
585                old_messages,
586                archived_count,
587                llm_opts.ok_or_else(|| {
588                    VmError::Runtime(
589                        "LLM transcript compaction requires active LLM call options".to_string(),
590                    )
591                })?,
592                summarize_prompt,
593            )
594            .await
595        }
596        CompactStrategy::Custom => {
597            custom_compaction_summary(
598                old_messages,
599                archived_count,
600                custom_compactor.ok_or_else(|| {
601                    VmError::Runtime(
602                        "compact_callback is required when compact_strategy is 'custom'"
603                            .to_string(),
604                    )
605                })?,
606            )
607            .await
608        }
609        CompactStrategy::ObservationMask => {
610            let mask_results = if let Some(cb) = mask_callback {
611                Some(invoke_mask_callback(cb, old_messages).await?)
612            } else {
613                None
614            };
615            Ok(observation_mask_compaction_with_callback(
616                old_messages,
617                archived_count,
618                mask_results.as_deref(),
619            ))
620        }
621    }
622}
623
624/// Auto-compact a message list in place using two-tier compaction.
625pub(crate) async fn auto_compact_messages(
626    messages: &mut Vec<serde_json::Value>,
627    config: &AutoCompactConfig,
628    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
629) -> Result<Option<String>, VmError> {
630    if messages.len() <= config.keep_first.saturating_add(config.keep_last) {
631        return Ok(None);
632    }
633    let compact_start = config.keep_first.min(messages.len());
634    let original_split = messages.len().saturating_sub(config.keep_last);
635    let mut split_at = original_split;
636    // Snap back to a user-role boundary so the kept suffix begins at a clean
637    // turn. OpenAI-compatible APIs reject tool results orphaned from their
638    // assistant request, so splitting mid-turn corrupts the transcript.
639    while split_at > compact_start
640        && split_at < messages.len()
641        && messages[split_at]
642            .get("role")
643            .and_then(|r| r.as_str())
644            .is_none_or(|r| r != "user")
645    {
646        split_at -= 1;
647    }
648    // Fall back to the naive split (e.g. tool-heavy transcripts with the sole
649    // user message at index 0) rather than skipping compaction entirely.
650    if split_at == compact_start {
651        split_at = original_split;
652    }
653    if let Some(volatile_start) = messages[split_at..]
654        .iter()
655        .position(is_reasoning_or_tool_turn_message)
656        .map(|offset| split_at + offset)
657    {
658        if let Some(boundary) = volatile_start
659            .checked_sub(1)
660            .and_then(|idx| find_prev_user_boundary(messages, idx))
661            .filter(|boundary| *boundary > compact_start)
662        {
663            split_at = boundary;
664        }
665    }
666    if split_at <= compact_start {
667        return Ok(None);
668    }
669    let old_messages: Vec<_> = messages.drain(compact_start..split_at).collect();
670    let archived_count = old_messages.len();
671
672    let mut summary = apply_compaction_strategy(
673        &config.compact_strategy,
674        &old_messages,
675        archived_count,
676        llm_opts,
677        config.custom_compactor.as_ref(),
678        config.mask_callback.as_ref(),
679        config.summarize_prompt.as_deref(),
680    )
681    .await?;
682
683    if let Some(hard_limit) = config.hard_limit_tokens {
684        let summary_msg = serde_json::json!({"role": "user", "content": &summary});
685        let mut estimate_msgs = vec![summary_msg];
686        estimate_msgs.extend_from_slice(messages.as_slice());
687        let estimated = estimate_message_tokens(&estimate_msgs);
688        if estimated > hard_limit {
689            let tier1_as_messages = vec![serde_json::json!({
690                "role": "user",
691                "content": summary,
692            })];
693            summary = apply_compaction_strategy(
694                &config.hard_limit_strategy,
695                &tier1_as_messages,
696                archived_count,
697                llm_opts,
698                config.custom_compactor.as_ref(),
699                None,
700                config.summarize_prompt.as_deref(),
701            )
702            .await?;
703        }
704    }
705
706    messages.insert(
707        compact_start,
708        serde_json::json!({
709            "role": "user",
710            "content": summary,
711        }),
712    );
713    Ok(Some(summary))
714}
715
716#[cfg(test)]
717mod tests {
718    use super::*;
719
720    #[test]
721    fn microcompact_short_output_unchanged() {
722        let output = "line1\nline2\nline3\n";
723        assert_eq!(microcompact_tool_output(output, 1000), output);
724    }
725
726    #[test]
727    fn microcompact_snaps_to_line_boundaries() {
728        let lines: Vec<String> = (0..20)
729            .map(|i| format!("line {:02} content here", i))
730            .collect();
731        let output = lines.join("\n");
732        let result = microcompact_tool_output(&output, 200);
733        assert!(result.contains("[... "), "should have snip marker");
734        let parts: Vec<&str> = result.split("\n\n[... ").collect();
735        assert!(parts.len() >= 2, "should split at marker");
736        let head = parts[0];
737        for line in head.lines() {
738            assert!(
739                line.starts_with("line "),
740                "head line should be complete: {line}"
741            );
742        }
743    }
744
745    #[test]
746    fn microcompact_preserves_diagnostic_lines_with_line_boundaries() {
747        let mut lines = Vec::new();
748        for i in 0..50 {
749            lines.push(format!("verbose output line {i}"));
750        }
751        lines.push("src/main.rs:42: error: cannot find value".to_string());
752        for i in 50..100 {
753            lines.push(format!("verbose output line {i}"));
754        }
755        let output = lines.join("\n");
756        let result = microcompact_tool_output(&output, 600);
757        assert!(result.contains("cannot find value"), "diagnostic preserved");
758        assert!(
759            result.contains("[diagnostic lines preserved]"),
760            "has diagnostic marker"
761        );
762    }
763
764    #[test]
765    fn snap_to_line_end_finds_newline() {
766        let s = "line1\nline2\nline3\nline4\n";
767        let head = snap_to_line_end(s, 12);
768        assert!(head.ends_with('\n'), "should end at newline");
769        assert!(head.contains("line1"));
770    }
771
772    #[test]
773    fn snap_to_line_start_finds_newline() {
774        let s = "line1\nline2\nline3\nline4\n";
775        let tail = snap_to_line_start(s, 12);
776        assert!(
777            tail.starts_with("line"),
778            "should start at line boundary: {tail}"
779        );
780    }
781
782    #[test]
783    fn auto_compact_preserves_reasoning_tool_suffix() {
784        let mut messages = vec![
785            serde_json::json!({"role": "user", "content": "old task"}),
786            serde_json::json!({"role": "assistant", "content": "old reply"}),
787            serde_json::json!({"role": "user", "content": "new task"}),
788            serde_json::json!({
789                "role": "assistant",
790                "content": "",
791                "reasoning": "think first",
792                "tool_calls": [{
793                    "id": "call_1",
794                    "type": "function",
795                    "function": {"name": "read", "arguments": "{\"path\":\"foo.rs\"}"}
796                }],
797            }),
798            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": "file"}),
799        ];
800        let config = AutoCompactConfig {
801            keep_last: 2,
802            ..Default::default()
803        };
804
805        let runtime = tokio::runtime::Builder::new_current_thread()
806            .enable_all()
807            .build()
808            .expect("runtime");
809        let summary = runtime
810            .block_on(auto_compact_messages(&mut messages, &config, None))
811            .expect("compaction succeeds");
812
813        assert!(summary.is_some());
814        assert_eq!(messages[1]["role"], "user");
815        assert_eq!(messages[2]["role"], "assistant");
816        assert_eq!(messages[2]["tool_calls"][0]["id"], "call_1");
817        assert_eq!(messages[3]["role"], "tool");
818        assert_eq!(messages[3]["tool_call_id"], "call_1");
819    }
820}