Skip to main content

harn_vm/orchestration/
compaction.rs

1//! Auto-compaction — transcript size management strategies.
2
3use std::collections::BTreeMap;
4use std::rc::Rc;
5
6use crate::llm::{vm_call_llm_full, vm_value_to_json};
7use crate::value::{VmError, VmValue};
8
9#[derive(Clone, Debug, PartialEq, Eq)]
10pub enum CompactStrategy {
11    Llm,
12    Truncate,
13    Custom,
14    ObservationMask,
15}
16
17pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
18    match value {
19        "llm" => Ok(CompactStrategy::Llm),
20        "truncate" => Ok(CompactStrategy::Truncate),
21        "custom" => Ok(CompactStrategy::Custom),
22        "observation_mask" => Ok(CompactStrategy::ObservationMask),
23        other => Err(VmError::Runtime(format!(
24            "unknown compact_strategy '{other}' (expected 'llm', 'truncate', 'custom', or 'observation_mask')"
25        ))),
26    }
27}
28
29pub fn compact_strategy_name(strategy: &CompactStrategy) -> &'static str {
30    match strategy {
31        CompactStrategy::Llm => "llm",
32        CompactStrategy::Truncate => "truncate",
33        CompactStrategy::Custom => "custom",
34        CompactStrategy::ObservationMask => "observation_mask",
35    }
36}
37
38/// Configuration for automatic transcript compaction in agent loops.
39///
40/// Two-tier compaction:
41///   Tier 1 (`token_threshold` / `compact_strategy`): lightweight, deterministic
42///     observation masking that fires early. Masks verbose tool results while
43///     preserving assistant prose and error output.
44///   Tier 2 (`hard_limit_tokens` / `hard_limit_strategy`): aggressive LLM-powered
45///     summarization that fires when tier-1 alone isn't enough, typically as the
46///     transcript approaches the model's actual context window.
47#[derive(Clone, Debug)]
48pub struct AutoCompactConfig {
49    /// Tier-1 threshold: estimated tokens before lightweight compaction.
50    pub token_threshold: usize,
51    /// Maximum character length for a single tool result before microcompaction.
52    pub tool_output_max_chars: usize,
53    /// Number of recent messages to keep during compaction.
54    pub keep_last: usize,
55    /// Tier-1 strategy (default: ObservationMask).
56    pub compact_strategy: CompactStrategy,
57    /// Tier-2 threshold: fires when tier-1 result still exceeds this.
58    /// Typically set to ~75% of the model's actual context window.
59    /// When `None`, tier-2 is disabled.
60    pub hard_limit_tokens: Option<usize>,
61    /// Tier-2 strategy (default: Llm).
62    pub hard_limit_strategy: CompactStrategy,
63    /// Optional Harn callback used when a strategy is `custom`.
64    pub custom_compactor: Option<VmValue>,
65    /// Optional callback for domain-specific per-message masking during
66    /// observation mask compaction. Called with a list of archived messages,
67    /// returns a list of `Option<String>` — `Some(masked)` to override the
68    /// default mask for that message, `None` to use the default.
69    /// This lets the host (e.g. burin-code) inject AST outlines, file
70    /// summaries, etc. without putting language-specific logic in Harn.
71    pub mask_callback: Option<VmValue>,
72    /// Optional callback for per-tool-result compression. Called with
73    /// `{tool_name, output, max_chars}` and returns compressed output string.
74    /// When set, used INSTEAD of the built-in `microcompact_tool_output`.
75    /// This allows the pipeline to use LLM-based compression rather than
76    /// keyword heuristics.
77    pub compress_callback: Option<VmValue>,
78    /// Optional prompt-template asset path used when LLM compaction is
79    /// selected. The rendered template becomes the user message sent to
80    /// the summarizer.
81    pub summarize_prompt: Option<String>,
82}
83
84impl Default for AutoCompactConfig {
85    fn default() -> Self {
86        Self {
87            token_threshold: 48_000,
88            tool_output_max_chars: 16_000,
89            keep_last: 12,
90            compact_strategy: CompactStrategy::ObservationMask,
91            hard_limit_tokens: None,
92            hard_limit_strategy: CompactStrategy::Llm,
93            custom_compactor: None,
94            mask_callback: None,
95            compress_callback: None,
96            summarize_prompt: None,
97        }
98    }
99}
100
101/// Estimate token count from a list of JSON messages (chars / 4 heuristic).
102pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
103    messages
104        .iter()
105        .map(|m| {
106            m.get("content")
107                .and_then(|c| c.as_str())
108                .map(|s| s.len())
109                .unwrap_or(0)
110        })
111        .sum::<usize>()
112        / 4
113}
114
115fn is_reasoning_or_tool_turn_message(message: &serde_json::Value) -> bool {
116    let role = message
117        .get("role")
118        .and_then(|value| value.as_str())
119        .unwrap_or_default();
120    role == "tool"
121        || message.get("tool_calls").is_some()
122        || message
123            .get("reasoning")
124            .map(|value| !value.is_null())
125            .unwrap_or(false)
126}
127
128fn find_prev_user_boundary(messages: &[serde_json::Value], start: usize) -> Option<usize> {
129    (0..=start)
130        .rev()
131        .find(|idx| messages[*idx].get("role").and_then(|value| value.as_str()) == Some("user"))
132}
133
134/// Microcompact a tool result: if it exceeds `max_chars`, keep the first and
135/// last portions with a snip marker in between.
136pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
137    if output.len() <= max_chars || max_chars < 200 {
138        return output.to_string();
139    }
140    let diagnostic_lines = output
141        .lines()
142        .filter(|line| {
143            let trimmed = line.trim();
144            let lower = trimmed.to_lowercase();
145            let has_file_line = {
146                let bytes = trimmed.as_bytes();
147                let mut i = 0;
148                let mut found_colon = false;
149                while i < bytes.len() {
150                    if bytes[i] == b':' {
151                        found_colon = true;
152                        break;
153                    }
154                    i += 1;
155                }
156                found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
157            };
158            let has_strong_keyword =
159                trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
160            let has_weak_keyword = trimmed.contains("error")
161                || trimmed.contains("undefined")
162                || trimmed.contains("expected")
163                || trimmed.contains("got")
164                || lower.contains("cannot find")
165                || lower.contains("not found")
166                || lower.contains("no such")
167                || lower.contains("unresolved")
168                || lower.contains("missing")
169                || lower.contains("declared but not used")
170                || lower.contains("unused")
171                || lower.contains("mismatch");
172            let positional = lower.contains(" error ")
173                || lower.starts_with("error:")
174                || lower.starts_with("warning:")
175                || lower.starts_with("note:")
176                || lower.contains("panic:");
177            has_strong_keyword || (has_file_line && has_weak_keyword) || positional
178        })
179        .take(32)
180        .collect::<Vec<_>>();
181    if !diagnostic_lines.is_empty() {
182        let diagnostics = diagnostic_lines.join("\n");
183        let budget = max_chars.saturating_sub(diagnostics.len() + 64);
184        let keep = budget / 2;
185        if keep >= 80 && output.len() > keep * 2 {
186            let head = snap_to_line_end(output, keep);
187            let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
188            return format!(
189                "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
190            );
191        }
192    }
193    let keep = max_chars / 2;
194    let head = snap_to_line_end(output, keep);
195    let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
196    let snipped = output.len().saturating_sub(head.len() + tail.len());
197    format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
198}
199
200/// Invoke the compress_callback to compress a tool result via pipeline-defined
201/// logic (typically an LLM call). Returns the compressed output, or falls back
202/// to `microcompact_tool_output` on error.
203pub(crate) async fn invoke_compress_callback(
204    callback: &VmValue,
205    tool_name: &str,
206    output: &str,
207    max_chars: usize,
208) -> String {
209    let VmValue::Closure(closure) = callback.clone() else {
210        return microcompact_tool_output(output, max_chars);
211    };
212    let mut vm = match crate::vm::clone_async_builtin_child_vm() {
213        Some(vm) => vm,
214        None => return microcompact_tool_output(output, max_chars),
215    };
216    let args_dict = VmValue::Dict(Rc::new({
217        let mut dict = std::collections::BTreeMap::new();
218        dict.insert(
219            "tool_name".to_string(),
220            VmValue::String(Rc::from(tool_name)),
221        );
222        dict.insert("output".to_string(), VmValue::String(Rc::from(output)));
223        dict.insert("max_chars".to_string(), VmValue::Int(max_chars as i64));
224        dict
225    }));
226    match vm.call_closure_pub(&closure, &[args_dict]).await {
227        Ok(VmValue::String(s)) if !s.is_empty() => s.to_string(),
228        _ => microcompact_tool_output(output, max_chars),
229    }
230}
231
232/// Snap a byte offset to the nearest preceding line boundary (end of a complete line).
233/// Returns the substring from the start up to and including the last complete line
234/// that fits within `max_bytes`. Never cuts mid-line.
235fn snap_to_line_end(s: &str, max_bytes: usize) -> &str {
236    if max_bytes >= s.len() {
237        return s;
238    }
239    let search_end = s.floor_char_boundary(max_bytes);
240    match s[..search_end].rfind('\n') {
241        Some(pos) => &s[..pos + 1],
242        None => &s[..search_end], // single long line — fall back to char boundary
243    }
244}
245
246/// Snap a byte offset to the nearest following line boundary (start of a complete line).
247/// Returns the substring from the first complete line at or after `start_byte`.
248/// Never cuts mid-line.
249fn snap_to_line_start(s: &str, start_byte: usize) -> &str {
250    if start_byte == 0 {
251        return s;
252    }
253    let search_start = s.ceil_char_boundary(start_byte);
254    if search_start >= s.len() {
255        return "";
256    }
257    match s[search_start..].find('\n') {
258        Some(pos) => {
259            let line_start = search_start + pos + 1;
260            if line_start < s.len() {
261                &s[line_start..]
262            } else {
263                &s[search_start..]
264            }
265        }
266        None => &s[search_start..], // already at start of last line
267    }
268}
269
270fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
271    messages
272        .iter()
273        .map(|msg| {
274            let role = msg
275                .get("role")
276                .and_then(|v| v.as_str())
277                .unwrap_or("user")
278                .to_uppercase();
279            let content = msg
280                .get("content")
281                .and_then(|v| v.as_str())
282                .unwrap_or_default();
283            format!("{role}: {content}")
284        })
285        .collect::<Vec<_>>()
286        .join("\n")
287}
288
289fn truncate_compaction_summary(
290    old_messages: &[serde_json::Value],
291    archived_count: usize,
292) -> String {
293    truncate_compaction_summary_with_context(old_messages, archived_count, false)
294}
295
296fn truncate_compaction_summary_with_context(
297    old_messages: &[serde_json::Value],
298    archived_count: usize,
299    is_llm_fallback: bool,
300) -> String {
301    let per_msg_limit = 500_usize;
302    let summary_parts: Vec<String> = old_messages
303        .iter()
304        .filter_map(|m| {
305            let role = m.get("role")?.as_str()?;
306            let content = m.get("content")?.as_str()?;
307            if content.is_empty() {
308                return None;
309            }
310            let truncated = if content.len() > per_msg_limit {
311                format!(
312                    "{}... [truncated from {} chars]",
313                    &content[..content.floor_char_boundary(per_msg_limit)],
314                    content.len()
315                )
316            } else {
317                content.to_string()
318            };
319            Some(format!("[{role}] {truncated}"))
320        })
321        .take(15)
322        .collect();
323    let header = if is_llm_fallback {
324        format!(
325            "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
326        )
327    } else {
328        format!("[auto-compacted {archived_count} older messages via truncate strategy]")
329    };
330    format!(
331        "{header}\n{}{}",
332        summary_parts.join("\n"),
333        if archived_count > 15 {
334            format!("\n... and {} more", archived_count - 15)
335        } else {
336            String::new()
337        }
338    )
339}
340
341fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
342    if let Some(map) = value.as_dict() {
343        if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
344            return Ok(summary.display());
345        }
346    }
347    match value {
348        VmValue::String(text) => Ok(text.to_string()),
349        VmValue::Nil => Ok(String::new()),
350        _ => serde_json::to_string_pretty(&vm_value_to_json(value))
351            .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
352    }
353}
354
355async fn llm_compaction_summary(
356    old_messages: &[serde_json::Value],
357    archived_count: usize,
358    llm_opts: &crate::llm::api::LlmCallOptions,
359    summarize_prompt: Option<&str>,
360) -> Result<String, VmError> {
361    let mut compact_opts = llm_opts.clone();
362    let formatted = format_compaction_messages(old_messages);
363    compact_opts.system = None;
364    compact_opts.transcript_summary = None;
365    compact_opts.native_tools = None;
366    compact_opts.tool_choice = None;
367    compact_opts.response_format = None;
368    compact_opts.json_schema = None;
369    let prompt = render_llm_compaction_prompt(summarize_prompt, &formatted, archived_count)?;
370    compact_opts.messages = vec![serde_json::json!({
371        "role": "user",
372        "content": prompt,
373    })];
374    let result = vm_call_llm_full(&compact_opts).await?;
375    let summary = result.text.trim();
376    if summary.is_empty() {
377        Ok(truncate_compaction_summary_with_context(
378            old_messages,
379            archived_count,
380            true,
381        ))
382    } else {
383        Ok(format!(
384            "[auto-compacted {archived_count} older messages]\n{summary}"
385        ))
386    }
387}
388
389fn render_llm_compaction_prompt(
390    summarize_prompt: Option<&str>,
391    formatted: &str,
392    archived_count: usize,
393) -> Result<String, VmError> {
394    let Some(path) = summarize_prompt.filter(|path| !path.trim().is_empty()) else {
395        return Ok(format!(
396            "Summarize these archived conversation messages for a follow-on coding agent. Preserve goals, constraints, decisions, completed tool work, unresolved issues, and next actions. Output only the summary text.\n\nArchived message count: {archived_count}\n\nConversation:\n{formatted}"
397        ));
398    };
399
400    let resolved = crate::stdlib::process::resolve_source_asset_path(path);
401    let template = std::fs::read_to_string(&resolved).map_err(|error| {
402        VmError::Runtime(format!(
403            "failed to read compaction summarize_prompt {}: {error}",
404            resolved.display()
405        ))
406    })?;
407    let mut bindings = BTreeMap::new();
408    bindings.insert(
409        "formatted_messages".to_string(),
410        VmValue::String(Rc::from(formatted.to_string())),
411    );
412    bindings.insert(
413        "archived_count".to_string(),
414        VmValue::Int(archived_count as i64),
415    );
416    crate::stdlib::template::render_template_result(
417        &template,
418        Some(&bindings),
419        resolved.parent(),
420        Some(&resolved),
421    )
422    .map_err(|error| {
423        VmError::Runtime(format!(
424            "compaction summarize_prompt render error: {error:?}"
425        ))
426    })
427}
428
429async fn custom_compaction_summary(
430    old_messages: &[serde_json::Value],
431    archived_count: usize,
432    callback: &VmValue,
433) -> Result<String, VmError> {
434    let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
435        return Err(VmError::Runtime(
436            "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
437        ));
438    };
439    let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
440        VmError::Runtime(
441            "custom transcript compaction requires an async builtin VM context".to_string(),
442        )
443    })?;
444    let messages_vm = VmValue::List(Rc::new(
445        old_messages
446            .iter()
447            .map(crate::stdlib::json_to_vm_value)
448            .collect(),
449    ));
450    let result = vm.call_closure_pub(&closure, &[messages_vm]).await;
451    let summary = compact_summary_text_from_value(&result?)?;
452    if summary.trim().is_empty() {
453        Ok(truncate_compaction_summary(old_messages, archived_count))
454    } else {
455        Ok(format!(
456            "[auto-compacted {archived_count} older messages]\n{summary}"
457        ))
458    }
459}
460
461/// Check whether a tool-result string should be preserved verbatim during
462/// observation masking. Uses content length as the primary heuristic:
463/// short results (< 500 chars) are kept since they're typically error messages,
464/// status lines, or concise answers that are cheap to retain and risky to mask.
465/// Long results are masked to save context budget.
466fn content_should_preserve(content: &str) -> bool {
467    content.len() < 500
468}
469
470/// Default per-message masking for tool results.
471fn default_mask_tool_result(role: &str, content: &str) -> String {
472    let first_line = content.lines().next().unwrap_or(content);
473    let line_count = content.lines().count();
474    let char_count = content.len();
475    if line_count <= 3 {
476        format!("[{role}] {content}")
477    } else {
478        let preview = &first_line[..first_line.len().min(120)];
479        format!("[{role}] {preview}... [{line_count} lines, {char_count} chars masked]")
480    }
481}
482
483/// Deterministic observation-mask compaction.
484#[cfg(test)]
485pub(crate) fn observation_mask_compaction(
486    old_messages: &[serde_json::Value],
487    archived_count: usize,
488) -> String {
489    observation_mask_compaction_with_callback(old_messages, archived_count, None)
490}
491
492fn observation_mask_compaction_with_callback(
493    old_messages: &[serde_json::Value],
494    archived_count: usize,
495    mask_results: Option<&[Option<String>]>,
496) -> String {
497    let mut parts = Vec::new();
498    parts.push(format!(
499        "[auto-compacted {archived_count} older messages via observation masking]"
500    ));
501    for (idx, msg) in old_messages.iter().enumerate() {
502        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
503        let content = msg
504            .get("content")
505            .and_then(|v| v.as_str())
506            .unwrap_or_default();
507        if content.is_empty() {
508            continue;
509        }
510        if role == "assistant" {
511            parts.push(format!("[assistant] {content}"));
512            continue;
513        }
514        if content_should_preserve(content) {
515            parts.push(format!("[{role}] {content}"));
516        } else if let Some(Some(custom)) = mask_results.and_then(|r| r.get(idx)) {
517            parts.push(custom.clone());
518        } else {
519            parts.push(default_mask_tool_result(role, content));
520        }
521    }
522    parts.join("\n")
523}
524
525/// Invoke the mask_callback to get per-message custom masks.
526async fn invoke_mask_callback(
527    callback: &VmValue,
528    old_messages: &[serde_json::Value],
529) -> Result<Vec<Option<String>>, VmError> {
530    let VmValue::Closure(closure) = callback.clone() else {
531        return Err(VmError::Runtime(
532            "mask_callback must be a closure".to_string(),
533        ));
534    };
535    let mut vm = crate::vm::clone_async_builtin_child_vm().ok_or_else(|| {
536        VmError::Runtime("mask_callback requires an async builtin VM context".to_string())
537    })?;
538    let messages_vm = VmValue::List(Rc::new(
539        old_messages
540            .iter()
541            .map(crate::stdlib::json_to_vm_value)
542            .collect(),
543    ));
544    let result = vm.call_closure_pub(&closure, &[messages_vm]).await?;
545    let list = match result {
546        VmValue::List(items) => items,
547        _ => return Ok(vec![None; old_messages.len()]),
548    };
549    Ok(list
550        .iter()
551        .map(|v| match v {
552            VmValue::String(s) => Some(s.to_string()),
553            VmValue::Nil => None,
554            _ => None,
555        })
556        .collect())
557}
558
559/// Apply a single compaction strategy to a list of archived messages.
560async fn apply_compaction_strategy(
561    strategy: &CompactStrategy,
562    old_messages: &[serde_json::Value],
563    archived_count: usize,
564    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
565    custom_compactor: Option<&VmValue>,
566    mask_callback: Option<&VmValue>,
567    summarize_prompt: Option<&str>,
568) -> Result<String, VmError> {
569    match strategy {
570        CompactStrategy::Truncate => Ok(truncate_compaction_summary(old_messages, archived_count)),
571        CompactStrategy::Llm => {
572            llm_compaction_summary(
573                old_messages,
574                archived_count,
575                llm_opts.ok_or_else(|| {
576                    VmError::Runtime(
577                        "LLM transcript compaction requires active LLM call options".to_string(),
578                    )
579                })?,
580                summarize_prompt,
581            )
582            .await
583        }
584        CompactStrategy::Custom => {
585            custom_compaction_summary(
586                old_messages,
587                archived_count,
588                custom_compactor.ok_or_else(|| {
589                    VmError::Runtime(
590                        "compact_callback is required when compact_strategy is 'custom'"
591                            .to_string(),
592                    )
593                })?,
594            )
595            .await
596        }
597        CompactStrategy::ObservationMask => {
598            let mask_results = if let Some(cb) = mask_callback {
599                Some(invoke_mask_callback(cb, old_messages).await?)
600            } else {
601                None
602            };
603            Ok(observation_mask_compaction_with_callback(
604                old_messages,
605                archived_count,
606                mask_results.as_deref(),
607            ))
608        }
609    }
610}
611
612/// Auto-compact a message list in place using two-tier compaction.
613pub(crate) async fn auto_compact_messages(
614    messages: &mut Vec<serde_json::Value>,
615    config: &AutoCompactConfig,
616    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
617) -> Result<Option<String>, VmError> {
618    if messages.len() <= config.keep_last {
619        return Ok(None);
620    }
621    let original_split = messages.len().saturating_sub(config.keep_last);
622    let mut split_at = original_split;
623    // Snap back to a user-role boundary so the kept suffix begins at a clean
624    // turn. OpenAI-compatible APIs reject tool results orphaned from their
625    // assistant request, so splitting mid-turn corrupts the transcript.
626    while split_at > 0
627        && messages[split_at]
628            .get("role")
629            .and_then(|r| r.as_str())
630            .is_none_or(|r| r != "user")
631    {
632        split_at -= 1;
633    }
634    // Fall back to the naive split (e.g. tool-heavy transcripts with the sole
635    // user message at index 0) rather than skipping compaction entirely.
636    if split_at == 0 {
637        split_at = original_split;
638    }
639    if let Some(volatile_start) = messages[split_at..]
640        .iter()
641        .position(is_reasoning_or_tool_turn_message)
642        .map(|offset| split_at + offset)
643    {
644        if let Some(boundary) = volatile_start
645            .checked_sub(1)
646            .and_then(|idx| find_prev_user_boundary(messages, idx))
647            .filter(|boundary| *boundary > 0)
648        {
649            split_at = boundary;
650        }
651    }
652    if split_at == 0 {
653        return Ok(None);
654    }
655    let old_messages: Vec<_> = messages.drain(..split_at).collect();
656    let archived_count = old_messages.len();
657
658    let mut summary = apply_compaction_strategy(
659        &config.compact_strategy,
660        &old_messages,
661        archived_count,
662        llm_opts,
663        config.custom_compactor.as_ref(),
664        config.mask_callback.as_ref(),
665        config.summarize_prompt.as_deref(),
666    )
667    .await?;
668
669    if let Some(hard_limit) = config.hard_limit_tokens {
670        let summary_msg = serde_json::json!({"role": "user", "content": &summary});
671        let mut estimate_msgs = vec![summary_msg];
672        estimate_msgs.extend_from_slice(messages.as_slice());
673        let estimated = estimate_message_tokens(&estimate_msgs);
674        if estimated > hard_limit {
675            let tier1_as_messages = vec![serde_json::json!({
676                "role": "user",
677                "content": summary,
678            })];
679            summary = apply_compaction_strategy(
680                &config.hard_limit_strategy,
681                &tier1_as_messages,
682                archived_count,
683                llm_opts,
684                config.custom_compactor.as_ref(),
685                None,
686                config.summarize_prompt.as_deref(),
687            )
688            .await?;
689        }
690    }
691
692    messages.insert(
693        0,
694        serde_json::json!({
695            "role": "user",
696            "content": summary,
697        }),
698    );
699    Ok(Some(summary))
700}
701
702#[cfg(test)]
703mod tests {
704    use super::*;
705
706    #[test]
707    fn microcompact_short_output_unchanged() {
708        let output = "line1\nline2\nline3\n";
709        assert_eq!(microcompact_tool_output(output, 1000), output);
710    }
711
712    #[test]
713    fn microcompact_snaps_to_line_boundaries() {
714        let lines: Vec<String> = (0..20)
715            .map(|i| format!("line {:02} content here", i))
716            .collect();
717        let output = lines.join("\n");
718        let result = microcompact_tool_output(&output, 200);
719        assert!(result.contains("[... "), "should have snip marker");
720        let parts: Vec<&str> = result.split("\n\n[... ").collect();
721        assert!(parts.len() >= 2, "should split at marker");
722        let head = parts[0];
723        for line in head.lines() {
724            assert!(
725                line.starts_with("line "),
726                "head line should be complete: {line}"
727            );
728        }
729    }
730
731    #[test]
732    fn microcompact_preserves_diagnostic_lines_with_line_boundaries() {
733        let mut lines = Vec::new();
734        for i in 0..50 {
735            lines.push(format!("verbose output line {i}"));
736        }
737        lines.push("src/main.rs:42: error: cannot find value".to_string());
738        for i in 50..100 {
739            lines.push(format!("verbose output line {i}"));
740        }
741        let output = lines.join("\n");
742        let result = microcompact_tool_output(&output, 600);
743        assert!(result.contains("cannot find value"), "diagnostic preserved");
744        assert!(
745            result.contains("[diagnostic lines preserved]"),
746            "has diagnostic marker"
747        );
748    }
749
750    #[test]
751    fn snap_to_line_end_finds_newline() {
752        let s = "line1\nline2\nline3\nline4\n";
753        let head = snap_to_line_end(s, 12);
754        assert!(head.ends_with('\n'), "should end at newline");
755        assert!(head.contains("line1"));
756    }
757
758    #[test]
759    fn snap_to_line_start_finds_newline() {
760        let s = "line1\nline2\nline3\nline4\n";
761        let tail = snap_to_line_start(s, 12);
762        assert!(
763            tail.starts_with("line"),
764            "should start at line boundary: {tail}"
765        );
766    }
767
768    #[test]
769    fn auto_compact_preserves_reasoning_tool_suffix() {
770        let mut messages = vec![
771            serde_json::json!({"role": "user", "content": "old task"}),
772            serde_json::json!({"role": "assistant", "content": "old reply"}),
773            serde_json::json!({"role": "user", "content": "new task"}),
774            serde_json::json!({
775                "role": "assistant",
776                "content": "",
777                "reasoning": "think first",
778                "tool_calls": [{
779                    "id": "call_1",
780                    "type": "function",
781                    "function": {"name": "read", "arguments": "{\"path\":\"foo.rs\"}"}
782                }],
783            }),
784            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": "file"}),
785        ];
786        let config = AutoCompactConfig {
787            keep_last: 2,
788            ..Default::default()
789        };
790
791        let runtime = tokio::runtime::Builder::new_current_thread()
792            .enable_all()
793            .build()
794            .expect("runtime");
795        let summary = runtime
796            .block_on(auto_compact_messages(&mut messages, &config, None))
797            .expect("compaction succeeds");
798
799        assert!(summary.is_some());
800        assert_eq!(messages[1]["role"], "user");
801        assert_eq!(messages[2]["role"], "assistant");
802        assert_eq!(messages[2]["tool_calls"][0]["id"], "call_1");
803        assert_eq!(messages[3]["role"], "tool");
804        assert_eq!(messages[3]["tool_call_id"], "call_1");
805    }
806}