harn_vm/orchestration/
compaction.rs

1//! Auto-compaction — transcript size management strategies.
2
3use std::collections::BTreeMap;
4
5use serde::{Deserialize, Serialize};
6
7use crate::llm::{vm_call_llm_full, vm_value_to_json};
8use crate::value::{VmError, VmValue};
9use crate::vm::AsyncBuiltinCtx;
10
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub enum CompactStrategy {
13    Llm,
14    Truncate,
15    Custom,
16    ObservationMask,
17}
18
19pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
20    match value {
21        "llm" => Ok(CompactStrategy::Llm),
22        "truncate" => Ok(CompactStrategy::Truncate),
23        "custom" => Ok(CompactStrategy::Custom),
24        "observation_mask" => Ok(CompactStrategy::ObservationMask),
25        other => Err(VmError::Runtime(format!(
26            "unknown compact_strategy '{other}' (expected 'llm', 'truncate', 'custom', or 'observation_mask')"
27        ))),
28    }
29}
30
31pub fn compact_strategy_name(strategy: &CompactStrategy) -> &'static str {
32    match strategy {
33        CompactStrategy::Llm => "llm",
34        CompactStrategy::Truncate => "truncate",
35        CompactStrategy::Custom => "custom",
36        CompactStrategy::ObservationMask => "observation_mask",
37    }
38}
39
40const COMPACTION_POLICY_KEYS: &[&str] = &[
41    "instructions",
42    "mode",
43    "scope",
44    "preserve",
45    "drop",
46    "extend_default_instructions",
47    "author",
48];
49
50#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
51#[serde(default)]
52pub struct CompactionPolicy {
53    pub instructions: Option<String>,
54    pub mode: Option<String>,
55    pub scope: Option<String>,
56    pub preserve: Vec<String>,
57    #[serde(rename = "drop")]
58    pub drop_items: Vec<String>,
59    pub extend_default_instructions: Option<bool>,
60    pub author: Option<String>,
61}
62
63#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(default)]
65pub struct CompactionRequest {
66    pub mode: Option<String>,
67    pub policy: CompactionPolicy,
68}
69
70impl CompactionPolicy {
71    pub fn has_metadata(&self) -> bool {
72        self.instructions.is_some()
73            || self.mode.is_some()
74            || self.scope.is_some()
75            || !self.preserve.is_empty()
76            || !self.drop_items.is_empty()
77            || self.extend_default_instructions.is_some()
78            || self.author.is_some()
79    }
80
81    fn has_prompt_directives(&self) -> bool {
82        self.instructions
83            .as_deref()
84            .is_some_and(|value| !value.trim().is_empty())
85            || !self.preserve.is_empty()
86            || !self.drop_items.is_empty()
87    }
88
89    pub fn instruction_mode(&self) -> &'static str {
90        if !self.has_prompt_directives() {
91            "default"
92        } else if self.extend_default_instructions == Some(false) {
93            "replace"
94        } else {
95            "extend"
96        }
97    }
98
99    pub fn instruction_source(&self) -> Option<&str> {
100        self.author
101            .as_deref()
102            .filter(|author| !author.trim().is_empty())
103    }
104
105    pub fn metadata_json(&self) -> Option<serde_json::Value> {
106        if !self.has_metadata() {
107            return None;
108        }
109        let mut map = serde_json::Map::new();
110        if let Some(instructions) = self.instructions.as_ref() {
111            map.insert(
112                "instructions".to_string(),
113                serde_json::Value::String(instructions.clone()),
114            );
115        }
116        if let Some(mode) = self.mode.as_ref() {
117            map.insert("mode".to_string(), serde_json::Value::String(mode.clone()));
118        }
119        if let Some(scope) = self.scope.as_ref() {
120            map.insert(
121                "scope".to_string(),
122                serde_json::Value::String(scope.clone()),
123            );
124        }
125        if !self.preserve.is_empty() {
126            map.insert(
127                "preserve".to_string(),
128                serde_json::to_value(&self.preserve).unwrap_or_default(),
129            );
130        }
131        if !self.drop_items.is_empty() {
132            map.insert(
133                "drop".to_string(),
134                serde_json::to_value(&self.drop_items).unwrap_or_default(),
135            );
136        }
137        if let Some(extend_default_instructions) = self.extend_default_instructions {
138            map.insert(
139                "extend_default_instructions".to_string(),
140                serde_json::Value::Bool(extend_default_instructions),
141            );
142        }
143        if let Some(author) = self.author.as_ref() {
144            map.insert(
145                "author".to_string(),
146                serde_json::Value::String(author.clone()),
147            );
148        }
149        map.insert(
150            "instruction_mode".to_string(),
151            serde_json::Value::String(self.instruction_mode().to_string()),
152        );
153        if let Some(source) = self.instruction_source() {
154            map.insert(
155                "instruction_source".to_string(),
156                serde_json::Value::String(source.to_string()),
157            );
158        }
159        Some(serde_json::Value::Object(map))
160    }
161
162    fn prompt_directives(&self) -> Option<String> {
163        if !self.has_prompt_directives() {
164            return None;
165        }
166        let mut parts = Vec::new();
167        if let Some(instructions) = self
168            .instructions
169            .as_deref()
170            .map(str::trim)
171            .filter(|value| !value.is_empty())
172        {
173            parts.push(instructions.to_string());
174        }
175        if !self.preserve.is_empty() {
176            parts.push(format!("Preserve: {}.", self.preserve.join("; ")));
177        }
178        if !self.drop_items.is_empty() {
179            parts.push(format!("Drop: {}.", self.drop_items.join("; ")));
180        }
181        Some(parts.join("\n"))
182    }
183
184    fn is_model_visible_scope(&self) -> bool {
185        matches!(
186            self.scope.as_deref(),
187            Some("model_visible" | "summary" | "transcript")
188        )
189    }
190}
191
192pub fn compaction_policy_option_keys() -> &'static [&'static str] {
193    COMPACTION_POLICY_KEYS
194}
195
196pub fn compaction_policy_to_vm_value(policy: &CompactionPolicy) -> VmValue {
197    let mut map = BTreeMap::new();
198    if let Some(instructions) = policy.instructions.as_ref() {
199        map.insert(
200            "instructions".to_string(),
201            VmValue::String(std::sync::Arc::from(instructions.clone())),
202        );
203    }
204    if let Some(mode) = policy.mode.as_ref() {
205        map.insert(
206            "mode".to_string(),
207            VmValue::String(std::sync::Arc::from(mode.clone())),
208        );
209    }
210    if let Some(scope) = policy.scope.as_ref() {
211        map.insert(
212            "scope".to_string(),
213            VmValue::String(std::sync::Arc::from(scope.clone())),
214        );
215    }
216    map.insert(
217        "preserve".to_string(),
218        VmValue::List(std::sync::Arc::new(
219            policy
220                .preserve
221                .iter()
222                .map(|item| VmValue::String(std::sync::Arc::from(item.clone())))
223                .collect(),
224        )),
225    );
226    map.insert(
227        "drop".to_string(),
228        VmValue::List(std::sync::Arc::new(
229            policy
230                .drop_items
231                .iter()
232                .map(|item| VmValue::String(std::sync::Arc::from(item.clone())))
233                .collect(),
234        )),
235    );
236    if let Some(extend_default_instructions) = policy.extend_default_instructions {
237        map.insert(
238            "extend_default_instructions".to_string(),
239            VmValue::Bool(extend_default_instructions),
240        );
241    }
242    if let Some(author) = policy.author.as_ref() {
243        map.insert(
244            "author".to_string(),
245            VmValue::String(std::sync::Arc::from(author.clone())),
246        );
247    }
248    VmValue::Dict(std::sync::Arc::new(map))
249}
250
251pub fn parse_compaction_policy_options(
252    options: Option<&BTreeMap<String, VmValue>>,
253    builtin: &str,
254) -> Result<CompactionPolicy, VmError> {
255    let mut policy = options
256        .and_then(|map| {
257            map.get("policy")
258                .or_else(|| map.get("compaction_policy"))
259                .or_else(|| map.get("compaction_request"))
260        })
261        .map(|value| parse_compaction_policy_value(value, builtin))
262        .transpose()?
263        .unwrap_or_default();
264    if let Some(options) = options {
265        apply_compaction_policy_fields(&mut policy, options, builtin)?;
266    }
267    Ok(policy)
268}
269
270fn parse_compaction_policy_value(
271    value: &VmValue,
272    builtin: &str,
273) -> Result<CompactionPolicy, VmError> {
274    match value {
275        VmValue::Nil => Ok(CompactionPolicy::default()),
276        VmValue::Dict(map) => {
277            if let Some(nested) = map
278                .get("policy")
279                .or_else(|| map.get("compaction_policy"))
280                .or_else(|| map.get("compaction_request"))
281            {
282                let mut policy = parse_compaction_policy_value(nested, builtin)?;
283                apply_compaction_policy_fields(&mut policy, map, builtin)?;
284                Ok(policy)
285            } else {
286                let mut policy = CompactionPolicy::default();
287                apply_compaction_policy_fields(&mut policy, map, builtin)?;
288                Ok(policy)
289            }
290        }
291        other => Err(VmError::Runtime(format!(
292            "{builtin}: compaction policy must be a dict or nil, got {}",
293            other.type_name()
294        ))),
295    }
296}
297
298fn apply_compaction_policy_fields(
299    policy: &mut CompactionPolicy,
300    map: &BTreeMap<String, VmValue>,
301    builtin: &str,
302) -> Result<(), VmError> {
303    if let Some(value) = optional_policy_string(map, "instructions", builtin)? {
304        policy.instructions = Some(value);
305    }
306    if let Some(value) = optional_policy_string(map, "mode", builtin)? {
307        policy.mode = Some(value);
308    }
309    if let Some(value) = optional_policy_string(map, "scope", builtin)? {
310        policy.scope = Some(value);
311    }
312    if map.contains_key("preserve") {
313        policy.preserve = policy_string_list(map.get("preserve"), builtin, "preserve")?;
314    }
315    if map.contains_key("drop") {
316        policy.drop_items = policy_string_list(map.get("drop"), builtin, "drop")?;
317    }
318    if let Some(value) = optional_policy_bool(map, "extend_default_instructions", builtin)? {
319        policy.extend_default_instructions = Some(value);
320    }
321    if let Some(value) = optional_policy_string(map, "author", builtin)? {
322        policy.author = Some(value);
323    }
324    Ok(())
325}
326
327fn optional_policy_string(
328    map: &BTreeMap<String, VmValue>,
329    key: &str,
330    builtin: &str,
331) -> Result<Option<String>, VmError> {
332    match map.get(key) {
333        None | Some(VmValue::Nil) => Ok(None),
334        Some(VmValue::String(text)) => {
335            let trimmed = text.trim();
336            if trimmed.is_empty() {
337                Ok(None)
338            } else {
339                Ok(Some(trimmed.to_string()))
340            }
341        }
342        Some(other) => Err(VmError::Runtime(format!(
343            "{builtin}: compaction policy `{key}` must be a string, got {}",
344            other.type_name()
345        ))),
346    }
347}
348
349fn optional_policy_bool(
350    map: &BTreeMap<String, VmValue>,
351    key: &str,
352    builtin: &str,
353) -> Result<Option<bool>, VmError> {
354    match map.get(key) {
355        None | Some(VmValue::Nil) => Ok(None),
356        Some(VmValue::Bool(value)) => Ok(Some(*value)),
357        Some(other) => Err(VmError::Runtime(format!(
358            "{builtin}: compaction policy `{key}` must be a bool, got {}",
359            other.type_name()
360        ))),
361    }
362}
363
364fn policy_string_list(
365    value: Option<&VmValue>,
366    builtin: &str,
367    key: &str,
368) -> Result<Vec<String>, VmError> {
369    match value {
370        None | Some(VmValue::Nil) => Ok(Vec::new()),
371        Some(VmValue::String(text)) => {
372            let trimmed = text.trim();
373            if trimmed.is_empty() {
374                Ok(Vec::new())
375            } else {
376                Ok(vec![trimmed.to_string()])
377            }
378        }
379        Some(VmValue::List(items)) => items
380            .iter()
381            .map(|item| match item {
382                VmValue::String(text) => Ok(text.trim().to_string()),
383                other => Err(VmError::Runtime(format!(
384                    "{builtin}: compaction policy `{key}` entries must be strings, got {}",
385                    other.type_name()
386                ))),
387            })
388            .filter_map(|result| match result {
389                Ok(value) if value.is_empty() => None,
390                other => Some(other),
391            })
392            .collect(),
393        Some(other) => Err(VmError::Runtime(format!(
394            "{builtin}: compaction policy `{key}` must be a string or list, got {}",
395            other.type_name()
396        ))),
397    }
398}
399
400pub fn compaction_policy_metadata_fields(
401    policy: &CompactionPolicy,
402) -> Vec<(&'static str, serde_json::Value)> {
403    let mut fields = vec![(
404        "instruction_mode",
405        serde_json::Value::String(policy.instruction_mode().to_string()),
406    )];
407    if let Some(source) = policy.instruction_source() {
408        fields.push((
409            "instruction_source",
410            serde_json::Value::String(source.to_string()),
411        ));
412    }
413    if let Some(policy_json) = policy.metadata_json() {
414        fields.push(("compaction_policy", policy_json));
415    }
416    fields
417}
418
419/// Configuration for automatic transcript compaction in agent loops.
420///
421/// Two-tier compaction:
422///   Tier 1 (`token_threshold` / `compact_strategy`): lightweight, deterministic
423///     observation masking that fires early. Masks verbose tool results while
424///     preserving assistant prose and error output.
425///   Tier 2 (`hard_limit_tokens` / `hard_limit_strategy`): aggressive LLM-powered
426///     summarization that fires when tier-1 alone isn't enough, typically as the
427///     transcript approaches the model's actual context window.
428#[derive(Clone, Debug)]
429pub struct AutoCompactConfig {
430    /// Number of earliest messages to keep verbatim before the compacted
431    /// summary. The system prompt is not part of this list and is always
432    /// preserved separately by the caller.
433    pub keep_first: usize,
434    /// Tier-1 threshold: estimated tokens before lightweight compaction.
435    pub token_threshold: usize,
436    /// Maximum character length for a single tool result before microcompaction.
437    pub tool_output_max_chars: usize,
438    /// Number of recent messages to keep during compaction.
439    pub keep_last: usize,
440    /// Tier-1 strategy (default: ObservationMask).
441    pub compact_strategy: CompactStrategy,
442    /// Tier-2 threshold: fires when tier-1 result still exceeds this.
443    /// Typically set to ~75% of the model's actual context window.
444    /// When `None`, tier-2 is disabled.
445    pub hard_limit_tokens: Option<usize>,
446    /// Tier-2 strategy (default: Llm).
447    pub hard_limit_strategy: CompactStrategy,
448    /// Optional Harn callback used when a strategy is `custom`.
449    pub custom_compactor: Option<VmValue>,
450    /// Pending reminders supplied to `custom_compactor` as a second
451    /// argument. Built-in compaction strategies decide reminder retention
452    /// before rebuilding the transcript, so they do not consume this list.
453    pub custom_compactor_reminders: Vec<VmValue>,
454    /// Optional callback for domain-specific per-message masking during
455    /// observation mask compaction. Called with a list of archived messages,
456    /// returns a list of `Option<String>` — `Some(masked)` to override the
457    /// default mask for that message, `None` to use the default.
458    /// This lets the host (e.g. an IDE or cloud runner) inject AST outlines,
459    /// file summaries, etc. without putting language-specific logic in Harn.
460    pub mask_callback: Option<VmValue>,
461    /// Optional callback for per-tool-result compression. Called with
462    /// `{tool_name, output, max_chars}` and returns compressed output string.
463    /// When set, used INSTEAD of the built-in `microcompact_tool_output`.
464    /// This allows the pipeline to use LLM-based compression rather than
465    /// keyword heuristics.
466    pub compress_callback: Option<VmValue>,
467    /// Optional prompt-template asset path used when LLM compaction is
468    /// selected. The rendered template becomes the user message sent to
469    /// the summarizer.
470    pub summarize_prompt: Option<String>,
471    /// User-facing policy label for replay and observability. This can be
472    /// broader than the engine strategy, e.g. `hybrid` lowers to LLM
473    /// summarization plus truncate fallback.
474    pub policy_strategy: String,
475    /// Strategy to try when the primary strategy fails. Budget-pressure
476    /// compaction uses this to keep the session within its hard cap even when
477    /// an LLM summarizer is unavailable.
478    pub fallback_strategy: Option<CompactStrategy>,
479    /// Host/user-supplied instructions that guide compaction without
480    /// becoming part of the compacted transcript unless `scope` explicitly
481    /// asks for model-visible policy text.
482    pub policy: CompactionPolicy,
483}
484
485impl Default for AutoCompactConfig {
486    fn default() -> Self {
487        Self {
488            keep_first: 0,
489            token_threshold: 48_000,
490            tool_output_max_chars: 16_000,
491            keep_last: 12,
492            compact_strategy: CompactStrategy::ObservationMask,
493            hard_limit_tokens: None,
494            hard_limit_strategy: CompactStrategy::Llm,
495            custom_compactor: None,
496            custom_compactor_reminders: Vec::new(),
497            mask_callback: None,
498            compress_callback: None,
499            summarize_prompt: None,
500            policy_strategy: compact_strategy_name(&CompactStrategy::ObservationMask).to_string(),
501            fallback_strategy: None,
502            policy: CompactionPolicy::default(),
503        }
504    }
505}
506
507/// Estimate token count from a list of JSON messages (chars / 4 heuristic).
508pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
509    messages.iter().map(estimate_message_chars).sum::<usize>() / 4
510}
511
512fn estimate_message_chars(message: &serde_json::Value) -> usize {
513    let mut total = message
514        .get("content")
515        .map(estimate_content_chars)
516        .unwrap_or_default();
517    if let Some(reasoning) = message.get("reasoning") {
518        total += estimate_content_chars(reasoning);
519    }
520    if let Some(tool_calls) = message.get("tool_calls") {
521        total += estimate_content_chars(tool_calls);
522    }
523    total
524}
525
526fn estimate_content_chars(value: &serde_json::Value) -> usize {
527    match value {
528        serde_json::Value::String(text) => text.len(),
529        serde_json::Value::Array(items) => items.iter().map(estimate_content_chars).sum(),
530        serde_json::Value::Object(map) => map.values().map(estimate_content_chars).sum(),
531        serde_json::Value::Null => 0,
532        other => other.to_string().len(),
533    }
534}
535
536fn is_reasoning_or_tool_turn_message(message: &serde_json::Value) -> bool {
537    let role = message
538        .get("role")
539        .and_then(|value| value.as_str())
540        .unwrap_or_default();
541    role == "tool"
542        || message.get("tool_calls").is_some()
543        || message
544            .get("reasoning")
545            .map(|value| !value.is_null())
546            .unwrap_or(false)
547}
548
549fn find_prev_user_boundary(messages: &[serde_json::Value], start: usize) -> Option<usize> {
550    (0..=start)
551        .rev()
552        .find(|idx| messages[*idx].get("role").and_then(|value| value.as_str()) == Some("user"))
553}
554
555/// Microcompact a tool result: if it exceeds `max_chars`, keep the first and
556/// last portions with a snip marker in between.
557pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
558    if output.len() <= max_chars || max_chars < 200 {
559        return output.to_string();
560    }
561    let diagnostic_lines = output
562        .lines()
563        .filter(|line| {
564            let trimmed = line.trim();
565            let lower = trimmed.to_lowercase();
566            let has_file_line = {
567                let bytes = trimmed.as_bytes();
568                let mut i = 0;
569                let mut found_colon = false;
570                while i < bytes.len() {
571                    if bytes[i] == b':' {
572                        found_colon = true;
573                        break;
574                    }
575                    i += 1;
576                }
577                found_colon && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
578            };
579            let has_strong_keyword =
580                trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
581            let has_weak_keyword = trimmed.contains("error")
582                || trimmed.contains("undefined")
583                || trimmed.contains("expected")
584                || trimmed.contains("got")
585                || lower.contains("cannot find")
586                || lower.contains("not found")
587                || lower.contains("no such")
588                || lower.contains("unresolved")
589                || lower.contains("missing")
590                || lower.contains("declared but not used")
591                || lower.contains("unused")
592                || lower.contains("mismatch");
593            let positional = lower.contains(" error ")
594                || lower.starts_with("error:")
595                || lower.starts_with("warning:")
596                || lower.starts_with("note:")
597                || lower.contains("panic:");
598            has_strong_keyword || (has_file_line && has_weak_keyword) || positional
599        })
600        .take(32)
601        .collect::<Vec<_>>();
602    if !diagnostic_lines.is_empty() {
603        let diagnostics = diagnostic_lines.join("\n");
604        let budget = max_chars.saturating_sub(diagnostics.len() + 64);
605        let keep = budget / 2;
606        if keep >= 80 && output.len() > keep * 2 {
607            let head = snap_to_line_end(output, keep);
608            let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
609            return format!(
610                "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
611            );
612        }
613    }
614    let keep = max_chars / 2;
615    let head = snap_to_line_end(output, keep);
616    let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
617    let snipped = output.len().saturating_sub(head.len() + tail.len());
618    format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
619}
620
621/// Snap a byte offset to the nearest preceding line boundary (end of a complete line).
622/// Returns the substring from the start up to and including the last complete line
623/// that fits within `max_bytes`. Never cuts mid-line.
624fn snap_to_line_end(s: &str, max_bytes: usize) -> &str {
625    if max_bytes >= s.len() {
626        return s;
627    }
628    let search_end = s.floor_char_boundary(max_bytes);
629    match s[..search_end].rfind('\n') {
630        Some(pos) => &s[..pos + 1],
631        None => &s[..search_end], // single long line — fall back to char boundary
632    }
633}
634
635/// Snap a byte offset to the nearest following line boundary (start of a complete line).
636/// Returns the substring from the first complete line at or after `start_byte`.
637/// Never cuts mid-line.
638fn snap_to_line_start(s: &str, start_byte: usize) -> &str {
639    if start_byte == 0 {
640        return s;
641    }
642    let search_start = s.ceil_char_boundary(start_byte);
643    if search_start >= s.len() {
644        return "";
645    }
646    match s[search_start..].find('\n') {
647        Some(pos) => {
648            let line_start = search_start + pos + 1;
649            if line_start < s.len() {
650                &s[line_start..]
651            } else {
652                &s[search_start..]
653            }
654        }
655        None => &s[search_start..], // already at start of last line
656    }
657}
658
659fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
660    messages
661        .iter()
662        .map(|msg| {
663            let role = msg
664                .get("role")
665                .and_then(|v| v.as_str())
666                .unwrap_or("user")
667                .to_uppercase();
668            let content = msg
669                .get("content")
670                .and_then(|v| v.as_str())
671                .unwrap_or_default();
672            format!("{role}: {content}")
673        })
674        .collect::<Vec<_>>()
675        .join("\n")
676}
677
678fn truncate_compaction_summary(
679    old_messages: &[serde_json::Value],
680    archived_count: usize,
681) -> String {
682    truncate_compaction_summary_with_context(old_messages, archived_count, false)
683}
684
685fn truncate_compaction_summary_with_context(
686    old_messages: &[serde_json::Value],
687    archived_count: usize,
688    is_llm_fallback: bool,
689) -> String {
690    let per_msg_limit = 500_usize;
691    let summary_parts: Vec<String> = old_messages
692        .iter()
693        .filter_map(|m| {
694            let role = m.get("role")?.as_str()?;
695            let content = m.get("content")?.as_str()?;
696            if content.is_empty() {
697                return None;
698            }
699            let truncated = if content.len() > per_msg_limit {
700                format!(
701                    "{}... [truncated from {} chars]",
702                    &content[..content.floor_char_boundary(per_msg_limit)],
703                    content.len()
704                )
705            } else {
706                content.to_string()
707            };
708            Some(format!("[{role}] {truncated}"))
709        })
710        .take(15)
711        .collect();
712    let header = if is_llm_fallback {
713        format!(
714            "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
715        )
716    } else {
717        format!("[auto-compacted {archived_count} older messages via truncate strategy]")
718    };
719    format!(
720        "{header}\n{}{}",
721        summary_parts.join("\n"),
722        if archived_count > 15 {
723            format!("\n... and {} more", archived_count - 15)
724        } else {
725            String::new()
726        }
727    )
728}
729
730fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
731    if let Some(map) = value.as_dict() {
732        if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
733            return Ok(summary.display());
734        }
735    }
736    match value {
737        VmValue::String(text) => Ok(text.to_string()),
738        VmValue::Nil => Ok(String::new()),
739        _ => serde_json::to_string_pretty(&vm_value_to_json(value))
740            .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
741    }
742}
743
744async fn llm_compaction_summary(
745    old_messages: &[serde_json::Value],
746    archived_count: usize,
747    llm_opts: &crate::llm::api::LlmCallOptions,
748    summarize_prompt: Option<&str>,
749    policy: &CompactionPolicy,
750) -> Result<String, VmError> {
751    let mut compact_opts = llm_opts.clone();
752    let formatted = format_compaction_messages(old_messages);
753    compact_opts.system = None;
754    compact_opts.transcript_summary = None;
755    compact_opts.native_tools = None;
756    compact_opts.tool_choice = None;
757    compact_opts.output_format = crate::llm::api::OutputFormat::Text;
758    compact_opts.response_format = None;
759    compact_opts.json_schema = None;
760    compact_opts.output_schema = None;
761    let prompt =
762        render_llm_compaction_prompt(summarize_prompt, &formatted, archived_count, policy)?;
763    compact_opts.messages = vec![serde_json::json!({
764        "role": "user",
765        "content": prompt,
766    })];
767    let result = vm_call_llm_full(&compact_opts).await?;
768    let summary = result.text.trim();
769    if summary.is_empty() {
770        Ok(truncate_compaction_summary_with_context(
771            old_messages,
772            archived_count,
773            true,
774        ))
775    } else {
776        Ok(format!(
777            "[auto-compacted {archived_count} older messages]\n{summary}"
778        ))
779    }
780}
781
782fn render_llm_compaction_prompt(
783    summarize_prompt: Option<&str>,
784    formatted: &str,
785    archived_count: usize,
786    policy: &CompactionPolicy,
787) -> Result<String, VmError> {
788    if policy.has_prompt_directives() && policy.extend_default_instructions == Some(false) {
789        return render_replacement_compaction_prompt(policy, formatted, archived_count);
790    }
791    let mut bindings = BTreeMap::new();
792    bindings.insert(
793        "formatted_messages".to_string(),
794        VmValue::String(std::sync::Arc::from(formatted.to_string())),
795    );
796    bindings.insert(
797        "archived_count".to_string(),
798        VmValue::Int(archived_count as i64),
799    );
800    let Some(path) = summarize_prompt.filter(|path| !path.trim().is_empty()) else {
801        let prompt = crate::stdlib::template::render_stdlib_prompt_asset(
802            "orchestration/prompts/compaction_summary.harn.prompt",
803            Some(&bindings),
804        )?;
805        return Ok(extend_compaction_prompt(prompt, policy));
806    };
807
808    let asset = crate::stdlib::template::TemplateAsset::render_target(path)
809        .map_err(|error| VmError::Runtime(format!("compaction summarize_prompt: {error}")))?;
810    let prompt = crate::stdlib::template::render_asset_result(&asset, Some(&bindings))
811        .map_err(VmError::from)?;
812    Ok(extend_compaction_prompt(prompt, policy))
813}
814
815fn render_replacement_compaction_prompt(
816    policy: &CompactionPolicy,
817    formatted: &str,
818    archived_count: usize,
819) -> Result<String, VmError> {
820    let directives = policy.prompt_directives().unwrap_or_default();
821    let mut bindings = BTreeMap::new();
822    bindings.insert(
823        "directives".to_string(),
824        VmValue::String(std::sync::Arc::from(directives)),
825    );
826    bindings.insert(
827        "formatted_messages".to_string(),
828        VmValue::String(std::sync::Arc::from(formatted.to_string())),
829    );
830    bindings.insert(
831        "archived_count".to_string(),
832        VmValue::Int(archived_count as i64),
833    );
834    crate::stdlib::template::render_stdlib_prompt_asset(
835        "orchestration/prompts/compaction_policy_replacement.harn.prompt",
836        Some(&bindings),
837    )
838}
839
840fn extend_compaction_prompt(mut prompt: String, policy: &CompactionPolicy) -> String {
841    let Some(directives) = policy.prompt_directives() else {
842        return prompt;
843    };
844    prompt.push_str(
845        "\n\nAdditional compaction instructions: use these directives to shape the summary, but do not quote this section unless it explicitly requests a model-visible note.\n",
846    );
847    prompt.push_str(&directives);
848    prompt
849}
850
851async fn custom_compaction_summary(
852    ctx: Option<&AsyncBuiltinCtx>,
853    old_messages: &[serde_json::Value],
854    archived_count: usize,
855    callback: &VmValue,
856    reminders: &[VmValue],
857    policy: &CompactionPolicy,
858) -> Result<String, VmError> {
859    let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
860        return Err(VmError::Runtime(
861            "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
862        ));
863    };
864    let Some(ctx) = ctx else {
865        return Err(VmError::Runtime(
866            "custom transcript compaction requires an async builtin VM context".to_string(),
867        ));
868    };
869    let mut vm = ctx.child_vm();
870    let messages_vm = VmValue::List(std::sync::Arc::new(
871        old_messages
872            .iter()
873            .map(crate::stdlib::json_to_vm_value)
874            .collect(),
875    ));
876    let result = if policy.has_metadata()
877        && (closure.func.params.len() >= 3 || closure.func.has_rest_param)
878    {
879        let reminders_vm = VmValue::List(std::sync::Arc::new(reminders.to_vec()));
880        let policy_vm = compaction_policy_to_vm_value(policy);
881        vm.call_closure_pub(&closure, &[messages_vm, reminders_vm, policy_vm])
882            .await
883    } else if closure.func.params.len() >= 2 || closure.func.has_rest_param {
884        let reminders_vm = VmValue::List(std::sync::Arc::new(reminders.to_vec()));
885        vm.call_closure_pub(&closure, &[messages_vm, reminders_vm])
886            .await
887    } else {
888        vm.call_closure_pub(&closure, &[messages_vm]).await
889    };
890    let summary = compact_summary_text_from_value(&result?)?;
891    ctx.forward_output(&vm.take_output());
892    if summary.trim().is_empty() {
893        Ok(truncate_compaction_summary(old_messages, archived_count))
894    } else {
895        Ok(format!(
896            "[auto-compacted {archived_count} older messages]\n{summary}"
897        ))
898    }
899}
900
901/// Marker the host emits inside a tool-output (or message) body to pin its
902/// live grounding — the current file view and just-edited window — so it
903/// survives a compaction pass. Burin renders this literal substring inside
904/// markdown headings (e.g. `## Exact current file text [no-compact]`,
905/// `## Edited region now reads (...) [no-compact]`) in
906/// `lib/tools/result-format.harn`. Compaction matches the substring; it does
907/// not invent a new vocabulary.
908pub(crate) const NO_COMPACT_MARKER: &str = "[no-compact]";
909
910/// Upper bound on how many of the most-recent pinned segments survive a
911/// compaction pass verbatim. A pin that could never be evicted would let a
912/// long session accumulate unbounded pinned snapshots (e.g. one edited-window
913/// per edit) and eventually overflow the context window — defeating the
914/// purpose of compaction. Keeping only the latest few preserves the agent's
915/// *current* grounding (the file it is editing now, emitted as the exact-text
916/// block plus the numbered-lines block in one or two adjacent outputs) while
917/// letting stale duplicates from earlier in the session compact normally.
918pub(crate) const MAX_PINNED_SEGMENTS: usize = 3;
919
920/// Whether a content body carries the host's `[no-compact]` pin marker.
921fn is_pinned_content(content: &str) -> bool {
922    content.contains(NO_COMPACT_MARKER)
923}
924
925/// Compute the set of message indices into `messages` that are pinned AND fall
926/// within the most-recent [`MAX_PINNED_SEGMENTS`] pinned bodies. Older pinned
927/// bodies are intentionally excluded so they compact normally (the bound).
928/// `content_of` extracts the body text to inspect for each message.
929fn latest_pinned_indices<'a, F>(
930    messages: impl Iterator<Item = &'a serde_json::Value>,
931    content_of: F,
932) -> std::collections::HashSet<usize>
933where
934    F: Fn(&serde_json::Value) -> Option<&str>,
935{
936    // Walk newest-first, collecting up to MAX_PINNED_SEGMENTS pinned indices.
937    let pinned: Vec<usize> = messages
938        .enumerate()
939        .filter(|(_, msg)| content_of(msg).is_some_and(is_pinned_content))
940        .map(|(idx, _)| idx)
941        .collect();
942    pinned.into_iter().rev().take(MAX_PINNED_SEGMENTS).collect()
943}
944
945/// Check whether a tool-result string should be preserved verbatim during
946/// observation masking. Uses content length as the primary heuristic:
947/// short results (< 500 chars) are kept since they're typically error messages,
948/// status lines, or concise answers that are cheap to retain and risky to mask.
949/// Long results are masked to save context budget.
950fn content_should_preserve(content: &str) -> bool {
951    content.len() < 500
952}
953
954/// Default per-message masking for tool results.
955fn default_mask_tool_result(role: &str, content: &str) -> String {
956    let first_line = content.lines().next().unwrap_or(content);
957    let line_count = content.lines().count();
958    let char_count = content.len();
959    if line_count <= 3 {
960        format!("[{role}] {content}")
961    } else {
962        let preview = &first_line[..first_line.len().min(120)];
963        format!("[{role}] {preview}... [{line_count} lines, {char_count} chars masked]")
964    }
965}
966
967/// Deterministic observation-mask compaction.
968#[cfg(test)]
969pub(crate) fn observation_mask_compaction(
970    old_messages: &[serde_json::Value],
971    archived_count: usize,
972) -> String {
973    observation_mask_compaction_with_callback(old_messages, archived_count, None)
974}
975
976fn observation_mask_compaction_with_callback(
977    old_messages: &[serde_json::Value],
978    archived_count: usize,
979    mask_results: Option<&[Option<String>]>,
980) -> String {
981    let mut parts = Vec::new();
982    parts.push(format!(
983        "[auto-compacted {archived_count} older messages via observation masking]"
984    ));
985    // Pin the agent's most-recent live grounding: any archived body carrying
986    // the host's `[no-compact]` marker (the current file view / edited window)
987    // survives masking verbatim, bounded to the latest MAX_PINNED_SEGMENTS so a
988    // long session's stale snapshots still compact.
989    let pinned = latest_pinned_indices(old_messages.iter(), |msg| {
990        msg.get("content").and_then(|v| v.as_str())
991    });
992    for (idx, msg) in old_messages.iter().enumerate() {
993        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
994        let content = msg
995            .get("content")
996            .and_then(|v| v.as_str())
997            .unwrap_or_default();
998        if content.is_empty() {
999            continue;
1000        }
1001        if pinned.contains(&idx) {
1002            parts.push(format!("[{role}] {content}"));
1003            continue;
1004        }
1005        if role == "assistant" {
1006            parts.push(format!("[assistant] {content}"));
1007            continue;
1008        }
1009        if content_should_preserve(content) {
1010            parts.push(format!("[{role}] {content}"));
1011        } else if let Some(Some(custom)) = mask_results.and_then(|r| r.get(idx)) {
1012            parts.push(custom.clone());
1013        } else {
1014            parts.push(default_mask_tool_result(role, content));
1015        }
1016    }
1017    parts.join("\n")
1018}
1019
1020/// Invoke the mask_callback to get per-message custom masks.
1021async fn invoke_mask_callback(
1022    ctx: Option<&AsyncBuiltinCtx>,
1023    callback: &VmValue,
1024    old_messages: &[serde_json::Value],
1025) -> Result<Vec<Option<String>>, VmError> {
1026    let VmValue::Closure(closure) = callback.clone() else {
1027        return Err(VmError::Runtime(
1028            "mask_callback must be a closure".to_string(),
1029        ));
1030    };
1031    let Some(ctx) = ctx else {
1032        return Err(VmError::Runtime(
1033            "mask_callback requires an async builtin VM context".to_string(),
1034        ));
1035    };
1036    let mut vm = ctx.child_vm();
1037    let messages_vm = VmValue::List(std::sync::Arc::new(
1038        old_messages
1039            .iter()
1040            .map(crate::stdlib::json_to_vm_value)
1041            .collect(),
1042    ));
1043    let result = vm.call_closure_pub(&closure, &[messages_vm]).await?;
1044    ctx.forward_output(&vm.take_output());
1045    let list = match result {
1046        VmValue::List(items) => items,
1047        _ => return Ok(vec![None; old_messages.len()]),
1048    };
1049    Ok(list
1050        .iter()
1051        .map(|v| match v {
1052            VmValue::String(s) => Some(s.to_string()),
1053            VmValue::Nil => None,
1054            _ => None,
1055        })
1056        .collect())
1057}
1058
1059/// Rewrite each tool-result message in `messages` whose content exceeds
1060/// `config.tool_output_max_chars`, using `config.compress_callback` when set
1061/// (and a VM context is available) else the deterministic
1062/// [`microcompact_tool_output`]. Only the `content` text is replaced; the
1063/// message's `role`/`tool_call_id` are left untouched so tool-call pairing is
1064/// preserved. A `tool_output_max_chars` of 0 disables the pass.
1065async fn clamp_tool_outputs(
1066    ctx: Option<&AsyncBuiltinCtx>,
1067    messages: &mut [serde_json::Value],
1068    config: &AutoCompactConfig,
1069) -> Result<(), VmError> {
1070    if config.tool_output_max_chars == 0 {
1071        return Ok(());
1072    }
1073    // Exempt the most-recent pinned tool-outputs (those carrying the host's
1074    // `[no-compact]` marker) from length-clamping so the agent's live file view
1075    // stays intact. Bounded to the latest MAX_PINNED_SEGMENTS so older pinned
1076    // snapshots in the kept window still clamp and can't blow the budget.
1077    let pinned = latest_pinned_indices(messages.iter(), |msg| {
1078        if msg.get("role").and_then(|role| role.as_str()) == Some("tool") {
1079            msg.get("content").and_then(|content| content.as_str())
1080        } else {
1081            None
1082        }
1083    });
1084    for (idx, message) in messages.iter_mut().enumerate() {
1085        if message.get("role").and_then(|role| role.as_str()) != Some("tool") {
1086            continue;
1087        }
1088        let Some(content) = message.get("content").and_then(|content| content.as_str()) else {
1089            continue;
1090        };
1091        if content.len() <= config.tool_output_max_chars {
1092            continue;
1093        }
1094        if pinned.contains(&idx) {
1095            continue;
1096        }
1097        let content = content.to_string();
1098        let replacement = match (config.compress_callback.as_ref(), ctx) {
1099            (Some(callback), Some(ctx)) => {
1100                invoke_compress_callback(ctx, callback, &content, config.tool_output_max_chars)
1101                    .await?
1102            }
1103            _ => microcompact_tool_output(&content, config.tool_output_max_chars),
1104        };
1105        message["content"] = serde_json::Value::String(replacement);
1106    }
1107    Ok(())
1108}
1109
1110/// Invoke `compress_callback(content, max_chars)` to replace one oversized
1111/// tool-output body, mirroring [`invoke_mask_callback`]'s child-VM closure
1112/// invocation. A non-string return falls back to the deterministic primitive.
1113async fn invoke_compress_callback(
1114    ctx: &AsyncBuiltinCtx,
1115    callback: &VmValue,
1116    content: &str,
1117    max_chars: usize,
1118) -> Result<String, VmError> {
1119    let VmValue::Closure(closure) = callback.clone() else {
1120        return Err(VmError::Runtime(
1121            "compress_callback must be a closure".to_string(),
1122        ));
1123    };
1124    let mut vm = ctx.child_vm();
1125    let args = [
1126        VmValue::String(std::sync::Arc::from(content)),
1127        VmValue::Int(max_chars as i64),
1128    ];
1129    let result = vm.call_closure_pub(&closure, &args).await?;
1130    ctx.forward_output(&vm.take_output());
1131    match result {
1132        VmValue::String(text) => Ok(text.to_string()),
1133        _ => Ok(microcompact_tool_output(content, max_chars)),
1134    }
1135}
1136
1137#[derive(Clone, Copy)]
1138struct CompactionStrategyInputs<'a> {
1139    ctx: Option<&'a AsyncBuiltinCtx>,
1140    strategy: &'a CompactStrategy,
1141    old_messages: &'a [serde_json::Value],
1142    archived_count: usize,
1143    llm_opts: Option<&'a crate::llm::api::LlmCallOptions>,
1144    custom_compactor: Option<&'a VmValue>,
1145    custom_compactor_reminders: &'a [VmValue],
1146    mask_callback: Option<&'a VmValue>,
1147    summarize_prompt: Option<&'a str>,
1148    policy: &'a CompactionPolicy,
1149}
1150
1151/// Apply a single compaction strategy to a list of archived messages.
1152async fn apply_compaction_strategy(input: CompactionStrategyInputs<'_>) -> Result<String, VmError> {
1153    let CompactionStrategyInputs {
1154        strategy,
1155        old_messages,
1156        archived_count,
1157        llm_opts,
1158        custom_compactor,
1159        custom_compactor_reminders,
1160        mask_callback,
1161        summarize_prompt,
1162        policy,
1163        ctx,
1164    } = input;
1165    match strategy {
1166        CompactStrategy::Truncate => Ok(truncate_compaction_summary(old_messages, archived_count)),
1167        CompactStrategy::Llm => {
1168            llm_compaction_summary(
1169                old_messages,
1170                archived_count,
1171                llm_opts.ok_or_else(|| {
1172                    VmError::Runtime(
1173                        "LLM transcript compaction requires active LLM call options".to_string(),
1174                    )
1175                })?,
1176                summarize_prompt,
1177                policy,
1178            )
1179            .await
1180        }
1181        CompactStrategy::Custom => {
1182            custom_compaction_summary(
1183                ctx,
1184                old_messages,
1185                archived_count,
1186                custom_compactor.ok_or_else(|| {
1187                    VmError::Runtime(
1188                        "compact_callback is required when compact_strategy is 'custom'"
1189                            .to_string(),
1190                    )
1191                })?,
1192                custom_compactor_reminders,
1193                policy,
1194            )
1195            .await
1196        }
1197        CompactStrategy::ObservationMask => {
1198            let mask_results = if let Some(cb) = mask_callback {
1199                Some(invoke_mask_callback(ctx, cb, old_messages).await?)
1200            } else {
1201                None
1202            };
1203            Ok(observation_mask_compaction_with_callback(
1204                old_messages,
1205                archived_count,
1206                mask_results.as_deref(),
1207            ))
1208        }
1209    }
1210}
1211
1212async fn apply_compaction_strategy_with_fallback(
1213    input: CompactionStrategyInputs<'_>,
1214    fallback_strategy: Option<&CompactStrategy>,
1215) -> Result<(String, CompactStrategy), VmError> {
1216    match apply_compaction_strategy(input).await {
1217        Ok(summary) => Ok((summary, input.strategy.clone())),
1218        Err(primary_error) => {
1219            let Some(fallback) = fallback_strategy.filter(|fallback| *fallback != input.strategy)
1220            else {
1221                return Err(primary_error);
1222            };
1223            let fallback_input = CompactionStrategyInputs {
1224                strategy: fallback,
1225                ..input
1226            };
1227            apply_compaction_strategy(fallback_input)
1228                .await
1229                .map(|summary| (summary, fallback.clone()))
1230        }
1231    }
1232}
1233
1234pub(crate) struct AutoCompactResult {
1235    pub summary: String,
1236    pub strategy: CompactStrategy,
1237}
1238
1239/// Auto-compact a message list in place using two-tier compaction.
1240#[cfg(test)]
1241pub(crate) async fn auto_compact_messages_with_result(
1242    messages: &mut Vec<serde_json::Value>,
1243    config: &AutoCompactConfig,
1244    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
1245) -> Result<Option<AutoCompactResult>, VmError> {
1246    auto_compact_messages_with_result_with_ctx(None, messages, config, llm_opts).await
1247}
1248
1249pub(crate) async fn auto_compact_messages_with_result_with_ctx(
1250    ctx: Option<&AsyncBuiltinCtx>,
1251    messages: &mut Vec<serde_json::Value>,
1252    config: &AutoCompactConfig,
1253    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
1254) -> Result<Option<AutoCompactResult>, VmError> {
1255    if config.token_threshold > 0 && estimate_message_tokens(messages) <= config.token_threshold {
1256        return Ok(None);
1257    }
1258    if messages.len() <= config.keep_first.saturating_add(config.keep_last) {
1259        return Ok(None);
1260    }
1261    let compact_start = config.keep_first.min(messages.len());
1262    let original_split = messages.len().saturating_sub(config.keep_last);
1263    let mut split_at = original_split;
1264    // Snap back to a user-role boundary so the kept suffix begins at a clean
1265    // turn. OpenAI-compatible APIs reject tool results orphaned from their
1266    // assistant request, so splitting mid-turn corrupts the transcript.
1267    while split_at > compact_start
1268        && split_at < messages.len()
1269        && messages[split_at]
1270            .get("role")
1271            .and_then(|r| r.as_str())
1272            .is_none_or(|r| r != "user")
1273    {
1274        split_at -= 1;
1275    }
1276    // Fall back to the naive split (e.g. tool-heavy transcripts with the sole
1277    // user message at index 0) rather than skipping compaction entirely.
1278    if split_at == compact_start {
1279        split_at = original_split;
1280    }
1281    if let Some(volatile_start) = messages[split_at..]
1282        .iter()
1283        .position(is_reasoning_or_tool_turn_message)
1284        .map(|offset| split_at + offset)
1285    {
1286        if let Some(boundary) = volatile_start
1287            .checked_sub(1)
1288            .and_then(|idx| find_prev_user_boundary(messages, idx))
1289            .filter(|boundary| *boundary > compact_start)
1290        {
1291            split_at = boundary;
1292        }
1293    }
1294    if split_at <= compact_start {
1295        return Ok(None);
1296    }
1297    let old_messages: Vec<_> = messages.drain(compact_start..split_at).collect();
1298    let archived_count = old_messages.len();
1299
1300    // Clamp oversized tool-result bodies in the *kept* window so the live
1301    // context honors the policy's `tool_output_max_chars` (and the
1302    // `compress_callback` override), not just the archived/summarized window —
1303    // the two config fields were previously parsed and defaulted but never
1304    // applied here. Runs before the hard-limit estimate so tier-2 escalation
1305    // keys off the post-clamp size. Only the text body is rewritten; `role`
1306    // and `tool_call_id` are preserved so tool_call/tool_result pairing stays
1307    // intact.
1308    clamp_tool_outputs(ctx, messages, config).await?;
1309
1310    let (mut summary, mut strategy) = apply_compaction_strategy_with_fallback(
1311        CompactionStrategyInputs {
1312            ctx,
1313            strategy: &config.compact_strategy,
1314            old_messages: &old_messages,
1315            archived_count,
1316            llm_opts,
1317            custom_compactor: config.custom_compactor.as_ref(),
1318            custom_compactor_reminders: &config.custom_compactor_reminders,
1319            mask_callback: config.mask_callback.as_ref(),
1320            summarize_prompt: config.summarize_prompt.as_deref(),
1321            policy: &config.policy,
1322        },
1323        config.fallback_strategy.as_ref(),
1324    )
1325    .await?;
1326
1327    if let Some(hard_limit) = config.hard_limit_tokens {
1328        let summary_msg = serde_json::json!({"role": "user", "content": &summary});
1329        let mut estimate_msgs = vec![summary_msg];
1330        estimate_msgs.extend_from_slice(messages.as_slice());
1331        let estimated = estimate_message_tokens(&estimate_msgs);
1332        if estimated > hard_limit {
1333            let tier1_as_messages = vec![serde_json::json!({
1334                "role": "user",
1335                "content": summary,
1336            })];
1337            let (hard_limit_summary, hard_limit_strategy) =
1338                apply_compaction_strategy_with_fallback(
1339                    CompactionStrategyInputs {
1340                        ctx,
1341                        strategy: &config.hard_limit_strategy,
1342                        old_messages: &tier1_as_messages,
1343                        archived_count,
1344                        llm_opts,
1345                        custom_compactor: config.custom_compactor.as_ref(),
1346                        custom_compactor_reminders: &config.custom_compactor_reminders,
1347                        mask_callback: None,
1348                        summarize_prompt: config.summarize_prompt.as_deref(),
1349                        policy: &config.policy,
1350                    },
1351                    config.fallback_strategy.as_ref(),
1352                )
1353                .await?;
1354            summary = hard_limit_summary;
1355            strategy = hard_limit_strategy;
1356        }
1357    }
1358
1359    summary = apply_model_visible_policy(summary, &config.policy);
1360
1361    messages.insert(
1362        compact_start,
1363        serde_json::json!({
1364            "role": "user",
1365            "content": summary,
1366        }),
1367    );
1368    Ok(Some(AutoCompactResult { summary, strategy }))
1369}
1370
1371/// Auto-compact a message list in place using two-tier compaction.
1372#[cfg(test)]
1373pub(crate) async fn auto_compact_messages(
1374    messages: &mut Vec<serde_json::Value>,
1375    config: &AutoCompactConfig,
1376    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
1377) -> Result<Option<String>, VmError> {
1378    Ok(
1379        auto_compact_messages_with_result(messages, config, llm_opts)
1380            .await?
1381            .map(|result| result.summary),
1382    )
1383}
1384
1385fn apply_model_visible_policy(mut summary: String, policy: &CompactionPolicy) -> String {
1386    if !policy.is_model_visible_scope() {
1387        return summary;
1388    }
1389    let Some(directives) = policy.prompt_directives() else {
1390        return summary;
1391    };
1392    summary.push_str("\n\n[compaction instructions]\n");
1393    summary.push_str(&directives);
1394    summary
1395}
1396
1397#[cfg(test)]
1398mod tests {
1399    use super::*;
1400
1401    #[test]
1402    fn microcompact_short_output_unchanged() {
1403        let output = "line1\nline2\nline3\n";
1404        assert_eq!(microcompact_tool_output(output, 1000), output);
1405    }
1406
1407    #[test]
1408    fn microcompact_snaps_to_line_boundaries() {
1409        let lines: Vec<String> = (0..20)
1410            .map(|i| format!("line {i:02} content here"))
1411            .collect();
1412        let output = lines.join("\n");
1413        let result = microcompact_tool_output(&output, 200);
1414        assert!(result.contains("[... "), "should have snip marker");
1415        let parts: Vec<&str> = result.split("\n\n[... ").collect();
1416        assert!(parts.len() >= 2, "should split at marker");
1417        let head = parts[0];
1418        for line in head.lines() {
1419            assert!(
1420                line.starts_with("line "),
1421                "head line should be complete: {line}"
1422            );
1423        }
1424    }
1425
1426    #[test]
1427    fn microcompact_preserves_diagnostic_lines_with_line_boundaries() {
1428        let mut lines = Vec::new();
1429        for i in 0..50 {
1430            lines.push(format!("verbose output line {i}"));
1431        }
1432        lines.push("src/main.rs:42: error: cannot find value".to_string());
1433        for i in 50..100 {
1434            lines.push(format!("verbose output line {i}"));
1435        }
1436        let output = lines.join("\n");
1437        let result = microcompact_tool_output(&output, 600);
1438        assert!(result.contains("cannot find value"), "diagnostic preserved");
1439        assert!(
1440            result.contains("[diagnostic lines preserved]"),
1441            "has diagnostic marker"
1442        );
1443    }
1444
1445    #[test]
1446    fn token_estimate_counts_structured_message_content() {
1447        let text = "x".repeat(400);
1448        let messages = vec![serde_json::json!({
1449            "role": "user",
1450            "content": [
1451                {"type": "text", "text": text},
1452                {"type": "input_text", "text": "tail"},
1453            ],
1454            "reasoning": {"text": "scratch"},
1455            "tool_calls": [{
1456                "id": "call_1",
1457                "type": "function",
1458                "function": {"name": "read", "arguments": "{\"path\":\"src/main.rs\"}"}
1459            }],
1460        })];
1461
1462        assert!(
1463            estimate_message_tokens(&messages) >= 100,
1464            "structured content must not count as zero"
1465        );
1466    }
1467
1468    #[test]
1469    fn compaction_policy_instructions_extend_by_default() {
1470        let policy = CompactionPolicy {
1471            instructions: Some("Keep the failing test names.".to_string()),
1472            ..Default::default()
1473        };
1474        let prompt = render_llm_compaction_prompt(None, "[user] old context", 1, &policy)
1475            .expect("prompt renders");
1476
1477        assert_eq!(policy.instruction_mode(), "extend");
1478        assert!(prompt.contains("Preserve goals, constraints"));
1479        assert!(prompt.contains("Additional compaction instructions"));
1480        assert!(prompt.contains("Keep the failing test names."));
1481    }
1482
1483    #[test]
1484    fn compaction_policy_can_replace_default_instructions() {
1485        let policy = CompactionPolicy {
1486            instructions: Some("Only keep repro steps.".to_string()),
1487            extend_default_instructions: Some(false),
1488            ..Default::default()
1489        };
1490        let prompt = render_llm_compaction_prompt(None, "[user] old context", 1, &policy)
1491            .expect("prompt renders");
1492
1493        assert_eq!(policy.instruction_mode(), "replace");
1494        assert!(prompt.contains("according to these instructions"));
1495        assert!(prompt.contains("Only keep repro steps."));
1496        assert!(!prompt.contains("Preserve goals, constraints"));
1497    }
1498
1499    #[test]
1500    fn snap_to_line_end_finds_newline() {
1501        let s = "line1\nline2\nline3\nline4\n";
1502        let head = snap_to_line_end(s, 12);
1503        assert!(head.ends_with('\n'), "should end at newline");
1504        assert!(head.contains("line1"));
1505    }
1506
1507    #[test]
1508    fn snap_to_line_start_finds_newline() {
1509        let s = "line1\nline2\nline3\nline4\n";
1510        let tail = snap_to_line_start(s, 12);
1511        assert!(
1512            tail.starts_with("line"),
1513            "should start at line boundary: {tail}"
1514        );
1515    }
1516
1517    #[test]
1518    fn auto_compact_preserves_reasoning_tool_suffix() {
1519        let mut messages = vec![
1520            serde_json::json!({"role": "user", "content": "old task"}),
1521            serde_json::json!({"role": "assistant", "content": "old reply"}),
1522            serde_json::json!({"role": "user", "content": "new task"}),
1523            serde_json::json!({
1524                "role": "assistant",
1525                "content": "",
1526                "reasoning": "think first",
1527                "tool_calls": [{
1528                    "id": "call_1",
1529                    "type": "function",
1530                    "function": {"name": "read", "arguments": "{\"path\":\"foo.rs\"}"}
1531                }],
1532            }),
1533            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": "file"}),
1534        ];
1535        let config = AutoCompactConfig {
1536            token_threshold: 1,
1537            keep_last: 2,
1538            ..Default::default()
1539        };
1540
1541        let runtime = tokio::runtime::Builder::new_current_thread()
1542            .enable_all()
1543            .build()
1544            .expect("runtime");
1545        let summary = runtime
1546            .block_on(auto_compact_messages(&mut messages, &config, None))
1547            .expect("compaction succeeds");
1548
1549        assert!(summary.is_some());
1550        assert_eq!(messages[1]["role"], "user");
1551        assert_eq!(messages[2]["role"], "assistant");
1552        assert_eq!(messages[2]["tool_calls"][0]["id"], "call_1");
1553        assert_eq!(messages[3]["role"], "tool");
1554        assert_eq!(messages[3]["tool_call_id"], "call_1");
1555    }
1556
1557    #[test]
1558    fn auto_compact_clamps_oversized_tool_output_to_max_chars() {
1559        // A large tool result in the *kept* window must be clamped to honor
1560        // `tool_output_max_chars` — the engine previously ignored that config.
1561        let big = "x".repeat(4000);
1562        let big_len = big.len();
1563        let mut messages = vec![
1564            serde_json::json!({"role": "user", "content": "old task"}),
1565            serde_json::json!({"role": "assistant", "content": "old reply"}),
1566            serde_json::json!({"role": "user", "content": "new task"}),
1567            serde_json::json!({"role": "assistant", "content": "calling tool"}),
1568            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": big}),
1569        ];
1570        let config = AutoCompactConfig {
1571            token_threshold: 1,
1572            keep_last: 2,
1573            tool_output_max_chars: 500,
1574            ..Default::default()
1575        };
1576
1577        let runtime = tokio::runtime::Builder::new_current_thread()
1578            .enable_all()
1579            .build()
1580            .expect("runtime");
1581        let result = runtime
1582            .block_on(auto_compact_messages(&mut messages, &config, None))
1583            .expect("compaction succeeds");
1584        assert!(result.is_some(), "compaction should trigger");
1585
1586        let tool_msg = messages
1587            .iter()
1588            .find(|message| message["role"] == "tool")
1589            .expect("tool message kept in window");
1590        // Pairing preserved...
1591        assert_eq!(tool_msg["tool_call_id"], "call_1");
1592        // ...and the oversized body was clamped well below its original size.
1593        let content = tool_msg["content"].as_str().expect("string content");
1594        assert!(
1595            content.len() < big_len,
1596            "tool output should be clamped: {} vs {}",
1597            content.len(),
1598            big_len
1599        );
1600        assert!(content.len() < 2000, "clamped near tool_output_max_chars");
1601    }
1602
1603    /// (1) A pinned tool-output survives an observation-mask pass that evicts
1604    /// (masks) the unpinned verbose outputs around it.
1605    #[test]
1606    fn observation_mask_preserves_pinned_live_file_view() {
1607        let pinned_body = format!(
1608            "## Edited region now reads (line 42, ±6 context) {}\n```\n{}\n```",
1609            NO_COMPACT_MARKER,
1610            (0..40)
1611                .map(|i| format!("   {i}  let x = compute({i});"))
1612                .collect::<Vec<_>>()
1613                .join("\n")
1614        );
1615        let verbose_unpinned = (0..60)
1616            .map(|i| format!("verbose scan output line {i}"))
1617            .collect::<Vec<_>>()
1618            .join("\n");
1619        // These are the ARCHIVED messages handed to the mask pass.
1620        let archived = vec![
1621            serde_json::json!({"role": "user", "content": verbose_unpinned}),
1622            serde_json::json!({"role": "user", "content": pinned_body}),
1623        ];
1624        let summary = observation_mask_compaction(&archived, archived.len());
1625        // Pinned live file view survives verbatim.
1626        assert!(
1627            summary.contains("Edited region now reads"),
1628            "pinned heading survived: {summary}"
1629        );
1630        assert!(
1631            summary.contains("let x = compute(39);"),
1632            "pinned body survived verbatim"
1633        );
1634        // The unpinned verbose neighbor was masked.
1635        assert!(summary.contains("masked]"), "unpinned output was masked");
1636        assert!(!summary.contains("verbose scan output line 30"));
1637    }
1638
1639    /// (2) A pinned large tool-output is NOT clamped, while an unpinned one of
1640    /// the same size IS.
1641    #[test]
1642    fn clamp_exempts_pinned_tool_output() {
1643        let pinned_big = format!(
1644            "## Exact current file text {}\n{}",
1645            NO_COMPACT_MARKER,
1646            "x".repeat(4000)
1647        );
1648        let pinned_len = pinned_big.len();
1649        let unpinned_big = "y".repeat(4000);
1650        let unpinned_len = unpinned_big.len();
1651        let mut messages = vec![
1652            serde_json::json!({"role": "user", "content": "old task"}),
1653            serde_json::json!({"role": "assistant", "content": "reply"}),
1654            serde_json::json!({"role": "user", "content": "new task"}),
1655            serde_json::json!({"role": "assistant", "content": "calling tools"}),
1656            serde_json::json!({"role": "tool", "tool_call_id": "c0", "content": unpinned_big}),
1657            serde_json::json!({"role": "tool", "tool_call_id": "c1", "content": pinned_big}),
1658            serde_json::json!({"role": "user", "content": "continue"}),
1659        ];
1660        let config = AutoCompactConfig {
1661            token_threshold: 1,
1662            keep_last: 4,
1663            tool_output_max_chars: 500,
1664            ..Default::default()
1665        };
1666        let runtime = tokio::runtime::Builder::new_current_thread()
1667            .enable_all()
1668            .build()
1669            .expect("runtime");
1670        runtime
1671            .block_on(auto_compact_messages(&mut messages, &config, None))
1672            .expect("compaction succeeds");
1673
1674        let pinned_msg = messages
1675            .iter()
1676            .find(|m| m["tool_call_id"] == "c1")
1677            .expect("pinned tool message kept");
1678        assert_eq!(
1679            pinned_msg["content"].as_str().map(str::len),
1680            Some(pinned_len),
1681            "pinned output must be intact (unclamped)"
1682        );
1683        let unpinned_msg = messages
1684            .iter()
1685            .find(|m| m["tool_call_id"] == "c0")
1686            .expect("unpinned tool message kept");
1687        assert!(
1688            unpinned_msg["content"].as_str().map(str::len).unwrap() < unpinned_len,
1689            "unpinned output of the same size must be clamped"
1690        );
1691    }
1692
1693    /// (3) Bounded policy: with MANY pinned outputs, only the latest
1694    /// MAX_PINNED_SEGMENTS survive verbatim; older pinned duplicates compact —
1695    /// so the pin can't prevent all compaction (and can't overflow the window
1696    /// on a very long session).
1697    #[test]
1698    fn pin_bound_keeps_only_latest_segments() {
1699        // Build 6 distinct pinned, oversized edited-window snapshots
1700        // (gen 0 = oldest .. gen 5 = newest), each tagged with the marker and
1701        // long enough that masking would otherwise truncate it.
1702        let make = |gen: usize| {
1703            let body = (0..40)
1704                .map(|i| format!("marker-gen-{gen} body line {i}"))
1705                .collect::<Vec<_>>()
1706                .join("\n");
1707            serde_json::json!({
1708                "role": "user",
1709                "content": format!(
1710                    "## Edited region now reads (gen {gen}) {}\n{}",
1711                    NO_COMPACT_MARKER, body
1712                ),
1713            })
1714        };
1715        let archived: Vec<_> = (0..6).map(make).collect();
1716
1717        // Unit-level: the index selection keeps exactly the latest N.
1718        let pinned = latest_pinned_indices(archived.iter(), |m| {
1719            m.get("content").and_then(|c| c.as_str())
1720        });
1721        assert_eq!(
1722            pinned.len(),
1723            MAX_PINNED_SEGMENTS,
1724            "only the latest MAX_PINNED_SEGMENTS are pinned"
1725        );
1726        assert!(pinned.contains(&5) && pinned.contains(&4) && pinned.contains(&3));
1727        assert!(!pinned.contains(&0) && !pinned.contains(&1) && !pinned.contains(&2));
1728
1729        // End-to-end through the mask pass: the 3 newest snapshots survive
1730        // verbatim; the 3 oldest are masked, proving the pin cannot defeat all
1731        // compaction.
1732        let summary = observation_mask_compaction(&archived, archived.len());
1733        assert!(
1734            summary.contains("marker-gen-5")
1735                && summary.contains("marker-gen-4")
1736                && summary.contains("marker-gen-3"),
1737            "latest {MAX_PINNED_SEGMENTS} pinned snapshots survive verbatim: {summary}"
1738        );
1739        assert!(
1740            !summary.contains("marker-gen-0")
1741                && !summary.contains("marker-gen-1")
1742                && !summary.contains("marker-gen-2"),
1743            "older pinned snapshots are masked (bound enforced)"
1744        );
1745        assert!(summary.contains("masked]"), "older snapshots were masked");
1746    }
1747
1748    /// (4) Regression: with NO pins, compaction behaves exactly as before.
1749    #[test]
1750    fn no_pins_preserves_prior_clamp_behavior() {
1751        let big = "x".repeat(4000);
1752        let big_len = big.len();
1753        let mut messages = vec![
1754            serde_json::json!({"role": "user", "content": "old task"}),
1755            serde_json::json!({"role": "assistant", "content": "old reply"}),
1756            serde_json::json!({"role": "user", "content": "new task"}),
1757            serde_json::json!({"role": "assistant", "content": "calling tool"}),
1758            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": big}),
1759        ];
1760        let config = AutoCompactConfig {
1761            token_threshold: 1,
1762            keep_last: 2,
1763            tool_output_max_chars: 500,
1764            ..Default::default()
1765        };
1766        let runtime = tokio::runtime::Builder::new_current_thread()
1767            .enable_all()
1768            .build()
1769            .expect("runtime");
1770        let result = runtime
1771            .block_on(auto_compact_messages(&mut messages, &config, None))
1772            .expect("compaction succeeds");
1773        assert!(result.is_some());
1774        let tool_msg = messages
1775            .iter()
1776            .find(|m| m["role"] == "tool")
1777            .expect("tool kept");
1778        let content = tool_msg["content"].as_str().expect("string content");
1779        assert!(content.len() < big_len, "unpinned output clamped as before");
1780        assert!(content.len() < 2000, "clamped near tool_output_max_chars");
1781    }
1782}
harn_vm/orchestration/compaction.rs

harn_vm/orchestration/
compaction.rs