Skip to main content

harn_vm/orchestration/
compaction.rs

1//! Auto-compaction — transcript size management strategies.
2
3use crate::value::VmDictExt;
4
5use serde::{Deserialize, Serialize};
6
7use crate::llm::{vm_call_llm_full, vm_value_to_json};
8use crate::value::{VmError, VmValue};
9use crate::vm::AsyncBuiltinCtx;
10
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub enum CompactStrategy {
13    Llm,
14    Truncate,
15    Custom,
16    ObservationMask,
17}
18
19pub fn parse_compact_strategy(value: &str) -> Result<CompactStrategy, VmError> {
20    match value {
21        "llm" => Ok(CompactStrategy::Llm),
22        "truncate" => Ok(CompactStrategy::Truncate),
23        "custom" => Ok(CompactStrategy::Custom),
24        "observation_mask" => Ok(CompactStrategy::ObservationMask),
25        other => Err(VmError::Runtime(format!(
26            "unknown compact_strategy '{other}' (expected 'llm', 'truncate', 'custom', or 'observation_mask')"
27        ))),
28    }
29}
30
31pub fn compact_strategy_name(strategy: &CompactStrategy) -> &'static str {
32    match strategy {
33        CompactStrategy::Llm => "llm",
34        CompactStrategy::Truncate => "truncate",
35        CompactStrategy::Custom => "custom",
36        CompactStrategy::ObservationMask => "observation_mask",
37    }
38}
39
40const COMPACTION_POLICY_KEYS: &[&str] = &[
41    "instructions",
42    "mode",
43    "scope",
44    "preserve",
45    "drop",
46    "extend_default_instructions",
47    "author",
48];
49
50#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
51#[serde(default)]
52pub struct CompactionPolicy {
53    pub instructions: Option<String>,
54    pub mode: Option<String>,
55    pub scope: Option<String>,
56    pub preserve: Vec<String>,
57    #[serde(rename = "drop")]
58    pub drop_items: Vec<String>,
59    pub extend_default_instructions: Option<bool>,
60    pub author: Option<String>,
61}
62
63#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(default)]
65pub struct CompactionRequest {
66    pub mode: Option<String>,
67    pub policy: CompactionPolicy,
68}
69
70impl CompactionPolicy {
71    pub fn has_metadata(&self) -> bool {
72        self.instructions.is_some()
73            || self.mode.is_some()
74            || self.scope.is_some()
75            || !self.preserve.is_empty()
76            || !self.drop_items.is_empty()
77            || self.extend_default_instructions.is_some()
78            || self.author.is_some()
79    }
80
81    fn has_prompt_directives(&self) -> bool {
82        self.instructions
83            .as_deref()
84            .is_some_and(|value| !value.trim().is_empty())
85            || !self.preserve.is_empty()
86            || !self.drop_items.is_empty()
87    }
88
89    pub fn instruction_mode(&self) -> &'static str {
90        if !self.has_prompt_directives() {
91            "default"
92        } else if self.extend_default_instructions == Some(false) {
93            "replace"
94        } else {
95            "extend"
96        }
97    }
98
99    pub fn instruction_source(&self) -> Option<&str> {
100        self.author
101            .as_deref()
102            .filter(|author| !author.trim().is_empty())
103    }
104
105    pub fn metadata_json(&self) -> Option<serde_json::Value> {
106        if !self.has_metadata() {
107            return None;
108        }
109        let mut map = serde_json::Map::new();
110        if let Some(instructions) = self.instructions.as_ref() {
111            map.insert(
112                "instructions".to_string(),
113                serde_json::Value::String(instructions.clone()),
114            );
115        }
116        if let Some(mode) = self.mode.as_ref() {
117            map.insert("mode".to_string(), serde_json::Value::String(mode.clone()));
118        }
119        if let Some(scope) = self.scope.as_ref() {
120            map.insert(
121                "scope".to_string(),
122                serde_json::Value::String(scope.clone()),
123            );
124        }
125        if !self.preserve.is_empty() {
126            map.insert(
127                "preserve".to_string(),
128                serde_json::to_value(&self.preserve).unwrap_or_default(),
129            );
130        }
131        if !self.drop_items.is_empty() {
132            map.insert(
133                "drop".to_string(),
134                serde_json::to_value(&self.drop_items).unwrap_or_default(),
135            );
136        }
137        if let Some(extend_default_instructions) = self.extend_default_instructions {
138            map.insert(
139                "extend_default_instructions".to_string(),
140                serde_json::Value::Bool(extend_default_instructions),
141            );
142        }
143        if let Some(author) = self.author.as_ref() {
144            map.insert(
145                "author".to_string(),
146                serde_json::Value::String(author.clone()),
147            );
148        }
149        map.insert(
150            "instruction_mode".to_string(),
151            serde_json::Value::String(self.instruction_mode().to_string()),
152        );
153        if let Some(source) = self.instruction_source() {
154            map.insert(
155                "instruction_source".to_string(),
156                serde_json::Value::String(source.to_string()),
157            );
158        }
159        Some(serde_json::Value::Object(map))
160    }
161
162    fn prompt_directives(&self) -> Option<String> {
163        if !self.has_prompt_directives() {
164            return None;
165        }
166        let mut parts = Vec::new();
167        if let Some(instructions) = self
168            .instructions
169            .as_deref()
170            .map(str::trim)
171            .filter(|value| !value.is_empty())
172        {
173            parts.push(instructions.to_string());
174        }
175        if !self.preserve.is_empty() {
176            parts.push(format!("Preserve: {}.", self.preserve.join("; ")));
177        }
178        if !self.drop_items.is_empty() {
179            parts.push(format!("Drop: {}.", self.drop_items.join("; ")));
180        }
181        Some(parts.join("\n"))
182    }
183
184    fn is_model_visible_scope(&self) -> bool {
185        matches!(
186            self.scope.as_deref(),
187            Some("model_visible" | "summary" | "transcript")
188        )
189    }
190}
191
192pub fn compaction_policy_option_keys() -> &'static [&'static str] {
193    COMPACTION_POLICY_KEYS
194}
195
196pub fn compaction_policy_to_vm_value(policy: &CompactionPolicy) -> VmValue {
197    let mut map = crate::value::DictMap::new();
198    if let Some(instructions) = policy.instructions.as_ref() {
199        map.put_str("instructions", instructions.clone());
200    }
201    if let Some(mode) = policy.mode.as_ref() {
202        map.put_str("mode", mode.clone());
203    }
204    if let Some(scope) = policy.scope.as_ref() {
205        map.put_str("scope", scope.clone());
206    }
207    map.insert(
208        crate::value::intern_key("preserve"),
209        VmValue::List(std::sync::Arc::new(
210            policy
211                .preserve
212                .iter()
213                .map(|item| VmValue::String(arcstr::ArcStr::from(item.clone())))
214                .collect(),
215        )),
216    );
217    map.insert(
218        crate::value::intern_key("drop"),
219        VmValue::List(std::sync::Arc::new(
220            policy
221                .drop_items
222                .iter()
223                .map(|item| VmValue::String(arcstr::ArcStr::from(item.clone())))
224                .collect(),
225        )),
226    );
227    if let Some(extend_default_instructions) = policy.extend_default_instructions {
228        map.insert(
229            crate::value::intern_key("extend_default_instructions"),
230            VmValue::Bool(extend_default_instructions),
231        );
232    }
233    if let Some(author) = policy.author.as_ref() {
234        map.put_str("author", author.clone());
235    }
236    VmValue::dict(map)
237}
238
239pub fn parse_compaction_policy_options(
240    options: Option<&crate::value::DictMap>,
241    builtin: &str,
242) -> Result<CompactionPolicy, VmError> {
243    let mut policy = options
244        .and_then(|map| {
245            map.get("policy")
246                .or_else(|| map.get("compaction_policy"))
247                .or_else(|| map.get("compaction_request"))
248        })
249        .map(|value| parse_compaction_policy_value(value, builtin))
250        .transpose()?
251        .unwrap_or_default();
252    if let Some(options) = options {
253        apply_compaction_policy_fields(&mut policy, options, builtin)?;
254    }
255    Ok(policy)
256}
257
258fn parse_compaction_policy_value(
259    value: &VmValue,
260    builtin: &str,
261) -> Result<CompactionPolicy, VmError> {
262    match value {
263        VmValue::Nil => Ok(CompactionPolicy::default()),
264        VmValue::Dict(map) => {
265            if let Some(nested) = map
266                .get("policy")
267                .or_else(|| map.get("compaction_policy"))
268                .or_else(|| map.get("compaction_request"))
269            {
270                let mut policy = parse_compaction_policy_value(nested, builtin)?;
271                apply_compaction_policy_fields(&mut policy, map, builtin)?;
272                Ok(policy)
273            } else {
274                let mut policy = CompactionPolicy::default();
275                apply_compaction_policy_fields(&mut policy, map, builtin)?;
276                Ok(policy)
277            }
278        }
279        other => Err(VmError::Runtime(format!(
280            "{builtin}: compaction policy must be a dict or nil, got {}",
281            other.type_name()
282        ))),
283    }
284}
285
286fn apply_compaction_policy_fields(
287    policy: &mut CompactionPolicy,
288    map: &crate::value::DictMap,
289    builtin: &str,
290) -> Result<(), VmError> {
291    if let Some(value) = optional_policy_string(map, "instructions", builtin)? {
292        policy.instructions = Some(value);
293    }
294    if let Some(value) = optional_policy_string(map, "mode", builtin)? {
295        policy.mode = Some(value);
296    }
297    if let Some(value) = optional_policy_string(map, "scope", builtin)? {
298        policy.scope = Some(value);
299    }
300    if map.contains_key("preserve") {
301        policy.preserve = policy_string_list(map.get("preserve"), builtin, "preserve")?;
302    }
303    if map.contains_key("drop") {
304        policy.drop_items = policy_string_list(map.get("drop"), builtin, "drop")?;
305    }
306    if let Some(value) = optional_policy_bool(map, "extend_default_instructions", builtin)? {
307        policy.extend_default_instructions = Some(value);
308    }
309    if let Some(value) = optional_policy_string(map, "author", builtin)? {
310        policy.author = Some(value);
311    }
312    Ok(())
313}
314
315fn optional_policy_string(
316    map: &crate::value::DictMap,
317    key: &str,
318    builtin: &str,
319) -> Result<Option<String>, VmError> {
320    match map.get(key) {
321        None | Some(VmValue::Nil) => Ok(None),
322        Some(VmValue::String(text)) => {
323            let trimmed = text.trim();
324            if trimmed.is_empty() {
325                Ok(None)
326            } else {
327                Ok(Some(trimmed.to_string()))
328            }
329        }
330        Some(other) => Err(VmError::Runtime(format!(
331            "{builtin}: compaction policy `{key}` must be a string, got {}",
332            other.type_name()
333        ))),
334    }
335}
336
337fn optional_policy_bool(
338    map: &crate::value::DictMap,
339    key: &str,
340    builtin: &str,
341) -> Result<Option<bool>, VmError> {
342    match map.get(key) {
343        None | Some(VmValue::Nil) => Ok(None),
344        Some(VmValue::Bool(value)) => Ok(Some(*value)),
345        Some(other) => Err(VmError::Runtime(format!(
346            "{builtin}: compaction policy `{key}` must be a bool, got {}",
347            other.type_name()
348        ))),
349    }
350}
351
352fn policy_string_list(
353    value: Option<&VmValue>,
354    builtin: &str,
355    key: &str,
356) -> Result<Vec<String>, VmError> {
357    match value {
358        None | Some(VmValue::Nil) => Ok(Vec::new()),
359        Some(VmValue::String(text)) => {
360            let trimmed = text.trim();
361            if trimmed.is_empty() {
362                Ok(Vec::new())
363            } else {
364                Ok(vec![trimmed.to_string()])
365            }
366        }
367        Some(VmValue::List(items)) => items
368            .iter()
369            .map(|item| match item {
370                VmValue::String(text) => Ok(text.trim().to_string()),
371                other => Err(VmError::Runtime(format!(
372                    "{builtin}: compaction policy `{key}` entries must be strings, got {}",
373                    other.type_name()
374                ))),
375            })
376            .filter_map(|result| match result {
377                Ok(value) if value.is_empty() => None,
378                other => Some(other),
379            })
380            .collect(),
381        Some(other) => Err(VmError::Runtime(format!(
382            "{builtin}: compaction policy `{key}` must be a string or list, got {}",
383            other.type_name()
384        ))),
385    }
386}
387
388pub fn compaction_policy_metadata_fields(
389    policy: &CompactionPolicy,
390) -> Vec<(&'static str, serde_json::Value)> {
391    let mut fields = vec![(
392        "instruction_mode",
393        serde_json::Value::String(policy.instruction_mode().to_string()),
394    )];
395    if let Some(source) = policy.instruction_source() {
396        fields.push((
397            "instruction_source",
398            serde_json::Value::String(source.to_string()),
399        ));
400    }
401    if let Some(policy_json) = policy.metadata_json() {
402        fields.push(("compaction_policy", policy_json));
403    }
404    fields
405}
406
407/// Configuration for automatic transcript compaction in agent loops.
408///
409/// Two-tier compaction:
410///   Tier 1 (`token_threshold` / `compact_strategy`): lightweight, deterministic
411///     observation masking that fires early. Masks verbose tool results while
412///     preserving assistant prose and error output.
413///   Tier 2 (`hard_limit_tokens` / `hard_limit_strategy`): aggressive LLM-powered
414///     summarization that fires when tier-1 alone isn't enough, typically as the
415///     transcript approaches the model's actual context window.
416#[derive(Clone, Debug)]
417pub struct AutoCompactConfig {
418    /// Number of earliest messages to keep verbatim before the compacted
419    /// summary. The system prompt is not part of this list and is always
420    /// preserved separately by the caller.
421    pub keep_first: usize,
422    /// Tier-1 threshold: estimated tokens before lightweight compaction.
423    pub token_threshold: usize,
424    /// Maximum character length for a single tool result before microcompaction.
425    pub tool_output_max_chars: usize,
426    /// Number of recent messages to keep during compaction.
427    pub keep_last: usize,
428    /// Tier-1 strategy (default: ObservationMask).
429    pub compact_strategy: CompactStrategy,
430    /// Tier-2 threshold: fires when tier-1 result still exceeds this.
431    /// Typically set to ~75% of the model's actual context window.
432    /// When `None`, tier-2 is disabled.
433    pub hard_limit_tokens: Option<usize>,
434    /// Tier-2 strategy (default: Llm).
435    pub hard_limit_strategy: CompactStrategy,
436    /// Optional Harn callback used when a strategy is `custom`.
437    pub custom_compactor: Option<VmValue>,
438    /// Pending reminders supplied to `custom_compactor` as a second
439    /// argument. Built-in compaction strategies decide reminder retention
440    /// before rebuilding the transcript, so they do not consume this list.
441    pub custom_compactor_reminders: Vec<VmValue>,
442    /// Optional callback for domain-specific per-message masking during
443    /// observation mask compaction. Called with a list of archived messages,
444    /// returns a list of `Option<String>` — `Some(masked)` to override the
445    /// default mask for that message, `None` to use the default.
446    /// This lets the host (e.g. an IDE or cloud runner) inject AST outlines,
447    /// file summaries, etc. without putting language-specific logic in Harn.
448    pub mask_callback: Option<VmValue>,
449    /// Optional callback for per-tool-result compression. Called with
450    /// `{tool_name, output, max_chars}` and returns compressed output string.
451    /// When set, used INSTEAD of the built-in `microcompact_tool_output`.
452    /// This allows the pipeline to use LLM-based compression rather than
453    /// keyword heuristics.
454    pub compress_callback: Option<VmValue>,
455    /// Optional prompt-template asset path used when LLM compaction is
456    /// selected. The rendered template becomes the user message sent to
457    /// the summarizer.
458    pub summarize_prompt: Option<String>,
459    /// User-facing policy label for replay and observability. This can be
460    /// broader than the engine strategy, e.g. `hybrid` lowers to LLM
461    /// summarization plus truncate fallback.
462    pub policy_strategy: String,
463    /// Strategy to try when the primary strategy fails. Budget-pressure
464    /// compaction uses this to keep the session within its hard cap even when
465    /// an LLM summarizer is unavailable.
466    pub fallback_strategy: Option<CompactStrategy>,
467    /// Host/user-supplied instructions that guide compaction without
468    /// becoming part of the compacted transcript unless `scope` explicitly
469    /// asks for model-visible policy text.
470    pub policy: CompactionPolicy,
471}
472
473impl Default for AutoCompactConfig {
474    fn default() -> Self {
475        Self {
476            keep_first: 0,
477            token_threshold: 48_000,
478            tool_output_max_chars: 16_000,
479            keep_last: 12,
480            compact_strategy: CompactStrategy::ObservationMask,
481            hard_limit_tokens: None,
482            hard_limit_strategy: CompactStrategy::Llm,
483            custom_compactor: None,
484            custom_compactor_reminders: Vec::new(),
485            mask_callback: None,
486            compress_callback: None,
487            summarize_prompt: None,
488            policy_strategy: compact_strategy_name(&CompactStrategy::ObservationMask).to_string(),
489            fallback_strategy: None,
490            policy: CompactionPolicy::default(),
491        }
492    }
493}
494
495/// Estimate token count from a list of JSON messages (chars / 4 heuristic).
496pub fn estimate_message_tokens(messages: &[serde_json::Value]) -> usize {
497    messages.iter().map(estimate_message_chars).sum::<usize>() / 4
498}
499
500fn estimate_message_chars(message: &serde_json::Value) -> usize {
501    let mut total = message
502        .get("content")
503        .map(estimate_content_chars)
504        .unwrap_or_default();
505    if let Some(reasoning) = message.get("reasoning") {
506        total += estimate_content_chars(reasoning);
507    }
508    if let Some(tool_calls) = message.get("tool_calls") {
509        total += estimate_content_chars(tool_calls);
510    }
511    total
512}
513
514fn estimate_content_chars(value: &serde_json::Value) -> usize {
515    match value {
516        serde_json::Value::String(text) => text.len(),
517        serde_json::Value::Array(items) => items.iter().map(estimate_content_chars).sum(),
518        serde_json::Value::Object(map) => map.values().map(estimate_content_chars).sum(),
519        serde_json::Value::Null => 0,
520        other => other.to_string().len(),
521    }
522}
523
524fn is_reasoning_or_tool_turn_message(message: &serde_json::Value) -> bool {
525    let role = message
526        .get("role")
527        .and_then(|value| value.as_str())
528        .unwrap_or_default();
529    role == "tool"
530        || message.get("tool_calls").is_some()
531        || message
532            .get("reasoning")
533            .map(|value| !value.is_null())
534            .unwrap_or(false)
535}
536
537fn find_prev_user_boundary(messages: &[serde_json::Value], start: usize) -> Option<usize> {
538    (0..=start)
539        .rev()
540        .find(|idx| messages[*idx].get("role").and_then(|value| value.as_str()) == Some("user"))
541}
542
543/// True when the message carries a tool result: the OpenAI durable shape
544/// (`role: "tool"`), the Anthropic durable shape (`role: "tool_result"`), or
545/// a user message whose content blocks include a `tool_result`. Text-channel
546/// results are ordinary user strings and intentionally don't match — they
547/// have no provider-level pairing to protect.
548fn is_tool_result_message(message: &serde_json::Value) -> bool {
549    match message.get("role").and_then(|role| role.as_str()) {
550        Some("tool") | Some("tool_result") => true,
551        Some("user") => message
552            .get("content")
553            .and_then(|content| content.as_array())
554            .is_some_and(|blocks| {
555                blocks.iter().any(|block| {
556                    block.get("type").and_then(|value| value.as_str()) == Some("tool_result")
557                })
558            }),
559        _ => false,
560    }
561}
562
563/// A compaction split must never land between an assistant tool-use message
564/// and its tool_result message(s): a kept window that begins with a
565/// tool_result whose request was drained is rejected by providers as an
566/// orphaned result. Results always immediately follow their request, so a
567/// split index is unsafe exactly when it points AT a tool-result message.
568/// Walk backward to the message that initiated the result run (keeping the
569/// request together with its results); when that would consume the whole
570/// compactable window, walk forward past the run instead so compaction still
571/// makes progress.
572fn snap_split_off_tool_results(
573    messages: &[serde_json::Value],
574    split_at: usize,
575    compact_start: usize,
576) -> usize {
577    if split_at >= messages.len() || !is_tool_result_message(&messages[split_at]) {
578        return split_at;
579    }
580    let mut backward = split_at;
581    while backward > compact_start && is_tool_result_message(&messages[backward]) {
582        backward -= 1;
583    }
584    if backward > compact_start {
585        return backward;
586    }
587    let mut forward = split_at;
588    while forward < messages.len() && is_tool_result_message(&messages[forward]) {
589        forward += 1;
590    }
591    forward
592}
593
594/// True when `trimmed` (an already-trimmed line) begins with `file:line` —
595/// a colon immediately followed by a digit, with no whitespace before the
596/// colon. Used as a strong signal that a line carries a located diagnostic.
597fn line_has_file_line_prefix(trimmed: &str) -> bool {
598    let bytes = trimmed.as_bytes();
599    let mut i = 0;
600    while i < bytes.len() && bytes[i] != b':' {
601        i += 1;
602    }
603    i < bytes.len() && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit()
604}
605
606/// True when a single line carries failure signal worth preserving verbatim
607/// when we shrink a large tool output. This is the ONE filter shared by both
608/// the microcompact path (`microcompact_tool_output`) and the observation-mask
609/// path (`default_mask_tool_result`). Keep these paths on one filter so
610/// assertion values, rustc continuation lines, and structured failing-line
611/// markers survive in the detail the model re-reads to fix the bug.
612///
613/// In addition to keyword/positional error lines and `file:line` diagnostics,
614/// this keeps three structured kinds that are easy to drop accidentally:
615///   - assertion value lines with no `file:line` (`left:`, `right:`,
616///     `expected:`, `actual:`, `got`, `want`) — the values the model needs to
617///     see the actual-vs-expected mismatch.
618///   - rustc continuation lines (`-->` location pointers, `= help:`/`= note:`,
619///     numbered source rows like `12 |`, and `^^^` carets).
620///   - `Lnnn:` failing-line structure some test harnesses emit.
621fn is_failure_signal_line(line: &str) -> bool {
622    let trimmed = line.trim();
623    if trimmed.is_empty() {
624        return false;
625    }
626    let lower = trimmed.to_lowercase();
627
628    let has_file_line = line_has_file_line_prefix(trimmed);
629    let has_strong_keyword =
630        trimmed.contains("FAIL") || trimmed.contains("panic") || trimmed.contains("Panic");
631    let has_weak_keyword = trimmed.contains("error")
632        || trimmed.contains("undefined")
633        || trimmed.contains("expected")
634        || trimmed.contains("got")
635        || lower.contains("cannot find")
636        || lower.contains("not found")
637        || lower.contains("no such")
638        || lower.contains("unresolved")
639        || lower.contains("missing")
640        || lower.contains("declared but not used")
641        || lower.contains("unused")
642        || lower.contains("mismatch");
643    let positional = lower.contains(" error ")
644        || lower.starts_with("error:")
645        || lower.starts_with("warning:")
646        || lower.starts_with("note:")
647        || lower.contains("panic:");
648
649    // Assertion value lines (no file:line) — the actual-vs-expected values.
650    let assertion_value = lower.starts_with("left:")
651        || lower.starts_with("right:")
652        || lower.starts_with("expected:")
653        || lower.starts_with("actual:")
654        || lower.starts_with("got:")
655        || lower.starts_with("want:")
656        || lower.starts_with("got ")
657        || lower.starts_with("want ")
658        || lower.starts_with("assertion")
659        || lower.contains("assertionerror");
660
661    // rustc continuation lines: location pointer, help/note, numbered source
662    // rows (`12 | ...`), and caret underlines (`^^^`).
663    let rustc_continuation = trimmed.starts_with("-->")
664        || trimmed.starts_with("= help:")
665        || trimmed.starts_with("= note:")
666        || trimmed.contains('^')
667        || {
668            // `<digits> |` numbered source row from rustc's snippet rendering.
669            let mut chars = trimmed.chars();
670            let mut saw_digit = false;
671            let mut rest = trimmed;
672            while let Some(c) = chars.clone().next() {
673                if c.is_ascii_digit() {
674                    saw_digit = true;
675                    chars.next();
676                    rest = chars.as_str();
677                } else {
678                    break;
679                }
680            }
681            saw_digit && rest.trim_start().starts_with('|')
682        };
683
684    // `Lnnn:` failing-line structure (`L42:`), some harnesses emit this.
685    let failing_line_marker = {
686        if let Some(rest) = trimmed.strip_prefix('L') {
687            let digits: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect();
688            !digits.is_empty() && rest[digits.len()..].starts_with(':')
689        } else {
690            false
691        }
692    };
693
694    has_strong_keyword
695        || (has_file_line && has_weak_keyword)
696        || positional
697        || assertion_value
698        || rustc_continuation
699        || failing_line_marker
700}
701
702/// Microcompact a tool result: if it exceeds `max_chars`, keep the first and
703/// last portions with a snip marker in between.
704pub fn microcompact_tool_output(output: &str, max_chars: usize) -> String {
705    if output.len() <= max_chars || max_chars < 200 {
706        return output.to_string();
707    }
708    let diagnostic_lines = output
709        .lines()
710        .filter(|line| is_failure_signal_line(line))
711        .take(32)
712        .collect::<Vec<_>>();
713    if !diagnostic_lines.is_empty() {
714        let diagnostics = diagnostic_lines.join("\n");
715        let budget = max_chars.saturating_sub(diagnostics.len() + 64);
716        let keep = budget / 2;
717        if keep >= 80 && output.len() > keep * 2 {
718            let head = snap_to_line_end(output, keep);
719            let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
720            return format!(
721                "{head}\n\n[diagnostic lines preserved]\n{diagnostics}\n\n[... output compacted ...]\n\n{tail}"
722            );
723        }
724    }
725    let keep = max_chars / 2;
726    let head = snap_to_line_end(output, keep);
727    let tail = snap_to_line_start(output, output.len().saturating_sub(keep));
728    let snipped = output.len().saturating_sub(head.len() + tail.len());
729    format!("{head}\n\n[... {snipped} characters snipped ...]\n\n{tail}")
730}
731
732/// Snap a byte offset to the nearest preceding line boundary (end of a complete line).
733/// Returns the substring from the start up to and including the last complete line
734/// that fits within `max_bytes`. Never cuts mid-line.
735fn snap_to_line_end(s: &str, max_bytes: usize) -> &str {
736    if max_bytes >= s.len() {
737        return s;
738    }
739    let search_end = s.floor_char_boundary(max_bytes);
740    match s[..search_end].rfind('\n') {
741        Some(pos) => &s[..pos + 1],
742        None => &s[..search_end], // single long line — fall back to char boundary
743    }
744}
745
746/// Snap a byte offset to the nearest following line boundary (start of a complete line).
747/// Returns the substring from the first complete line at or after `start_byte`.
748/// Never cuts mid-line.
749fn snap_to_line_start(s: &str, start_byte: usize) -> &str {
750    if start_byte == 0 {
751        return s;
752    }
753    let search_start = s.ceil_char_boundary(start_byte);
754    if search_start >= s.len() {
755        return "";
756    }
757    match s[search_start..].find('\n') {
758        Some(pos) => {
759            let line_start = search_start + pos + 1;
760            if line_start < s.len() {
761                &s[line_start..]
762            } else {
763                &s[search_start..]
764            }
765        }
766        None => &s[search_start..], // already at start of last line
767    }
768}
769
770fn format_compaction_messages(messages: &[serde_json::Value]) -> String {
771    messages
772        .iter()
773        .map(|msg| {
774            let role = msg
775                .get("role")
776                .and_then(|v| v.as_str())
777                .unwrap_or("user")
778                .to_uppercase();
779            let content = msg
780                .get("content")
781                .and_then(|v| v.as_str())
782                .unwrap_or_default();
783            format!("{role}: {content}")
784        })
785        .collect::<Vec<_>>()
786        .join("\n")
787}
788
789fn truncate_compaction_summary(
790    old_messages: &[serde_json::Value],
791    archived_count: usize,
792) -> String {
793    truncate_compaction_summary_with_context(old_messages, archived_count, false)
794}
795
796fn truncate_compaction_summary_with_context(
797    old_messages: &[serde_json::Value],
798    archived_count: usize,
799    is_llm_fallback: bool,
800) -> String {
801    let per_msg_limit = 500_usize;
802    let summary_parts: Vec<String> = old_messages
803        .iter()
804        .filter_map(|m| {
805            let role = m.get("role")?.as_str()?;
806            let content = m.get("content")?.as_str()?;
807            if content.is_empty() {
808                return None;
809            }
810            let truncated = if content.len() > per_msg_limit {
811                format!(
812                    "{}... [truncated from {} chars]",
813                    &content[..content.floor_char_boundary(per_msg_limit)],
814                    content.len()
815                )
816            } else {
817                content.to_string()
818            };
819            Some(format!("[{role}] {truncated}"))
820        })
821        .take(15)
822        .collect();
823    let header = if is_llm_fallback {
824        format!(
825            "[auto-compact fallback: LLM summarizer returned empty; {archived_count} older messages abbreviated to ~{per_msg_limit} chars each]"
826        )
827    } else {
828        format!("[auto-compacted {archived_count} older messages via truncate strategy]")
829    };
830    format!(
831        "{header}\n{}{}",
832        summary_parts.join("\n"),
833        if archived_count > 15 {
834            format!("\n... and {} more", archived_count - 15)
835        } else {
836            String::new()
837        }
838    )
839}
840
841fn compact_summary_text_from_value(value: &VmValue) -> Result<String, VmError> {
842    if let Some(map) = value.as_dict() {
843        if let Some(summary) = map.get("summary").or_else(|| map.get("text")) {
844            return Ok(summary.display());
845        }
846    }
847    match value {
848        VmValue::String(text) => Ok(text.to_string()),
849        VmValue::Nil => Ok(String::new()),
850        _ => serde_json::to_string_pretty(&vm_value_to_json(value))
851            .map_err(|e| VmError::Runtime(format!("custom compactor encode error: {e}"))),
852    }
853}
854
855async fn llm_compaction_summary(
856    old_messages: &[serde_json::Value],
857    archived_count: usize,
858    llm_opts: &crate::llm::api::LlmCallOptions,
859    summarize_prompt: Option<&str>,
860    policy: &CompactionPolicy,
861) -> Result<String, VmError> {
862    let mut compact_opts = llm_opts.clone();
863    let formatted = format_compaction_messages(old_messages);
864    compact_opts.system = None;
865    compact_opts.transcript_summary = None;
866    compact_opts.native_tools = None;
867    compact_opts.tool_choice = None;
868    compact_opts.output_format = crate::llm::api::OutputFormat::Text;
869    compact_opts.response_format = None;
870    compact_opts.json_schema = None;
871    compact_opts.output_schema = None;
872    let prompt =
873        render_llm_compaction_prompt(summarize_prompt, &formatted, archived_count, policy)?;
874    compact_opts.messages = vec![serde_json::json!({
875        "role": "user",
876        "content": prompt,
877    })];
878    let result = vm_call_llm_full(&compact_opts).await?;
879    let summary = result.text.trim();
880    if summary.is_empty() {
881        Ok(truncate_compaction_summary_with_context(
882            old_messages,
883            archived_count,
884            true,
885        ))
886    } else {
887        Ok(format!(
888            "[auto-compacted {archived_count} older messages]\n{summary}"
889        ))
890    }
891}
892
893fn render_llm_compaction_prompt(
894    summarize_prompt: Option<&str>,
895    formatted: &str,
896    archived_count: usize,
897    policy: &CompactionPolicy,
898) -> Result<String, VmError> {
899    if policy.has_prompt_directives() && policy.extend_default_instructions == Some(false) {
900        return render_replacement_compaction_prompt(policy, formatted, archived_count);
901    }
902    let mut bindings = crate::value::DictMap::new();
903    bindings.put_str("formatted_messages", formatted);
904    bindings.insert(
905        crate::value::intern_key("archived_count"),
906        VmValue::Int(archived_count as i64),
907    );
908    let Some(path) = summarize_prompt.filter(|path| !path.trim().is_empty()) else {
909        let prompt = crate::stdlib::template::render_stdlib_prompt_asset(
910            "orchestration/prompts/compaction_summary.harn.prompt",
911            Some(&bindings),
912        )?;
913        return Ok(extend_compaction_prompt(prompt, policy));
914    };
915
916    let asset = crate::stdlib::template::TemplateAsset::render_target(path)
917        .map_err(|error| VmError::Runtime(format!("compaction summarize_prompt: {error}")))?;
918    let prompt = crate::stdlib::template::render_asset_result(&asset, Some(&bindings))
919        .map_err(VmError::from)?;
920    Ok(extend_compaction_prompt(prompt, policy))
921}
922
923fn render_replacement_compaction_prompt(
924    policy: &CompactionPolicy,
925    formatted: &str,
926    archived_count: usize,
927) -> Result<String, VmError> {
928    let directives = policy.prompt_directives().unwrap_or_default();
929    let mut bindings = crate::value::DictMap::new();
930    bindings.put_str("directives", directives);
931    bindings.put_str("formatted_messages", formatted);
932    bindings.insert(
933        crate::value::intern_key("archived_count"),
934        VmValue::Int(archived_count as i64),
935    );
936    crate::stdlib::template::render_stdlib_prompt_asset(
937        "orchestration/prompts/compaction_policy_replacement.harn.prompt",
938        Some(&bindings),
939    )
940}
941
942fn extend_compaction_prompt(mut prompt: String, policy: &CompactionPolicy) -> String {
943    let Some(directives) = policy.prompt_directives() else {
944        return prompt;
945    };
946    prompt.push_str(
947        "\n\nAdditional compaction instructions: use these directives to shape the summary, but do not quote this section unless it explicitly requests a model-visible note.\n",
948    );
949    prompt.push_str(&directives);
950    prompt
951}
952
953async fn custom_compaction_summary(
954    ctx: Option<&AsyncBuiltinCtx>,
955    old_messages: &[serde_json::Value],
956    archived_count: usize,
957    callback: &VmValue,
958    reminders: &[VmValue],
959    policy: &CompactionPolicy,
960) -> Result<String, VmError> {
961    let Some(VmValue::Closure(closure)) = Some(callback.clone()) else {
962        return Err(VmError::Runtime(
963            "compact_callback must be a closure when compact_strategy is 'custom'".to_string(),
964        ));
965    };
966    let Some(ctx) = ctx else {
967        return Err(VmError::Runtime(
968            "custom transcript compaction requires an async builtin VM context".to_string(),
969        ));
970    };
971    let mut vm = ctx.child_vm();
972    let messages_vm = VmValue::List(std::sync::Arc::new(
973        old_messages
974            .iter()
975            .map(crate::stdlib::json_to_vm_value)
976            .collect(),
977    ));
978    let result = if policy.has_metadata()
979        && (closure.func.params.len() >= 3 || closure.func.has_rest_param)
980    {
981        let reminders_vm = VmValue::List(std::sync::Arc::new(reminders.to_vec()));
982        let policy_vm = compaction_policy_to_vm_value(policy);
983        vm.call_closure_pub(&closure, &[messages_vm, reminders_vm, policy_vm])
984            .await
985    } else if closure.func.params.len() >= 2 || closure.func.has_rest_param {
986        let reminders_vm = VmValue::List(std::sync::Arc::new(reminders.to_vec()));
987        vm.call_closure_pub(&closure, &[messages_vm, reminders_vm])
988            .await
989    } else {
990        vm.call_closure_pub(&closure, &[messages_vm]).await
991    };
992    let summary = compact_summary_text_from_value(&result?)?;
993    ctx.forward_output(&vm.take_output());
994    if summary.trim().is_empty() {
995        Ok(truncate_compaction_summary(old_messages, archived_count))
996    } else {
997        Ok(format!(
998            "[auto-compacted {archived_count} older messages]\n{summary}"
999        ))
1000    }
1001}
1002
1003/// Marker the host emits inside a tool-output (or message) body to pin its
1004/// live grounding — the current file view and just-edited window — so it
1005/// survives a compaction pass. The host renders this literal substring inside
1006/// markdown headings (e.g. `## Exact current file text [no-compact]`,
1007/// `## Edited region now reads (...) [no-compact]`) in
1008/// `lib/tools/result-format.harn`. Compaction matches the substring; it does
1009/// not invent a new vocabulary.
1010pub(crate) const NO_COMPACT_MARKER: &str = "[no-compact]";
1011
1012/// Upper bound on how many of the most-recent pinned segments survive a
1013/// compaction pass verbatim. A pin that could never be evicted would let a
1014/// long session accumulate unbounded pinned snapshots (e.g. one edited-window
1015/// per edit) and eventually overflow the context window — defeating the
1016/// purpose of compaction. Keeping only the latest few preserves the agent's
1017/// *current* grounding (the file it is editing now, emitted as the exact-text
1018/// block plus the numbered-lines block in one or two adjacent outputs) while
1019/// letting stale duplicates from earlier in the session compact normally.
1020pub(crate) const MAX_PINNED_SEGMENTS: usize = 3;
1021
1022/// Whether a content body carries the host's `[no-compact]` pin marker.
1023fn is_pinned_content(content: &str) -> bool {
1024    content.contains(NO_COMPACT_MARKER)
1025}
1026
1027/// Compute the set of message indices into `messages` that are pinned AND fall
1028/// within the most-recent [`MAX_PINNED_SEGMENTS`] pinned bodies. Older pinned
1029/// bodies are intentionally excluded so they compact normally (the bound).
1030/// `content_of` extracts the body text to inspect for each message.
1031fn latest_pinned_indices<'a, F>(
1032    messages: impl Iterator<Item = &'a serde_json::Value>,
1033    content_of: F,
1034) -> std::collections::HashSet<usize>
1035where
1036    F: Fn(&serde_json::Value) -> Option<&str>,
1037{
1038    // Walk newest-first, collecting up to MAX_PINNED_SEGMENTS pinned indices.
1039    let pinned: Vec<usize> = messages
1040        .enumerate()
1041        .filter(|(_, msg)| content_of(msg).is_some_and(is_pinned_content))
1042        .map(|(idx, _)| idx)
1043        .collect();
1044    pinned.into_iter().rev().take(MAX_PINNED_SEGMENTS).collect()
1045}
1046
1047/// Check whether a tool-result string should be preserved verbatim during
1048/// observation masking. Uses content length as the primary heuristic:
1049/// short results (< 500 chars) are kept since they're typically error messages,
1050/// status lines, or concise answers that are cheap to retain and risky to mask.
1051/// Long results are masked to save context budget.
1052fn content_should_preserve(content: &str) -> bool {
1053    content.len() < 500
1054}
1055
1056/// Default per-message masking for tool results.
1057///
1058/// Beyond the first-line preview, this KEEPS any failure-signal lines
1059/// (assertion values, located diagnostics, rustc help/caret/source rows,
1060/// `Lnnn:` markers) via the shared [`is_failure_signal_line`] filter, so the
1061/// model re-reads the actual-vs-expected detail it needs to fix the bug. The
1062/// previous mask dropped everything but the first line, silently shredding the
1063/// structured failure that the strong microcompact filter preserves at the
1064/// emission side.
1065fn default_mask_tool_result(role: &str, content: &str) -> String {
1066    let first_line = content.lines().next().unwrap_or(content);
1067    let line_count = content.lines().count();
1068    let char_count = content.len();
1069    if line_count <= 3 {
1070        return format!("[{role}] {content}");
1071    }
1072    let preview = &first_line[..first_line.len().min(120)];
1073    // Preserve failure-signal lines (bounded so a huge log can't defeat the
1074    // mask). Skip the first line itself — it is already in the preview.
1075    let kept: Vec<&str> = content
1076        .lines()
1077        .skip(1)
1078        .filter(|line| is_failure_signal_line(line))
1079        .take(32)
1080        .collect();
1081    if kept.is_empty() {
1082        format!("[{role}] {preview}... [{line_count} lines, {char_count} chars masked]")
1083    } else {
1084        format!(
1085            "[{role}] {preview}... [{line_count} lines, {char_count} chars masked; \
1086             failure lines preserved]\n{}",
1087            kept.join("\n")
1088        )
1089    }
1090}
1091
1092/// Deterministic observation-mask compaction.
1093#[cfg(test)]
1094pub(crate) fn observation_mask_compaction(
1095    old_messages: &[serde_json::Value],
1096    archived_count: usize,
1097) -> String {
1098    observation_mask_compaction_with_callback(old_messages, archived_count, None)
1099}
1100
1101fn observation_mask_compaction_with_callback(
1102    old_messages: &[serde_json::Value],
1103    archived_count: usize,
1104    mask_results: Option<&[Option<String>]>,
1105) -> String {
1106    let mut parts = Vec::new();
1107    parts.push(format!(
1108        "[auto-compacted {archived_count} older messages via observation masking]"
1109    ));
1110    // Pin the agent's most-recent live grounding: any archived body carrying
1111    // the host's `[no-compact]` marker (the current file view / edited window)
1112    // survives masking verbatim, bounded to the latest MAX_PINNED_SEGMENTS so a
1113    // long session's stale snapshots still compact.
1114    let pinned = latest_pinned_indices(old_messages.iter(), |msg| {
1115        msg.get("content").and_then(|v| v.as_str())
1116    });
1117    for (idx, msg) in old_messages.iter().enumerate() {
1118        let role = msg.get("role").and_then(|v| v.as_str()).unwrap_or("user");
1119        let content = msg
1120            .get("content")
1121            .and_then(|v| v.as_str())
1122            .unwrap_or_default();
1123        if content.is_empty() {
1124            continue;
1125        }
1126        if pinned.contains(&idx) {
1127            parts.push(format!("[{role}] {content}"));
1128            continue;
1129        }
1130        if role == "assistant" {
1131            parts.push(format!("[assistant] {content}"));
1132            continue;
1133        }
1134        if content_should_preserve(content) {
1135            parts.push(format!("[{role}] {content}"));
1136        } else if let Some(Some(custom)) = mask_results.and_then(|r| r.get(idx)) {
1137            parts.push(custom.clone());
1138        } else {
1139            parts.push(default_mask_tool_result(role, content));
1140        }
1141    }
1142    parts.join("\n")
1143}
1144
1145/// Invoke the mask_callback to get per-message custom masks.
1146async fn invoke_mask_callback(
1147    ctx: Option<&AsyncBuiltinCtx>,
1148    callback: &VmValue,
1149    old_messages: &[serde_json::Value],
1150) -> Result<Vec<Option<String>>, VmError> {
1151    let VmValue::Closure(closure) = callback.clone() else {
1152        return Err(VmError::Runtime(
1153            "mask_callback must be a closure".to_string(),
1154        ));
1155    };
1156    let Some(ctx) = ctx else {
1157        return Err(VmError::Runtime(
1158            "mask_callback requires an async builtin VM context".to_string(),
1159        ));
1160    };
1161    let mut vm = ctx.child_vm();
1162    let messages_vm = VmValue::List(std::sync::Arc::new(
1163        old_messages
1164            .iter()
1165            .map(crate::stdlib::json_to_vm_value)
1166            .collect(),
1167    ));
1168    let result = vm.call_closure_pub(&closure, &[messages_vm]).await?;
1169    ctx.forward_output(&vm.take_output());
1170    let list = match result {
1171        VmValue::List(items) => items,
1172        _ => return Ok(vec![None; old_messages.len()]),
1173    };
1174    Ok(list
1175        .iter()
1176        .map(|v| match v {
1177            VmValue::String(s) => Some(s.to_string()),
1178            VmValue::Nil => None,
1179            _ => None,
1180        })
1181        .collect())
1182}
1183
1184/// Rewrite each tool-result message in `messages` whose content exceeds
1185/// `config.tool_output_max_chars`, using `config.compress_callback` when set
1186/// (and a VM context is available) else the deterministic
1187/// [`microcompact_tool_output`]. Only the `content` text is replaced; the
1188/// message's `role`/`tool_call_id` are left untouched so tool-call pairing is
1189/// preserved. A `tool_output_max_chars` of 0 disables the pass.
1190async fn clamp_tool_outputs(
1191    ctx: Option<&AsyncBuiltinCtx>,
1192    messages: &mut [serde_json::Value],
1193    config: &AutoCompactConfig,
1194) -> Result<(), VmError> {
1195    if config.tool_output_max_chars == 0 {
1196        return Ok(());
1197    }
1198    // Exempt the most-recent pinned tool-outputs (those carrying the host's
1199    // `[no-compact]` marker) from length-clamping so the agent's live file view
1200    // stays intact. Bounded to the latest MAX_PINNED_SEGMENTS so older pinned
1201    // snapshots in the kept window still clamp and can't blow the budget.
1202    let pinned = latest_pinned_indices(messages.iter(), |msg| {
1203        if msg.get("role").and_then(|role| role.as_str()) == Some("tool") {
1204            msg.get("content").and_then(|content| content.as_str())
1205        } else {
1206            None
1207        }
1208    });
1209    for (idx, message) in messages.iter_mut().enumerate() {
1210        if message.get("role").and_then(|role| role.as_str()) != Some("tool") {
1211            continue;
1212        }
1213        let Some(content) = message.get("content").and_then(|content| content.as_str()) else {
1214            continue;
1215        };
1216        if content.len() <= config.tool_output_max_chars {
1217            continue;
1218        }
1219        if pinned.contains(&idx) {
1220            continue;
1221        }
1222        let content = content.to_string();
1223        let replacement = match (config.compress_callback.as_ref(), ctx) {
1224            (Some(callback), Some(ctx)) => {
1225                invoke_compress_callback(ctx, callback, &content, config.tool_output_max_chars)
1226                    .await?
1227            }
1228            _ => microcompact_tool_output(&content, config.tool_output_max_chars),
1229        };
1230        message["content"] = serde_json::Value::String(replacement);
1231    }
1232    Ok(())
1233}
1234
1235/// Invoke `compress_callback(content, max_chars)` to replace one oversized
1236/// tool-output body, mirroring [`invoke_mask_callback`]'s child-VM closure
1237/// invocation. A non-string return falls back to the deterministic primitive.
1238async fn invoke_compress_callback(
1239    ctx: &AsyncBuiltinCtx,
1240    callback: &VmValue,
1241    content: &str,
1242    max_chars: usize,
1243) -> Result<String, VmError> {
1244    let VmValue::Closure(closure) = callback.clone() else {
1245        return Err(VmError::Runtime(
1246            "compress_callback must be a closure".to_string(),
1247        ));
1248    };
1249    let mut vm = ctx.child_vm();
1250    let args = [
1251        VmValue::String(arcstr::ArcStr::from(content)),
1252        VmValue::Int(max_chars as i64),
1253    ];
1254    let result = vm.call_closure_pub(&closure, &args).await?;
1255    ctx.forward_output(&vm.take_output());
1256    match result {
1257        VmValue::String(text) => Ok(text.to_string()),
1258        _ => Ok(microcompact_tool_output(content, max_chars)),
1259    }
1260}
1261
1262#[derive(Clone, Copy)]
1263struct CompactionStrategyInputs<'a> {
1264    ctx: Option<&'a AsyncBuiltinCtx>,
1265    strategy: &'a CompactStrategy,
1266    old_messages: &'a [serde_json::Value],
1267    archived_count: usize,
1268    llm_opts: Option<&'a crate::llm::api::LlmCallOptions>,
1269    custom_compactor: Option<&'a VmValue>,
1270    custom_compactor_reminders: &'a [VmValue],
1271    mask_callback: Option<&'a VmValue>,
1272    summarize_prompt: Option<&'a str>,
1273    policy: &'a CompactionPolicy,
1274}
1275
1276/// Apply a single compaction strategy to a list of archived messages.
1277async fn apply_compaction_strategy(input: CompactionStrategyInputs<'_>) -> Result<String, VmError> {
1278    let CompactionStrategyInputs {
1279        strategy,
1280        old_messages,
1281        archived_count,
1282        llm_opts,
1283        custom_compactor,
1284        custom_compactor_reminders,
1285        mask_callback,
1286        summarize_prompt,
1287        policy,
1288        ctx,
1289    } = input;
1290    match strategy {
1291        CompactStrategy::Truncate => Ok(truncate_compaction_summary(old_messages, archived_count)),
1292        CompactStrategy::Llm => {
1293            llm_compaction_summary(
1294                old_messages,
1295                archived_count,
1296                llm_opts.ok_or_else(|| {
1297                    VmError::Runtime(
1298                        "LLM transcript compaction requires active LLM call options".to_string(),
1299                    )
1300                })?,
1301                summarize_prompt,
1302                policy,
1303            )
1304            .await
1305        }
1306        CompactStrategy::Custom => {
1307            custom_compaction_summary(
1308                ctx,
1309                old_messages,
1310                archived_count,
1311                custom_compactor.ok_or_else(|| {
1312                    VmError::Runtime(
1313                        "compact_callback is required when compact_strategy is 'custom'"
1314                            .to_string(),
1315                    )
1316                })?,
1317                custom_compactor_reminders,
1318                policy,
1319            )
1320            .await
1321        }
1322        CompactStrategy::ObservationMask => {
1323            let mask_results = if let Some(cb) = mask_callback {
1324                Some(invoke_mask_callback(ctx, cb, old_messages).await?)
1325            } else {
1326                None
1327            };
1328            Ok(observation_mask_compaction_with_callback(
1329                old_messages,
1330                archived_count,
1331                mask_results.as_deref(),
1332            ))
1333        }
1334    }
1335}
1336
1337async fn apply_compaction_strategy_with_fallback(
1338    input: CompactionStrategyInputs<'_>,
1339    fallback_strategy: Option<&CompactStrategy>,
1340) -> Result<(String, CompactStrategy), VmError> {
1341    match apply_compaction_strategy(input).await {
1342        Ok(summary) => Ok((summary, input.strategy.clone())),
1343        Err(primary_error) => {
1344            let Some(fallback) = fallback_strategy.filter(|fallback| *fallback != input.strategy)
1345            else {
1346                return Err(primary_error);
1347            };
1348            let fallback_input = CompactionStrategyInputs {
1349                strategy: fallback,
1350                ..input
1351            };
1352            apply_compaction_strategy(fallback_input)
1353                .await
1354                .map(|summary| (summary, fallback.clone()))
1355        }
1356    }
1357}
1358
1359pub(crate) struct AutoCompactResult {
1360    pub summary: String,
1361    pub strategy: CompactStrategy,
1362}
1363
1364/// Auto-compact a message list in place using two-tier compaction.
1365#[cfg(test)]
1366pub(crate) async fn auto_compact_messages_with_result(
1367    messages: &mut Vec<serde_json::Value>,
1368    config: &AutoCompactConfig,
1369    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
1370) -> Result<Option<AutoCompactResult>, VmError> {
1371    auto_compact_messages_with_result_with_ctx(None, messages, config, llm_opts).await
1372}
1373
1374pub(crate) async fn auto_compact_messages_with_result_with_ctx(
1375    ctx: Option<&AsyncBuiltinCtx>,
1376    messages: &mut Vec<serde_json::Value>,
1377    config: &AutoCompactConfig,
1378    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
1379) -> Result<Option<AutoCompactResult>, VmError> {
1380    if config.token_threshold > 0 && estimate_message_tokens(messages) <= config.token_threshold {
1381        return Ok(None);
1382    }
1383    if messages.len() <= config.keep_first.saturating_add(config.keep_last) {
1384        return Ok(None);
1385    }
1386    let compact_start = config.keep_first.min(messages.len());
1387    let original_split = messages.len().saturating_sub(config.keep_last);
1388    let mut split_at = original_split;
1389    // Snap back to a user-role boundary so the kept suffix begins at a clean
1390    // turn. OpenAI-compatible APIs reject tool results orphaned from their
1391    // assistant request, so splitting mid-turn corrupts the transcript.
1392    while split_at > compact_start
1393        && split_at < messages.len()
1394        && messages[split_at]
1395            .get("role")
1396            .and_then(|r| r.as_str())
1397            .is_none_or(|r| r != "user")
1398    {
1399        split_at -= 1;
1400    }
1401    // Fall back to the naive split (e.g. tool-heavy transcripts with the sole
1402    // user message at index 0) rather than skipping compaction entirely.
1403    if split_at == compact_start {
1404        split_at = original_split;
1405    }
1406    if let Some(volatile_start) = messages[split_at..]
1407        .iter()
1408        .position(is_reasoning_or_tool_turn_message)
1409        .map(|offset| split_at + offset)
1410    {
1411        if let Some(boundary) = volatile_start
1412            .checked_sub(1)
1413            .and_then(|idx| find_prev_user_boundary(messages, idx))
1414            .filter(|boundary| *boundary > compact_start)
1415        {
1416            split_at = boundary;
1417        }
1418    }
1419    // The naive fallback (and, in tool-heavy transcripts with no interior
1420    // user boundary, the volatile-start correction too) can still leave the
1421    // split pointing at a tool_result whose tool_use request would be
1422    // drained. Final pass: snap off any request/result pair.
1423    split_at = snap_split_off_tool_results(messages, split_at, compact_start);
1424    if split_at <= compact_start {
1425        return Ok(None);
1426    }
1427    let old_messages: Vec<_> = messages.drain(compact_start..split_at).collect();
1428    let archived_count = old_messages.len();
1429
1430    // Clamp oversized tool-result bodies in the *kept* window so the live
1431    // context honors the policy's `tool_output_max_chars` (and the
1432    // `compress_callback` override), not just the archived/summarized window.
1433    // Runs before the hard-limit estimate so tier-2 escalation
1434    // keys off the post-clamp size. Only the text body is rewritten; `role`
1435    // and `tool_call_id` are preserved so tool_call/tool_result pairing stays
1436    // intact.
1437    clamp_tool_outputs(ctx, messages, config).await?;
1438
1439    let (mut summary, mut strategy) = apply_compaction_strategy_with_fallback(
1440        CompactionStrategyInputs {
1441            ctx,
1442            strategy: &config.compact_strategy,
1443            old_messages: &old_messages,
1444            archived_count,
1445            llm_opts,
1446            custom_compactor: config.custom_compactor.as_ref(),
1447            custom_compactor_reminders: &config.custom_compactor_reminders,
1448            mask_callback: config.mask_callback.as_ref(),
1449            summarize_prompt: config.summarize_prompt.as_deref(),
1450            policy: &config.policy,
1451        },
1452        config.fallback_strategy.as_ref(),
1453    )
1454    .await?;
1455
1456    if let Some(hard_limit) = config.hard_limit_tokens {
1457        let summary_msg = serde_json::json!({"role": "user", "content": &summary});
1458        let mut estimate_msgs = vec![summary_msg];
1459        estimate_msgs.extend_from_slice(messages.as_slice());
1460        let estimated = estimate_message_tokens(&estimate_msgs);
1461        if estimated > hard_limit {
1462            let tier1_as_messages = vec![serde_json::json!({
1463                "role": "user",
1464                "content": summary,
1465            })];
1466            let (hard_limit_summary, hard_limit_strategy) =
1467                apply_compaction_strategy_with_fallback(
1468                    CompactionStrategyInputs {
1469                        ctx,
1470                        strategy: &config.hard_limit_strategy,
1471                        old_messages: &tier1_as_messages,
1472                        archived_count,
1473                        llm_opts,
1474                        custom_compactor: config.custom_compactor.as_ref(),
1475                        custom_compactor_reminders: &config.custom_compactor_reminders,
1476                        mask_callback: None,
1477                        summarize_prompt: config.summarize_prompt.as_deref(),
1478                        policy: &config.policy,
1479                    },
1480                    config.fallback_strategy.as_ref(),
1481                )
1482                .await?;
1483            summary = hard_limit_summary;
1484            strategy = hard_limit_strategy;
1485        }
1486    }
1487
1488    summary = apply_model_visible_policy(summary, &config.policy);
1489
1490    messages.insert(
1491        compact_start,
1492        serde_json::json!({
1493            "role": "user",
1494            "content": summary,
1495        }),
1496    );
1497    Ok(Some(AutoCompactResult { summary, strategy }))
1498}
1499
1500/// Auto-compact a message list in place using two-tier compaction.
1501#[cfg(test)]
1502pub(crate) async fn auto_compact_messages(
1503    messages: &mut Vec<serde_json::Value>,
1504    config: &AutoCompactConfig,
1505    llm_opts: Option<&crate::llm::api::LlmCallOptions>,
1506) -> Result<Option<String>, VmError> {
1507    Ok(
1508        auto_compact_messages_with_result(messages, config, llm_opts)
1509            .await?
1510            .map(|result| result.summary),
1511    )
1512}
1513
1514fn apply_model_visible_policy(mut summary: String, policy: &CompactionPolicy) -> String {
1515    if !policy.is_model_visible_scope() {
1516        return summary;
1517    }
1518    let Some(directives) = policy.prompt_directives() else {
1519        return summary;
1520    };
1521    summary.push_str("\n\n[compaction instructions]\n");
1522    summary.push_str(&directives);
1523    summary
1524}
1525
1526#[cfg(test)]
1527mod tests {
1528    use super::*;
1529
1530    #[test]
1531    fn microcompact_short_output_unchanged() {
1532        let output = "line1\nline2\nline3\n";
1533        assert_eq!(microcompact_tool_output(output, 1000), output);
1534    }
1535
1536    #[test]
1537    fn microcompact_snaps_to_line_boundaries() {
1538        let lines: Vec<String> = (0..20)
1539            .map(|i| format!("line {i:02} content here"))
1540            .collect();
1541        let output = lines.join("\n");
1542        let result = microcompact_tool_output(&output, 200);
1543        assert!(result.contains("[... "), "should have snip marker");
1544        let parts: Vec<&str> = result.split("\n\n[... ").collect();
1545        assert!(parts.len() >= 2, "should split at marker");
1546        let head = parts[0];
1547        for line in head.lines() {
1548            assert!(
1549                line.starts_with("line "),
1550                "head line should be complete: {line}"
1551            );
1552        }
1553    }
1554
1555    #[test]
1556    fn microcompact_preserves_diagnostic_lines_with_line_boundaries() {
1557        let mut lines = Vec::new();
1558        for i in 0..50 {
1559            lines.push(format!("verbose output line {i}"));
1560        }
1561        lines.push("src/main.rs:42: error: cannot find value".to_string());
1562        for i in 50..100 {
1563            lines.push(format!("verbose output line {i}"));
1564        }
1565        let output = lines.join("\n");
1566        let result = microcompact_tool_output(&output, 600);
1567        assert!(result.contains("cannot find value"), "diagnostic preserved");
1568        assert!(
1569            result.contains("[diagnostic lines preserved]"),
1570            "has diagnostic marker"
1571        );
1572    }
1573
1574    // D1-durable: the shared failure-signal filter must keep the structured
1575    // failure kinds the old mask path dropped — assertion values, rustc
1576    // help/caret/source rows, and `Lnnn:` markers — not just keyword lines.
1577    #[test]
1578    fn failure_signal_filter_keeps_structured_failure_lines() {
1579        for keep in [
1580            "left: 3",
1581            "right: 4",
1582            "expected: foo",
1583            "actual: bar",
1584            "  --> src/main.rs:4:9",
1585            "= help: add `use std::fmt;`",
1586            "12 | let x = bad();",
1587            "   | ^^^^^^^ not found",
1588            "L42: assertion failed",
1589            "src/main.rs:42: error: cannot find value",
1590            "FAIL: TestThing",
1591            "panic: index out of range",
1592        ] {
1593            assert!(
1594                is_failure_signal_line(keep),
1595                "should keep failure-signal line: {keep:?}"
1596            );
1597        }
1598        for drop in [
1599            "verbose output line 7",
1600            "compiling crate foo",
1601            "    let y = ok();",
1602            "",
1603        ] {
1604            assert!(
1605                !is_failure_signal_line(drop),
1606                "should drop ordinary line: {drop:?}"
1607            );
1608        }
1609    }
1610
1611    // D1-durable: masking a large tool output must preserve the assertion
1612    // values and rustc detail (not just the first line) so the model can fix
1613    // the bug instead of re-reading a shredded summary.
1614    #[test]
1615    fn default_mask_preserves_failure_detail() {
1616        let mut lines = vec!["running 1 test".to_string()];
1617        for i in 0..40 {
1618            lines.push(format!("noise line {i}"));
1619        }
1620        lines.push("assertion `left == right` failed".to_string());
1621        lines.push("  left: 3".to_string());
1622        lines.push(" right: 4".to_string());
1623        lines.push("  --> src/lib.rs:10:5".to_string());
1624        for i in 40..80 {
1625            lines.push(format!("more noise {i}"));
1626        }
1627        let content = lines.join("\n");
1628        let masked = default_mask_tool_result("tool", &content);
1629        assert!(
1630            masked.contains("masked"),
1631            "still reports it masked: {masked}"
1632        );
1633        assert!(
1634            masked.contains("failure lines preserved"),
1635            "should flag preserved lines: {masked}"
1636        );
1637        assert!(masked.contains("left: 3"), "keeps left value: {masked}");
1638        assert!(masked.contains("right: 4"), "keeps right value: {masked}");
1639        assert!(
1640            masked.contains("--> src/lib.rs:10:5"),
1641            "keeps rustc location: {masked}"
1642        );
1643        assert!(
1644            !masked.contains("noise line 7"),
1645            "drops ordinary noise: {masked}"
1646        );
1647    }
1648
1649    // Verbose output with NO failure signal still masks down to the preview.
1650    #[test]
1651    fn default_mask_without_failure_lines_stays_terse() {
1652        let lines: Vec<String> = (0..40).map(|i| format!("plain line {i}")).collect();
1653        let content = lines.join("\n");
1654        let masked = default_mask_tool_result("tool", &content);
1655        assert!(masked.contains("masked]"), "should mask: {masked}");
1656        assert!(
1657            !masked.contains("failure lines preserved"),
1658            "no failure lines to preserve: {masked}"
1659        );
1660    }
1661
1662    #[test]
1663    fn token_estimate_counts_structured_message_content() {
1664        let text = "x".repeat(400);
1665        let messages = vec![serde_json::json!({
1666            "role": "user",
1667            "content": [
1668                {"type": "text", "text": text},
1669                {"type": "input_text", "text": "tail"},
1670            ],
1671            "reasoning": {"text": "scratch"},
1672            "tool_calls": [{
1673                "id": "call_1",
1674                "type": "function",
1675                "function": {"name": "read", "arguments": "{\"path\":\"src/main.rs\"}"}
1676            }],
1677        })];
1678
1679        assert!(
1680            estimate_message_tokens(&messages) >= 100,
1681            "structured content must not count as zero"
1682        );
1683    }
1684
1685    #[test]
1686    fn compaction_policy_instructions_extend_by_default() {
1687        let policy = CompactionPolicy {
1688            instructions: Some("Keep the failing test names.".to_string()),
1689            ..Default::default()
1690        };
1691        let prompt = render_llm_compaction_prompt(None, "[user] old context", 1, &policy)
1692            .expect("prompt renders");
1693
1694        assert_eq!(policy.instruction_mode(), "extend");
1695        assert!(prompt.contains("Preserve goals, constraints"));
1696        assert!(prompt.contains("Additional compaction instructions"));
1697        assert!(prompt.contains("Keep the failing test names."));
1698    }
1699
1700    #[test]
1701    fn compaction_policy_can_replace_default_instructions() {
1702        let policy = CompactionPolicy {
1703            instructions: Some("Only keep repro steps.".to_string()),
1704            extend_default_instructions: Some(false),
1705            ..Default::default()
1706        };
1707        let prompt = render_llm_compaction_prompt(None, "[user] old context", 1, &policy)
1708            .expect("prompt renders");
1709
1710        assert_eq!(policy.instruction_mode(), "replace");
1711        assert!(prompt.contains("according to these instructions"));
1712        assert!(prompt.contains("Only keep repro steps."));
1713        assert!(!prompt.contains("Preserve goals, constraints"));
1714    }
1715
1716    #[test]
1717    fn snap_to_line_end_finds_newline() {
1718        let s = "line1\nline2\nline3\nline4\n";
1719        let head = snap_to_line_end(s, 12);
1720        assert!(head.ends_with('\n'), "should end at newline");
1721        assert!(head.contains("line1"));
1722    }
1723
1724    #[test]
1725    fn snap_to_line_start_finds_newline() {
1726        let s = "line1\nline2\nline3\nline4\n";
1727        let tail = snap_to_line_start(s, 12);
1728        assert!(
1729            tail.starts_with("line"),
1730            "should start at line boundary: {tail}"
1731        );
1732    }
1733
1734    #[test]
1735    fn auto_compact_preserves_reasoning_tool_suffix() {
1736        let mut messages = vec![
1737            serde_json::json!({"role": "user", "content": "old task"}),
1738            serde_json::json!({"role": "assistant", "content": "old reply"}),
1739            serde_json::json!({"role": "user", "content": "new task"}),
1740            serde_json::json!({
1741                "role": "assistant",
1742                "content": "",
1743                "reasoning": "think first",
1744                "tool_calls": [{
1745                    "id": "call_1",
1746                    "type": "function",
1747                    "function": {"name": "read", "arguments": "{\"path\":\"foo.rs\"}"}
1748                }],
1749            }),
1750            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": "file"}),
1751        ];
1752        let config = AutoCompactConfig {
1753            token_threshold: 1,
1754            keep_last: 2,
1755            ..Default::default()
1756        };
1757
1758        let runtime = tokio::runtime::Builder::new_current_thread()
1759            .enable_all()
1760            .build()
1761            .expect("runtime");
1762        let summary = runtime
1763            .block_on(auto_compact_messages(&mut messages, &config, None))
1764            .expect("compaction succeeds");
1765
1766        assert!(summary.is_some());
1767        assert_eq!(messages[1]["role"], "user");
1768        assert_eq!(messages[2]["role"], "assistant");
1769        assert_eq!(messages[2]["tool_calls"][0]["id"], "call_1");
1770        assert_eq!(messages[3]["role"], "tool");
1771        assert_eq!(messages[3]["tool_call_id"], "call_1");
1772    }
1773
1774    /// Regression (transcript integrity): a tool-heavy transcript whose only
1775    /// user message is the pinned head has no interior user boundary, so the
1776    /// split falls back to the naive `len - keep_last` index — which can land
1777    /// BETWEEN an assistant tool_use message and its tool_result, orphaning
1778    /// the result at the kept-window head. The split must snap to the start
1779    /// of the request/result pair instead.
1780    #[test]
1781    fn auto_compact_never_splits_assistant_tool_use_from_its_result() {
1782        let tool_call = |id: &str| {
1783            serde_json::json!({
1784                "id": id,
1785                "type": "function",
1786                "function": {"name": "run", "arguments": "{}"}
1787            })
1788        };
1789        let mut messages = vec![
1790            serde_json::json!({"role": "user", "content": "task"}),
1791            serde_json::json!({"role": "assistant", "content": "", "tool_calls": [tool_call("c0")]}),
1792            serde_json::json!({"role": "tool", "tool_call_id": "c0", "content": "r0"}),
1793            serde_json::json!({"role": "assistant", "content": "", "tool_calls": [tool_call("c1")]}),
1794            serde_json::json!({"role": "tool", "tool_call_id": "c1", "content": "r1"}),
1795            serde_json::json!({"role": "assistant", "content": "", "tool_calls": [tool_call("c2")]}),
1796            serde_json::json!({"role": "tool", "tool_call_id": "c2", "content": "r2"}),
1797        ];
1798        // keep_last: 3 puts the naive split at index 4 — the tool_result for
1799        // c1 — exactly mid-pair.
1800        let config = AutoCompactConfig {
1801            token_threshold: 1,
1802            keep_first: 0,
1803            keep_last: 3,
1804            ..Default::default()
1805        };
1806
1807        let runtime = tokio::runtime::Builder::new_current_thread()
1808            .enable_all()
1809            .build()
1810            .expect("runtime");
1811        let summary = runtime
1812            .block_on(auto_compact_messages(&mut messages, &config, None))
1813            .expect("compaction succeeds");
1814        assert!(summary.is_some(), "compaction should trigger");
1815
1816        // Kept window: summary, then the INTACT c1 pair, then the c2 pair.
1817        assert_eq!(messages[0]["role"], "user", "summary head");
1818        assert_eq!(messages[1]["role"], "assistant");
1819        assert_eq!(messages[1]["tool_calls"][0]["id"], "c1");
1820        assert_eq!(messages[2]["role"], "tool");
1821        assert_eq!(messages[2]["tool_call_id"], "c1");
1822        assert_eq!(messages[3]["tool_calls"][0]["id"], "c2");
1823        assert_eq!(messages[4]["tool_call_id"], "c2");
1824        // No kept tool_result may reference a drained (missing) request.
1825        for (idx, message) in messages.iter().enumerate() {
1826            if message["role"] == "tool" {
1827                let id = message["tool_call_id"].as_str().expect("tool_call_id");
1828                let paired = messages[..idx].iter().any(|prev| {
1829                    prev["tool_calls"]
1830                        .as_array()
1831                        .is_some_and(|calls| calls.iter().any(|call| call["id"] == id))
1832                });
1833                assert!(paired, "tool_result {id} orphaned in kept window");
1834            }
1835        }
1836    }
1837
1838    #[test]
1839    fn snap_split_off_tool_results_handles_all_result_shapes() {
1840        // A split pointing at any tool-result shape walks back to the
1841        // request that initiated the run. OpenAI durable shape
1842        // (`role: "tool"`):
1843        let openai = vec![
1844            serde_json::json!({"role": "user", "content": "task"}),
1845            serde_json::json!({"role": "assistant", "content": "", "tool_calls": []}),
1846            serde_json::json!({"role": "tool", "tool_call_id": "c0", "content": "r0"}),
1847        ];
1848        assert_eq!(snap_split_off_tool_results(&openai, 2, 0), 1);
1849        // Anthropic durable shape (`role: "tool_result"`).
1850        let anthropic = vec![
1851            serde_json::json!({"role": "user", "content": "task"}),
1852            serde_json::json!({"role": "assistant", "content": ""}),
1853            serde_json::json!({"role": "tool_result", "tool_use_id": "c0", "content": "r0"}),
1854        ];
1855        assert_eq!(snap_split_off_tool_results(&anthropic, 2, 0), 1);
1856        // User message carrying tool_result blocks.
1857        let user_blocks = vec![
1858            serde_json::json!({"role": "user", "content": "task"}),
1859            serde_json::json!({"role": "assistant", "content": ""}),
1860            serde_json::json!({
1861                "role": "user",
1862                "content": [{"type": "tool_result", "tool_use_id": "c0", "content": "r0"}],
1863            }),
1864        ];
1865        assert_eq!(snap_split_off_tool_results(&user_blocks, 2, 0), 1);
1866        // Plain user text is a safe boundary — untouched.
1867        let text = vec![
1868            serde_json::json!({"role": "assistant", "content": ""}),
1869            serde_json::json!({"role": "user", "content": "plain"}),
1870        ];
1871        assert_eq!(snap_split_off_tool_results(&text, 1, 0), 1);
1872        // Backward walk pinned at compact_start: fall forward past the run
1873        // so compaction still makes progress (the whole pair is drained
1874        // together rather than split).
1875        let pinned = vec![
1876            serde_json::json!({"role": "tool", "tool_call_id": "c0", "content": "r0"}),
1877            serde_json::json!({"role": "tool", "tool_call_id": "c1", "content": "r1"}),
1878            serde_json::json!({"role": "assistant", "content": "done"}),
1879        ];
1880        assert_eq!(snap_split_off_tool_results(&pinned, 1, 0), 2);
1881    }
1882
1883    #[test]
1884    fn auto_compact_clamps_oversized_tool_output_to_max_chars() {
1885        // A large tool result in the *kept* window must be clamped to honor
1886        // `tool_output_max_chars`.
1887        let big = "x".repeat(4000);
1888        let big_len = big.len();
1889        let mut messages = vec![
1890            serde_json::json!({"role": "user", "content": "old task"}),
1891            serde_json::json!({"role": "assistant", "content": "old reply"}),
1892            serde_json::json!({"role": "user", "content": "new task"}),
1893            serde_json::json!({"role": "assistant", "content": "calling tool"}),
1894            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": big}),
1895        ];
1896        let config = AutoCompactConfig {
1897            token_threshold: 1,
1898            keep_last: 2,
1899            tool_output_max_chars: 500,
1900            ..Default::default()
1901        };
1902
1903        let runtime = tokio::runtime::Builder::new_current_thread()
1904            .enable_all()
1905            .build()
1906            .expect("runtime");
1907        let result = runtime
1908            .block_on(auto_compact_messages(&mut messages, &config, None))
1909            .expect("compaction succeeds");
1910        assert!(result.is_some(), "compaction should trigger");
1911
1912        let tool_msg = messages
1913            .iter()
1914            .find(|message| message["role"] == "tool")
1915            .expect("tool message kept in window");
1916        // Pairing preserved...
1917        assert_eq!(tool_msg["tool_call_id"], "call_1");
1918        // ...and the oversized body was clamped well below its original size.
1919        let content = tool_msg["content"].as_str().expect("string content");
1920        assert!(
1921            content.len() < big_len,
1922            "tool output should be clamped: {} vs {}",
1923            content.len(),
1924            big_len
1925        );
1926        assert!(content.len() < 2000, "clamped near tool_output_max_chars");
1927    }
1928
1929    /// (1) A pinned tool-output survives an observation-mask pass that evicts
1930    /// (masks) the unpinned verbose outputs around it.
1931    #[test]
1932    fn observation_mask_preserves_pinned_live_file_view() {
1933        let pinned_body = format!(
1934            "## Edited region now reads (line 42, ±6 context) {}\n```\n{}\n```",
1935            NO_COMPACT_MARKER,
1936            (0..40)
1937                .map(|i| format!("   {i}  let x = compute({i});"))
1938                .collect::<Vec<_>>()
1939                .join("\n")
1940        );
1941        let verbose_unpinned = (0..60)
1942            .map(|i| format!("verbose scan output line {i}"))
1943            .collect::<Vec<_>>()
1944            .join("\n");
1945        // These are the ARCHIVED messages handed to the mask pass.
1946        let archived = vec![
1947            serde_json::json!({"role": "user", "content": verbose_unpinned}),
1948            serde_json::json!({"role": "user", "content": pinned_body}),
1949        ];
1950        let summary = observation_mask_compaction(&archived, archived.len());
1951        // Pinned live file view survives verbatim.
1952        assert!(
1953            summary.contains("Edited region now reads"),
1954            "pinned heading survived: {summary}"
1955        );
1956        assert!(
1957            summary.contains("let x = compute(39);"),
1958            "pinned body survived verbatim"
1959        );
1960        // The unpinned verbose neighbor was masked.
1961        assert!(summary.contains("masked]"), "unpinned output was masked");
1962        assert!(!summary.contains("verbose scan output line 30"));
1963    }
1964
1965    /// (2) A pinned large tool-output is NOT clamped, while an unpinned one of
1966    /// the same size IS.
1967    #[test]
1968    fn clamp_exempts_pinned_tool_output() {
1969        let pinned_big = format!(
1970            "## Exact current file text {}\n{}",
1971            NO_COMPACT_MARKER,
1972            "x".repeat(4000)
1973        );
1974        let pinned_len = pinned_big.len();
1975        let unpinned_big = "y".repeat(4000);
1976        let unpinned_len = unpinned_big.len();
1977        let mut messages = vec![
1978            serde_json::json!({"role": "user", "content": "old task"}),
1979            serde_json::json!({"role": "assistant", "content": "reply"}),
1980            serde_json::json!({"role": "user", "content": "new task"}),
1981            serde_json::json!({"role": "assistant", "content": "calling tools"}),
1982            serde_json::json!({"role": "tool", "tool_call_id": "c0", "content": unpinned_big}),
1983            serde_json::json!({"role": "tool", "tool_call_id": "c1", "content": pinned_big}),
1984            serde_json::json!({"role": "user", "content": "continue"}),
1985        ];
1986        let config = AutoCompactConfig {
1987            token_threshold: 1,
1988            keep_last: 4,
1989            tool_output_max_chars: 500,
1990            ..Default::default()
1991        };
1992        let runtime = tokio::runtime::Builder::new_current_thread()
1993            .enable_all()
1994            .build()
1995            .expect("runtime");
1996        runtime
1997            .block_on(auto_compact_messages(&mut messages, &config, None))
1998            .expect("compaction succeeds");
1999
2000        let pinned_msg = messages
2001            .iter()
2002            .find(|m| m["tool_call_id"] == "c1")
2003            .expect("pinned tool message kept");
2004        assert_eq!(
2005            pinned_msg["content"].as_str().map(str::len),
2006            Some(pinned_len),
2007            "pinned output must be intact (unclamped)"
2008        );
2009        let unpinned_msg = messages
2010            .iter()
2011            .find(|m| m["tool_call_id"] == "c0")
2012            .expect("unpinned tool message kept");
2013        assert!(
2014            unpinned_msg["content"].as_str().map(str::len).unwrap() < unpinned_len,
2015            "unpinned output of the same size must be clamped"
2016        );
2017    }
2018
2019    /// (3) Bounded policy: with MANY pinned outputs, only the latest
2020    /// MAX_PINNED_SEGMENTS survive verbatim; older pinned duplicates compact —
2021    /// so the pin can't prevent all compaction (and can't overflow the window
2022    /// on a very long session).
2023    #[test]
2024    fn pin_bound_keeps_only_latest_segments() {
2025        // Build 6 distinct pinned, oversized edited-window snapshots
2026        // (gen 0 = oldest .. gen 5 = newest), each tagged with the marker and
2027        // long enough that masking would otherwise truncate it.
2028        let make = |gen: usize| {
2029            let body = (0..40)
2030                .map(|i| format!("marker-gen-{gen} body line {i}"))
2031                .collect::<Vec<_>>()
2032                .join("\n");
2033            serde_json::json!({
2034                "role": "user",
2035                "content": format!(
2036                    "## Edited region now reads (gen {gen}) {}\n{}",
2037                    NO_COMPACT_MARKER, body
2038                ),
2039            })
2040        };
2041        let archived: Vec<_> = (0..6).map(make).collect();
2042
2043        // Unit-level: the index selection keeps exactly the latest N.
2044        let pinned = latest_pinned_indices(archived.iter(), |m| {
2045            m.get("content").and_then(|c| c.as_str())
2046        });
2047        assert_eq!(
2048            pinned.len(),
2049            MAX_PINNED_SEGMENTS,
2050            "only the latest MAX_PINNED_SEGMENTS are pinned"
2051        );
2052        assert!(pinned.contains(&5) && pinned.contains(&4) && pinned.contains(&3));
2053        assert!(!pinned.contains(&0) && !pinned.contains(&1) && !pinned.contains(&2));
2054
2055        // End-to-end through the mask pass: the 3 newest snapshots survive
2056        // verbatim; the 3 oldest are masked, proving the pin cannot defeat all
2057        // compaction.
2058        let summary = observation_mask_compaction(&archived, archived.len());
2059        assert!(
2060            summary.contains("marker-gen-5")
2061                && summary.contains("marker-gen-4")
2062                && summary.contains("marker-gen-3"),
2063            "latest {MAX_PINNED_SEGMENTS} pinned snapshots survive verbatim: {summary}"
2064        );
2065        assert!(
2066            !summary.contains("marker-gen-0")
2067                && !summary.contains("marker-gen-1")
2068                && !summary.contains("marker-gen-2"),
2069            "older pinned snapshots are masked (bound enforced)"
2070        );
2071        assert!(summary.contains("masked]"), "older snapshots were masked");
2072    }
2073
2074    /// (4) Regression: with NO pins, compaction behaves exactly as before.
2075    #[test]
2076    fn no_pins_preserves_prior_clamp_behavior() {
2077        let big = "x".repeat(4000);
2078        let big_len = big.len();
2079        let mut messages = vec![
2080            serde_json::json!({"role": "user", "content": "old task"}),
2081            serde_json::json!({"role": "assistant", "content": "old reply"}),
2082            serde_json::json!({"role": "user", "content": "new task"}),
2083            serde_json::json!({"role": "assistant", "content": "calling tool"}),
2084            serde_json::json!({"role": "tool", "tool_call_id": "call_1", "content": big}),
2085        ];
2086        let config = AutoCompactConfig {
2087            token_threshold: 1,
2088            keep_last: 2,
2089            tool_output_max_chars: 500,
2090            ..Default::default()
2091        };
2092        let runtime = tokio::runtime::Builder::new_current_thread()
2093            .enable_all()
2094            .build()
2095            .expect("runtime");
2096        let result = runtime
2097            .block_on(auto_compact_messages(&mut messages, &config, None))
2098            .expect("compaction succeeds");
2099        assert!(result.is_some());
2100        let tool_msg = messages
2101            .iter()
2102            .find(|m| m["role"] == "tool")
2103            .expect("tool kept");
2104        let content = tool_msg["content"].as_str().expect("string content");
2105        assert!(content.len() < big_len, "unpinned output clamped as before");
2106        assert!(content.len() < 2000, "clamped near tool_output_max_chars");
2107    }
2108}