Skip to main content

deepstrike_core/context/
renderer.rs

1use super::partitions::ContextPartitions;
2use super::snapshot::stable_hash;
3use super::task_state::TaskState;
4use super::token_engine::ContextTokenEngine;
5use crate::mm::handle::{HandleTable, Residency};
6use crate::types::message::{Content, ContentPart, Message, Role};
7use serde::{Deserialize, Serialize};
8
9/// Structured render output aligned with LLM API slots.
10///
11/// Slot 1 — system_stable:    Identity (system partition). Anthropic system[0] cache_control.
12/// Slot 2 — system_knowledge: Knowledge partition. Anthropic system[1] cache_control.
13/// Slot 3 — turns[0..N]:      History turns (stable, cacheable prefix).
14/// Slot 4 — state_turn:       State (task_state + signals), rebuilt every call.
15///
16/// The State turn is kept OUT of `turns` so the history prefix stays byte-stable
17/// across turns and can be prompt-cached. Providers place `state_turn` themselves:
18/// Anthropic appends it AFTER the message-history cache breakpoint (so the volatile
19/// state is the cheap uncached tail); OpenAI-family prepend it (preserving today's
20/// ordering). When this struct is produced by an older binding that has not been
21/// rebuilt, `state_turn` is absent and `turns[0]` still carries the State turn —
22/// providers handle both shapes.
23///
24/// system_text = system_stable + system_knowledge (for OpenAI which has one system slot).
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct RenderedContext {
27    /// Identity + Knowledge combined — for providers with a single system slot (OpenAI).
28    pub system_text: String,
29    /// Identity only (system partition). Anthropic system[0] with cache_control.
30    pub system_stable: String,
31    /// Knowledge (memory retrievals, skill definitions, artifacts). Anthropic system[1] with cache_control.
32    pub system_knowledge: String,
33    /// History turns only — the stable, cacheable message prefix.
34    pub turns: Vec<Message>,
35    /// Volatile State turn (task_state + signals), rebuilt every call. Rendered
36    /// after the cacheable history. `None` when there is no task state or signals.
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub state_turn: Option<Message>,
39    /// P1-E: number of leading `turns` that form the **frozen prefix** — byte-stable until the
40    /// next compaction. Providers that place explicit cache breakpoints (Anthropic) pin one *deep*
41    /// breakpoint at this boundary (a long-lived cache that survives many turns and is immune to
42    /// the 20-block lookback miss on heavy tool turns) and roll the other at the tail. `None` when
43    /// there is no distinct frozen region yet (pre-first-compaction, or the whole render is hot) —
44    /// providers then fall back to the rolling-pair placement. Providers clamp out-of-range values.
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub frozen_prefix_len: Option<usize>,
47}
48
49/// Per-render fingerprint of the **cacheable prefix** — the segments a provider
50/// caches as a stable prefix (system blocks + history `turns`). Excludes
51/// `state_turn` (the volatile uncached tail) and `token_count` metadata (not on the
52/// wire). This is the metrics-first instrument (P0-A) behind the optimization work:
53/// two renders share a reusable KV / prompt-cache prefix iff their system hashes
54/// match *and* one's `turn_hashes` is a prefix of the other's. Pure and derived —
55/// never stored in snapshots, session logs, or event logs.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct PrefixFingerprint {
58    pub system_stable_hash: u64,
59    pub system_knowledge_hash: u64,
60    /// One stable hash per history turn, in order. The longest common prefix with a
61    /// previous render's vector = how many turns stay cache-reusable across the call.
62    pub turn_hashes: Vec<u64>,
63}
64
65impl PrefixFingerprint {
66    /// True when `self`'s cacheable prefix is a byte-stable *extension* of `prev`:
67    /// identical system segments and `prev.turn_hashes` is a prefix of
68    /// `self.turn_hashes`. This is exactly the KV / prompt-cache reuse condition —
69    /// no drift anywhere in the prefix, only growth at the tail.
70    pub fn extends(&self, prev: &PrefixFingerprint) -> bool {
71        self.system_stable_hash == prev.system_stable_hash
72            && self.system_knowledge_hash == prev.system_knowledge_hash
73            && prev.turn_hashes.len() <= self.turn_hashes.len()
74            && self.turn_hashes[..prev.turn_hashes.len()] == prev.turn_hashes[..]
75    }
76
77    /// Number of leading turns byte-identical to `prev` — the reusable turn-prefix
78    /// length. A drop below `prev.turn_hashes.len()` signals mid-prefix churn (a
79    /// turn rewritten in place, e.g. an in-place collapse) that invalidates cache.
80    pub fn common_turn_prefix(&self, prev: &PrefixFingerprint) -> usize {
81        self.turn_hashes
82            .iter()
83            .zip(prev.turn_hashes.iter())
84            .take_while(|(a, b)| a == b)
85            .count()
86    }
87}
88
89/// Wire-relevant hash of one turn: role + content + tool_calls, **excluding**
90/// `token_count` (kernel-only metadata that never reaches the provider). Serialised
91/// through serde so every content variant and tool-call argument is covered with a
92/// deterministic field order.
93fn hash_turn(msg: &Message) -> u64 {
94    let material =
95        serde_json::to_vec(&(&msg.role, &msg.content, &msg.tool_calls)).unwrap_or_default();
96    stable_hash(&material)
97}
98
99impl RenderedContext {
100    /// Compute the [`PrefixFingerprint`] for this render. See its docs for the
101    /// cache-reuse contract it certifies.
102    pub fn prefix_fingerprint(&self) -> PrefixFingerprint {
103        PrefixFingerprint {
104            system_stable_hash: stable_hash(self.system_stable.as_bytes()),
105            system_knowledge_hash: stable_hash(self.system_knowledge.as_bytes()),
106            turn_hashes: self.turns.iter().map(hash_turn).collect(),
107        }
108    }
109}
110
111fn build_system_stable(partitions: &ContextPartitions) -> String {
112    partitions.system.messages
113        .iter()
114        .filter_map(|m| m.content.as_text())
115        .collect::<Vec<_>>()
116        .join("\n\n")
117}
118
119fn build_system_knowledge(partitions: &ContextPartitions) -> String {
120    partitions.knowledge.messages
121        .iter()
122        .filter_map(|m| m.content.as_text())
123        .collect::<Vec<_>>()
124        .join("\n\n")
125}
126
127/// P1-F (+ 2b/2c): a one-line recency footer at the *last* content before the "Proceed." anchor —
128/// the highest-attention position in the prompt (the model attends most to the final tokens).
129///
130/// It LEADS WITH FORWARD MOTION (what just happened · what to do next · the standing directive), not
131/// a verbatim restatement of the goal. Re-injecting the bare goal at this peak-attention slot every
132/// turn primes the model to *re-narrate intent* ("好的,我来将<goal>…") instead of acting — an
133/// undamped repetition trap when there is no plan/progress to advance. The full goal still LEADS the
134/// TASK STATE block above (primacy + reference), so goal-adherence is preserved; the footer restates
135/// the goal only when nothing has happened yet (e.g. turn 1, no actions). `None` when there is no goal.
136///
137/// The "just did" clause is kernel-derived from `recent_actions` (real tool activity), and a trailing
138/// run of an identical action raises an explicit STOP — a cheap no-progress backstop that breaks the
139/// read→re-read→re-narrate loop in-band, at the position the model weights most.
140fn salience_footer(ts: &TaskState) -> Option<String> {
141    if ts.goal.is_empty() {
142        return None;
143    }
144    let mut clauses: Vec<String> = Vec::new();
145
146    // What just happened — display tool NAMES only. The full `name(args)` signatures are kept in
147    // `recent_actions` for the repeat check below, but rendering them every turn bloats the volatile
148    // footer; the names alone show motion at the peak-attention slot.
149    let recent = ts.recent_actions.as_slice();
150    let action_name = |entry: &str| entry.split('(').next().unwrap_or(entry).to_string();
151    if let Some(last) = recent.last() {
152        let start = recent.len().saturating_sub(3);
153        let names = recent[start..].iter().map(|e| action_name(e)).collect::<Vec<_>>().join(" → ");
154        clauses.push(format!("did: {names}"));
155
156        // No-progress backstop: the SAME call — name AND args — repeated on the last ≥2 turns is a
157        // stall (a legit loop varies its args, so it reads as distinct progress, not a repeat).
158        let trailing_repeat = recent.iter().rev().take_while(|a| *a == last).count();
159        if trailing_repeat >= 2 {
160            clauses.push(format!(
161                "STOP: `{}` repeated {trailing_repeat}× unchanged — do something different or report",
162                action_name(last)
163            ));
164        }
165    }
166
167    // What to do next — the active plan step if the model maintains one, else a short forward nudge.
168    let active_step = ts
169        .current_step
170        .and_then(|i| ts.plan.get(i).map(|s| (i, s)))
171        .filter(|(_, s)| !s.done);
172    if let Some((i, step)) = active_step {
173        clauses.push(format!("next: step {} — {}", i + 1, step.label));
174    } else if !recent.is_empty() {
175        clauses.push("next: advance the goal".to_string());
176    }
177
178    if let Some(d) = ts.directives.last() {
179        clauses.push(format!("must: {d}"));
180    }
181
182    // Lead with the goal only when no forward clause fills the footer (turn 1, nothing done yet);
183    // otherwise the forward clauses carry the salience and the goal stays in the block above.
184    let body = if clauses.is_empty() {
185        format!("→ focus: {}", ts.goal)
186    } else {
187        format!("→ {}", clauses.join(" · "))
188    };
189    Some(body)
190}
191
192/// Build the State turn (the volatile tail): task_state + signals + a recency focus footer +
193/// "Proceed." anchor. The footer sits last (just before "Proceed.") so the current goal/step/
194/// directive land in the prompt's highest-attention position (P1-F).
195fn build_state_turn(partitions: &ContextPartitions) -> Option<Message> {
196    let task = partitions.task_state.format_compact();
197    if task.is_empty() && partitions.signals.is_empty() {
198        return None;
199    }
200    let mut parts: Vec<String> = Vec::new();
201    if !task.is_empty() {
202        parts.push(task);
203    }
204    let signals_text = partitions.signals.join("\n");
205    if !signals_text.is_empty() {
206        parts.push(signals_text);
207    }
208    if let Some(footer) = salience_footer(&partitions.task_state) {
209        parts.push(footer);
210    }
211    let body = parts.join("\n\n");
212    Some(Message::user(format!("{body}\n\nProceed.")))
213}
214
215/// Ensure turns start with a user message.
216/// After AutoCompact the preserved tail may be all assistant/tool — insert an anchor.
217fn normalize_turn_prefix(turns: &mut Vec<Message>) {
218    if !turns.is_empty() && matches!(turns[0].role, Role::Assistant | Role::Tool) {
219        turns.insert(0, Message::user("[context resumed]"));
220    }
221}
222
223/// Layer-4 read-time projection: replace the body of a `Collapsed` tool result with a short
224/// preview, leaving a marker. Non-destructive — the full output stays in `partitions.history`;
225/// only the rendered copy shrinks, so the projection reverses when pressure drops.
226fn collapse_preview(output: &str) -> String {
227    const PREVIEW_BYTES: usize = 160;
228    let mut end = PREVIEW_BYTES.min(output.len());
229    while end > 0 && !output.is_char_boundary(end) {
230        end -= 1;
231    }
232    let dropped = output.len().saturating_sub(end);
233    format!(
234        "{}…\n[collapsed: {dropped} chars projected out of view; full result retained in history]",
235        &output[..end]
236    )
237}
238
239/// Stub substituted for a collapsed assistant preamble. Carries no goal text (that would re-seed the
240/// very repetition this removes) and points the model at the authoritative State turn instead.
241const NARRATION_STUB: &str = "[earlier narration collapsed; tool call(s) preserved below — current progress is in the TASK STATE block]";
242
243/// Minimum narration length (chars, CJK-aware) worth collapsing. Short preambles aren't worth a
244/// stub substitution (and the one-time cache churn it costs as the turn ages out of the window).
245const NARRATION_COLLAPSE_MIN_CHARS: usize = 40;
246
247/// Method 1: read-time collapse of an OLD assistant turn's narration. Targets exactly the
248/// "preamble before action" turns — `Role::Assistant`, a `Content::Text` body, AND a non-empty
249/// `tool_calls` (the model narrated intent, then acted). Returns a projected copy whose text is
250/// replaced by [`NARRATION_STUB`] while `tool_calls` (and thus tool_use/tool_result pairing) are
251/// left intact; the original full text stays in `partitions.history`, so the projection reverses if
252/// the flag is turned off. `None` when the message isn't a collapsible narration turn or the flag is
253/// off. Caller restricts this to messages already past the protected recent window.
254fn project_assistant_narration(msg: &Message, enabled: bool) -> Option<Message> {
255    if !enabled || msg.role != Role::Assistant || msg.tool_calls.is_empty() {
256        return None;
257    }
258    let Content::Text(text) = &msg.content else {
259        return None;
260    };
261    if text == NARRATION_STUB || text.chars().count() < NARRATION_COLLAPSE_MIN_CHARS {
262        return None;
263    }
264    let mut projected = msg.clone();
265    projected.content = Content::Text(NARRATION_STUB.to_string());
266    projected.token_count = None; // recomputed against the smaller stub
267    Some(projected)
268}
269
270/// If any of `msg`'s tool-result parts is `Collapsed` per the handle table, return a projected
271/// copy with those parts previewed; `None` if nothing is collapsed (render the message as-is).
272fn project_message(msg: &Message, handles: &HandleTable) -> Option<Message> {
273    let Content::Parts(parts) = &msg.content else {
274        return None;
275    };
276    let mut changed = false;
277    let new_parts: Vec<ContentPart> = parts
278        .iter()
279        .map(|part| match part {
280            ContentPart::ToolResult { call_id, output, is_error }
281                if matches!(
282                    handles.residency_for_source(call_id),
283                    Some(Residency::Collapsed)
284                ) =>
285            {
286                changed = true;
287                ContentPart::ToolResult {
288                    call_id: call_id.clone(),
289                    output: collapse_preview(output),
290                    is_error: *is_error,
291                }
292            }
293            other => other.clone(),
294        })
295        .collect();
296    if changed {
297        let mut projected = msg.clone();
298        projected.content = Content::Parts(new_parts);
299        projected.token_count = None; // recomputed against the smaller projected body
300        Some(projected)
301    } else {
302        None
303    }
304}
305
306/// Render the context into a `RenderedContext` suitable for a provider API call.
307///
308/// Equivalent to [`render_projected`] with an empty handle table (no Layer-4 projection) and no
309/// frozen-prefix boundary (`frozen_history_len = 0` → `frozen_prefix_len` is always `None`).
310pub fn render(
311    partitions: &ContextPartitions,
312    budget: u32,
313    engine: &ContextTokenEngine,
314    preserve_recent_msgs: usize,
315) -> RenderedContext {
316    // The convenience wrapper renders history verbatim (no narration collapse) — callers that want
317    // Method-1 collapse drive `render_projected` with the flag (the kernel passes it from config).
318    render_projected(partitions, budget, engine, preserve_recent_msgs, &HandleTable::new(), 0, false)
319}
320
321/// Render with Layer-4 read-time projection driven by `handles`: tool results whose handle is
322/// `Collapsed` render as previews (originals untouched), freeing budget for more recent turns.
323///
324/// Token budget:
325///   system_stable + system_knowledge tokens are subtracted first.
326///   Remaining budget is allocated to history turns newest-first.
327///   The first `preserve_recent_msgs` history messages are always included.
328///   Text messages are truncated at the budget boundary; Parts messages are included whole.
329pub fn render_projected(
330    partitions: &ContextPartitions,
331    budget: u32,
332    engine: &ContextTokenEngine,
333    preserve_recent_msgs: usize,
334    handles: &HandleTable,
335    frozen_history_len: usize,
336    collapse_narration: bool,
337) -> RenderedContext {
338    let system_stable = build_system_stable(partitions);
339    let system_knowledge = build_system_knowledge(partitions);
340    let system_text = [system_stable.as_str(), system_knowledge.as_str()]
341        .iter()
342        .filter(|s| !s.is_empty())
343        .cloned()
344        .collect::<Vec<_>>()
345        .join("\n\n");
346
347    let system_tokens = engine.count(&system_text).min(budget);
348    let mut remaining = budget.saturating_sub(system_tokens);
349
350    // Fill history newest-first within remaining budget. Layer-4 projection is applied per
351    // message: a collapsed tool result renders as a preview and is costed at its reduced size.
352    let mut kept_rev: Vec<Message> = Vec::new();
353    for msg in partitions.history.messages.iter().rev() {
354        let is_protected = kept_rev.len() < preserve_recent_msgs;
355        // `projected` is `Some` only when read-time projection shrank the message. Two disjoint
356        // sources: a `Collapsed` tool-result preview (handle-driven, any age) OR — once the turn has
357        // aged past the protected recent window — an assistant-narration stub (Method 1). A message
358        // is either a tool-result Parts message or an assistant Text+tool_calls message, never both.
359        let projected = project_message(msg, handles).or_else(|| {
360            if is_protected { None } else { project_assistant_narration(msg, collapse_narration) }
361        });
362        let effective = projected.as_ref().unwrap_or(msg);
363        let tokens = match &projected {
364            Some(p) => engine.count_message(p),
365            None => msg.token_count.unwrap_or_else(|| engine.count_message(msg)),
366        };
367        if tokens == 0 { continue; }
368
369        if is_protected {
370            kept_rev.push(effective.clone());
371            remaining = remaining.saturating_sub(tokens);
372            continue;
373        }
374
375        if tokens <= remaining {
376            kept_rev.push(effective.clone());
377            remaining = remaining.saturating_sub(tokens);
378        } else if remaining > 0 {
379            match &effective.content {
380                // P0-B1: drop a Text boundary message **whole** rather than mid-truncate. A
381                // truncated body's bytes depend on `remaining`, which varies per turn — that churns
382                // turns[0] and invalidates the entire cached prefix. Compaction normally keeps
383                // history under budget, so this overflow path is a rare safety net; keeping every
384                // kept turn a complete message preserves prompt-cache reuse.
385                Content::Text(_) => {}
386                // A Parts message was already included whole (byte-stable) — unchanged.
387                Content::Parts(_) => kept_rev.push(effective.clone()),
388            }
389            break;
390        } else {
391            break;
392        }
393    }
394
395    kept_rev.reverse();
396    let mut turns = kept_rev;
397    normalize_turn_prefix(&mut turns);
398
399    // The State turn (task_state + signals) is volatile — keep it OUT of the
400    // cacheable history. Providers render it after the history (Anthropic) or
401    // prepended (OpenAI). See RenderedContext docs.
402    let state_turn = build_state_turn(partitions);
403
404    // P1-E: locate the frozen-prefix boundary in rendered turns. `frozen_history_len` is the
405    // history length as of the last compaction (0 before any) — messages beyond it are the hot
406    // tail that grows each turn. We count the hot tail from the END, which is robust to the leading
407    // anchor and to budget-dropping of OLD turns (the recent tail is never dropped). Emit `Some`
408    // only for a distinct, non-empty frozen region; otherwise providers use the rolling-pair
409    // fallback (deep == tail would waste a breakpoint).
410    let hot = partitions
411        .history
412        .messages
413        .len()
414        .saturating_sub(frozen_history_len);
415    let frozen_prefix_len = if frozen_history_len > 0 && hot > 0 && hot < turns.len() {
416        Some(turns.len() - hot)
417    } else {
418        None
419    };
420
421    RenderedContext { system_text, system_stable, system_knowledge, turns, state_turn, frozen_prefix_len }
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427    use crate::context::config::ContextConfig;
428    use crate::context::partitions::ContextPartitions;
429    use crate::context::task_state::{PlanStep, TaskState};
430    use crate::context::token_engine::ContextTokenEngine;
431    use crate::types::message::{Message, Role};
432
433    fn engine() -> ContextTokenEngine { ContextTokenEngine::char_approx() }
434    fn ctx() -> ContextPartitions { ContextPartitions::new(&ContextConfig::default()) }
435
436    #[test]
437    fn system_stable_contains_system_partition() {
438        let mut c = ctx();
439        c.system.push(Message::system("You are helpful."), 10);
440        let rc = render(&c, 10_000, &engine(), 4);
441        assert!(rc.system_stable.contains("You are helpful."));
442        assert!(rc.system_text.contains("You are helpful."));
443    }
444
445    #[test]
446    fn system_knowledge_contains_knowledge_partition() {
447        let mut c = ctx();
448        c.knowledge.push(Message::system("skill: debug"), 10);
449        let rc = render(&c, 10_000, &engine(), 4);
450        assert!(rc.system_knowledge.contains("skill: debug"));
451        assert!(rc.system_text.contains("skill: debug"));
452    }
453
454    #[test]
455    fn task_state_appears_in_state_turn() {
456        let mut c = ctx();
457        c.task_state = TaskState { goal: "find the bug".to_string(), ..Default::default() };
458        let rc = render(&c, 10_000, &engine(), 4);
459        assert!(!rc.system_text.contains("[TASK STATE]"), "task_state must not be in system_text");
460        let state = rc.state_turn.as_ref().expect("should have a state turn");
461        assert_eq!(state.role, Role::User);
462        assert!(state.content.as_text().unwrap().contains("[TASK STATE] goal: find the bug"));
463        // State is NOT in the cacheable history turns.
464        assert!(!rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("[TASK STATE]")).unwrap_or(false)));
465    }
466
467    #[test]
468    fn signals_appear_in_state_turn() {
469        let mut c = ctx();
470        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
471        c.signals.push("[ROLLBACK] tool failed".to_string());
472        let rc = render(&c, 10_000, &engine(), 4);
473        let state = rc.state_turn.as_ref().unwrap();
474        assert!(state.content.as_text().unwrap().contains("[ROLLBACK] tool failed"));
475    }
476
477    #[test]
478    fn empty_task_state_no_state_turn() {
479        let c = ctx();
480        let rc = render(&c, 10_000, &engine(), 4);
481        // No state turn when task_state is empty and no signals
482        assert!(rc.state_turn.is_none());
483        assert!(rc.turns.is_empty());
484    }
485
486    #[test]
487    fn history_excludes_state_turn() {
488        let mut c = ctx();
489        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
490        c.history.push(Message::user("step 1"), 5);
491        c.history.push(Message::assistant("done"), 5);
492        let rc = render(&c, 10_000, &engine(), 4);
493        // turns is history only; state lives in state_turn.
494        assert!(rc.state_turn.as_ref().unwrap().content.as_text().unwrap().contains("[TASK STATE]"));
495        assert_eq!(rc.turns[0].role, Role::User);
496        assert_eq!(rc.turns[0].content.as_text(), Some("step 1"));
497        assert_eq!(rc.turns[1].role, Role::Assistant);
498    }
499
500    #[test]
501    fn all_assistant_tool_history_gets_anchor_user_turn() {
502        let mut c = ctx();
503        c.history.push(Message::assistant("reply"), 5);
504        let rc = render(&c, 10_000, &engine(), 4);
505        assert_eq!(rc.turns[0].role, Role::User);
506    }
507
508    #[test]
509    fn zero_token_messages_skipped() {
510        let mut c = ctx();
511        c.history.push(Message::user("zero"), 0);
512        c.history.push(Message::user("real"), 5);
513        let rc = render(&c, 10_000, &engine(), 4);
514        // Only "real" in history turns (state turn absent — no task_state)
515        assert!(rc.turns.iter().any(|m| m.content.as_text() == Some("real")));
516        assert!(!rc.turns.iter().any(|m| m.content.as_text() == Some("zero")));
517    }
518
519    #[test]
520    fn collapsed_tool_result_renders_as_preview_without_mutating_history() {
521        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};
522
523        let mut c = ctx();
524        let long = "DATA ".repeat(200); // 1000 bytes
525        c.history.push(
526            Message::tool(vec![ContentPart::ToolResult {
527                call_id: "c1".into(),
528                output: long.clone(),
529                is_error: false,
530            }]),
531            250,
532        );
533
534        let mut handles = HandleTable::new();
535        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
536        h.residency = Residency::Collapsed;
537        handles.insert(h);
538
539        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0, false);
540        let rendered: String = rc
541            .turns
542            .iter()
543            .flat_map(|m| match &m.content {
544                Content::Parts(parts) => parts.clone(),
545                _ => Vec::new(),
546            })
547            .find_map(|p| match p {
548                ContentPart::ToolResult { output, .. } => Some(output),
549                _ => None,
550            })
551            .expect("tool result rendered");
552        // Rendered copy is a preview; original full output is retained in history.
553        assert!(rendered.contains("[collapsed:"));
554        assert!(rendered.len() < long.len());
555        let stored = match &c.history.messages[0].content {
556            Content::Parts(parts) => match &parts[0] {
557                ContentPart::ToolResult { output, .. } => output.clone(),
558                _ => unreachable!(),
559            },
560            _ => unreachable!(),
561        };
562        assert_eq!(stored, long, "projection must not mutate stored history");
563    }
564
565    #[test]
566    fn resident_tool_result_renders_in_full() {
567        use crate::mm::handle::{Handle, HandleKind, HandleTable};
568
569        let mut c = ctx();
570        let body = "RESIDENT BODY ".repeat(20);
571        c.history.push(
572            Message::tool(vec![ContentPart::ToolResult {
573                call_id: "c2".into(),
574                output: body.clone(),
575                is_error: false,
576            }]),
577            60,
578        );
579        let mut handles = HandleTable::new();
580        handles.insert(Handle::resident_for(1, HandleKind::ToolResult, 60, "c2"));
581
582        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0, false);
583        let rendered: String = rc
584            .turns
585            .iter()
586            .flat_map(|m| match &m.content {
587                Content::Parts(parts) => parts.clone(),
588                _ => Vec::new(),
589            })
590            .find_map(|p| match p {
591                ContentPart::ToolResult { output, .. } => Some(output),
592                _ => None,
593            })
594            .expect("tool result rendered");
595        assert_eq!(rendered, body);
596        assert!(!rendered.contains("[collapsed:"));
597    }
598
599    // ── P1-F: state-turn recency footer ───────────────────────────────────
600
601    #[test]
602    fn state_turn_footer_leads_with_next_step_not_bare_goal() {
603        let mut c = ctx();
604        c.task_state = TaskState {
605            goal: "ship the cache work".to_string(),
606            plan: vec![PlanStep { label: "do E".to_string(), done: false }],
607            current_step: Some(0),
608            ..Default::default()
609        };
610        c.task_state.record_directive("don't break ABI");
611        let rc = render(&c, 100_000, &engine(), 4);
612        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
613
614        // The full TASK STATE block still LEADS (primacy) — goal-adherence preserved ...
615        assert!(text.starts_with("[TASK STATE] goal: ship the cache work"));
616        // ... but the peak-attention footer leads with the forward action, not a goal restatement.
617        let before_proceed = text.rsplit_once("\n\nProceed.").expect("ends with Proceed").0;
618        let last_block = before_proceed.rsplit("\n\n").next().unwrap();
619        assert!(last_block.starts_with("→ next: step 1 — do E"), "got: {last_block}");
620        assert!(last_block.contains("must: don't break ABI"));
621        // The bare goal must NOT be re-injected at the peak-attention tail (the repetition fuel).
622        assert!(!last_block.contains("focus: ship the cache work"), "got: {last_block}");
623    }
624
625    #[test]
626    fn footer_falls_back_to_focus_goal_when_nothing_done_yet() {
627        // Turn 1: no actions, no plan — the footer surfaces the goal so the model knows the objective.
628        let mut c = ctx();
629        c.task_state = TaskState { goal: "build the thing".to_string(), ..Default::default() };
630        let rc = render(&c, 100_000, &engine(), 4);
631        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
632        let footer = text.rsplit_once("\n\nProceed.").unwrap().0.rsplit("\n\n").next().unwrap();
633        assert_eq!(footer, "→ focus: build the thing");
634    }
635
636    #[test]
637    fn footer_shows_recent_actions_and_forward_nudge_without_a_plan() {
638        // No curated plan, but real tool activity (2b) → the footer shows motion + a forward nudge,
639        // and the goal is NOT restated at the tail.
640        let mut c = ctx();
641        c.task_state = TaskState { goal: "rebuild §4.4 as SVG".to_string(), ..Default::default() };
642        c.task_state.note_actions("module_list");
643        c.task_state.note_actions("module_read");
644        let rc = render(&c, 100_000, &engine(), 4);
645        let footer = rc.state_turn.unwrap().content.as_text().unwrap()
646            .rsplit_once("\n\nProceed.").unwrap().0.rsplit("\n\n").next().unwrap().to_string();
647        assert!(footer.contains("did: module_list → module_read"), "got: {footer}");
648        assert!(footer.contains("next: advance the goal"), "got: {footer}");
649        assert!(!footer.contains("focus: rebuild §4.4 as SVG"), "goal must not lead the footer");
650    }
651
652    #[test]
653    fn footer_raises_stop_on_repeated_action() {
654        // The same action on the last ≥2 turns ⇒ explicit STOP backstop (breaks the read-loop in-band).
655        let mut c = ctx();
656        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
657        c.task_state.note_actions("document_read");
658        c.task_state.note_actions("document_read");
659        c.task_state.note_actions("document_read");
660        let rc = render(&c, 100_000, &engine(), 4);
661        let footer = rc.state_turn.unwrap().content.as_text().unwrap()
662            .rsplit_once("\n\nProceed.").unwrap().0.rsplit("\n\n").next().unwrap().to_string();
663        assert!(footer.contains("STOP: `document_read` repeated 3×"), "got: {footer}");
664    }
665
666    #[test]
667    fn no_salience_footer_without_a_goal() {
668        let mut c = ctx();
669        c.signals.push("[ROLLBACK] tool failed".to_string());
670        let rc = render(&c, 100_000, &engine(), 4);
671        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
672        assert!(!text.contains("→ focus:"), "no goal ⇒ no footer");
673        // signals remain the last content before the anchor.
674        assert!(text.contains("[ROLLBACK] tool failed"));
675    }
676
677    // ── P0-A: prefix fingerprint (cache-drift instrument) ──────────────────
678
679    #[test]
680    fn prefix_fingerprint_is_stable_when_appending_history() {
681        let mut c = ctx();
682        c.system.push(Message::system("rules"), 5);
683        c.knowledge.push(Message::system("skill: debug"), 5);
684        c.history.push(Message::user("turn A"), 5);
685        c.history.push(Message::assistant("turn B"), 5);
686        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
687
688        // Append a new turn — the existing prefix must stay byte-identical.
689        c.history.push(Message::user("turn C"), 5);
690        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
691
692        assert!(fp2.extends(&fp1), "appending must only grow the tail, never drift the prefix");
693        assert_eq!(fp2.common_turn_prefix(&fp1), 2, "both prior turns stay cache-reusable");
694        assert_eq!(fp2.turn_hashes.len(), 3);
695    }
696
697    #[test]
698    fn prefix_fingerprint_ignores_state_turn() {
699        // Same history, different task_state/signals → the cacheable prefix is
700        // identical (state lives in the uncached tail, out of `turns`).
701        let mut c = ctx();
702        c.history.push(Message::user("turn A"), 5);
703        c.task_state = TaskState { goal: "first goal".to_string(), ..Default::default() };
704        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
705
706        c.task_state = TaskState { goal: "totally different goal".to_string(), ..Default::default() };
707        c.signals.push("[ROLLBACK] whatever".to_string());
708        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
709
710        assert_eq!(fp1, fp2, "volatile state must not perturb the cacheable prefix");
711    }
712
713    #[test]
714    fn prefix_fingerprint_detects_system_drift() {
715        let mut c = ctx();
716        c.system.push(Message::system("rules v1"), 5);
717        c.history.push(Message::user("turn A"), 5);
718        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
719
720        c.system.messages.clear();
721        c.system.push(Message::system("rules v2"), 5);
722        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
723
724        assert_ne!(fp1.system_stable_hash, fp2.system_stable_hash);
725        assert!(!fp2.extends(&fp1), "a system-block edit invalidates the whole prefix");
726    }
727
728    #[test]
729    fn prefix_fingerprint_detects_in_place_collapse_churn() {
730        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};
731
732        let mut c = ctx();
733        c.history.push(Message::user("start"), 5);
734        let long = "DATA ".repeat(200);
735        c.history.push(
736            Message::tool(vec![ContentPart::ToolResult {
737                call_id: "c1".into(),
738                output: long,
739                is_error: false,
740            }]),
741            250,
742        );
743        c.history.push(Message::user("recent"), 5);
744
745        let resident = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
746
747        // Collapsing the old tool result rewrites that turn in place → the prefix
748        // hash at that position changes (the cache-cost of folding, made visible).
749        let mut handles = HandleTable::new();
750        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
751        h.residency = Residency::Collapsed;
752        handles.insert(h);
753        let collapsed = render_projected(&c, 100_000, &engine(), 4, &handles, 0, false).prefix_fingerprint();
754
755        // turn 0 ("start") is byte-stable; the collapsed tool result at turn 1 drifts.
756        assert_eq!(collapsed.common_turn_prefix(&resident), 1, "drift begins at the collapsed turn");
757        assert!(!collapsed.extends(&resident));
758    }
759
760    // ── Method 1: assistant-narration collapse ─────────────────────────────
761
762    fn assistant_with_call(text: &str) -> Message {
763        let mut m = Message::assistant(text);
764        m.tool_calls = vec![crate::types::message::ToolCall {
765            id: "c1".into(),
766            name: "module_read".into(),
767            arguments: serde_json::json!({}),
768        }];
769        m
770    }
771
772    #[test]
773    fn old_assistant_narration_collapses_keeping_tool_calls() {
774        let mut c = ctx();
775        // Oldest = a long preamble + a tool call; then enough recent turns to push it past the window.
776        c.history.push(assistant_with_call(&"好的,我来将 §4.4 的 Mermaid 部署架构图重新构建为 SVG 版本。先找到当前 Mermaid 模块的位置。".repeat(1)), 60);
777        for i in 0..5 { c.history.push(Message::user(format!("recent {i}")), 5); }
778
779        // collapse ON (preserve window = 4, so the oldest narration turn is past it)
780        let rc = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true);
781        let narration = rc
782            .turns
783            .iter()
784            .find(|m| m.content.as_text() == Some(NARRATION_STUB))
785            .expect("old narration replaced by stub");
786        assert_eq!(narration.tool_calls.len(), 1, "tool call (pairing) preserved");
787        assert_eq!(narration.tool_calls[0].name, "module_read");
788        // No verbatim preamble survives in the rendered prefix.
789        assert!(!rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("先找到当前 Mermaid")).unwrap_or(false)));
790        // Original history is untouched (non-destructive projection).
791        assert!(c.history.messages[0].content.as_text().unwrap().contains("先找到当前 Mermaid"));
792
793        // collapse OFF → verbatim narration survives.
794        let rc_off = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, false);
795        assert!(rc_off.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("先找到当前 Mermaid")).unwrap_or(false)));
796    }
797
798    #[test]
799    fn recent_assistant_narration_within_window_is_not_collapsed() {
800        let mut c = ctx();
801        // Only 2 turns, preserve window = 4 → the narration turn is protected → never collapsed.
802        c.history.push(assistant_with_call(&"好的,我来将 §4.4 重新构建为 SVG。先定位模块位置确认范围读取内容。".to_string()), 60);
803        c.history.push(Message::user("ok"), 5);
804        let rc = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true);
805        assert!(rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("先定位模块位置")).unwrap_or(false)), "recent narration kept verbatim");
806    }
807
808    #[test]
809    fn assistant_without_tool_calls_is_never_collapsed() {
810        let mut c = ctx();
811        // A pure final answer (no tool calls) is substantive — must survive even when old.
812        c.history.push(Message::assistant("这是给用户的最终结论,包含实质内容,不应被折叠掉以免丢信息。"), 40);
813        for i in 0..5 { c.history.push(Message::user(format!("r{i}")), 5); }
814        let rc = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true);
815        assert!(rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("最终结论")).unwrap_or(false)), "answer-only turns are not narration");
816    }
817
818    #[test]
819    fn collapsing_narration_drifts_only_that_turn_in_the_cache_prefix() {
820        // The cost made visible: collapsing rewrites that one turn in place → the prefix hash drifts
821        // at its position (one-time, as it ages past the window), but earlier turns stay reusable.
822        let mut c = ctx();
823        c.history.push(Message::user("start"), 5);
824        c.history.push(assistant_with_call(&"好的,我来将 §4.4 重新构建为 SVG 版本。先找到 Mermaid 模块的确切位置再读取其内容。".to_string()), 60);
825        for i in 0..4 { c.history.push(Message::user(format!("recent {i}")), 5); }
826
827        let verbatim = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, false).prefix_fingerprint();
828        let collapsed = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true).prefix_fingerprint();
829        // turn 0 ("start") is byte-stable; drift begins at the collapsed narration turn (index 1).
830        assert_eq!(collapsed.common_turn_prefix(&verbatim), 1, "only the collapsed turn drifts");
831        assert!(!collapsed.extends(&verbatim));
832    }
833
834    #[test]
835    fn protected_recent_messages_kept_whole_over_budget() {
836        let mut c = ctx();
837        c.history.push(Message::user("first message"), 5);
838        c.history.push(Message::user("a".repeat(1000)), 250);
839        // preserve_recent_msgs=4 protects both — kept whole regardless of the 10-token budget.
840        let rc = render(&c, 10, &engine(), 4);
841        assert!(rc.turns.iter().any(|m| {
842            m.content.as_text().map(|t| t.contains("first message")).unwrap_or(false)
843        }));
844    }
845
846    #[test]
847    fn oversized_text_boundary_is_dropped_whole_not_truncated() {
848        // P0-B1: an unprotected, over-budget Text boundary message is dropped whole — never
849        // mid-truncated — so no budget-dependent fragment lands in the cached prefix.
850        let mut c = ctx();
851        c.history.push(Message::user("a".repeat(1000)), 250); // oldest, oversized
852        c.history.push(Message::user("recent"), 2); // newest, fits
853        let rc = render(&c, 5, &engine(), 0); // nothing protected
854        assert_eq!(rc.turns.len(), 1, "only the fitting newest turn survives");
855        assert_eq!(rc.turns[0].content.as_text(), Some("recent"));
856        assert!(
857            !rc.turns.iter().any(|m| m
858                .content
859                .as_text()
860                .map(|t| t.starts_with("aaaa"))
861                .unwrap_or(false)),
862            "no truncated body in the prefix"
863        );
864    }
865}