Skip to main content

deepstrike_core/context/
renderer.rs

1use super::partitions::ContextPartitions;
2use super::snapshot::stable_hash;
3use super::task_state::TaskState;
4use super::token_engine::ContextTokenEngine;
5use crate::mm::handle::{HandleTable, Residency};
6use crate::types::message::{Content, ContentPart, Message, Role};
7use serde::{Deserialize, Serialize};
8
9/// Structured render output aligned with LLM API slots.
10///
11/// Slot 1 — system_stable:    Identity (system partition). Anthropic system[0] cache_control.
12/// Slot 2 — system_knowledge: Knowledge partition. Anthropic system[1] cache_control.
13/// Slot 3 — turns[0..N]:      History turns (stable, cacheable prefix).
14/// Slot 4 — state_turn:       State (task_state + signals), rebuilt every call.
15///
16/// The State turn is kept OUT of `turns` so the history prefix stays byte-stable
17/// across turns and can be prompt-cached. Providers place `state_turn` themselves:
18/// Anthropic appends it AFTER the message-history cache breakpoint (so the volatile
19/// state is the cheap uncached tail); OpenAI-family prepend it (preserving today's
20/// ordering). When this struct is produced by an older binding that has not been
21/// rebuilt, `state_turn` is absent and `turns[0]` still carries the State turn —
22/// providers handle both shapes.
23///
24/// system_text = system_stable + system_knowledge (for OpenAI which has one system slot).
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct RenderedContext {
27    /// Identity + Knowledge combined — for providers with a single system slot (OpenAI).
28    pub system_text: String,
29    /// Identity only (system partition). Anthropic system[0] with cache_control.
30    pub system_stable: String,
31    /// Knowledge (memory retrievals, skill definitions, artifacts). Anthropic system[1] with cache_control.
32    pub system_knowledge: String,
33    /// History turns only — the stable, cacheable message prefix.
34    pub turns: Vec<Message>,
35    /// Volatile State turn (task_state + signals), rebuilt every call. Rendered
36    /// after the cacheable history. `None` when there is no task state or signals.
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub state_turn: Option<Message>,
39    /// P1-E: number of leading `turns` that form the **frozen prefix** — byte-stable until the
40    /// next compaction. Providers that place explicit cache breakpoints (Anthropic) pin one *deep*
41    /// breakpoint at this boundary (a long-lived cache that survives many turns and is immune to
42    /// the 20-block lookback miss on heavy tool turns) and roll the other at the tail. `None` when
43    /// there is no distinct frozen region yet (pre-first-compaction, or the whole render is hot) —
44    /// providers then fall back to the rolling-pair placement. Providers clamp out-of-range values.
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub frozen_prefix_len: Option<usize>,
47}
48
49/// Per-render fingerprint of the **cacheable prefix** — the segments a provider
50/// caches as a stable prefix (system blocks + history `turns`). Excludes
51/// `state_turn` (the volatile uncached tail) and `token_count` metadata (not on the
52/// wire). This is the metrics-first instrument (P0-A) behind the optimization work:
53/// two renders share a reusable KV / prompt-cache prefix iff their system hashes
54/// match *and* one's `turn_hashes` is a prefix of the other's. Pure and derived —
55/// never stored in snapshots, session logs, or event logs.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct PrefixFingerprint {
58    pub system_stable_hash: u64,
59    pub system_knowledge_hash: u64,
60    /// One stable hash per history turn, in order. The longest common prefix with a
61    /// previous render's vector = how many turns stay cache-reusable across the call.
62    pub turn_hashes: Vec<u64>,
63}
64
65impl PrefixFingerprint {
66    /// True when `self`'s cacheable prefix is a byte-stable *extension* of `prev`:
67    /// identical system segments and `prev.turn_hashes` is a prefix of
68    /// `self.turn_hashes`. This is exactly the KV / prompt-cache reuse condition —
69    /// no drift anywhere in the prefix, only growth at the tail.
70    pub fn extends(&self, prev: &PrefixFingerprint) -> bool {
71        self.system_stable_hash == prev.system_stable_hash
72            && self.system_knowledge_hash == prev.system_knowledge_hash
73            && prev.turn_hashes.len() <= self.turn_hashes.len()
74            && self.turn_hashes[..prev.turn_hashes.len()] == prev.turn_hashes[..]
75    }
76
77    /// Number of leading turns byte-identical to `prev` — the reusable turn-prefix
78    /// length. A drop below `prev.turn_hashes.len()` signals mid-prefix churn (a
79    /// turn rewritten in place, e.g. an in-place collapse) that invalidates cache.
80    pub fn common_turn_prefix(&self, prev: &PrefixFingerprint) -> usize {
81        self.turn_hashes
82            .iter()
83            .zip(prev.turn_hashes.iter())
84            .take_while(|(a, b)| a == b)
85            .count()
86    }
87}
88
89/// Wire-relevant hash of one turn: role + content + tool_calls, **excluding**
90/// `token_count` (kernel-only metadata that never reaches the provider). Serialised
91/// through serde so every content variant and tool-call argument is covered with a
92/// deterministic field order.
93fn hash_turn(msg: &Message) -> u64 {
94    let material =
95        serde_json::to_vec(&(&msg.role, &msg.content, &msg.tool_calls)).unwrap_or_default();
96    stable_hash(&material)
97}
98
99impl RenderedContext {
100    /// Compute the [`PrefixFingerprint`] for this render. See its docs for the
101    /// cache-reuse contract it certifies.
102    pub fn prefix_fingerprint(&self) -> PrefixFingerprint {
103        PrefixFingerprint {
104            system_stable_hash: stable_hash(self.system_stable.as_bytes()),
105            system_knowledge_hash: stable_hash(self.system_knowledge.as_bytes()),
106            turn_hashes: self.turns.iter().map(hash_turn).collect(),
107        }
108    }
109}
110
111fn build_system_stable(partitions: &ContextPartitions) -> String {
112    partitions.system.messages
113        .iter()
114        .filter_map(|m| m.content.as_text())
115        .collect::<Vec<_>>()
116        .join("\n\n")
117}
118
119fn build_system_knowledge(partitions: &ContextPartitions) -> String {
120    partitions.knowledge.messages
121        .iter()
122        .filter_map(|m| m.content.as_text())
123        .collect::<Vec<_>>()
124        .join("\n\n")
125}
126
127/// P1-F (+ 2b/2c): a one-line recency footer at the *last* content before the "Proceed." anchor —
128/// the highest-attention position in the prompt (the model attends most to the final tokens).
129///
130/// It LEADS WITH FORWARD MOTION (what just happened · what to do next · the standing directive), not
131/// a verbatim restatement of the goal. Re-injecting the bare goal at this peak-attention slot every
132/// turn primes the model to *re-narrate intent* ("好的,我来将<goal>…") instead of acting — an
133/// undamped repetition trap when there is no plan/progress to advance. The full goal still LEADS the
134/// TASK STATE block above (primacy + reference), so goal-adherence is preserved; the footer restates
135/// the goal only when nothing has happened yet (e.g. turn 1, no actions). `None` when there is no goal.
136///
137/// The "just did" clause is kernel-derived from `recent_actions` (real tool activity), and a trailing
138/// run of an identical action raises an explicit STOP — a cheap no-progress backstop that breaks the
139/// read→re-read→re-narrate loop in-band, at the position the model weights most.
140fn salience_footer(ts: &TaskState) -> Option<String> {
141    if ts.goal.is_empty() {
142        return None;
143    }
144    let mut clauses: Vec<String> = Vec::new();
145
146    // What just happened — so the peak-attention slot shows motion, not a blank restart.
147    let recent = ts.recent_actions.as_slice();
148    if let Some(last) = recent.last() {
149        let start = recent.len().saturating_sub(3);
150        clauses.push(format!("just did: {}", recent[start..].join(" → ")));
151
152        // No-progress backstop: the same action repeated on the last ≥2 turns is a stall. Surface an
153        // explicit stop so the model breaks the loop instead of re-narrating the same plan.
154        let trailing_repeat = recent.iter().rev().take_while(|a| *a == last).count();
155        if trailing_repeat >= 2 {
156            clauses.push(format!(
157                "STOP: `{last}` repeated {trailing_repeat}× with no progress — take a DIFFERENT \
158                 concrete action or report the blocker; do not repeat it"
159            ));
160        }
161    }
162
163    // What to do next — the active plan step if the model maintains one, else a forward nudge.
164    let active_step = ts
165        .current_step
166        .and_then(|i| ts.plan.get(i).map(|s| (i, s)))
167        .filter(|(_, s)| !s.done);
168    if let Some((i, step)) = active_step {
169        clauses.push(format!("next: step {} — {}", i + 1, step.label));
170    } else if !recent.is_empty() {
171        clauses.push(
172            "next: take the next concrete action toward the goal; do not re-state the plan".to_string(),
173        );
174    }
175
176    if let Some(d) = ts.directives.last() {
177        clauses.push(format!("must: {d}"));
178    }
179
180    // Lead with the goal only when no forward clause fills the footer (turn 1, nothing done yet);
181    // otherwise the forward clauses carry the salience and the goal stays in the block above.
182    let body = if clauses.is_empty() {
183        format!("→ focus: {}", ts.goal)
184    } else {
185        format!("→ {}", clauses.join(" · "))
186    };
187    Some(body)
188}
189
190/// Build the State turn (the volatile tail): task_state + signals + a recency focus footer +
191/// "Proceed." anchor. The footer sits last (just before "Proceed.") so the current goal/step/
192/// directive land in the prompt's highest-attention position (P1-F).
193fn build_state_turn(partitions: &ContextPartitions) -> Option<Message> {
194    let task = partitions.task_state.format_compact();
195    if task.is_empty() && partitions.signals.is_empty() {
196        return None;
197    }
198    let mut parts: Vec<String> = Vec::new();
199    if !task.is_empty() {
200        parts.push(task);
201    }
202    let signals_text = partitions.signals.join("\n");
203    if !signals_text.is_empty() {
204        parts.push(signals_text);
205    }
206    if let Some(footer) = salience_footer(&partitions.task_state) {
207        parts.push(footer);
208    }
209    let body = parts.join("\n\n");
210    Some(Message::user(format!("{body}\n\nProceed.")))
211}
212
213/// Ensure turns start with a user message.
214/// After AutoCompact the preserved tail may be all assistant/tool — insert an anchor.
215fn normalize_turn_prefix(turns: &mut Vec<Message>) {
216    if !turns.is_empty() && matches!(turns[0].role, Role::Assistant | Role::Tool) {
217        turns.insert(0, Message::user("[context resumed]"));
218    }
219}
220
221/// Layer-4 read-time projection: replace the body of a `Collapsed` tool result with a short
222/// preview, leaving a marker. Non-destructive — the full output stays in `partitions.history`;
223/// only the rendered copy shrinks, so the projection reverses when pressure drops.
224fn collapse_preview(output: &str) -> String {
225    const PREVIEW_BYTES: usize = 160;
226    let mut end = PREVIEW_BYTES.min(output.len());
227    while end > 0 && !output.is_char_boundary(end) {
228        end -= 1;
229    }
230    let dropped = output.len().saturating_sub(end);
231    format!(
232        "{}…\n[collapsed: {dropped} chars projected out of view; full result retained in history]",
233        &output[..end]
234    )
235}
236
237/// Stub substituted for a collapsed assistant preamble. Carries no goal text (that would re-seed the
238/// very repetition this removes) and points the model at the authoritative State turn instead.
239const NARRATION_STUB: &str = "[earlier narration collapsed; tool call(s) preserved below — current progress is in the TASK STATE block]";
240
241/// Minimum narration length (chars, CJK-aware) worth collapsing. Short preambles aren't worth a
242/// stub substitution (and the one-time cache churn it costs as the turn ages out of the window).
243const NARRATION_COLLAPSE_MIN_CHARS: usize = 40;
244
245/// Method 1: read-time collapse of an OLD assistant turn's narration. Targets exactly the
246/// "preamble before action" turns — `Role::Assistant`, a `Content::Text` body, AND a non-empty
247/// `tool_calls` (the model narrated intent, then acted). Returns a projected copy whose text is
248/// replaced by [`NARRATION_STUB`] while `tool_calls` (and thus tool_use/tool_result pairing) are
249/// left intact; the original full text stays in `partitions.history`, so the projection reverses if
250/// the flag is turned off. `None` when the message isn't a collapsible narration turn or the flag is
251/// off. Caller restricts this to messages already past the protected recent window.
252fn project_assistant_narration(msg: &Message, enabled: bool) -> Option<Message> {
253    if !enabled || msg.role != Role::Assistant || msg.tool_calls.is_empty() {
254        return None;
255    }
256    let Content::Text(text) = &msg.content else {
257        return None;
258    };
259    if text == NARRATION_STUB || text.chars().count() < NARRATION_COLLAPSE_MIN_CHARS {
260        return None;
261    }
262    let mut projected = msg.clone();
263    projected.content = Content::Text(NARRATION_STUB.to_string());
264    projected.token_count = None; // recomputed against the smaller stub
265    Some(projected)
266}
267
268/// If any of `msg`'s tool-result parts is `Collapsed` per the handle table, return a projected
269/// copy with those parts previewed; `None` if nothing is collapsed (render the message as-is).
270fn project_message(msg: &Message, handles: &HandleTable) -> Option<Message> {
271    let Content::Parts(parts) = &msg.content else {
272        return None;
273    };
274    let mut changed = false;
275    let new_parts: Vec<ContentPart> = parts
276        .iter()
277        .map(|part| match part {
278            ContentPart::ToolResult { call_id, output, is_error }
279                if matches!(
280                    handles.residency_for_source(call_id),
281                    Some(Residency::Collapsed)
282                ) =>
283            {
284                changed = true;
285                ContentPart::ToolResult {
286                    call_id: call_id.clone(),
287                    output: collapse_preview(output),
288                    is_error: *is_error,
289                }
290            }
291            other => other.clone(),
292        })
293        .collect();
294    if changed {
295        let mut projected = msg.clone();
296        projected.content = Content::Parts(new_parts);
297        projected.token_count = None; // recomputed against the smaller projected body
298        Some(projected)
299    } else {
300        None
301    }
302}
303
304/// Render the context into a `RenderedContext` suitable for a provider API call.
305///
306/// Equivalent to [`render_projected`] with an empty handle table (no Layer-4 projection) and no
307/// frozen-prefix boundary (`frozen_history_len = 0` → `frozen_prefix_len` is always `None`).
308pub fn render(
309    partitions: &ContextPartitions,
310    budget: u32,
311    engine: &ContextTokenEngine,
312    preserve_recent_msgs: usize,
313) -> RenderedContext {
314    // The convenience wrapper renders history verbatim (no narration collapse) — callers that want
315    // Method-1 collapse drive `render_projected` with the flag (the kernel passes it from config).
316    render_projected(partitions, budget, engine, preserve_recent_msgs, &HandleTable::new(), 0, false)
317}
318
319/// Render with Layer-4 read-time projection driven by `handles`: tool results whose handle is
320/// `Collapsed` render as previews (originals untouched), freeing budget for more recent turns.
321///
322/// Token budget:
323///   system_stable + system_knowledge tokens are subtracted first.
324///   Remaining budget is allocated to history turns newest-first.
325///   The first `preserve_recent_msgs` history messages are always included.
326///   Text messages are truncated at the budget boundary; Parts messages are included whole.
327pub fn render_projected(
328    partitions: &ContextPartitions,
329    budget: u32,
330    engine: &ContextTokenEngine,
331    preserve_recent_msgs: usize,
332    handles: &HandleTable,
333    frozen_history_len: usize,
334    collapse_narration: bool,
335) -> RenderedContext {
336    let system_stable = build_system_stable(partitions);
337    let system_knowledge = build_system_knowledge(partitions);
338    let system_text = [system_stable.as_str(), system_knowledge.as_str()]
339        .iter()
340        .filter(|s| !s.is_empty())
341        .cloned()
342        .collect::<Vec<_>>()
343        .join("\n\n");
344
345    let system_tokens = engine.count(&system_text).min(budget);
346    let mut remaining = budget.saturating_sub(system_tokens);
347
348    // Fill history newest-first within remaining budget. Layer-4 projection is applied per
349    // message: a collapsed tool result renders as a preview and is costed at its reduced size.
350    let mut kept_rev: Vec<Message> = Vec::new();
351    for msg in partitions.history.messages.iter().rev() {
352        let is_protected = kept_rev.len() < preserve_recent_msgs;
353        // `projected` is `Some` only when read-time projection shrank the message. Two disjoint
354        // sources: a `Collapsed` tool-result preview (handle-driven, any age) OR — once the turn has
355        // aged past the protected recent window — an assistant-narration stub (Method 1). A message
356        // is either a tool-result Parts message or an assistant Text+tool_calls message, never both.
357        let projected = project_message(msg, handles).or_else(|| {
358            if is_protected { None } else { project_assistant_narration(msg, collapse_narration) }
359        });
360        let effective = projected.as_ref().unwrap_or(msg);
361        let tokens = match &projected {
362            Some(p) => engine.count_message(p),
363            None => msg.token_count.unwrap_or_else(|| engine.count_message(msg)),
364        };
365        if tokens == 0 { continue; }
366
367        if is_protected {
368            kept_rev.push(effective.clone());
369            remaining = remaining.saturating_sub(tokens);
370            continue;
371        }
372
373        if tokens <= remaining {
374            kept_rev.push(effective.clone());
375            remaining = remaining.saturating_sub(tokens);
376        } else if remaining > 0 {
377            match &effective.content {
378                // P0-B1: drop a Text boundary message **whole** rather than mid-truncate. A
379                // truncated body's bytes depend on `remaining`, which varies per turn — that churns
380                // turns[0] and invalidates the entire cached prefix. Compaction normally keeps
381                // history under budget, so this overflow path is a rare safety net; keeping every
382                // kept turn a complete message preserves prompt-cache reuse.
383                Content::Text(_) => {}
384                // A Parts message was already included whole (byte-stable) — unchanged.
385                Content::Parts(_) => kept_rev.push(effective.clone()),
386            }
387            break;
388        } else {
389            break;
390        }
391    }
392
393    kept_rev.reverse();
394    let mut turns = kept_rev;
395    normalize_turn_prefix(&mut turns);
396
397    // The State turn (task_state + signals) is volatile — keep it OUT of the
398    // cacheable history. Providers render it after the history (Anthropic) or
399    // prepended (OpenAI). See RenderedContext docs.
400    let state_turn = build_state_turn(partitions);
401
402    // P1-E: locate the frozen-prefix boundary in rendered turns. `frozen_history_len` is the
403    // history length as of the last compaction (0 before any) — messages beyond it are the hot
404    // tail that grows each turn. We count the hot tail from the END, which is robust to the leading
405    // anchor and to budget-dropping of OLD turns (the recent tail is never dropped). Emit `Some`
406    // only for a distinct, non-empty frozen region; otherwise providers use the rolling-pair
407    // fallback (deep == tail would waste a breakpoint).
408    let hot = partitions
409        .history
410        .messages
411        .len()
412        .saturating_sub(frozen_history_len);
413    let frozen_prefix_len = if frozen_history_len > 0 && hot > 0 && hot < turns.len() {
414        Some(turns.len() - hot)
415    } else {
416        None
417    };
418
419    RenderedContext { system_text, system_stable, system_knowledge, turns, state_turn, frozen_prefix_len }
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425    use crate::context::config::ContextConfig;
426    use crate::context::partitions::ContextPartitions;
427    use crate::context::task_state::{PlanStep, TaskState};
428    use crate::context::token_engine::ContextTokenEngine;
429    use crate::types::message::{Message, Role};
430
431    fn engine() -> ContextTokenEngine { ContextTokenEngine::char_approx() }
432    fn ctx() -> ContextPartitions { ContextPartitions::new(&ContextConfig::default()) }
433
434    #[test]
435    fn system_stable_contains_system_partition() {
436        let mut c = ctx();
437        c.system.push(Message::system("You are helpful."), 10);
438        let rc = render(&c, 10_000, &engine(), 4);
439        assert!(rc.system_stable.contains("You are helpful."));
440        assert!(rc.system_text.contains("You are helpful."));
441    }
442
443    #[test]
444    fn system_knowledge_contains_knowledge_partition() {
445        let mut c = ctx();
446        c.knowledge.push(Message::system("skill: debug"), 10);
447        let rc = render(&c, 10_000, &engine(), 4);
448        assert!(rc.system_knowledge.contains("skill: debug"));
449        assert!(rc.system_text.contains("skill: debug"));
450    }
451
452    #[test]
453    fn task_state_appears_in_state_turn() {
454        let mut c = ctx();
455        c.task_state = TaskState { goal: "find the bug".to_string(), ..Default::default() };
456        let rc = render(&c, 10_000, &engine(), 4);
457        assert!(!rc.system_text.contains("[TASK STATE]"), "task_state must not be in system_text");
458        let state = rc.state_turn.as_ref().expect("should have a state turn");
459        assert_eq!(state.role, Role::User);
460        assert!(state.content.as_text().unwrap().contains("[TASK STATE] goal: find the bug"));
461        // State is NOT in the cacheable history turns.
462        assert!(!rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("[TASK STATE]")).unwrap_or(false)));
463    }
464
465    #[test]
466    fn signals_appear_in_state_turn() {
467        let mut c = ctx();
468        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
469        c.signals.push("[ROLLBACK] tool failed".to_string());
470        let rc = render(&c, 10_000, &engine(), 4);
471        let state = rc.state_turn.as_ref().unwrap();
472        assert!(state.content.as_text().unwrap().contains("[ROLLBACK] tool failed"));
473    }
474
475    #[test]
476    fn empty_task_state_no_state_turn() {
477        let c = ctx();
478        let rc = render(&c, 10_000, &engine(), 4);
479        // No state turn when task_state is empty and no signals
480        assert!(rc.state_turn.is_none());
481        assert!(rc.turns.is_empty());
482    }
483
484    #[test]
485    fn history_excludes_state_turn() {
486        let mut c = ctx();
487        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
488        c.history.push(Message::user("step 1"), 5);
489        c.history.push(Message::assistant("done"), 5);
490        let rc = render(&c, 10_000, &engine(), 4);
491        // turns is history only; state lives in state_turn.
492        assert!(rc.state_turn.as_ref().unwrap().content.as_text().unwrap().contains("[TASK STATE]"));
493        assert_eq!(rc.turns[0].role, Role::User);
494        assert_eq!(rc.turns[0].content.as_text(), Some("step 1"));
495        assert_eq!(rc.turns[1].role, Role::Assistant);
496    }
497
498    #[test]
499    fn all_assistant_tool_history_gets_anchor_user_turn() {
500        let mut c = ctx();
501        c.history.push(Message::assistant("reply"), 5);
502        let rc = render(&c, 10_000, &engine(), 4);
503        assert_eq!(rc.turns[0].role, Role::User);
504    }
505
506    #[test]
507    fn zero_token_messages_skipped() {
508        let mut c = ctx();
509        c.history.push(Message::user("zero"), 0);
510        c.history.push(Message::user("real"), 5);
511        let rc = render(&c, 10_000, &engine(), 4);
512        // Only "real" in history turns (state turn absent — no task_state)
513        assert!(rc.turns.iter().any(|m| m.content.as_text() == Some("real")));
514        assert!(!rc.turns.iter().any(|m| m.content.as_text() == Some("zero")));
515    }
516
517    #[test]
518    fn collapsed_tool_result_renders_as_preview_without_mutating_history() {
519        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};
520
521        let mut c = ctx();
522        let long = "DATA ".repeat(200); // 1000 bytes
523        c.history.push(
524            Message::tool(vec![ContentPart::ToolResult {
525                call_id: "c1".into(),
526                output: long.clone(),
527                is_error: false,
528            }]),
529            250,
530        );
531
532        let mut handles = HandleTable::new();
533        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
534        h.residency = Residency::Collapsed;
535        handles.insert(h);
536
537        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0, false);
538        let rendered: String = rc
539            .turns
540            .iter()
541            .flat_map(|m| match &m.content {
542                Content::Parts(parts) => parts.clone(),
543                _ => Vec::new(),
544            })
545            .find_map(|p| match p {
546                ContentPart::ToolResult { output, .. } => Some(output),
547                _ => None,
548            })
549            .expect("tool result rendered");
550        // Rendered copy is a preview; original full output is retained in history.
551        assert!(rendered.contains("[collapsed:"));
552        assert!(rendered.len() < long.len());
553        let stored = match &c.history.messages[0].content {
554            Content::Parts(parts) => match &parts[0] {
555                ContentPart::ToolResult { output, .. } => output.clone(),
556                _ => unreachable!(),
557            },
558            _ => unreachable!(),
559        };
560        assert_eq!(stored, long, "projection must not mutate stored history");
561    }
562
563    #[test]
564    fn resident_tool_result_renders_in_full() {
565        use crate::mm::handle::{Handle, HandleKind, HandleTable};
566
567        let mut c = ctx();
568        let body = "RESIDENT BODY ".repeat(20);
569        c.history.push(
570            Message::tool(vec![ContentPart::ToolResult {
571                call_id: "c2".into(),
572                output: body.clone(),
573                is_error: false,
574            }]),
575            60,
576        );
577        let mut handles = HandleTable::new();
578        handles.insert(Handle::resident_for(1, HandleKind::ToolResult, 60, "c2"));
579
580        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0, false);
581        let rendered: String = rc
582            .turns
583            .iter()
584            .flat_map(|m| match &m.content {
585                Content::Parts(parts) => parts.clone(),
586                _ => Vec::new(),
587            })
588            .find_map(|p| match p {
589                ContentPart::ToolResult { output, .. } => Some(output),
590                _ => None,
591            })
592            .expect("tool result rendered");
593        assert_eq!(rendered, body);
594        assert!(!rendered.contains("[collapsed:"));
595    }
596
597    // ── P1-F: state-turn recency footer ───────────────────────────────────
598
599    #[test]
600    fn state_turn_footer_leads_with_next_step_not_bare_goal() {
601        let mut c = ctx();
602        c.task_state = TaskState {
603            goal: "ship the cache work".to_string(),
604            plan: vec![PlanStep { label: "do E".to_string(), done: false }],
605            current_step: Some(0),
606            ..Default::default()
607        };
608        c.task_state.record_directive("don't break ABI");
609        let rc = render(&c, 100_000, &engine(), 4);
610        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
611
612        // The full TASK STATE block still LEADS (primacy) — goal-adherence preserved ...
613        assert!(text.starts_with("[TASK STATE] goal: ship the cache work"));
614        // ... but the peak-attention footer leads with the forward action, not a goal restatement.
615        let before_proceed = text.rsplit_once("\n\nProceed.").expect("ends with Proceed").0;
616        let last_block = before_proceed.rsplit("\n\n").next().unwrap();
617        assert!(last_block.starts_with("→ next: step 1 — do E"), "got: {last_block}");
618        assert!(last_block.contains("must: don't break ABI"));
619        // The bare goal must NOT be re-injected at the peak-attention tail (the repetition fuel).
620        assert!(!last_block.contains("focus: ship the cache work"), "got: {last_block}");
621    }
622
623    #[test]
624    fn footer_falls_back_to_focus_goal_when_nothing_done_yet() {
625        // Turn 1: no actions, no plan — the footer surfaces the goal so the model knows the objective.
626        let mut c = ctx();
627        c.task_state = TaskState { goal: "build the thing".to_string(), ..Default::default() };
628        let rc = render(&c, 100_000, &engine(), 4);
629        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
630        let footer = text.rsplit_once("\n\nProceed.").unwrap().0.rsplit("\n\n").next().unwrap();
631        assert_eq!(footer, "→ focus: build the thing");
632    }
633
634    #[test]
635    fn footer_shows_recent_actions_and_forward_nudge_without_a_plan() {
636        // No curated plan, but real tool activity (2b) → the footer shows motion + a forward nudge,
637        // and the goal is NOT restated at the tail.
638        let mut c = ctx();
639        c.task_state = TaskState { goal: "rebuild §4.4 as SVG".to_string(), ..Default::default() };
640        c.task_state.note_actions("module_list");
641        c.task_state.note_actions("module_read");
642        let rc = render(&c, 100_000, &engine(), 4);
643        let footer = rc.state_turn.unwrap().content.as_text().unwrap()
644            .rsplit_once("\n\nProceed.").unwrap().0.rsplit("\n\n").next().unwrap().to_string();
645        assert!(footer.contains("just did: module_list → module_read"), "got: {footer}");
646        assert!(footer.contains("next: take the next concrete action"), "got: {footer}");
647        assert!(!footer.contains("focus: rebuild §4.4 as SVG"), "goal must not lead the footer");
648    }
649
650    #[test]
651    fn footer_raises_stop_on_repeated_action() {
652        // The same action on the last ≥2 turns ⇒ explicit STOP backstop (breaks the read-loop in-band).
653        let mut c = ctx();
654        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
655        c.task_state.note_actions("document_read");
656        c.task_state.note_actions("document_read");
657        c.task_state.note_actions("document_read");
658        let rc = render(&c, 100_000, &engine(), 4);
659        let footer = rc.state_turn.unwrap().content.as_text().unwrap()
660            .rsplit_once("\n\nProceed.").unwrap().0.rsplit("\n\n").next().unwrap().to_string();
661        assert!(footer.contains("STOP: `document_read` repeated 3×"), "got: {footer}");
662    }
663
664    #[test]
665    fn no_salience_footer_without_a_goal() {
666        let mut c = ctx();
667        c.signals.push("[ROLLBACK] tool failed".to_string());
668        let rc = render(&c, 100_000, &engine(), 4);
669        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
670        assert!(!text.contains("→ focus:"), "no goal ⇒ no footer");
671        // signals remain the last content before the anchor.
672        assert!(text.contains("[ROLLBACK] tool failed"));
673    }
674
675    // ── P0-A: prefix fingerprint (cache-drift instrument) ──────────────────
676
677    #[test]
678    fn prefix_fingerprint_is_stable_when_appending_history() {
679        let mut c = ctx();
680        c.system.push(Message::system("rules"), 5);
681        c.knowledge.push(Message::system("skill: debug"), 5);
682        c.history.push(Message::user("turn A"), 5);
683        c.history.push(Message::assistant("turn B"), 5);
684        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
685
686        // Append a new turn — the existing prefix must stay byte-identical.
687        c.history.push(Message::user("turn C"), 5);
688        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
689
690        assert!(fp2.extends(&fp1), "appending must only grow the tail, never drift the prefix");
691        assert_eq!(fp2.common_turn_prefix(&fp1), 2, "both prior turns stay cache-reusable");
692        assert_eq!(fp2.turn_hashes.len(), 3);
693    }
694
695    #[test]
696    fn prefix_fingerprint_ignores_state_turn() {
697        // Same history, different task_state/signals → the cacheable prefix is
698        // identical (state lives in the uncached tail, out of `turns`).
699        let mut c = ctx();
700        c.history.push(Message::user("turn A"), 5);
701        c.task_state = TaskState { goal: "first goal".to_string(), ..Default::default() };
702        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
703
704        c.task_state = TaskState { goal: "totally different goal".to_string(), ..Default::default() };
705        c.signals.push("[ROLLBACK] whatever".to_string());
706        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
707
708        assert_eq!(fp1, fp2, "volatile state must not perturb the cacheable prefix");
709    }
710
711    #[test]
712    fn prefix_fingerprint_detects_system_drift() {
713        let mut c = ctx();
714        c.system.push(Message::system("rules v1"), 5);
715        c.history.push(Message::user("turn A"), 5);
716        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
717
718        c.system.messages.clear();
719        c.system.push(Message::system("rules v2"), 5);
720        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
721
722        assert_ne!(fp1.system_stable_hash, fp2.system_stable_hash);
723        assert!(!fp2.extends(&fp1), "a system-block edit invalidates the whole prefix");
724    }
725
726    #[test]
727    fn prefix_fingerprint_detects_in_place_collapse_churn() {
728        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};
729
730        let mut c = ctx();
731        c.history.push(Message::user("start"), 5);
732        let long = "DATA ".repeat(200);
733        c.history.push(
734            Message::tool(vec![ContentPart::ToolResult {
735                call_id: "c1".into(),
736                output: long,
737                is_error: false,
738            }]),
739            250,
740        );
741        c.history.push(Message::user("recent"), 5);
742
743        let resident = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
744
745        // Collapsing the old tool result rewrites that turn in place → the prefix
746        // hash at that position changes (the cache-cost of folding, made visible).
747        let mut handles = HandleTable::new();
748        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
749        h.residency = Residency::Collapsed;
750        handles.insert(h);
751        let collapsed = render_projected(&c, 100_000, &engine(), 4, &handles, 0, false).prefix_fingerprint();
752
753        // turn 0 ("start") is byte-stable; the collapsed tool result at turn 1 drifts.
754        assert_eq!(collapsed.common_turn_prefix(&resident), 1, "drift begins at the collapsed turn");
755        assert!(!collapsed.extends(&resident));
756    }
757
758    // ── Method 1: assistant-narration collapse ─────────────────────────────
759
760    fn assistant_with_call(text: &str) -> Message {
761        let mut m = Message::assistant(text);
762        m.tool_calls = vec![crate::types::message::ToolCall {
763            id: "c1".into(),
764            name: "module_read".into(),
765            arguments: serde_json::json!({}),
766        }];
767        m
768    }
769
770    #[test]
771    fn old_assistant_narration_collapses_keeping_tool_calls() {
772        let mut c = ctx();
773        // Oldest = a long preamble + a tool call; then enough recent turns to push it past the window.
774        c.history.push(assistant_with_call(&"好的,我来将 §4.4 的 Mermaid 部署架构图重新构建为 SVG 版本。先找到当前 Mermaid 模块的位置。".repeat(1)), 60);
775        for i in 0..5 { c.history.push(Message::user(format!("recent {i}")), 5); }
776
777        // collapse ON (preserve window = 4, so the oldest narration turn is past it)
778        let rc = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true);
779        let narration = rc
780            .turns
781            .iter()
782            .find(|m| m.content.as_text() == Some(NARRATION_STUB))
783            .expect("old narration replaced by stub");
784        assert_eq!(narration.tool_calls.len(), 1, "tool call (pairing) preserved");
785        assert_eq!(narration.tool_calls[0].name, "module_read");
786        // No verbatim preamble survives in the rendered prefix.
787        assert!(!rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("先找到当前 Mermaid")).unwrap_or(false)));
788        // Original history is untouched (non-destructive projection).
789        assert!(c.history.messages[0].content.as_text().unwrap().contains("先找到当前 Mermaid"));
790
791        // collapse OFF → verbatim narration survives.
792        let rc_off = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, false);
793        assert!(rc_off.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("先找到当前 Mermaid")).unwrap_or(false)));
794    }
795
796    #[test]
797    fn recent_assistant_narration_within_window_is_not_collapsed() {
798        let mut c = ctx();
799        // Only 2 turns, preserve window = 4 → the narration turn is protected → never collapsed.
800        c.history.push(assistant_with_call(&"好的,我来将 §4.4 重新构建为 SVG。先定位模块位置确认范围读取内容。".to_string()), 60);
801        c.history.push(Message::user("ok"), 5);
802        let rc = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true);
803        assert!(rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("先定位模块位置")).unwrap_or(false)), "recent narration kept verbatim");
804    }
805
806    #[test]
807    fn assistant_without_tool_calls_is_never_collapsed() {
808        let mut c = ctx();
809        // A pure final answer (no tool calls) is substantive — must survive even when old.
810        c.history.push(Message::assistant("这是给用户的最终结论,包含实质内容,不应被折叠掉以免丢信息。"), 40);
811        for i in 0..5 { c.history.push(Message::user(format!("r{i}")), 5); }
812        let rc = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true);
813        assert!(rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("最终结论")).unwrap_or(false)), "answer-only turns are not narration");
814    }
815
816    #[test]
817    fn collapsing_narration_drifts_only_that_turn_in_the_cache_prefix() {
818        // The cost made visible: collapsing rewrites that one turn in place → the prefix hash drifts
819        // at its position (one-time, as it ages past the window), but earlier turns stay reusable.
820        let mut c = ctx();
821        c.history.push(Message::user("start"), 5);
822        c.history.push(assistant_with_call(&"好的,我来将 §4.4 重新构建为 SVG 版本。先找到 Mermaid 模块的确切位置再读取其内容。".to_string()), 60);
823        for i in 0..4 { c.history.push(Message::user(format!("recent {i}")), 5); }
824
825        let verbatim = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, false).prefix_fingerprint();
826        let collapsed = render_projected(&c, 100_000, &engine(), 4, &HandleTable::new(), 0, true).prefix_fingerprint();
827        // turn 0 ("start") is byte-stable; drift begins at the collapsed narration turn (index 1).
828        assert_eq!(collapsed.common_turn_prefix(&verbatim), 1, "only the collapsed turn drifts");
829        assert!(!collapsed.extends(&verbatim));
830    }
831
832    #[test]
833    fn protected_recent_messages_kept_whole_over_budget() {
834        let mut c = ctx();
835        c.history.push(Message::user("first message"), 5);
836        c.history.push(Message::user("a".repeat(1000)), 250);
837        // preserve_recent_msgs=4 protects both — kept whole regardless of the 10-token budget.
838        let rc = render(&c, 10, &engine(), 4);
839        assert!(rc.turns.iter().any(|m| {
840            m.content.as_text().map(|t| t.contains("first message")).unwrap_or(false)
841        }));
842    }
843
844    #[test]
845    fn oversized_text_boundary_is_dropped_whole_not_truncated() {
846        // P0-B1: an unprotected, over-budget Text boundary message is dropped whole — never
847        // mid-truncated — so no budget-dependent fragment lands in the cached prefix.
848        let mut c = ctx();
849        c.history.push(Message::user("a".repeat(1000)), 250); // oldest, oversized
850        c.history.push(Message::user("recent"), 2); // newest, fits
851        let rc = render(&c, 5, &engine(), 0); // nothing protected
852        assert_eq!(rc.turns.len(), 1, "only the fitting newest turn survives");
853        assert_eq!(rc.turns[0].content.as_text(), Some("recent"));
854        assert!(
855            !rc.turns.iter().any(|m| m
856                .content
857                .as_text()
858                .map(|t| t.starts_with("aaaa"))
859                .unwrap_or(false)),
860            "no truncated body in the prefix"
861        );
862    }
863}