Skip to main content

deepstrike_core/context/
renderer.rs

1use super::partitions::ContextPartitions;
2use super::snapshot::stable_hash;
3use super::task_state::TaskState;
4use super::token_engine::ContextTokenEngine;
5use crate::mm::handle::{HandleTable, Residency};
6use crate::types::message::{Content, ContentPart, Message, Role};
7use serde::{Deserialize, Serialize};
8
9/// Structured render output aligned with LLM API slots.
10///
11/// Slot 1 — system_stable:    Identity (system partition). Anthropic system[0] cache_control.
12/// Slot 2 — system_knowledge: Knowledge partition. Anthropic system[1] cache_control.
13/// Slot 3 — turns[0..N]:      History turns (stable, cacheable prefix).
14/// Slot 4 — state_turn:       State (task_state + signals), rebuilt every call.
15///
16/// The State turn is kept OUT of `turns` so the history prefix stays byte-stable
17/// across turns and can be prompt-cached. Providers place `state_turn` themselves:
18/// Anthropic appends it AFTER the message-history cache breakpoint (so the volatile
19/// state is the cheap uncached tail); OpenAI-family prepend it (preserving today's
20/// ordering). When this struct is produced by an older binding that has not been
21/// rebuilt, `state_turn` is absent and `turns[0]` still carries the State turn —
22/// providers handle both shapes.
23///
24/// system_text = system_stable + system_knowledge (for OpenAI which has one system slot).
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct RenderedContext {
27    /// Identity + Knowledge combined — for providers with a single system slot (OpenAI).
28    pub system_text: String,
29    /// Identity only (system partition). Anthropic system[0] with cache_control.
30    pub system_stable: String,
31    /// Knowledge (memory retrievals, skill definitions, artifacts). Anthropic system[1] with cache_control.
32    pub system_knowledge: String,
33    /// History turns only — the stable, cacheable message prefix.
34    pub turns: Vec<Message>,
35    /// Volatile State turn (task_state + signals), rebuilt every call. Rendered
36    /// after the cacheable history. `None` when there is no task state or signals.
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub state_turn: Option<Message>,
39    /// P1-E: number of leading `turns` that form the **frozen prefix** — byte-stable until the
40    /// next compaction. Providers that place explicit cache breakpoints (Anthropic) pin one *deep*
41    /// breakpoint at this boundary (a long-lived cache that survives many turns and is immune to
42    /// the 20-block lookback miss on heavy tool turns) and roll the other at the tail. `None` when
43    /// there is no distinct frozen region yet (pre-first-compaction, or the whole render is hot) —
44    /// providers then fall back to the rolling-pair placement. Providers clamp out-of-range values.
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub frozen_prefix_len: Option<usize>,
47}
48
49/// Per-render fingerprint of the **cacheable prefix** — the segments a provider
50/// caches as a stable prefix (system blocks + history `turns`). Excludes
51/// `state_turn` (the volatile uncached tail) and `token_count` metadata (not on the
52/// wire). This is the metrics-first instrument (P0-A) behind the optimization work:
53/// two renders share a reusable KV / prompt-cache prefix iff their system hashes
54/// match *and* one's `turn_hashes` is a prefix of the other's. Pure and derived —
55/// never stored in snapshots, session logs, or event logs.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct PrefixFingerprint {
58    pub system_stable_hash: u64,
59    pub system_knowledge_hash: u64,
60    /// One stable hash per history turn, in order. The longest common prefix with a
61    /// previous render's vector = how many turns stay cache-reusable across the call.
62    pub turn_hashes: Vec<u64>,
63}
64
65impl PrefixFingerprint {
66    /// True when `self`'s cacheable prefix is a byte-stable *extension* of `prev`:
67    /// identical system segments and `prev.turn_hashes` is a prefix of
68    /// `self.turn_hashes`. This is exactly the KV / prompt-cache reuse condition —
69    /// no drift anywhere in the prefix, only growth at the tail.
70    pub fn extends(&self, prev: &PrefixFingerprint) -> bool {
71        self.system_stable_hash == prev.system_stable_hash
72            && self.system_knowledge_hash == prev.system_knowledge_hash
73            && prev.turn_hashes.len() <= self.turn_hashes.len()
74            && self.turn_hashes[..prev.turn_hashes.len()] == prev.turn_hashes[..]
75    }
76
77    /// Number of leading turns byte-identical to `prev` — the reusable turn-prefix
78    /// length. A drop below `prev.turn_hashes.len()` signals mid-prefix churn (a
79    /// turn rewritten in place, e.g. an in-place collapse) that invalidates cache.
80    pub fn common_turn_prefix(&self, prev: &PrefixFingerprint) -> usize {
81        self.turn_hashes
82            .iter()
83            .zip(prev.turn_hashes.iter())
84            .take_while(|(a, b)| a == b)
85            .count()
86    }
87}
88
89/// Wire-relevant hash of one turn: role + content + tool_calls, **excluding**
90/// `token_count` (kernel-only metadata that never reaches the provider). Serialised
91/// through serde so every content variant and tool-call argument is covered with a
92/// deterministic field order.
93fn hash_turn(msg: &Message) -> u64 {
94    let material =
95        serde_json::to_vec(&(&msg.role, &msg.content, &msg.tool_calls)).unwrap_or_default();
96    stable_hash(&material)
97}
98
99impl RenderedContext {
100    /// Compute the [`PrefixFingerprint`] for this render. See its docs for the
101    /// cache-reuse contract it certifies.
102    pub fn prefix_fingerprint(&self) -> PrefixFingerprint {
103        PrefixFingerprint {
104            system_stable_hash: stable_hash(self.system_stable.as_bytes()),
105            system_knowledge_hash: stable_hash(self.system_knowledge.as_bytes()),
106            turn_hashes: self.turns.iter().map(hash_turn).collect(),
107        }
108    }
109}
110
111fn build_system_stable(partitions: &ContextPartitions) -> String {
112    partitions.system.messages
113        .iter()
114        .filter_map(|m| m.content.as_text())
115        .collect::<Vec<_>>()
116        .join("\n\n")
117}
118
119fn build_system_knowledge(partitions: &ContextPartitions) -> String {
120    partitions.knowledge.messages
121        .iter()
122        .filter_map(|m| m.content.as_text())
123        .collect::<Vec<_>>()
124        .join("\n\n")
125}
126
127/// P1-F: a one-line recency footer restating the current focus — goal, active plan step, and the
128/// most recent standing directive. It is rendered as the *last* content before the "Proceed."
129/// anchor, the highest-attention position in the prompt (the model attends most to the final
130/// tokens). The full TASK STATE block still leads the turn for primacy + reference; this footer
131/// just re-surfaces "what to do right now" where attention peaks. `None` when there is no goal.
132fn salience_footer(ts: &TaskState) -> Option<String> {
133    if ts.goal.is_empty() {
134        return None;
135    }
136    let mut s = format!("→ focus: {}", ts.goal);
137    if let Some(i) = ts.current_step {
138        if let Some(step) = ts.plan.get(i) {
139            if !step.done {
140                s.push_str(&format!(" · step {}: {}", i + 1, step.label));
141            }
142        }
143    }
144    if let Some(d) = ts.directives.last() {
145        s.push_str(&format!(" · must: {d}"));
146    }
147    Some(s)
148}
149
150/// Build the State turn (the volatile tail): task_state + signals + a recency focus footer +
151/// "Proceed." anchor. The footer sits last (just before "Proceed.") so the current goal/step/
152/// directive land in the prompt's highest-attention position (P1-F).
153fn build_state_turn(partitions: &ContextPartitions) -> Option<Message> {
154    let task = partitions.task_state.format_compact();
155    if task.is_empty() && partitions.signals.is_empty() {
156        return None;
157    }
158    let mut parts: Vec<String> = Vec::new();
159    if !task.is_empty() {
160        parts.push(task);
161    }
162    let signals_text = partitions.signals.join("\n");
163    if !signals_text.is_empty() {
164        parts.push(signals_text);
165    }
166    if let Some(footer) = salience_footer(&partitions.task_state) {
167        parts.push(footer);
168    }
169    let body = parts.join("\n\n");
170    Some(Message::user(format!("{body}\n\nProceed.")))
171}
172
173/// Ensure turns start with a user message.
174/// After AutoCompact the preserved tail may be all assistant/tool — insert an anchor.
175fn normalize_turn_prefix(turns: &mut Vec<Message>) {
176    if !turns.is_empty() && matches!(turns[0].role, Role::Assistant | Role::Tool) {
177        turns.insert(0, Message::user("[context resumed]"));
178    }
179}
180
181/// Layer-4 read-time projection: replace the body of a `Collapsed` tool result with a short
182/// preview, leaving a marker. Non-destructive — the full output stays in `partitions.history`;
183/// only the rendered copy shrinks, so the projection reverses when pressure drops.
184fn collapse_preview(output: &str) -> String {
185    const PREVIEW_BYTES: usize = 160;
186    let mut end = PREVIEW_BYTES.min(output.len());
187    while end > 0 && !output.is_char_boundary(end) {
188        end -= 1;
189    }
190    let dropped = output.len().saturating_sub(end);
191    format!(
192        "{}…\n[collapsed: {dropped} chars projected out of view; full result retained in history]",
193        &output[..end]
194    )
195}
196
197/// If any of `msg`'s tool-result parts is `Collapsed` per the handle table, return a projected
198/// copy with those parts previewed; `None` if nothing is collapsed (render the message as-is).
199fn project_message(msg: &Message, handles: &HandleTable) -> Option<Message> {
200    let Content::Parts(parts) = &msg.content else {
201        return None;
202    };
203    let mut changed = false;
204    let new_parts: Vec<ContentPart> = parts
205        .iter()
206        .map(|part| match part {
207            ContentPart::ToolResult { call_id, output, is_error }
208                if matches!(
209                    handles.residency_for_source(call_id),
210                    Some(Residency::Collapsed)
211                ) =>
212            {
213                changed = true;
214                ContentPart::ToolResult {
215                    call_id: call_id.clone(),
216                    output: collapse_preview(output),
217                    is_error: *is_error,
218                }
219            }
220            other => other.clone(),
221        })
222        .collect();
223    if changed {
224        let mut projected = msg.clone();
225        projected.content = Content::Parts(new_parts);
226        projected.token_count = None; // recomputed against the smaller projected body
227        Some(projected)
228    } else {
229        None
230    }
231}
232
233/// Render the context into a `RenderedContext` suitable for a provider API call.
234///
235/// Equivalent to [`render_projected`] with an empty handle table (no Layer-4 projection) and no
236/// frozen-prefix boundary (`frozen_history_len = 0` → `frozen_prefix_len` is always `None`).
237pub fn render(
238    partitions: &ContextPartitions,
239    budget: u32,
240    engine: &ContextTokenEngine,
241    preserve_recent_msgs: usize,
242) -> RenderedContext {
243    render_projected(partitions, budget, engine, preserve_recent_msgs, &HandleTable::new(), 0)
244}
245
246/// Render with Layer-4 read-time projection driven by `handles`: tool results whose handle is
247/// `Collapsed` render as previews (originals untouched), freeing budget for more recent turns.
248///
249/// Token budget:
250///   system_stable + system_knowledge tokens are subtracted first.
251///   Remaining budget is allocated to history turns newest-first.
252///   The first `preserve_recent_msgs` history messages are always included.
253///   Text messages are truncated at the budget boundary; Parts messages are included whole.
254pub fn render_projected(
255    partitions: &ContextPartitions,
256    budget: u32,
257    engine: &ContextTokenEngine,
258    preserve_recent_msgs: usize,
259    handles: &HandleTable,
260    frozen_history_len: usize,
261) -> RenderedContext {
262    let system_stable = build_system_stable(partitions);
263    let system_knowledge = build_system_knowledge(partitions);
264    let system_text = [system_stable.as_str(), system_knowledge.as_str()]
265        .iter()
266        .filter(|s| !s.is_empty())
267        .cloned()
268        .collect::<Vec<_>>()
269        .join("\n\n");
270
271    let system_tokens = engine.count(&system_text).min(budget);
272    let mut remaining = budget.saturating_sub(system_tokens);
273
274    // Fill history newest-first within remaining budget. Layer-4 projection is applied per
275    // message: a collapsed tool result renders as a preview and is costed at its reduced size.
276    let mut kept_rev: Vec<Message> = Vec::new();
277    for msg in partitions.history.messages.iter().rev() {
278        // `projected` is `Some` only when read-time projection shrank the message.
279        let projected = project_message(msg, handles);
280        let effective = projected.as_ref().unwrap_or(msg);
281        let tokens = match &projected {
282            Some(p) => engine.count_message(p),
283            None => msg.token_count.unwrap_or_else(|| engine.count_message(msg)),
284        };
285        if tokens == 0 { continue; }
286
287        let is_protected = kept_rev.len() < preserve_recent_msgs;
288        if is_protected {
289            kept_rev.push(effective.clone());
290            remaining = remaining.saturating_sub(tokens);
291            continue;
292        }
293
294        if tokens <= remaining {
295            kept_rev.push(effective.clone());
296            remaining = remaining.saturating_sub(tokens);
297        } else if remaining > 0 {
298            match &effective.content {
299                // P0-B1: drop a Text boundary message **whole** rather than mid-truncate. A
300                // truncated body's bytes depend on `remaining`, which varies per turn — that churns
301                // turns[0] and invalidates the entire cached prefix. Compaction normally keeps
302                // history under budget, so this overflow path is a rare safety net; keeping every
303                // kept turn a complete message preserves prompt-cache reuse.
304                Content::Text(_) => {}
305                // A Parts message was already included whole (byte-stable) — unchanged.
306                Content::Parts(_) => kept_rev.push(effective.clone()),
307            }
308            break;
309        } else {
310            break;
311        }
312    }
313
314    kept_rev.reverse();
315    let mut turns = kept_rev;
316    normalize_turn_prefix(&mut turns);
317
318    // The State turn (task_state + signals) is volatile — keep it OUT of the
319    // cacheable history. Providers render it after the history (Anthropic) or
320    // prepended (OpenAI). See RenderedContext docs.
321    let state_turn = build_state_turn(partitions);
322
323    // P1-E: locate the frozen-prefix boundary in rendered turns. `frozen_history_len` is the
324    // history length as of the last compaction (0 before any) — messages beyond it are the hot
325    // tail that grows each turn. We count the hot tail from the END, which is robust to the leading
326    // anchor and to budget-dropping of OLD turns (the recent tail is never dropped). Emit `Some`
327    // only for a distinct, non-empty frozen region; otherwise providers use the rolling-pair
328    // fallback (deep == tail would waste a breakpoint).
329    let hot = partitions
330        .history
331        .messages
332        .len()
333        .saturating_sub(frozen_history_len);
334    let frozen_prefix_len = if frozen_history_len > 0 && hot > 0 && hot < turns.len() {
335        Some(turns.len() - hot)
336    } else {
337        None
338    };
339
340    RenderedContext { system_text, system_stable, system_knowledge, turns, state_turn, frozen_prefix_len }
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346    use crate::context::config::ContextConfig;
347    use crate::context::partitions::ContextPartitions;
348    use crate::context::task_state::{PlanStep, TaskState};
349    use crate::context::token_engine::ContextTokenEngine;
350    use crate::types::message::{Message, Role};
351
352    fn engine() -> ContextTokenEngine { ContextTokenEngine::char_approx() }
353    fn ctx() -> ContextPartitions { ContextPartitions::new(&ContextConfig::default()) }
354
355    #[test]
356    fn system_stable_contains_system_partition() {
357        let mut c = ctx();
358        c.system.push(Message::system("You are helpful."), 10);
359        let rc = render(&c, 10_000, &engine(), 4);
360        assert!(rc.system_stable.contains("You are helpful."));
361        assert!(rc.system_text.contains("You are helpful."));
362    }
363
364    #[test]
365    fn system_knowledge_contains_knowledge_partition() {
366        let mut c = ctx();
367        c.knowledge.push(Message::system("skill: debug"), 10);
368        let rc = render(&c, 10_000, &engine(), 4);
369        assert!(rc.system_knowledge.contains("skill: debug"));
370        assert!(rc.system_text.contains("skill: debug"));
371    }
372
373    #[test]
374    fn task_state_appears_in_state_turn() {
375        let mut c = ctx();
376        c.task_state = TaskState { goal: "find the bug".to_string(), ..Default::default() };
377        let rc = render(&c, 10_000, &engine(), 4);
378        assert!(!rc.system_text.contains("[TASK STATE]"), "task_state must not be in system_text");
379        let state = rc.state_turn.as_ref().expect("should have a state turn");
380        assert_eq!(state.role, Role::User);
381        assert!(state.content.as_text().unwrap().contains("[TASK STATE] goal: find the bug"));
382        // State is NOT in the cacheable history turns.
383        assert!(!rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("[TASK STATE]")).unwrap_or(false)));
384    }
385
386    #[test]
387    fn signals_appear_in_state_turn() {
388        let mut c = ctx();
389        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
390        c.signals.push("[ROLLBACK] tool failed".to_string());
391        let rc = render(&c, 10_000, &engine(), 4);
392        let state = rc.state_turn.as_ref().unwrap();
393        assert!(state.content.as_text().unwrap().contains("[ROLLBACK] tool failed"));
394    }
395
396    #[test]
397    fn empty_task_state_no_state_turn() {
398        let c = ctx();
399        let rc = render(&c, 10_000, &engine(), 4);
400        // No state turn when task_state is empty and no signals
401        assert!(rc.state_turn.is_none());
402        assert!(rc.turns.is_empty());
403    }
404
405    #[test]
406    fn history_excludes_state_turn() {
407        let mut c = ctx();
408        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
409        c.history.push(Message::user("step 1"), 5);
410        c.history.push(Message::assistant("done"), 5);
411        let rc = render(&c, 10_000, &engine(), 4);
412        // turns is history only; state lives in state_turn.
413        assert!(rc.state_turn.as_ref().unwrap().content.as_text().unwrap().contains("[TASK STATE]"));
414        assert_eq!(rc.turns[0].role, Role::User);
415        assert_eq!(rc.turns[0].content.as_text(), Some("step 1"));
416        assert_eq!(rc.turns[1].role, Role::Assistant);
417    }
418
419    #[test]
420    fn all_assistant_tool_history_gets_anchor_user_turn() {
421        let mut c = ctx();
422        c.history.push(Message::assistant("reply"), 5);
423        let rc = render(&c, 10_000, &engine(), 4);
424        assert_eq!(rc.turns[0].role, Role::User);
425    }
426
427    #[test]
428    fn zero_token_messages_skipped() {
429        let mut c = ctx();
430        c.history.push(Message::user("zero"), 0);
431        c.history.push(Message::user("real"), 5);
432        let rc = render(&c, 10_000, &engine(), 4);
433        // Only "real" in history turns (state turn absent — no task_state)
434        assert!(rc.turns.iter().any(|m| m.content.as_text() == Some("real")));
435        assert!(!rc.turns.iter().any(|m| m.content.as_text() == Some("zero")));
436    }
437
438    #[test]
439    fn collapsed_tool_result_renders_as_preview_without_mutating_history() {
440        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};
441
442        let mut c = ctx();
443        let long = "DATA ".repeat(200); // 1000 bytes
444        c.history.push(
445            Message::tool(vec![ContentPart::ToolResult {
446                call_id: "c1".into(),
447                output: long.clone(),
448                is_error: false,
449            }]),
450            250,
451        );
452
453        let mut handles = HandleTable::new();
454        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
455        h.residency = Residency::Collapsed;
456        handles.insert(h);
457
458        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0);
459        let rendered: String = rc
460            .turns
461            .iter()
462            .flat_map(|m| match &m.content {
463                Content::Parts(parts) => parts.clone(),
464                _ => Vec::new(),
465            })
466            .find_map(|p| match p {
467                ContentPart::ToolResult { output, .. } => Some(output),
468                _ => None,
469            })
470            .expect("tool result rendered");
471        // Rendered copy is a preview; original full output is retained in history.
472        assert!(rendered.contains("[collapsed:"));
473        assert!(rendered.len() < long.len());
474        let stored = match &c.history.messages[0].content {
475            Content::Parts(parts) => match &parts[0] {
476                ContentPart::ToolResult { output, .. } => output.clone(),
477                _ => unreachable!(),
478            },
479            _ => unreachable!(),
480        };
481        assert_eq!(stored, long, "projection must not mutate stored history");
482    }
483
484    #[test]
485    fn resident_tool_result_renders_in_full() {
486        use crate::mm::handle::{Handle, HandleKind, HandleTable};
487
488        let mut c = ctx();
489        let body = "RESIDENT BODY ".repeat(20);
490        c.history.push(
491            Message::tool(vec![ContentPart::ToolResult {
492                call_id: "c2".into(),
493                output: body.clone(),
494                is_error: false,
495            }]),
496            60,
497        );
498        let mut handles = HandleTable::new();
499        handles.insert(Handle::resident_for(1, HandleKind::ToolResult, 60, "c2"));
500
501        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0);
502        let rendered: String = rc
503            .turns
504            .iter()
505            .flat_map(|m| match &m.content {
506                Content::Parts(parts) => parts.clone(),
507                _ => Vec::new(),
508            })
509            .find_map(|p| match p {
510                ContentPart::ToolResult { output, .. } => Some(output),
511                _ => None,
512            })
513            .expect("tool result rendered");
514        assert_eq!(rendered, body);
515        assert!(!rendered.contains("[collapsed:"));
516    }
517
518    // ── P1-F: state-turn recency footer ───────────────────────────────────
519
520    #[test]
521    fn state_turn_ends_with_salience_footer_before_proceed() {
522        let mut c = ctx();
523        c.task_state = TaskState {
524            goal: "ship the cache work".to_string(),
525            plan: vec![PlanStep { label: "do E".to_string(), done: false }],
526            current_step: Some(0),
527            ..Default::default()
528        };
529        c.task_state.record_directive("don't break ABI");
530        let rc = render(&c, 100_000, &engine(), 4);
531        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
532
533        // The full TASK STATE block still leads (primacy) ...
534        assert!(text.starts_with("[TASK STATE] goal: ship the cache work"));
535        // ... and the very last block before "Proceed." is the focus footer (recency).
536        let before_proceed = text.rsplit_once("\n\nProceed.").expect("ends with Proceed").0;
537        let last_block = before_proceed.rsplit("\n\n").next().unwrap();
538        assert!(last_block.starts_with("→ focus: ship the cache work"), "got: {last_block}");
539        assert!(last_block.contains("step 1: do E"));
540        assert!(last_block.contains("must: don't break ABI"));
541    }
542
543    #[test]
544    fn no_salience_footer_without_a_goal() {
545        let mut c = ctx();
546        c.signals.push("[ROLLBACK] tool failed".to_string());
547        let rc = render(&c, 100_000, &engine(), 4);
548        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
549        assert!(!text.contains("→ focus:"), "no goal ⇒ no footer");
550        // signals remain the last content before the anchor.
551        assert!(text.contains("[ROLLBACK] tool failed"));
552    }
553
554    // ── P0-A: prefix fingerprint (cache-drift instrument) ──────────────────
555
556    #[test]
557    fn prefix_fingerprint_is_stable_when_appending_history() {
558        let mut c = ctx();
559        c.system.push(Message::system("rules"), 5);
560        c.knowledge.push(Message::system("skill: debug"), 5);
561        c.history.push(Message::user("turn A"), 5);
562        c.history.push(Message::assistant("turn B"), 5);
563        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
564
565        // Append a new turn — the existing prefix must stay byte-identical.
566        c.history.push(Message::user("turn C"), 5);
567        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
568
569        assert!(fp2.extends(&fp1), "appending must only grow the tail, never drift the prefix");
570        assert_eq!(fp2.common_turn_prefix(&fp1), 2, "both prior turns stay cache-reusable");
571        assert_eq!(fp2.turn_hashes.len(), 3);
572    }
573
574    #[test]
575    fn prefix_fingerprint_ignores_state_turn() {
576        // Same history, different task_state/signals → the cacheable prefix is
577        // identical (state lives in the uncached tail, out of `turns`).
578        let mut c = ctx();
579        c.history.push(Message::user("turn A"), 5);
580        c.task_state = TaskState { goal: "first goal".to_string(), ..Default::default() };
581        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
582
583        c.task_state = TaskState { goal: "totally different goal".to_string(), ..Default::default() };
584        c.signals.push("[ROLLBACK] whatever".to_string());
585        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
586
587        assert_eq!(fp1, fp2, "volatile state must not perturb the cacheable prefix");
588    }
589
590    #[test]
591    fn prefix_fingerprint_detects_system_drift() {
592        let mut c = ctx();
593        c.system.push(Message::system("rules v1"), 5);
594        c.history.push(Message::user("turn A"), 5);
595        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
596
597        c.system.messages.clear();
598        c.system.push(Message::system("rules v2"), 5);
599        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
600
601        assert_ne!(fp1.system_stable_hash, fp2.system_stable_hash);
602        assert!(!fp2.extends(&fp1), "a system-block edit invalidates the whole prefix");
603    }
604
605    #[test]
606    fn prefix_fingerprint_detects_in_place_collapse_churn() {
607        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};
608
609        let mut c = ctx();
610        c.history.push(Message::user("start"), 5);
611        let long = "DATA ".repeat(200);
612        c.history.push(
613            Message::tool(vec![ContentPart::ToolResult {
614                call_id: "c1".into(),
615                output: long,
616                is_error: false,
617            }]),
618            250,
619        );
620        c.history.push(Message::user("recent"), 5);
621
622        let resident = render(&c, 100_000, &engine(), 4).prefix_fingerprint();
623
624        // Collapsing the old tool result rewrites that turn in place → the prefix
625        // hash at that position changes (the cache-cost of folding, made visible).
626        let mut handles = HandleTable::new();
627        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
628        h.residency = Residency::Collapsed;
629        handles.insert(h);
630        let collapsed = render_projected(&c, 100_000, &engine(), 4, &handles, 0).prefix_fingerprint();
631
632        // turn 0 ("start") is byte-stable; the collapsed tool result at turn 1 drifts.
633        assert_eq!(collapsed.common_turn_prefix(&resident), 1, "drift begins at the collapsed turn");
634        assert!(!collapsed.extends(&resident));
635    }
636
637    #[test]
638    fn protected_recent_messages_kept_whole_over_budget() {
639        let mut c = ctx();
640        c.history.push(Message::user("first message"), 5);
641        c.history.push(Message::user("a".repeat(1000)), 250);
642        // preserve_recent_msgs=4 protects both — kept whole regardless of the 10-token budget.
643        let rc = render(&c, 10, &engine(), 4);
644        assert!(rc.turns.iter().any(|m| {
645            m.content.as_text().map(|t| t.contains("first message")).unwrap_or(false)
646        }));
647    }
648
649    #[test]
650    fn oversized_text_boundary_is_dropped_whole_not_truncated() {
651        // P0-B1: an unprotected, over-budget Text boundary message is dropped whole — never
652        // mid-truncated — so no budget-dependent fragment lands in the cached prefix.
653        let mut c = ctx();
654        c.history.push(Message::user("a".repeat(1000)), 250); // oldest, oversized
655        c.history.push(Message::user("recent"), 2); // newest, fits
656        let rc = render(&c, 5, &engine(), 0); // nothing protected
657        assert_eq!(rc.turns.len(), 1, "only the fitting newest turn survives");
658        assert_eq!(rc.turns[0].content.as_text(), Some("recent"));
659        assert!(
660            !rc.turns.iter().any(|m| m
661                .content
662                .as_text()
663                .map(|t| t.starts_with("aaaa"))
664                .unwrap_or(false)),
665            "no truncated body in the prefix"
666        );
667    }
668}