deepstrike-core 0.2.32

Cross-language agent runtime kernel — pure computation, zero I/O
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
use super::partitions::ContextPartitions;
use super::snapshot::stable_hash;
use super::task_state::TaskState;
use super::token_engine::ContextTokenEngine;
use crate::mm::handle::{HandleTable, Residency};
use crate::types::message::{Content, ContentPart, Message, Role};
use serde::{Deserialize, Serialize};

/// Structured render output aligned with LLM API slots.
///
/// Slot 1 — system_stable:    Identity (system partition). Anthropic system[0] cache_control.
/// Slot 2 — system_knowledge: Knowledge partition. Anthropic system[1] cache_control.
/// Slot 3 — turns[0..N]:      History turns (stable, cacheable prefix).
/// Slot 4 — state_turn:       State (task_state + signals), rebuilt every call.
///
/// The State turn is kept OUT of `turns` so the history prefix stays byte-stable
/// across turns and can be prompt-cached. Providers place `state_turn` themselves:
/// Anthropic appends it AFTER the message-history cache breakpoint (so the volatile
/// state is the cheap uncached tail); OpenAI-family prepend it (preserving today's
/// ordering). When this struct is produced by an older binding that has not been
/// rebuilt, `state_turn` is absent and `turns[0]` still carries the State turn —
/// providers handle both shapes.
///
/// system_text = system_stable + system_knowledge (for OpenAI which has one system slot).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RenderedContext {
    /// Identity + Knowledge combined — for providers with a single system slot (OpenAI).
    pub system_text: String,
    /// Identity only (system partition). Anthropic system[0] with cache_control.
    pub system_stable: String,
    /// Knowledge (memory retrievals, skill definitions, artifacts). Anthropic system[1] with cache_control.
    pub system_knowledge: String,
    /// History turns only — the stable, cacheable message prefix.
    pub turns: Vec<Message>,
    /// Volatile State turn (task_state + signals), rebuilt every call. Rendered
    /// after the cacheable history. `None` when there is no task state or signals.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub state_turn: Option<Message>,
    /// P1-E: number of leading `turns` that form the **frozen prefix** — byte-stable until the
    /// next compaction. Providers that place explicit cache breakpoints (Anthropic) pin one *deep*
    /// breakpoint at this boundary (a long-lived cache that survives many turns and is immune to
    /// the 20-block lookback miss on heavy tool turns) and roll the other at the tail. `None` when
    /// there is no distinct frozen region yet (pre-first-compaction, or the whole render is hot) —
    /// providers then fall back to the rolling-pair placement. Providers clamp out-of-range values.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub frozen_prefix_len: Option<usize>,
}

/// Per-render fingerprint of the **cacheable prefix** — the segments a provider
/// caches as a stable prefix (system blocks + history `turns`). Excludes
/// `state_turn` (the volatile uncached tail) and `token_count` metadata (not on the
/// wire). This is the metrics-first instrument (P0-A) behind the optimization work:
/// two renders share a reusable KV / prompt-cache prefix iff their system hashes
/// match *and* one's `turn_hashes` is a prefix of the other's. Pure and derived —
/// never stored in snapshots, session logs, or event logs.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PrefixFingerprint {
    pub system_stable_hash: u64,
    pub system_knowledge_hash: u64,
    /// One stable hash per history turn, in order. The longest common prefix with a
    /// previous render's vector = how many turns stay cache-reusable across the call.
    pub turn_hashes: Vec<u64>,
}

impl PrefixFingerprint {
    /// True when `self`'s cacheable prefix is a byte-stable *extension* of `prev`:
    /// identical system segments and `prev.turn_hashes` is a prefix of
    /// `self.turn_hashes`. This is exactly the KV / prompt-cache reuse condition —
    /// no drift anywhere in the prefix, only growth at the tail.
    pub fn extends(&self, prev: &PrefixFingerprint) -> bool {
        self.system_stable_hash == prev.system_stable_hash
            && self.system_knowledge_hash == prev.system_knowledge_hash
            && prev.turn_hashes.len() <= self.turn_hashes.len()
            && self.turn_hashes[..prev.turn_hashes.len()] == prev.turn_hashes[..]
    }

    /// Number of leading turns byte-identical to `prev` — the reusable turn-prefix
    /// length. A drop below `prev.turn_hashes.len()` signals mid-prefix churn (a
    /// turn rewritten in place, e.g. an in-place collapse) that invalidates cache.
    pub fn common_turn_prefix(&self, prev: &PrefixFingerprint) -> usize {
        self.turn_hashes
            .iter()
            .zip(prev.turn_hashes.iter())
            .take_while(|(a, b)| a == b)
            .count()
    }
}

/// Wire-relevant hash of one turn: role + content + tool_calls, **excluding**
/// `token_count` (kernel-only metadata that never reaches the provider). Serialised
/// through serde so every content variant and tool-call argument is covered with a
/// deterministic field order.
fn hash_turn(msg: &Message) -> u64 {
    let material =
        serde_json::to_vec(&(&msg.role, &msg.content, &msg.tool_calls)).unwrap_or_default();
    stable_hash(&material)
}

impl RenderedContext {
    /// Compute the [`PrefixFingerprint`] for this render. See its docs for the
    /// cache-reuse contract it certifies.
    pub fn prefix_fingerprint(&self) -> PrefixFingerprint {
        PrefixFingerprint {
            system_stable_hash: stable_hash(self.system_stable.as_bytes()),
            system_knowledge_hash: stable_hash(self.system_knowledge.as_bytes()),
            turn_hashes: self.turns.iter().map(hash_turn).collect(),
        }
    }
}

fn build_system_stable(partitions: &ContextPartitions) -> String {
    partitions.system.messages
        .iter()
        .filter_map(|m| m.content.as_text())
        .collect::<Vec<_>>()
        .join("\n\n")
}

fn build_system_knowledge(partitions: &ContextPartitions) -> String {
    partitions.knowledge.messages
        .iter()
        .filter_map(|m| m.content.as_text())
        .collect::<Vec<_>>()
        .join("\n\n")
}

/// P1-F: a one-line recency footer restating the current focus — goal, active plan step, and the
/// most recent standing directive. It is rendered as the *last* content before the "Proceed."
/// anchor, the highest-attention position in the prompt (the model attends most to the final
/// tokens). The full TASK STATE block still leads the turn for primacy + reference; this footer
/// just re-surfaces "what to do right now" where attention peaks. `None` when there is no goal.
fn salience_footer(ts: &TaskState) -> Option<String> {
    if ts.goal.is_empty() {
        return None;
    }
    let mut s = format!("→ focus: {}", ts.goal);
    if let Some(i) = ts.current_step {
        if let Some(step) = ts.plan.get(i) {
            if !step.done {
                s.push_str(&format!(" · step {}: {}", i + 1, step.label));
            }
        }
    }
    if let Some(d) = ts.directives.last() {
        s.push_str(&format!(" · must: {d}"));
    }
    Some(s)
}

/// Build the State turn (the volatile tail): task_state + signals + a recency focus footer +
/// "Proceed." anchor. The footer sits last (just before "Proceed.") so the current goal/step/
/// directive land in the prompt's highest-attention position (P1-F).
fn build_state_turn(partitions: &ContextPartitions) -> Option<Message> {
    let task = partitions.task_state.format_compact();
    if task.is_empty() && partitions.signals.is_empty() {
        return None;
    }
    let mut parts: Vec<String> = Vec::new();
    if !task.is_empty() {
        parts.push(task);
    }
    let signals_text = partitions.signals.join("\n");
    if !signals_text.is_empty() {
        parts.push(signals_text);
    }
    if let Some(footer) = salience_footer(&partitions.task_state) {
        parts.push(footer);
    }
    let body = parts.join("\n\n");
    Some(Message::user(format!("{body}\n\nProceed.")))
}

/// Ensure turns start with a user message.
/// After AutoCompact the preserved tail may be all assistant/tool — insert an anchor.
fn normalize_turn_prefix(turns: &mut Vec<Message>) {
    if !turns.is_empty() && matches!(turns[0].role, Role::Assistant | Role::Tool) {
        turns.insert(0, Message::user("[context resumed]"));
    }
}

/// Layer-4 read-time projection: replace the body of a `Collapsed` tool result with a short
/// preview, leaving a marker. Non-destructive — the full output stays in `partitions.history`;
/// only the rendered copy shrinks, so the projection reverses when pressure drops.
fn collapse_preview(output: &str) -> String {
    const PREVIEW_BYTES: usize = 160;
    let mut end = PREVIEW_BYTES.min(output.len());
    while end > 0 && !output.is_char_boundary(end) {
        end -= 1;
    }
    let dropped = output.len().saturating_sub(end);
    format!(
        "{}\n[collapsed: {dropped} chars projected out of view; full result retained in history]",
        &output[..end]
    )
}

/// If any of `msg`'s tool-result parts is `Collapsed` per the handle table, return a projected
/// copy with those parts previewed; `None` if nothing is collapsed (render the message as-is).
fn project_message(msg: &Message, handles: &HandleTable) -> Option<Message> {
    let Content::Parts(parts) = &msg.content else {
        return None;
    };
    let mut changed = false;
    let new_parts: Vec<ContentPart> = parts
        .iter()
        .map(|part| match part {
            ContentPart::ToolResult { call_id, output, is_error }
                if matches!(
                    handles.residency_for_source(call_id),
                    Some(Residency::Collapsed)
                ) =>
            {
                changed = true;
                ContentPart::ToolResult {
                    call_id: call_id.clone(),
                    output: collapse_preview(output),
                    is_error: *is_error,
                }
            }
            other => other.clone(),
        })
        .collect();
    if changed {
        let mut projected = msg.clone();
        projected.content = Content::Parts(new_parts);
        projected.token_count = None; // recomputed against the smaller projected body
        Some(projected)
    } else {
        None
    }
}

/// Render the context into a `RenderedContext` suitable for a provider API call.
///
/// Equivalent to [`render_projected`] with an empty handle table (no Layer-4 projection) and no
/// frozen-prefix boundary (`frozen_history_len = 0` → `frozen_prefix_len` is always `None`).
pub fn render(
    partitions: &ContextPartitions,
    budget: u32,
    engine: &ContextTokenEngine,
    preserve_recent_msgs: usize,
) -> RenderedContext {
    render_projected(partitions, budget, engine, preserve_recent_msgs, &HandleTable::new(), 0)
}

/// Render with Layer-4 read-time projection driven by `handles`: tool results whose handle is
/// `Collapsed` render as previews (originals untouched), freeing budget for more recent turns.
///
/// Token budget:
///   system_stable + system_knowledge tokens are subtracted first.
///   Remaining budget is allocated to history turns newest-first.
///   The first `preserve_recent_msgs` history messages are always included.
///   Text messages are truncated at the budget boundary; Parts messages are included whole.
pub fn render_projected(
    partitions: &ContextPartitions,
    budget: u32,
    engine: &ContextTokenEngine,
    preserve_recent_msgs: usize,
    handles: &HandleTable,
    frozen_history_len: usize,
) -> RenderedContext {
    let system_stable = build_system_stable(partitions);
    let system_knowledge = build_system_knowledge(partitions);
    let system_text = [system_stable.as_str(), system_knowledge.as_str()]
        .iter()
        .filter(|s| !s.is_empty())
        .cloned()
        .collect::<Vec<_>>()
        .join("\n\n");

    let system_tokens = engine.count(&system_text).min(budget);
    let mut remaining = budget.saturating_sub(system_tokens);

    // Fill history newest-first within remaining budget. Layer-4 projection is applied per
    // message: a collapsed tool result renders as a preview and is costed at its reduced size.
    let mut kept_rev: Vec<Message> = Vec::new();
    for msg in partitions.history.messages.iter().rev() {
        // `projected` is `Some` only when read-time projection shrank the message.
        let projected = project_message(msg, handles);
        let effective = projected.as_ref().unwrap_or(msg);
        let tokens = match &projected {
            Some(p) => engine.count_message(p),
            None => msg.token_count.unwrap_or_else(|| engine.count_message(msg)),
        };
        if tokens == 0 { continue; }

        let is_protected = kept_rev.len() < preserve_recent_msgs;
        if is_protected {
            kept_rev.push(effective.clone());
            remaining = remaining.saturating_sub(tokens);
            continue;
        }

        if tokens <= remaining {
            kept_rev.push(effective.clone());
            remaining = remaining.saturating_sub(tokens);
        } else if remaining > 0 {
            match &effective.content {
                // P0-B1: drop a Text boundary message **whole** rather than mid-truncate. A
                // truncated body's bytes depend on `remaining`, which varies per turn — that churns
                // turns[0] and invalidates the entire cached prefix. Compaction normally keeps
                // history under budget, so this overflow path is a rare safety net; keeping every
                // kept turn a complete message preserves prompt-cache reuse.
                Content::Text(_) => {}
                // A Parts message was already included whole (byte-stable) — unchanged.
                Content::Parts(_) => kept_rev.push(effective.clone()),
            }
            break;
        } else {
            break;
        }
    }

    kept_rev.reverse();
    let mut turns = kept_rev;
    normalize_turn_prefix(&mut turns);

    // The State turn (task_state + signals) is volatile — keep it OUT of the
    // cacheable history. Providers render it after the history (Anthropic) or
    // prepended (OpenAI). See RenderedContext docs.
    let state_turn = build_state_turn(partitions);

    // P1-E: locate the frozen-prefix boundary in rendered turns. `frozen_history_len` is the
    // history length as of the last compaction (0 before any) — messages beyond it are the hot
    // tail that grows each turn. We count the hot tail from the END, which is robust to the leading
    // anchor and to budget-dropping of OLD turns (the recent tail is never dropped). Emit `Some`
    // only for a distinct, non-empty frozen region; otherwise providers use the rolling-pair
    // fallback (deep == tail would waste a breakpoint).
    let hot = partitions
        .history
        .messages
        .len()
        .saturating_sub(frozen_history_len);
    let frozen_prefix_len = if frozen_history_len > 0 && hot > 0 && hot < turns.len() {
        Some(turns.len() - hot)
    } else {
        None
    };

    RenderedContext { system_text, system_stable, system_knowledge, turns, state_turn, frozen_prefix_len }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::context::config::ContextConfig;
    use crate::context::partitions::ContextPartitions;
    use crate::context::task_state::{PlanStep, TaskState};
    use crate::context::token_engine::ContextTokenEngine;
    use crate::types::message::{Message, Role};

    fn engine() -> ContextTokenEngine { ContextTokenEngine::char_approx() }
    fn ctx() -> ContextPartitions { ContextPartitions::new(&ContextConfig::default()) }

    #[test]
    fn system_stable_contains_system_partition() {
        let mut c = ctx();
        c.system.push(Message::system("You are helpful."), 10);
        let rc = render(&c, 10_000, &engine(), 4);
        assert!(rc.system_stable.contains("You are helpful."));
        assert!(rc.system_text.contains("You are helpful."));
    }

    #[test]
    fn system_knowledge_contains_knowledge_partition() {
        let mut c = ctx();
        c.knowledge.push(Message::system("skill: debug"), 10);
        let rc = render(&c, 10_000, &engine(), 4);
        assert!(rc.system_knowledge.contains("skill: debug"));
        assert!(rc.system_text.contains("skill: debug"));
    }

    #[test]
    fn task_state_appears_in_state_turn() {
        let mut c = ctx();
        c.task_state = TaskState { goal: "find the bug".to_string(), ..Default::default() };
        let rc = render(&c, 10_000, &engine(), 4);
        assert!(!rc.system_text.contains("[TASK STATE]"), "task_state must not be in system_text");
        let state = rc.state_turn.as_ref().expect("should have a state turn");
        assert_eq!(state.role, Role::User);
        assert!(state.content.as_text().unwrap().contains("[TASK STATE] goal: find the bug"));
        // State is NOT in the cacheable history turns.
        assert!(!rc.turns.iter().any(|m| m.content.as_text().map(|t| t.contains("[TASK STATE]")).unwrap_or(false)));
    }

    #[test]
    fn signals_appear_in_state_turn() {
        let mut c = ctx();
        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
        c.signals.push("[ROLLBACK] tool failed".to_string());
        let rc = render(&c, 10_000, &engine(), 4);
        let state = rc.state_turn.as_ref().unwrap();
        assert!(state.content.as_text().unwrap().contains("[ROLLBACK] tool failed"));
    }

    #[test]
    fn empty_task_state_no_state_turn() {
        let c = ctx();
        let rc = render(&c, 10_000, &engine(), 4);
        // No state turn when task_state is empty and no signals
        assert!(rc.state_turn.is_none());
        assert!(rc.turns.is_empty());
    }

    #[test]
    fn history_excludes_state_turn() {
        let mut c = ctx();
        c.task_state = TaskState { goal: "g".to_string(), ..Default::default() };
        c.history.push(Message::user("step 1"), 5);
        c.history.push(Message::assistant("done"), 5);
        let rc = render(&c, 10_000, &engine(), 4);
        // turns is history only; state lives in state_turn.
        assert!(rc.state_turn.as_ref().unwrap().content.as_text().unwrap().contains("[TASK STATE]"));
        assert_eq!(rc.turns[0].role, Role::User);
        assert_eq!(rc.turns[0].content.as_text(), Some("step 1"));
        assert_eq!(rc.turns[1].role, Role::Assistant);
    }

    #[test]
    fn all_assistant_tool_history_gets_anchor_user_turn() {
        let mut c = ctx();
        c.history.push(Message::assistant("reply"), 5);
        let rc = render(&c, 10_000, &engine(), 4);
        assert_eq!(rc.turns[0].role, Role::User);
    }

    #[test]
    fn zero_token_messages_skipped() {
        let mut c = ctx();
        c.history.push(Message::user("zero"), 0);
        c.history.push(Message::user("real"), 5);
        let rc = render(&c, 10_000, &engine(), 4);
        // Only "real" in history turns (state turn absent — no task_state)
        assert!(rc.turns.iter().any(|m| m.content.as_text() == Some("real")));
        assert!(!rc.turns.iter().any(|m| m.content.as_text() == Some("zero")));
    }

    #[test]
    fn collapsed_tool_result_renders_as_preview_without_mutating_history() {
        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};

        let mut c = ctx();
        let long = "DATA ".repeat(200); // 1000 bytes
        c.history.push(
            Message::tool(vec![ContentPart::ToolResult {
                call_id: "c1".into(),
                output: long.clone(),
                is_error: false,
            }]),
            250,
        );

        let mut handles = HandleTable::new();
        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
        h.residency = Residency::Collapsed;
        handles.insert(h);

        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0);
        let rendered: String = rc
            .turns
            .iter()
            .flat_map(|m| match &m.content {
                Content::Parts(parts) => parts.clone(),
                _ => Vec::new(),
            })
            .find_map(|p| match p {
                ContentPart::ToolResult { output, .. } => Some(output),
                _ => None,
            })
            .expect("tool result rendered");
        // Rendered copy is a preview; original full output is retained in history.
        assert!(rendered.contains("[collapsed:"));
        assert!(rendered.len() < long.len());
        let stored = match &c.history.messages[0].content {
            Content::Parts(parts) => match &parts[0] {
                ContentPart::ToolResult { output, .. } => output.clone(),
                _ => unreachable!(),
            },
            _ => unreachable!(),
        };
        assert_eq!(stored, long, "projection must not mutate stored history");
    }

    #[test]
    fn resident_tool_result_renders_in_full() {
        use crate::mm::handle::{Handle, HandleKind, HandleTable};

        let mut c = ctx();
        let body = "RESIDENT BODY ".repeat(20);
        c.history.push(
            Message::tool(vec![ContentPart::ToolResult {
                call_id: "c2".into(),
                output: body.clone(),
                is_error: false,
            }]),
            60,
        );
        let mut handles = HandleTable::new();
        handles.insert(Handle::resident_for(1, HandleKind::ToolResult, 60, "c2"));

        let rc = render_projected(&c, 10_000, &engine(), 4, &handles, 0);
        let rendered: String = rc
            .turns
            .iter()
            .flat_map(|m| match &m.content {
                Content::Parts(parts) => parts.clone(),
                _ => Vec::new(),
            })
            .find_map(|p| match p {
                ContentPart::ToolResult { output, .. } => Some(output),
                _ => None,
            })
            .expect("tool result rendered");
        assert_eq!(rendered, body);
        assert!(!rendered.contains("[collapsed:"));
    }

    // ── P1-F: state-turn recency footer ───────────────────────────────────

    #[test]
    fn state_turn_ends_with_salience_footer_before_proceed() {
        let mut c = ctx();
        c.task_state = TaskState {
            goal: "ship the cache work".to_string(),
            plan: vec![PlanStep { label: "do E".to_string(), done: false }],
            current_step: Some(0),
            ..Default::default()
        };
        c.task_state.record_directive("don't break ABI");
        let rc = render(&c, 100_000, &engine(), 4);
        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();

        // The full TASK STATE block still leads (primacy) ...
        assert!(text.starts_with("[TASK STATE] goal: ship the cache work"));
        // ... and the very last block before "Proceed." is the focus footer (recency).
        let before_proceed = text.rsplit_once("\n\nProceed.").expect("ends with Proceed").0;
        let last_block = before_proceed.rsplit("\n\n").next().unwrap();
        assert!(last_block.starts_with("→ focus: ship the cache work"), "got: {last_block}");
        assert!(last_block.contains("step 1: do E"));
        assert!(last_block.contains("must: don't break ABI"));
    }

    #[test]
    fn no_salience_footer_without_a_goal() {
        let mut c = ctx();
        c.signals.push("[ROLLBACK] tool failed".to_string());
        let rc = render(&c, 100_000, &engine(), 4);
        let text = rc.state_turn.unwrap().content.as_text().unwrap().to_string();
        assert!(!text.contains("→ focus:"), "no goal ⇒ no footer");
        // signals remain the last content before the anchor.
        assert!(text.contains("[ROLLBACK] tool failed"));
    }

    // ── P0-A: prefix fingerprint (cache-drift instrument) ──────────────────

    #[test]
    fn prefix_fingerprint_is_stable_when_appending_history() {
        let mut c = ctx();
        c.system.push(Message::system("rules"), 5);
        c.knowledge.push(Message::system("skill: debug"), 5);
        c.history.push(Message::user("turn A"), 5);
        c.history.push(Message::assistant("turn B"), 5);
        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();

        // Append a new turn — the existing prefix must stay byte-identical.
        c.history.push(Message::user("turn C"), 5);
        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();

        assert!(fp2.extends(&fp1), "appending must only grow the tail, never drift the prefix");
        assert_eq!(fp2.common_turn_prefix(&fp1), 2, "both prior turns stay cache-reusable");
        assert_eq!(fp2.turn_hashes.len(), 3);
    }

    #[test]
    fn prefix_fingerprint_ignores_state_turn() {
        // Same history, different task_state/signals → the cacheable prefix is
        // identical (state lives in the uncached tail, out of `turns`).
        let mut c = ctx();
        c.history.push(Message::user("turn A"), 5);
        c.task_state = TaskState { goal: "first goal".to_string(), ..Default::default() };
        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();

        c.task_state = TaskState { goal: "totally different goal".to_string(), ..Default::default() };
        c.signals.push("[ROLLBACK] whatever".to_string());
        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();

        assert_eq!(fp1, fp2, "volatile state must not perturb the cacheable prefix");
    }

    #[test]
    fn prefix_fingerprint_detects_system_drift() {
        let mut c = ctx();
        c.system.push(Message::system("rules v1"), 5);
        c.history.push(Message::user("turn A"), 5);
        let fp1 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();

        c.system.messages.clear();
        c.system.push(Message::system("rules v2"), 5);
        let fp2 = render(&c, 100_000, &engine(), 4).prefix_fingerprint();

        assert_ne!(fp1.system_stable_hash, fp2.system_stable_hash);
        assert!(!fp2.extends(&fp1), "a system-block edit invalidates the whole prefix");
    }

    #[test]
    fn prefix_fingerprint_detects_in_place_collapse_churn() {
        use crate::mm::handle::{Handle, HandleKind, HandleTable, Residency};

        let mut c = ctx();
        c.history.push(Message::user("start"), 5);
        let long = "DATA ".repeat(200);
        c.history.push(
            Message::tool(vec![ContentPart::ToolResult {
                call_id: "c1".into(),
                output: long,
                is_error: false,
            }]),
            250,
        );
        c.history.push(Message::user("recent"), 5);

        let resident = render(&c, 100_000, &engine(), 4).prefix_fingerprint();

        // Collapsing the old tool result rewrites that turn in place → the prefix
        // hash at that position changes (the cache-cost of folding, made visible).
        let mut handles = HandleTable::new();
        let mut h = Handle::resident_for(1, HandleKind::ToolResult, 250, "c1");
        h.residency = Residency::Collapsed;
        handles.insert(h);
        let collapsed = render_projected(&c, 100_000, &engine(), 4, &handles, 0).prefix_fingerprint();

        // turn 0 ("start") is byte-stable; the collapsed tool result at turn 1 drifts.
        assert_eq!(collapsed.common_turn_prefix(&resident), 1, "drift begins at the collapsed turn");
        assert!(!collapsed.extends(&resident));
    }

    #[test]
    fn protected_recent_messages_kept_whole_over_budget() {
        let mut c = ctx();
        c.history.push(Message::user("first message"), 5);
        c.history.push(Message::user("a".repeat(1000)), 250);
        // preserve_recent_msgs=4 protects both — kept whole regardless of the 10-token budget.
        let rc = render(&c, 10, &engine(), 4);
        assert!(rc.turns.iter().any(|m| {
            m.content.as_text().map(|t| t.contains("first message")).unwrap_or(false)
        }));
    }

    #[test]
    fn oversized_text_boundary_is_dropped_whole_not_truncated() {
        // P0-B1: an unprotected, over-budget Text boundary message is dropped whole — never
        // mid-truncated — so no budget-dependent fragment lands in the cached prefix.
        let mut c = ctx();
        c.history.push(Message::user("a".repeat(1000)), 250); // oldest, oversized
        c.history.push(Message::user("recent"), 2); // newest, fits
        let rc = render(&c, 5, &engine(), 0); // nothing protected
        assert_eq!(rc.turns.len(), 1, "only the fitting newest turn survives");
        assert_eq!(rc.turns[0].content.as_text(), Some("recent"));
        assert!(
            !rc.turns.iter().any(|m| m
                .content
                .as_text()
                .map(|t| t.starts_with("aaaa"))
                .unwrap_or(false)),
            "no truncated body in the prefix"
        );
    }
}