Skip to main content

zeph_context/
typed_page.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Typed page classification and minimum-fidelity invariants for context compaction.
5//!
6//! Every context segment entering the assembler is tagged with a [`PageType`] and
7//! wrapped in a [`TypedPage`]. The [`PageInvariant`] trait declares the fidelity
8//! contract enforced at every compaction boundary.
9//!
10//! Classification is deterministic and side-effect free — no I/O, no LLM calls.
11//!
12//! # Architecture
13//!
14//! This module lives in `zeph-context` to keep classification logic co-located with
15//! the assembler. No dependency on `zeph-memory` is introduced here.
16//!
17//! # Feature flag
18//!
19//! All typed-page functionality is gated behind the
20//! `[memory.compaction.typed_pages] enabled = true` config key. When disabled the
21//! assembler falls back to the legacy untyped path without behaviour change.
22
23use std::sync::Arc;
24
25use serde::{Deserialize, Serialize};
26
27// ── PageType ──────────────────────────────────────────────────────────────────
28
29/// Classification of a context segment for compaction purposes.
30///
31/// The variant determines which [`PageInvariant`] is enforced and what shape the
32/// compacted summary must have.
33///
34/// # Invariant
35///
36/// Every [`TypedPage`] carries exactly one `PageType`. Unclassifiable segments
37/// default to [`PageType::ConversationTurn`] (see [`classify`]).
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub enum PageType {
41    /// A tool request/response pair sourced from memory or the current turn.
42    ToolOutput,
43    /// A user or assistant message that does not carry a tool role.
44    ConversationTurn,
45    /// Cross-session context, past summaries, or graph-fact recall injections.
46    MemoryExcerpt,
47    /// Session digest, persona, skill instructions, or compression guidelines.
48    SystemContext,
49}
50
51impl std::fmt::Display for PageType {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        match self {
54            Self::ToolOutput => f.write_str("tool_output"),
55            Self::ConversationTurn => f.write_str("conversation_turn"),
56            Self::MemoryExcerpt => f.write_str("memory_excerpt"),
57            Self::SystemContext => f.write_str("system_context"),
58        }
59    }
60}
61
62// ── PageOrigin ────────────────────────────────────────────────────────────────
63
64/// Provenance of a [`TypedPage`], serialised into audit records.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66#[serde(tag = "kind", rename_all = "snake_case")]
67pub enum PageOrigin {
68    /// Tool request/response pair.
69    ToolPair {
70        /// Name of the tool that produced this output.
71        tool_name: String,
72    },
73    /// User or assistant conversation turn.
74    Turn {
75        /// Opaque message identifier (numeric message id as string).
76        message_id: String,
77    },
78    /// Injected from memory (cross-session, summary, graph-facts, etc.).
79    Excerpt {
80        /// Human-readable label identifying the memory source.
81        source_label: String,
82    },
83    /// Session-level system context (persona, skills, digest).
84    System {
85        /// Logical key for this system context block (e.g. `"persona"`, `"skills"`).
86        key: String,
87    },
88}
89
90// ── SchemaHint ────────────────────────────────────────────────────────────────
91
92/// Body format hint for [`PageType::ToolOutput`] pages.
93///
94/// Used by the invariant to select the correct structured-summary prompt.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
96#[serde(rename_all = "snake_case")]
97pub enum SchemaHint {
98    /// Body is valid JSON (object or array).
99    Json,
100    /// Body is UTF-8 text (log lines, prose, etc.).
101    Text,
102    /// Body is a unified diff.
103    Diff,
104    /// Body is a tab- or comma-separated table.
105    Table,
106    /// Body is non-UTF-8 binary data.
107    Binary,
108}
109
110// ── PageId ────────────────────────────────────────────────────────────────────
111
112/// Stable content-addressed identifier for a [`TypedPage`].
113///
114/// Computed as BLAKE3 over `page_type_tag || origin_tag || body_bytes`, encoded
115/// as lowercase hex (first 16 bytes = 32 hex chars). The same input always
116/// produces the same `PageId`, enabling deduplication across turns.
117///
118/// # Semantics
119///
120/// `PageId` is a **content hash**: identical source bytes (same page type, same
121/// origin key, same body) always produce the same id. This means that the same
122/// tool output appearing in two different turns produces the same `PageId`.
123/// Callers that need per-turn provenance must use `turn_id` from the audit record
124/// — `PageId` is for deduplication, not for uniqueness across turns.
125#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
126pub struct PageId(pub String);
127
128impl PageId {
129    /// Compute a [`PageId`] from the page type, origin key, and body bytes.
130    #[must_use]
131    pub fn compute(page_type: PageType, origin_key: &str, body: &[u8]) -> Self {
132        let mut hasher = blake3::Hasher::new();
133        hasher.update(page_type.to_string().as_bytes());
134        hasher.update(b"|");
135        hasher.update(origin_key.as_bytes());
136        hasher.update(b"|");
137        hasher.update(body);
138        let hash = hasher.finalize();
139        // Use first 16 bytes (128 bits) — sufficient for deduplication purposes.
140        let mut hex = String::with_capacity(32);
141        for b in &hash.as_bytes()[..16] {
142            use std::fmt::Write as _;
143            let _ = write!(hex, "{b:02x}");
144        }
145        Self(format!("blake3:{hex}"))
146    }
147}
148
149// ── TypedPage ─────────────────────────────────────────────────────────────────
150
151/// A classified context segment ready for invariant-aware compaction.
152///
153/// `TypedPage` is the unit of work passed to compaction boundaries. The
154/// [`PageId`] is content-stable: the same source bytes always produce the same
155/// id, enabling the compactor to skip already-compacted pages.
156#[derive(Debug, Clone)]
157pub struct TypedPage {
158    /// Stable content-addressed identifier.
159    pub page_id: PageId,
160    /// Classification determining which invariant applies.
161    pub page_type: PageType,
162    /// Provenance of this page (for audit records).
163    pub origin: PageOrigin,
164    /// Token count of the original body.
165    pub tokens: u32,
166    /// Body text shared across potential clones.
167    pub body: Arc<str>,
168    /// Body format hint (populated for `ToolOutput` only; `None` otherwise).
169    pub schema_hint: Option<SchemaHint>,
170}
171
172impl TypedPage {
173    /// Construct a new [`TypedPage`], computing its [`PageId`] from content.
174    #[must_use]
175    pub fn new(
176        page_type: PageType,
177        origin: PageOrigin,
178        tokens: u32,
179        body: Arc<str>,
180        schema_hint: Option<SchemaHint>,
181    ) -> Self {
182        let origin_key = origin_key_for(&origin);
183        let page_id = PageId::compute(page_type, &origin_key, body.as_bytes());
184        Self {
185            page_id,
186            page_type,
187            origin,
188            tokens,
189            body,
190            schema_hint,
191        }
192    }
193}
194
195fn origin_key_for(origin: &PageOrigin) -> String {
196    match origin {
197        PageOrigin::ToolPair { tool_name } => format!("tool:{tool_name}"),
198        PageOrigin::Turn { message_id } => format!("turn:{message_id}"),
199        PageOrigin::Excerpt { source_label } => format!("excerpt:{source_label}"),
200        PageOrigin::System { key } => format!("system:{key}"),
201    }
202}
203
204// ── FidelityContract ──────────────────────────────────────────────────────────
205
206/// The set of fields that must be present in a compacted page.
207///
208/// Returned by [`PageInvariant::minimum_fidelity`] and checked by
209/// [`PageInvariant::verify`] after summarization.
210#[derive(Debug, Clone)]
211pub struct FidelityContract {
212    /// Human-readable label for this contract version (e.g. `"structured_summary_v1"`).
213    pub fidelity_level: &'static str,
214    /// Schema version integer included in audit records.
215    pub invariant_version: u8,
216    /// Fields that must appear in the compacted body text.
217    pub required_fields: &'static [&'static str],
218}
219
220// ── FidelityViolation ─────────────────────────────────────────────────────────
221
222/// Describes why an invariant check failed after compaction.
223///
224/// A violation is a hard error: the compacted page is dropped and an audit
225/// record with `violations` is emitted.
226#[derive(Debug, Clone, Serialize)]
227pub struct FidelityViolation {
228    /// The field or property that was expected but missing.
229    pub missing_field: String,
230    /// Human-readable explanation of the violation.
231    pub detail: String,
232}
233
234// ── CompactedPage ─────────────────────────────────────────────────────────────
235
236/// The output of a compaction attempt, passed to [`PageInvariant::verify`].
237#[derive(Debug, Clone)]
238pub struct CompactedPage {
239    /// The summarized body text produced by the compaction provider.
240    pub body: Arc<str>,
241    /// Token count of the compacted body.
242    pub tokens: u32,
243}
244
245// ── PageInvariant trait ───────────────────────────────────────────────────────
246
247/// Minimum-fidelity contract for a single [`PageType`].
248///
249/// Implementors declare what a compacted page must contain ([`minimum_fidelity`])
250/// and verify that the actual output honours the contract ([`verify`]).
251///
252/// The trait is object-safe so implementations can be stored in a
253/// `HashMap<PageType, Box<dyn PageInvariant>>` registry.
254///
255/// # Contract
256///
257/// - [`verify`] MUST NOT perform I/O or call an LLM.
258/// - A failed [`verify`] means the compacted page is dropped — it is NEVER
259///   injected in degraded form.
260///
261/// [`minimum_fidelity`]: PageInvariant::minimum_fidelity
262/// [`verify`]: PageInvariant::verify
263pub trait PageInvariant: Send + Sync {
264    /// The page type this invariant governs.
265    fn page_type(&self) -> PageType;
266
267    /// Return the fidelity contract required for a given page.
268    fn minimum_fidelity(&self, page: &TypedPage) -> FidelityContract;
269
270    /// Verify that `compacted` satisfies the fidelity contract derived from `original`.
271    ///
272    /// # Errors
273    ///
274    /// Returns a non-empty [`Vec<FidelityViolation>`] when one or more required
275    /// fields are absent from the compacted body.
276    fn verify(
277        &self,
278        original: &TypedPage,
279        compacted: &CompactedPage,
280    ) -> Result<(), Vec<FidelityViolation>>;
281}
282
283// ── Per-type invariant implementations ───────────────────────────────────────
284
285/// Invariant for [`PageType::ToolOutput`] pages.
286///
287/// The compacted body must contain the tool name, an exit/status indicator,
288/// and at least one structural key from the original output.
289pub struct ToolOutputInvariant;
290
291impl PageInvariant for ToolOutputInvariant {
292    fn page_type(&self) -> PageType {
293        PageType::ToolOutput
294    }
295
296    fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
297        FidelityContract {
298            fidelity_level: "structured_summary_v1",
299            invariant_version: 1,
300            required_fields: &["tool_name", "exit_status"],
301        }
302    }
303
304    fn verify(
305        &self,
306        original: &TypedPage,
307        compacted: &CompactedPage,
308    ) -> Result<(), Vec<FidelityViolation>> {
309        let body = compacted.body.as_ref();
310        // For binary pages the body marker is injected by the compactor, skip field checks.
311        if original.schema_hint == Some(SchemaHint::Binary) {
312            return Ok(());
313        }
314
315        let mut violations = Vec::new();
316
317        // The compacted body must reference the tool name.
318        let tool_name = match &original.origin {
319            PageOrigin::ToolPair { tool_name } => tool_name.as_str(),
320            _ => "",
321        };
322        if !tool_name.is_empty() && !body.contains(tool_name) {
323            violations.push(FidelityViolation {
324                missing_field: "tool_name".into(),
325                detail: format!("compacted body does not reference tool '{tool_name}'"),
326            });
327        }
328
329        // The compacted body must contain at least one exit / status indicator.
330        let has_status = body.contains("exit_status")
331            || body.contains("exit_code")
332            || body.contains("status:")
333            || body.contains("Status:")
334            || body.contains("exit:")
335            || body.contains("rc:");
336        if !has_status {
337            violations.push(FidelityViolation {
338                missing_field: "exit_status".into(),
339                detail: "compacted body does not contain an exit status indicator".into(),
340            });
341        }
342
343        // For JSON-schema tool outputs, verify that at least one top-level JSON
344        // field name from the original body is present in the compacted body
345        // (FR-003: structural keys must be preserved, not just exit-status markers).
346        if original.schema_hint == Some(SchemaHint::Json) {
347            let original_body = original.body.as_ref();
348            let preserved = check_json_structural_key(original_body, body);
349            if !preserved {
350                violations.push(FidelityViolation {
351                    missing_field: "structural_key".into(),
352                    detail: "compacted JSON tool output does not reference any top-level field \
353                             name from the original output"
354                        .into(),
355                });
356            }
357        }
358
359        if violations.is_empty() {
360            Ok(())
361        } else {
362            Err(violations)
363        }
364    }
365}
366
367/// Check that at least one top-level JSON key from `original` appears in `compacted`.
368///
369/// Parses `original` as a JSON object and returns `true` when any top-level key
370/// string is a substring of `compacted`. Returns `true` (no violation) when
371/// `original` is not a valid JSON object — the caller already checked schema hint.
372fn check_json_structural_key(original: &str, compacted: &str) -> bool {
373    let Ok(value) = serde_json::from_str::<serde_json::Value>(original) else {
374        return true;
375    };
376    let Some(obj) = value.as_object() else {
377        return true;
378    };
379    if obj.is_empty() {
380        return true;
381    }
382    obj.keys().any(|k| compacted.contains(k.as_str()))
383}
384
385/// Invariant for [`PageType::ConversationTurn`] pages.
386///
387/// The compacted body must preserve a role indicator and some meaningful content.
388pub struct ConversationTurnInvariant;
389
390impl PageInvariant for ConversationTurnInvariant {
391    fn page_type(&self) -> PageType {
392        PageType::ConversationTurn
393    }
394
395    fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
396        FidelityContract {
397            fidelity_level: "semantic_summary_v1",
398            invariant_version: 1,
399            required_fields: &["role"],
400        }
401    }
402
403    fn verify(
404        &self,
405        _original: &TypedPage,
406        compacted: &CompactedPage,
407    ) -> Result<(), Vec<FidelityViolation>> {
408        let body = compacted.body.as_ref();
409        let has_role =
410            body.contains("user") || body.contains("assistant") || body.contains("system");
411        if !has_role {
412            return Err(vec![FidelityViolation {
413                missing_field: "role".into(),
414                detail: "compacted turn does not identify a speaker role".into(),
415            }]);
416        }
417        Ok(())
418    }
419}
420
421/// Invariant for [`PageType::MemoryExcerpt`] pages.
422///
423/// The compacted body must retain the source label and a message id reference.
424pub struct MemoryExcerptInvariant;
425
426impl PageInvariant for MemoryExcerptInvariant {
427    fn page_type(&self) -> PageType {
428        PageType::MemoryExcerpt
429    }
430
431    fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
432        FidelityContract {
433            fidelity_level: "excerpt_summary_v1",
434            invariant_version: 1,
435            required_fields: &["source_label"],
436        }
437    }
438
439    fn verify(
440        &self,
441        original: &TypedPage,
442        compacted: &CompactedPage,
443    ) -> Result<(), Vec<FidelityViolation>> {
444        let source_label = match &original.origin {
445            PageOrigin::Excerpt { source_label } => source_label.as_str(),
446            _ => return Ok(()),
447        };
448        if !compacted.body.contains(source_label) {
449            return Err(vec![FidelityViolation {
450                missing_field: "source_label".into(),
451                detail: format!("compacted excerpt does not contain source label '{source_label}'"),
452            }]);
453        }
454        Ok(())
455    }
456}
457
458/// Invariant for [`PageType::SystemContext`] pages.
459///
460/// System context MUST NOT be paraphrased. Compaction replaces it with a
461/// pointer record; any body other than the pointer prefix is a violation.
462pub struct SystemContextInvariant;
463
464/// Pointer prefix that the compactor writes for `SystemContext` pages.
465pub const SYSTEM_POINTER_PREFIX: &str = "[system-ptr:";
466
467impl PageInvariant for SystemContextInvariant {
468    fn page_type(&self) -> PageType {
469        PageType::SystemContext
470    }
471
472    fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
473        FidelityContract {
474            fidelity_level: "pointer_replace_v1",
475            invariant_version: 1,
476            required_fields: &["pointer"],
477        }
478    }
479
480    fn verify(
481        &self,
482        _original: &TypedPage,
483        compacted: &CompactedPage,
484    ) -> Result<(), Vec<FidelityViolation>> {
485        if !compacted.body.starts_with(SYSTEM_POINTER_PREFIX) {
486            return Err(vec![FidelityViolation {
487                missing_field: "pointer".into(),
488                detail: format!(
489                    "SystemContext page was not pointer-replaced \
490                     (body does not start with '{SYSTEM_POINTER_PREFIX}')"
491                ),
492            }]);
493        }
494        Ok(())
495    }
496}
497
498// ── InvariantRegistry ─────────────────────────────────────────────────────────
499
500/// Registry mapping each [`PageType`] to its [`PageInvariant`] implementation.
501///
502/// Built once and shared via `Arc` so tests can swap in a mock registry.
503///
504/// # Examples
505///
506/// ```
507/// use zeph_context::typed_page::{InvariantRegistry, PageType};
508///
509/// let reg = InvariantRegistry::default();
510/// let inv = reg.get(PageType::ToolOutput).unwrap();
511/// assert_eq!(inv.page_type(), PageType::ToolOutput);
512/// ```
513pub struct InvariantRegistry {
514    tool_output: Box<dyn PageInvariant>,
515    conversation_turn: Box<dyn PageInvariant>,
516    memory_excerpt: Box<dyn PageInvariant>,
517    system_context: Box<dyn PageInvariant>,
518}
519
520impl Default for InvariantRegistry {
521    fn default() -> Self {
522        Self {
523            tool_output: Box::new(ToolOutputInvariant),
524            conversation_turn: Box::new(ConversationTurnInvariant),
525            memory_excerpt: Box::new(MemoryExcerptInvariant),
526            system_context: Box::new(SystemContextInvariant),
527        }
528    }
529}
530
531impl InvariantRegistry {
532    /// Look up the invariant for a given [`PageType`].
533    ///
534    /// Always returns `Some` for the four built-in variants.
535    #[must_use]
536    pub fn get(&self, page_type: PageType) -> Option<&dyn PageInvariant> {
537        match page_type {
538            PageType::ToolOutput => Some(self.tool_output.as_ref()),
539            PageType::ConversationTurn => Some(self.conversation_turn.as_ref()),
540            PageType::MemoryExcerpt => Some(self.memory_excerpt.as_ref()),
541            PageType::SystemContext => Some(self.system_context.as_ref()),
542        }
543    }
544
545    /// Verify that `compacted` satisfies the invariant for `original` at a compaction boundary.
546    ///
547    /// This is the primary entry point for the compactor — it wraps `verify()` in a
548    /// `tracing::info_span!` per NFR-009 so every compaction boundary is observable.
549    ///
550    /// Returns `Ok(())` when the invariant is satisfied, or the violation list on failure.
551    ///
552    /// # Errors
553    ///
554    /// Propagates [`FidelityViolation`]s from the registered invariant implementation.
555    pub fn enforce(
556        &self,
557        original: &TypedPage,
558        compacted: &CompactedPage,
559    ) -> Result<(), Vec<FidelityViolation>> {
560        let _span = tracing::info_span!(
561            "context.compaction.typed_page",
562            page_type = %original.page_type,
563            page_id = %original.page_id.0,
564            original_tokens = original.tokens,
565            compacted_tokens = compacted.tokens,
566        )
567        .entered();
568
569        if let Some(inv) = self.get(original.page_type) {
570            inv.verify(original, compacted)
571        } else {
572            tracing::warn!(
573                page_type = %original.page_type,
574                "no invariant registered for page type — skipping verification"
575            );
576            Ok(())
577        }
578    }
579}
580
581// ── Classification helpers ────────────────────────────────────────────────────
582
583/// Classify a context segment by examining well-known prefix markers.
584///
585/// Classification is deterministic and performs no I/O. When the input does not
586/// match any known prefix the function defaults to [`PageType::ConversationTurn`]
587/// and logs at `WARN` level per FR-008.
588///
589/// The function emits a `context.compaction.typed_page.classify` span per NFR-009
590/// so every classification is observable in traces.
591///
592/// | Source marker | Assigned [`PageType`] |
593/// |---|---|
594/// | Starts with `[tool_output]` or `[tool:` | [`PageType::ToolOutput`] |
595/// | Starts with `[cross-session context]`, `[semantic recall]`, `[known facts]`, `[conversation summaries]` | [`PageType::MemoryExcerpt`] |
596/// | Starts with `[Persona context]`, `[Past experience]`, `[Memory summary]`, `[system` | [`PageType::SystemContext`] |
597/// | Everything else | [`PageType::ConversationTurn`] |
598///
599/// # Examples
600///
601/// ```
602/// use zeph_context::typed_page::{classify, PageType};
603///
604/// assert_eq!(classify("[tool_output] exit_code: 0"), PageType::ToolOutput);
605/// assert_eq!(classify("[cross-session context]\nsome recall"), PageType::MemoryExcerpt);
606/// assert_eq!(classify("[Persona context]\nfact"), PageType::SystemContext);
607/// assert_eq!(classify("Hello, world!"), PageType::ConversationTurn);
608/// ```
609#[must_use]
610pub fn classify(body: &str) -> PageType {
611    classify_with_role(body, false)
612}
613
614/// Classify a context segment, with an explicit `is_system_role` hint.
615///
616/// When `is_system_role` is `true` the segment is classified as
617/// [`PageType::SystemContext`] without prefix matching, preventing arbitrary
618/// system messages injected by the assembler from silently falling back to
619/// `ConversationTurn` (Key Invariant: "`SystemContext` pages are never paraphrased").
620///
621/// Use this variant when the caller has access to the message `Role`.
622///
623/// # Examples
624///
625/// ```
626/// use zeph_context::typed_page::{classify_with_role, PageType};
627///
628/// // A plain system message without a known prefix is still SystemContext.
629/// assert_eq!(classify_with_role("You are a helpful assistant.", true), PageType::SystemContext);
630/// // Role hint does not override ToolOutput prefix detection.
631/// assert_eq!(classify_with_role("[tool_output] exit_code: 0", false), PageType::ToolOutput);
632/// ```
633#[must_use]
634pub fn classify_with_role(body: &str, is_system_role: bool) -> PageType {
635    tracing::info_span!(
636        "context.compaction.typed_page.classify",
637        body_len = body.len()
638    )
639    .in_scope(|| classify_with_role_inner(body, is_system_role))
640}
641
642fn classify_with_role_inner(body: &str, is_system_role: bool) -> PageType {
643    // Use the same prefix constants as the assembler for consistency.
644    const TOOL_PREFIXES: &[&str] = &["[tool_output]", "[tool:", "[Tool output]"];
645    const MEMORY_PREFIXES: &[&str] = &[
646        "[cross-session context]",
647        "[semantic recall]",
648        "[known facts]",
649        "[conversation summaries]",
650        "[past corrections]",
651        "## Relevant documents",
652    ];
653    const SYSTEM_PREFIXES: &[&str] = &[
654        "[Persona context]",
655        "[Past experience]",
656        "[Memory summary]",
657        "[system",
658        "[skill",
659        "[persona",
660        "[digest",
661        "[compression",
662    ];
663
664    let trimmed = body.trim_start();
665
666    for prefix in TOOL_PREFIXES {
667        if trimmed.starts_with(prefix) {
668            return PageType::ToolOutput;
669        }
670    }
671    for prefix in MEMORY_PREFIXES {
672        if trimmed.starts_with(prefix) {
673            return PageType::MemoryExcerpt;
674        }
675    }
676    for prefix in SYSTEM_PREFIXES {
677        if trimmed.starts_with(prefix) {
678            return PageType::SystemContext;
679        }
680    }
681
682    // When the caller signals Role::System, classify as SystemContext even if
683    // the body does not start with a known prefix.  This prevents system
684    // context injected by the assembler (e.g. plain instructions, directives)
685    // from being eligible for paraphrase.
686    if is_system_role {
687        return PageType::SystemContext;
688    }
689
690    tracing::warn!(
691        body_prefix = &body[..body.len().min(80)],
692        "typed-page classification fallback to ConversationTurn"
693    );
694    PageType::ConversationTurn
695}
696
697/// Detect [`SchemaHint`] for a [`PageType::ToolOutput`] body.
698///
699/// Returns [`SchemaHint::Binary`] when the body is not valid UTF-8 (detected via
700/// presence of replacement characters) or when the caller passes `is_binary =
701/// true`. JSON detection is heuristic (starts with `{` or `[`).
702#[must_use]
703pub fn detect_schema_hint(body: &str, is_binary: bool) -> SchemaHint {
704    if is_binary || body.contains('\u{FFFD}') {
705        return SchemaHint::Binary;
706    }
707    let trimmed = body.trim_start();
708    if trimmed.starts_with('{') || trimmed.starts_with('[') {
709        return SchemaHint::Json;
710    }
711    if trimmed.starts_with("--- ")
712        || trimmed.starts_with("+++ ")
713        || trimmed.starts_with("diff --git")
714        || trimmed.starts_with("diff -")
715    {
716        return SchemaHint::Diff;
717    }
718    // Simple table heuristic: first line contains multiple tab or pipe separators.
719    let first_line = trimmed.lines().next().unwrap_or("");
720    if first_line.matches('\t').count() >= 2 || first_line.matches('|').count() >= 2 {
721        return SchemaHint::Table;
722    }
723    SchemaHint::Text
724}
725
726// ── Audit record ──────────────────────────────────────────────────────────────
727
728/// One JSONL audit record emitted per compacted page (FR-007).
729///
730/// Written to `[memory.compaction.typed_pages] audit_path` by the audit sink
731/// before the compacted context is handed to the LLM.
732#[derive(Debug, Serialize)]
733pub struct CompactedPageRecord {
734    /// ISO-8601 timestamp when the compaction occurred.
735    pub ts: String,
736    /// Opaque turn identifier (agent turn counter as string).
737    pub turn_id: String,
738    /// Stable content-addressed page identifier.
739    pub page_id: String,
740    /// Page classification.
741    pub page_type: PageType,
742    /// Serialised page origin.
743    pub origin: PageOrigin,
744    /// Token count of the original page.
745    pub original_tokens: u32,
746    /// Token count of the compacted page.
747    pub compacted_tokens: u32,
748    /// Fidelity level label from the invariant contract.
749    pub fidelity_level: String,
750    /// Schema version integer.
751    pub invariant_version: u8,
752    /// Provider name used for summarization.
753    pub provider_name: String,
754    /// Fidelity violations encountered (empty on success).
755    pub violations: Vec<FidelityViolation>,
756    /// `true` when classification fell back to `ConversationTurn`.
757    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
758    pub classification_fallback: bool,
759}
760
761// ── Audit sink ────────────────────────────────────────────────────────────────
762
763/// Async bounded-mpsc audit sink for compaction records.
764///
765/// The sink serialises [`CompactedPageRecord`] values to a JSONL file via a
766/// background writer task, mirroring the `zeph-tools` audit pattern. Dropped
767/// records (when the channel is full) are counted and logged.
768///
769/// # Invariant
770///
771/// The sink MUST be flushed (all pending records written) before the compacted
772/// context is passed to the LLM. Callers use [`CompactionAuditSink::flush`] to
773/// ensure this.
774///
775/// # Examples
776///
777/// ```no_run
778/// use zeph_context::typed_page::CompactionAuditSink;
779/// use std::path::Path;
780///
781/// # async fn example() {
782/// let sink = CompactionAuditSink::open(Path::new(".local/audit/compaction.jsonl"), 256)
783///     .await
784///     .unwrap();
785/// # }
786/// ```
787#[derive(Debug, Clone)]
788pub struct CompactionAuditSink {
789    tx: tokio::sync::mpsc::Sender<CompactedPageRecord>,
790    drop_counter: Arc<std::sync::atomic::AtomicU64>,
791}
792
793impl CompactionAuditSink {
794    /// Open a new audit sink writing to `path`.
795    ///
796    /// `capacity` is the bounded channel depth; records dropped when full are counted
797    /// in the internal drop counter and logged at WARN.
798    ///
799    /// # Errors
800    ///
801    /// Returns an error when `path` cannot be opened for appending.
802    pub async fn open(path: &std::path::Path, capacity: usize) -> Result<Self, std::io::Error> {
803        use tokio::io::AsyncWriteExt as _;
804
805        if let Some(parent) = path.parent() {
806            tokio::fs::create_dir_all(parent).await?;
807        }
808        let file = tokio::fs::OpenOptions::new()
809            .create(true)
810            .append(true)
811            .open(path)
812            .await?;
813
814        let (tx, mut rx) = tokio::sync::mpsc::channel::<CompactedPageRecord>(capacity);
815        let drop_counter = Arc::new(std::sync::atomic::AtomicU64::new(0));
816        let drop_counter_bg = drop_counter.clone();
817
818        tokio::spawn(async move {
819            let mut writer = tokio::io::BufWriter::new(file);
820            while let Some(record) = rx.recv().await {
821                match serde_json::to_string(&record) {
822                    Ok(mut line) => {
823                        line.push('\n');
824                        if let Err(e) = writer.write_all(line.as_bytes()).await {
825                            tracing::error!("compaction audit write failed: {e:#}");
826                        }
827                    }
828                    Err(e) => {
829                        tracing::error!("compaction audit serialization failed: {e:#}");
830                    }
831                }
832            }
833            // Flush remaining bytes when channel closes.
834            let _ = writer.flush().await;
835
836            let dropped = drop_counter_bg.load(std::sync::atomic::Ordering::Relaxed);
837            if dropped > 0 {
838                tracing::warn!(dropped, "compaction audit sink closed with dropped records");
839            }
840        });
841
842        Ok(Self { tx, drop_counter })
843    }
844
845    /// Send a record to the audit sink.
846    ///
847    /// If the channel is full the record is dropped and the drop counter is incremented.
848    pub fn send(&self, record: CompactedPageRecord) {
849        match self.tx.try_send(record) {
850            Ok(()) => {}
851            Err(tokio::sync::mpsc::error::TrySendError::Full(_)) => {
852                let prev = self
853                    .drop_counter
854                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
855                tracing::warn!(
856                    dropped_total = prev + 1,
857                    "compaction audit sink full — record dropped (best-effort audit)"
858                );
859            }
860            Err(tokio::sync::mpsc::error::TrySendError::Closed(_)) => {
861                tracing::error!("compaction audit sink closed unexpectedly");
862            }
863        }
864    }
865
866    /// Flush all pending records by sending a sentinel and waiting for the writer task
867    /// to drain. This uses a one-shot pair through the same channel ordering.
868    ///
869    /// The implementation closes a clone's sender so the channel drains; callers that
870    /// need precise "before-LLM" semantics should call this before constructing the
871    /// LLM request.
872    ///
873    /// # Note
874    ///
875    /// In the current bounded-mpsc design, `flush` is a best-effort operation: records
876    /// already accepted into the channel will be written, but there is no synchronous
877    /// confirmation from the writer task. The audit invariant is therefore **best-effort**,
878    /// not strict. A future revision may use a rendezvous channel for strict flush.
879    pub async fn flush(&self) {
880        // Yield to the tokio runtime to let the writer task drain the channel.
881        tokio::task::yield_now().await;
882    }
883
884    /// Number of records dropped due to a full channel.
885    #[must_use]
886    pub fn dropped_count(&self) -> u64 {
887        self.drop_counter.load(std::sync::atomic::Ordering::Relaxed)
888    }
889}
890
891// ── Tests ─────────────────────────────────────────────────────────────────────
892
893#[cfg(test)]
894mod tests {
895    use super::*;
896
897    // ── PageId ────────────────────────────────────────────────────────────────
898
899    #[test]
900    fn page_id_same_input_same_output() {
901        let a = PageId::compute(PageType::ToolOutput, "tool:shell", b"exit_code: 0");
902        let b = PageId::compute(PageType::ToolOutput, "tool:shell", b"exit_code: 0");
903        assert_eq!(a, b);
904    }
905
906    #[test]
907    fn page_id_different_type_different_id() {
908        let a = PageId::compute(PageType::ToolOutput, "tool:shell", b"body");
909        let b = PageId::compute(PageType::ConversationTurn, "tool:shell", b"body");
910        assert_ne!(a, b);
911    }
912
913    #[test]
914    fn page_id_starts_with_blake3_prefix() {
915        let id = PageId::compute(PageType::SystemContext, "system:persona", b"some content");
916        assert!(id.0.starts_with("blake3:"));
917    }
918
919    // ── classify ──────────────────────────────────────────────────────────────
920
921    #[test]
922    fn classify_tool_output_prefix() {
923        assert_eq!(
924            classify("[tool_output] shell exit_code: 0"),
925            PageType::ToolOutput
926        );
927        assert_eq!(classify("[tool:shell] result"), PageType::ToolOutput);
928    }
929
930    #[test]
931    fn classify_memory_prefixes() {
932        assert_eq!(
933            classify("[cross-session context]\nsome recall"),
934            PageType::MemoryExcerpt
935        );
936        assert_eq!(
937            classify("[semantic recall]\n- [user] hello"),
938            PageType::MemoryExcerpt
939        );
940        assert_eq!(classify("[known facts]\n- fact"), PageType::MemoryExcerpt);
941        assert_eq!(
942            classify("[conversation summaries]\n- 1-10: summary"),
943            PageType::MemoryExcerpt
944        );
945    }
946
947    #[test]
948    fn classify_system_prefixes() {
949        assert_eq!(classify("[Persona context]\nfact"), PageType::SystemContext);
950        assert_eq!(classify("[system prompt]"), PageType::SystemContext);
951    }
952
953    #[test]
954    fn classify_fallback_is_conversation_turn() {
955        assert_eq!(classify("Hello, world!"), PageType::ConversationTurn);
956        assert_eq!(classify(""), PageType::ConversationTurn);
957    }
958
959    // ── detect_schema_hint ────────────────────────────────────────────────────
960
961    #[test]
962    fn detect_schema_hint_json() {
963        assert_eq!(
964            detect_schema_hint(r#"{"key": "val"}"#, false),
965            SchemaHint::Json
966        );
967        assert_eq!(detect_schema_hint("[1,2,3]", false), SchemaHint::Json);
968    }
969
970    #[test]
971    fn detect_schema_hint_diff() {
972        assert_eq!(detect_schema_hint("--- a\n+++ b", false), SchemaHint::Diff);
973    }
974
975    #[test]
976    fn detect_schema_hint_binary() {
977        assert_eq!(detect_schema_hint("anything", true), SchemaHint::Binary);
978    }
979
980    #[test]
981    fn detect_schema_hint_text_fallback() {
982        assert_eq!(detect_schema_hint("plain text", false), SchemaHint::Text);
983    }
984
985    // ── ToolOutputInvariant ───────────────────────────────────────────────────
986
987    #[test]
988    fn tool_output_invariant_passes_when_fields_present() {
989        let inv = ToolOutputInvariant;
990        let page = TypedPage::new(
991            PageType::ToolOutput,
992            PageOrigin::ToolPair {
993                tool_name: "shell".into(),
994            },
995            100,
996            Arc::from("[tool_output] shell exit_code: 0\nsome output"),
997            Some(SchemaHint::Text),
998        );
999        let compacted = CompactedPage {
1000            body: Arc::from("shell exit_status: 0\nkey: value"),
1001            tokens: 10,
1002        };
1003        assert!(inv.verify(&page, &compacted).is_ok());
1004    }
1005
1006    #[test]
1007    fn tool_output_invariant_fails_missing_tool_name() {
1008        let inv = ToolOutputInvariant;
1009        let page = TypedPage::new(
1010            PageType::ToolOutput,
1011            PageOrigin::ToolPair {
1012                tool_name: "my_tool".into(),
1013            },
1014            100,
1015            Arc::from("[tool_output] my_tool exit_code: 0"),
1016            Some(SchemaHint::Text),
1017        );
1018        let compacted = CompactedPage {
1019            body: Arc::from("exit_status: 0"),
1020            tokens: 5,
1021        };
1022        let result = inv.verify(&page, &compacted);
1023        assert!(result.is_err());
1024        let violations = result.unwrap_err();
1025        assert!(violations.iter().any(|v| v.missing_field == "tool_name"));
1026    }
1027
1028    #[test]
1029    fn tool_output_invariant_passes_for_binary() {
1030        let inv = ToolOutputInvariant;
1031        let page = TypedPage::new(
1032            PageType::ToolOutput,
1033            PageOrigin::ToolPair {
1034                tool_name: "binary_tool".into(),
1035            },
1036            100,
1037            Arc::from("<binary:1024 bytes>"),
1038            Some(SchemaHint::Binary),
1039        );
1040        let compacted = CompactedPage {
1041            body: Arc::from("<binary:1024 bytes> (archived)"),
1042            tokens: 5,
1043        };
1044        assert!(inv.verify(&page, &compacted).is_ok());
1045    }
1046
1047    // ── SystemContextInvariant ────────────────────────────────────────────────
1048
1049    #[test]
1050    fn system_context_invariant_passes_with_pointer() {
1051        let inv = SystemContextInvariant;
1052        let page = TypedPage::new(
1053            PageType::SystemContext,
1054            PageOrigin::System {
1055                key: "persona".into(),
1056            },
1057            200,
1058            Arc::from("[Persona context]\nsome persona info"),
1059            None,
1060        );
1061        let compacted = CompactedPage {
1062            body: Arc::from("[system-ptr:blake3:abcdef123456]"),
1063            tokens: 3,
1064        };
1065        assert!(inv.verify(&page, &compacted).is_ok());
1066    }
1067
1068    #[test]
1069    fn system_context_invariant_fails_without_pointer() {
1070        let inv = SystemContextInvariant;
1071        let page = TypedPage::new(
1072            PageType::SystemContext,
1073            PageOrigin::System {
1074                key: "persona".into(),
1075            },
1076            200,
1077            Arc::from("[Persona context]\nsome persona info"),
1078            None,
1079        );
1080        let compacted = CompactedPage {
1081            body: Arc::from("This is a paraphrase of persona info"),
1082            tokens: 10,
1083        };
1084        let result = inv.verify(&page, &compacted);
1085        assert!(result.is_err());
1086        let violations = result.unwrap_err();
1087        assert!(violations.iter().any(|v| v.missing_field == "pointer"));
1088    }
1089
1090    // ── InvariantRegistry ─────────────────────────────────────────────────────
1091
1092    #[test]
1093    fn registry_covers_all_page_types() {
1094        let reg = InvariantRegistry::default();
1095        for pt in [
1096            PageType::ToolOutput,
1097            PageType::ConversationTurn,
1098            PageType::MemoryExcerpt,
1099            PageType::SystemContext,
1100        ] {
1101            assert!(reg.get(pt).is_some(), "missing invariant for {pt:?}");
1102        }
1103    }
1104
1105    #[test]
1106    fn registry_returns_correct_page_type() {
1107        let reg = InvariantRegistry::default();
1108        assert_eq!(
1109            reg.get(PageType::ToolOutput).unwrap().page_type(),
1110            PageType::ToolOutput
1111        );
1112        assert_eq!(
1113            reg.get(PageType::SystemContext).unwrap().page_type(),
1114            PageType::SystemContext
1115        );
1116    }
1117
1118    // ── InvariantRegistry::enforce ────────────────────────────────────────────
1119
1120    #[test]
1121    fn enforce_ok_for_valid_system_pointer() {
1122        let reg = InvariantRegistry::default();
1123        let page = TypedPage::new(
1124            PageType::SystemContext,
1125            PageOrigin::System {
1126                key: "persona".into(),
1127            },
1128            50,
1129            Arc::from("[Persona context]\nrules"),
1130            None,
1131        );
1132        let compacted = CompactedPage {
1133            body: Arc::from("[system-ptr:blake3:aabbccdd11223344]"),
1134            tokens: 3,
1135        };
1136        assert!(reg.enforce(&page, &compacted).is_ok());
1137    }
1138
1139    #[test]
1140    fn enforce_err_for_paraphrased_system_context() {
1141        let reg = InvariantRegistry::default();
1142        let page = TypedPage::new(
1143            PageType::SystemContext,
1144            PageOrigin::System {
1145                key: "persona".into(),
1146            },
1147            50,
1148            Arc::from("[Persona context]\nrules"),
1149            None,
1150        );
1151        let compacted = CompactedPage {
1152            body: Arc::from("The persona says to be helpful."),
1153            tokens: 7,
1154        };
1155        let result = reg.enforce(&page, &compacted);
1156        assert!(result.is_err());
1157        assert!(
1158            result
1159                .unwrap_err()
1160                .iter()
1161                .any(|v| v.missing_field == "pointer")
1162        );
1163    }
1164
1165    #[test]
1166    fn enforce_ok_for_conversation_turn_with_role() {
1167        let reg = InvariantRegistry::default();
1168        let page = TypedPage::new(
1169            PageType::ConversationTurn,
1170            PageOrigin::Turn {
1171                message_id: "42".into(),
1172            },
1173            30,
1174            Arc::from("Hello from user"),
1175            None,
1176        );
1177        let compacted = CompactedPage {
1178            body: Arc::from("user asked about Rust"),
1179            tokens: 5,
1180        };
1181        assert!(reg.enforce(&page, &compacted).is_ok());
1182    }
1183
1184    // ── MemoryExcerptInvariant ────────────────────────────────────────────────
1185
1186    #[test]
1187    fn memory_excerpt_invariant_passes_when_label_present() {
1188        let inv = MemoryExcerptInvariant;
1189        let label = "semantic_recall";
1190        let page = TypedPage::new(
1191            PageType::MemoryExcerpt,
1192            PageOrigin::Excerpt {
1193                source_label: label.into(),
1194            },
1195            80,
1196            Arc::from("[semantic recall]\n- [user] hello"),
1197            None,
1198        );
1199        let compacted = CompactedPage {
1200            body: Arc::from(format!("Summary from {label}: user greeted")),
1201            tokens: 6,
1202        };
1203        assert!(inv.verify(&page, &compacted).is_ok());
1204    }
1205
1206    #[test]
1207    fn memory_excerpt_invariant_fails_when_label_missing() {
1208        let inv = MemoryExcerptInvariant;
1209        let page = TypedPage::new(
1210            PageType::MemoryExcerpt,
1211            PageOrigin::Excerpt {
1212                source_label: "graph_facts".into(),
1213            },
1214            80,
1215            Arc::from("[known facts]\n- Alice works at Acme"),
1216            None,
1217        );
1218        let compacted = CompactedPage {
1219            body: Arc::from("Alice is employed somewhere"),
1220            tokens: 5,
1221        };
1222        let result = inv.verify(&page, &compacted);
1223        assert!(result.is_err());
1224        assert!(
1225            result
1226                .unwrap_err()
1227                .iter()
1228                .any(|v| v.missing_field == "source_label")
1229        );
1230    }
1231
1232    #[test]
1233    fn memory_excerpt_invariant_passes_for_non_excerpt_origin() {
1234        let inv = MemoryExcerptInvariant;
1235        let page = TypedPage::new(
1236            PageType::MemoryExcerpt,
1237            PageOrigin::System {
1238                key: "digests".into(),
1239            },
1240            40,
1241            Arc::from("[system]"),
1242            None,
1243        );
1244        let compacted = CompactedPage {
1245            body: Arc::from("anything"),
1246            tokens: 1,
1247        };
1248        assert!(inv.verify(&page, &compacted).is_ok());
1249    }
1250
1251    // ── ConversationTurnInvariant ─────────────────────────────────────────────
1252
1253    #[test]
1254    fn conversation_turn_invariant_passes_with_role_word() {
1255        let inv = ConversationTurnInvariant;
1256        let page = TypedPage::new(
1257            PageType::ConversationTurn,
1258            PageOrigin::Turn {
1259                message_id: "1".into(),
1260            },
1261            20,
1262            Arc::from("Hello world"),
1263            None,
1264        );
1265        for body in &["user: hi", "assistant replied", "system note"] {
1266            let compacted = CompactedPage {
1267                body: Arc::from(*body),
1268                tokens: 2,
1269            };
1270            assert!(inv.verify(&page, &compacted).is_ok(), "body={body}");
1271        }
1272    }
1273
1274    #[test]
1275    fn conversation_turn_invariant_fails_without_role_word() {
1276        let inv = ConversationTurnInvariant;
1277        let page = TypedPage::new(
1278            PageType::ConversationTurn,
1279            PageOrigin::Turn {
1280                message_id: "2".into(),
1281            },
1282            20,
1283            Arc::from("some turn content"),
1284            None,
1285        );
1286        let compacted = CompactedPage {
1287            body: Arc::from("content was summarized"),
1288            tokens: 3,
1289        };
1290        let result = inv.verify(&page, &compacted);
1291        assert!(result.is_err());
1292        assert!(
1293            result
1294                .unwrap_err()
1295                .iter()
1296                .any(|v| v.missing_field == "role")
1297        );
1298    }
1299
1300    // ── CompactionAuditSink ───────────────────────────────────────────────────
1301
1302    #[tokio::test]
1303    async fn audit_sink_jsonl_roundtrip() {
1304        let dir = tempfile::tempdir().unwrap();
1305        let path = dir.path().join("audit.jsonl");
1306
1307        let sink = CompactionAuditSink::open(&path, 64).await.unwrap();
1308        let record = CompactedPageRecord {
1309            ts: "2026-04-19T00:00:00Z".into(),
1310            turn_id: "1".into(),
1311            page_id: "blake3:aabbccdd".into(),
1312            page_type: PageType::ToolOutput,
1313            origin: PageOrigin::ToolPair {
1314                tool_name: "shell".into(),
1315            },
1316            original_tokens: 100,
1317            compacted_tokens: 20,
1318            fidelity_level: "structured_summary_v1".into(),
1319            invariant_version: 1,
1320            provider_name: "test".into(),
1321            violations: vec![],
1322            classification_fallback: false,
1323        };
1324        sink.send(record);
1325
1326        // Drop the sink to close the channel and let the writer task flush.
1327        drop(sink);
1328        // Give the writer task time to finish.
1329        tokio::time::sleep(std::time::Duration::from_millis(50)).await;
1330
1331        let contents = std::fs::read_to_string(&path).unwrap();
1332        assert!(!contents.is_empty(), "audit file should not be empty");
1333        let parsed: serde_json::Value = serde_json::from_str(contents.trim()).unwrap();
1334        assert_eq!(parsed["page_type"], "tool_output");
1335        assert_eq!(parsed["turn_id"], "1");
1336        assert_eq!(parsed["provider_name"], "test");
1337    }
1338
1339    #[tokio::test]
1340    async fn audit_sink_drop_counter_increments_when_full() {
1341        let dir = tempfile::tempdir().unwrap();
1342        let path = dir.path().join("audit_full.jsonl");
1343
1344        // Capacity 1: first send fills the channel, subsequent sends are dropped.
1345        let sink = CompactionAuditSink::open(&path, 1).await.unwrap();
1346
1347        let make_record = || CompactedPageRecord {
1348            ts: "2026-04-19T00:00:00Z".into(),
1349            turn_id: "x".into(),
1350            page_id: "blake3:00".into(),
1351            page_type: PageType::ConversationTurn,
1352            origin: PageOrigin::Turn {
1353                message_id: "0".into(),
1354            },
1355            original_tokens: 10,
1356            compacted_tokens: 5,
1357            fidelity_level: "semantic_summary_v1".into(),
1358            invariant_version: 1,
1359            provider_name: "test".into(),
1360            violations: vec![],
1361            classification_fallback: false,
1362        };
1363
1364        // Send enough records to guarantee overflow.
1365        for _ in 0..10 {
1366            sink.send(make_record());
1367        }
1368
1369        assert!(
1370            sink.dropped_count() > 0,
1371            "expected at least one dropped record"
1372        );
1373    }
1374
1375    #[tokio::test]
1376    async fn audit_sink_flush_does_not_panic() {
1377        let dir = tempfile::tempdir().unwrap();
1378        let path = dir.path().join("audit_flush.jsonl");
1379        let sink = CompactionAuditSink::open(&path, 16).await.unwrap();
1380        // flush on an empty sink must not panic or deadlock.
1381        sink.flush().await;
1382    }
1383
1384    // ── classify_with_role ────────────────────────────────────────────────────
1385
1386    #[test]
1387    fn classify_with_role_system_flag_overrides_fallback() {
1388        assert_eq!(
1389            classify_with_role("You are a helpful assistant.", true),
1390            PageType::SystemContext
1391        );
1392    }
1393
1394    #[test]
1395    fn classify_with_role_prefix_wins_over_system_flag() {
1396        assert_eq!(
1397            classify_with_role("[tool_output] exit_code: 0", false),
1398            PageType::ToolOutput
1399        );
1400    }
1401
1402    #[test]
1403    fn classify_with_role_false_still_falls_back_to_conversation_turn() {
1404        assert_eq!(
1405            classify_with_role("random prose without markers", false),
1406            PageType::ConversationTurn
1407        );
1408    }
1409
1410    // ── check_json_structural_key (via ToolOutputInvariant) ───────────────────
1411
1412    #[test]
1413    fn tool_output_json_structural_check_passes_when_key_preserved() {
1414        let inv = ToolOutputInvariant;
1415        let original_body = r#"{"exit_code": 0, "stdout": "ok"}"#;
1416        let page = TypedPage::new(
1417            PageType::ToolOutput,
1418            PageOrigin::ToolPair {
1419                tool_name: "shell".into(),
1420            },
1421            50,
1422            Arc::from(original_body),
1423            Some(SchemaHint::Json),
1424        );
1425        // Compacted body references "exit_code" and "shell".
1426        let compacted = CompactedPage {
1427            body: Arc::from("shell exit_code: 0, stdout was ok"),
1428            tokens: 8,
1429        };
1430        assert!(inv.verify(&page, &compacted).is_ok());
1431    }
1432
1433    #[test]
1434    fn tool_output_json_structural_check_fails_when_no_key_preserved() {
1435        let inv = ToolOutputInvariant;
1436        let original_body = r#"{"some_field": "value", "other_field": 42}"#;
1437        let page = TypedPage::new(
1438            PageType::ToolOutput,
1439            PageOrigin::ToolPair {
1440                tool_name: "my_tool".into(),
1441            },
1442            50,
1443            Arc::from(original_body),
1444            Some(SchemaHint::Json),
1445        );
1446        // Compacted body references tool name and status but none of the JSON keys.
1447        let compacted = CompactedPage {
1448            body: Arc::from("my_tool exit_status: 0 completed successfully"),
1449            tokens: 7,
1450        };
1451        let result = inv.verify(&page, &compacted);
1452        assert!(result.is_err());
1453        let violations = result.unwrap_err();
1454        assert!(
1455            violations
1456                .iter()
1457                .any(|v| v.missing_field == "structural_key")
1458        );
1459    }
1460}