1use std::sync::Arc;
24use std::time::Duration;
25
26use serde::{Deserialize, Serialize};
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
40#[serde(rename_all = "snake_case")]
41pub enum PageType {
42 ToolOutput,
44 ConversationTurn,
46 MemoryExcerpt,
48 SystemContext,
50}
51
52impl std::fmt::Display for PageType {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 Self::ToolOutput => f.write_str("tool_output"),
56 Self::ConversationTurn => f.write_str("conversation_turn"),
57 Self::MemoryExcerpt => f.write_str("memory_excerpt"),
58 Self::SystemContext => f.write_str("system_context"),
59 }
60 }
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
67#[serde(tag = "kind", rename_all = "snake_case")]
68#[non_exhaustive]
69pub enum PageOrigin {
70 ToolPair {
72 tool_name: String,
74 },
75 Turn {
77 message_id: String,
79 },
80 Excerpt {
82 source_label: String,
84 },
85 System {
87 key: String,
89 },
90}
91
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
98#[serde(rename_all = "snake_case")]
99#[non_exhaustive]
100pub enum SchemaHint {
101 Json,
103 Text,
105 Diff,
107 Table,
109 Binary,
111}
112
113#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
129pub struct PageId(pub String);
130
131impl PageId {
132 #[must_use]
134 pub fn compute(page_type: PageType, origin_key: &str, body: &[u8]) -> Self {
135 let mut hasher = blake3::Hasher::new();
136 hasher.update(page_type.to_string().as_bytes());
137 hasher.update(b"|");
138 hasher.update(origin_key.as_bytes());
139 hasher.update(b"|");
140 hasher.update(body);
141 let hash = hasher.finalize();
142 let mut hex = String::with_capacity(32);
144 for b in &hash.as_bytes()[..16] {
145 use std::fmt::Write as _;
146 let _ = write!(hex, "{b:02x}");
147 }
148 Self(format!("blake3:{hex}"))
149 }
150}
151
152#[derive(Debug, Clone)]
160pub struct TypedPage {
161 pub page_id: PageId,
163 pub page_type: PageType,
165 pub origin: PageOrigin,
167 pub tokens: u32,
169 pub body: Arc<str>,
171 pub schema_hint: Option<SchemaHint>,
173}
174
175impl TypedPage {
176 #[must_use]
178 pub fn new(
179 page_type: PageType,
180 origin: PageOrigin,
181 tokens: u32,
182 body: Arc<str>,
183 schema_hint: Option<SchemaHint>,
184 ) -> Self {
185 let origin_key = origin_key_for(&origin);
186 let page_id = PageId::compute(page_type, &origin_key, body.as_bytes());
187 Self {
188 page_id,
189 page_type,
190 origin,
191 tokens,
192 body,
193 schema_hint,
194 }
195 }
196}
197
198fn origin_key_for(origin: &PageOrigin) -> String {
199 match origin {
200 PageOrigin::ToolPair { tool_name } => format!("tool:{tool_name}"),
201 PageOrigin::Turn { message_id } => format!("turn:{message_id}"),
202 PageOrigin::Excerpt { source_label } => format!("excerpt:{source_label}"),
203 PageOrigin::System { key } => format!("system:{key}"),
204 }
205}
206
207#[derive(Debug, Clone)]
214pub struct FidelityContract {
215 pub fidelity_level: &'static str,
217 pub invariant_version: u8,
219 pub required_fields: &'static [&'static str],
221}
222
223#[derive(Debug, Clone, Serialize)]
230pub struct FidelityViolation {
231 pub missing_field: String,
233 pub detail: String,
235}
236
237#[derive(Debug, Clone)]
241pub struct CompactedPage {
242 pub body: Arc<str>,
244 pub tokens: u32,
246}
247
248pub trait PageInvariant: Send + Sync {
267 fn page_type(&self) -> PageType;
269
270 fn minimum_fidelity(&self, page: &TypedPage) -> FidelityContract;
272
273 fn verify(
280 &self,
281 original: &TypedPage,
282 compacted: &CompactedPage,
283 ) -> Result<(), Vec<FidelityViolation>>;
284}
285
286pub struct ToolOutputInvariant;
293
294impl PageInvariant for ToolOutputInvariant {
295 fn page_type(&self) -> PageType {
296 PageType::ToolOutput
297 }
298
299 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
300 FidelityContract {
301 fidelity_level: "structured_summary_v1",
302 invariant_version: 1,
303 required_fields: &["tool_name", "exit_status"],
304 }
305 }
306
307 fn verify(
308 &self,
309 original: &TypedPage,
310 compacted: &CompactedPage,
311 ) -> Result<(), Vec<FidelityViolation>> {
312 let body = compacted.body.as_ref();
313 if original.schema_hint == Some(SchemaHint::Binary) {
315 return Ok(());
316 }
317
318 let mut violations = Vec::new();
319
320 let tool_name = match &original.origin {
322 PageOrigin::ToolPair { tool_name } => tool_name.as_str(),
323 _ => "",
324 };
325 if !tool_name.is_empty() && !body.contains(tool_name) {
326 violations.push(FidelityViolation {
327 missing_field: "tool_name".into(),
328 detail: format!("compacted body does not reference tool '{tool_name}'"),
329 });
330 }
331
332 let has_status = body.contains("exit_status")
334 || body.contains("exit_code")
335 || body.contains("status:")
336 || body.contains("Status:")
337 || body.contains("exit:")
338 || body.contains("rc:");
339 if !has_status {
340 violations.push(FidelityViolation {
341 missing_field: "exit_status".into(),
342 detail: "compacted body does not contain an exit status indicator".into(),
343 });
344 }
345
346 if original.schema_hint == Some(SchemaHint::Json) {
350 let original_body = original.body.as_ref();
351 let preserved = check_json_structural_key(original_body, body);
352 if !preserved {
353 violations.push(FidelityViolation {
354 missing_field: "structural_key".into(),
355 detail: "compacted JSON tool output does not reference any top-level field \
356 name from the original output"
357 .into(),
358 });
359 }
360 }
361
362 if violations.is_empty() {
363 Ok(())
364 } else {
365 Err(violations)
366 }
367 }
368}
369
370fn check_json_structural_key(original: &str, compacted: &str) -> bool {
376 let Ok(value) = serde_json::from_str::<serde_json::Value>(original) else {
377 return true;
378 };
379 let Some(obj) = value.as_object() else {
380 return true;
381 };
382 if obj.is_empty() {
383 return true;
384 }
385 obj.keys().any(|k| compacted.contains(k.as_str()))
386}
387
388pub struct ConversationTurnInvariant;
392
393impl PageInvariant for ConversationTurnInvariant {
394 fn page_type(&self) -> PageType {
395 PageType::ConversationTurn
396 }
397
398 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
399 FidelityContract {
400 fidelity_level: "semantic_summary_v1",
401 invariant_version: 1,
402 required_fields: &["role"],
403 }
404 }
405
406 fn verify(
407 &self,
408 _original: &TypedPage,
409 compacted: &CompactedPage,
410 ) -> Result<(), Vec<FidelityViolation>> {
411 let body = compacted.body.as_ref();
412 let has_role =
413 body.contains("user") || body.contains("assistant") || body.contains("system");
414 if !has_role {
415 return Err(vec![FidelityViolation {
416 missing_field: "role".into(),
417 detail: "compacted turn does not identify a speaker role".into(),
418 }]);
419 }
420 Ok(())
421 }
422}
423
424pub struct MemoryExcerptInvariant;
428
429impl PageInvariant for MemoryExcerptInvariant {
430 fn page_type(&self) -> PageType {
431 PageType::MemoryExcerpt
432 }
433
434 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
435 FidelityContract {
436 fidelity_level: "excerpt_summary_v1",
437 invariant_version: 1,
438 required_fields: &["source_label"],
439 }
440 }
441
442 fn verify(
443 &self,
444 original: &TypedPage,
445 compacted: &CompactedPage,
446 ) -> Result<(), Vec<FidelityViolation>> {
447 let source_label = match &original.origin {
448 PageOrigin::Excerpt { source_label } => source_label.as_str(),
449 _ => return Ok(()),
450 };
451 if !compacted.body.contains(source_label) {
452 return Err(vec![FidelityViolation {
453 missing_field: "source_label".into(),
454 detail: format!("compacted excerpt does not contain source label '{source_label}'"),
455 }]);
456 }
457 Ok(())
458 }
459}
460
461pub struct SystemContextInvariant;
466
467pub const SYSTEM_POINTER_PREFIX: &str = "[system-ptr:";
469
470impl PageInvariant for SystemContextInvariant {
471 fn page_type(&self) -> PageType {
472 PageType::SystemContext
473 }
474
475 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
476 FidelityContract {
477 fidelity_level: "pointer_replace_v1",
478 invariant_version: 1,
479 required_fields: &["pointer"],
480 }
481 }
482
483 fn verify(
484 &self,
485 _original: &TypedPage,
486 compacted: &CompactedPage,
487 ) -> Result<(), Vec<FidelityViolation>> {
488 if !compacted.body.starts_with(SYSTEM_POINTER_PREFIX) {
489 return Err(vec![FidelityViolation {
490 missing_field: "pointer".into(),
491 detail: format!(
492 "SystemContext page was not pointer-replaced \
493 (body does not start with '{SYSTEM_POINTER_PREFIX}')"
494 ),
495 }]);
496 }
497 Ok(())
498 }
499}
500
501pub struct InvariantRegistry {
517 tool_output: Box<dyn PageInvariant>,
518 conversation_turn: Box<dyn PageInvariant>,
519 memory_excerpt: Box<dyn PageInvariant>,
520 system_context: Box<dyn PageInvariant>,
521}
522
523impl Default for InvariantRegistry {
524 fn default() -> Self {
525 Self {
526 tool_output: Box::new(ToolOutputInvariant),
527 conversation_turn: Box::new(ConversationTurnInvariant),
528 memory_excerpt: Box::new(MemoryExcerptInvariant),
529 system_context: Box::new(SystemContextInvariant),
530 }
531 }
532}
533
534impl InvariantRegistry {
535 #[must_use]
539 pub fn get(&self, page_type: PageType) -> Option<&dyn PageInvariant> {
540 match page_type {
541 PageType::ToolOutput => Some(self.tool_output.as_ref()),
542 PageType::ConversationTurn => Some(self.conversation_turn.as_ref()),
543 PageType::MemoryExcerpt => Some(self.memory_excerpt.as_ref()),
544 PageType::SystemContext => Some(self.system_context.as_ref()),
545 }
546 }
547
548 pub fn enforce(
559 &self,
560 original: &TypedPage,
561 compacted: &CompactedPage,
562 ) -> Result<(), Vec<FidelityViolation>> {
563 let _span = tracing::info_span!(
564 "context.compaction.typed_page",
565 page_type = %original.page_type,
566 page_id = %original.page_id.0,
567 original_tokens = original.tokens,
568 compacted_tokens = compacted.tokens,
569 )
570 .entered();
571
572 if let Some(inv) = self.get(original.page_type) {
573 inv.verify(original, compacted)
574 } else {
575 tracing::warn!(
576 page_type = %original.page_type,
577 "no invariant registered for page type — skipping verification"
578 );
579 Ok(())
580 }
581 }
582}
583
584#[must_use]
613pub fn classify(body: &str) -> PageType {
614 classify_with_role(body, false)
615}
616
617#[must_use]
637pub fn classify_with_role(body: &str, is_system_role: bool) -> PageType {
638 tracing::info_span!(
639 "context.compaction.typed_page.classify",
640 body_len = body.len()
641 )
642 .in_scope(|| classify_with_role_inner(body, is_system_role))
643}
644
645fn classify_with_role_inner(body: &str, is_system_role: bool) -> PageType {
646 const TOOL_PREFIXES: &[&str] = &["[tool_output]", "[tool:", "[Tool output]"];
648 const MEMORY_PREFIXES: &[&str] = &[
649 "[cross-session context]",
650 "[semantic recall]",
651 "[known facts]",
652 "[conversation summaries]",
653 "[past corrections]",
654 "## Relevant documents",
655 ];
656 const SYSTEM_PREFIXES: &[&str] = &[
657 "[Persona context]",
658 "[Past experience]",
659 "[Memory summary]",
660 "[system",
661 "[skill",
662 "[persona",
663 "[digest",
664 "[compression",
665 ];
666
667 let trimmed = body.trim_start();
668
669 for prefix in TOOL_PREFIXES {
670 if trimmed.starts_with(prefix) {
671 return PageType::ToolOutput;
672 }
673 }
674 for prefix in MEMORY_PREFIXES {
675 if trimmed.starts_with(prefix) {
676 return PageType::MemoryExcerpt;
677 }
678 }
679 for prefix in SYSTEM_PREFIXES {
680 if trimmed.starts_with(prefix) {
681 return PageType::SystemContext;
682 }
683 }
684
685 if is_system_role {
690 return PageType::SystemContext;
691 }
692
693 let mut prefix_end = body.len().min(80);
694 while !body.is_char_boundary(prefix_end) {
695 prefix_end -= 1;
696 }
697 tracing::warn!(
698 body_prefix = &body[..prefix_end],
699 "typed-page classification fallback to ConversationTurn"
700 );
701 PageType::ConversationTurn
702}
703
704#[must_use]
710pub fn detect_schema_hint(body: &str, is_binary: bool) -> SchemaHint {
711 if is_binary || body.contains('\u{FFFD}') {
712 return SchemaHint::Binary;
713 }
714 let trimmed = body.trim_start();
715 if trimmed.starts_with('{') || trimmed.starts_with('[') {
716 return SchemaHint::Json;
717 }
718 if trimmed.starts_with("--- ")
719 || trimmed.starts_with("+++ ")
720 || trimmed.starts_with("diff --git")
721 || trimmed.starts_with("diff -")
722 {
723 return SchemaHint::Diff;
724 }
725 let first_line = trimmed.lines().next().unwrap_or("");
727 if first_line.matches('\t').count() >= 2 || first_line.matches('|').count() >= 2 {
728 return SchemaHint::Table;
729 }
730 SchemaHint::Text
731}
732
733#[derive(Debug, Serialize)]
740pub struct CompactedPageRecord {
741 pub ts: String,
743 pub turn_id: String,
745 pub page_id: String,
747 pub page_type: PageType,
749 pub origin: PageOrigin,
751 pub original_tokens: u32,
753 pub compacted_tokens: u32,
755 pub fidelity_level: String,
757 pub invariant_version: u8,
759 pub provider_name: String,
761 pub violations: Vec<FidelityViolation>,
763 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
765 pub classification_fallback: bool,
766}
767
768#[derive(Debug, Clone, Serialize)]
772pub struct BatchViolation {
773 pub assertion: String,
775 pub detail: String,
777}
778
779#[derive(Debug, Clone, Default)]
803pub struct BatchAssertions {
804 pub tool_names_in_batch: Vec<String>,
806 pub has_memory_excerpt: bool,
808 pub excerpt_labels: Vec<String>,
810}
811
812impl BatchAssertions {
813 #[must_use]
817 pub fn check(&self, summary: &str) -> Vec<BatchViolation> {
818 let mut violations = Vec::new();
819
820 if !self.tool_names_in_batch.is_empty() {
822 let any_tool_mentioned = self
823 .tool_names_in_batch
824 .iter()
825 .any(|name| !name.is_empty() && summary.contains(name.as_str()));
826 if !any_tool_mentioned {
827 violations.push(BatchViolation {
828 assertion: "tool_coverage".into(),
829 detail: format!(
830 "summary mentions none of the {} tool(s) in batch: {:?}",
831 self.tool_names_in_batch.len(),
832 self.tool_names_in_batch
833 ),
834 });
835 }
836 }
837
838 if self.has_memory_excerpt && !self.excerpt_labels.is_empty() {
840 let any_label_mentioned = self
841 .excerpt_labels
842 .iter()
843 .any(|label| !label.is_empty() && summary.contains(label.as_str()));
844 if !any_label_mentioned {
845 violations.push(BatchViolation {
846 assertion: "excerpt_label_coverage".into(),
847 detail: format!(
848 "summary mentions none of the memory excerpt labels: {:?}",
849 self.excerpt_labels
850 ),
851 });
852 }
853 }
854
855 violations
856 }
857}
858
859pub struct TypedPagesState {
866 pub registry: InvariantRegistry,
868 pub audit_sink: Option<CompactionAuditSink>,
870 pub is_active: bool,
873}
874
875enum AuditCommand {
879 Record(CompactedPageRecord),
881 Flush(tokio::sync::oneshot::Sender<()>),
883}
884
885#[derive(Debug, Clone)]
913pub struct CompactionAuditSink {
914 tx: tokio::sync::mpsc::Sender<AuditCommand>,
915 drop_counter: Arc<std::sync::atomic::AtomicU64>,
916}
917
918impl CompactionAuditSink {
919 pub async fn open(path: &std::path::Path, capacity: usize) -> Result<Self, std::io::Error> {
928 use tokio::io::AsyncWriteExt as _;
929
930 if let Some(parent) = path.parent() {
931 tokio::fs::create_dir_all(parent).await?;
932 }
933 let file = tokio::fs::OpenOptions::new()
934 .create(true)
935 .append(true)
936 .open(path)
937 .await?;
938
939 let (tx, mut rx) = tokio::sync::mpsc::channel::<AuditCommand>(capacity.max(1));
940 let drop_counter = Arc::new(std::sync::atomic::AtomicU64::new(0));
941 let drop_counter_bg = drop_counter.clone();
942
943 tokio::spawn(async move {
944 let mut writer = tokio::io::BufWriter::new(file);
945 while let Some(cmd) = rx.recv().await {
946 match cmd {
947 AuditCommand::Record(record) => match serde_json::to_string(&record) {
948 Ok(mut line) => {
949 line.push('\n');
950 if let Err(e) = writer.write_all(line.as_bytes()).await {
951 tracing::error!("compaction audit write failed: {e:#}");
952 }
953 }
954 Err(e) => {
955 tracing::error!("compaction audit serialization failed: {e:#}");
956 }
957 },
958 AuditCommand::Flush(responder) => {
959 let _ = writer.flush().await;
960 let _ = responder.send(());
961 }
962 }
963 }
964 let _ = writer.flush().await;
966
967 let dropped = drop_counter_bg.load(std::sync::atomic::Ordering::Relaxed);
968 if dropped > 0 {
969 tracing::warn!(dropped, "compaction audit sink closed with dropped records");
970 }
971 });
972
973 Ok(Self { tx, drop_counter })
974 }
975
976 pub fn send(&self, record: CompactedPageRecord) {
980 match self.tx.try_send(AuditCommand::Record(record)) {
981 Ok(()) => {}
982 Err(tokio::sync::mpsc::error::TrySendError::Full(_)) => {
983 let prev = self
984 .drop_counter
985 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
986 tracing::warn!(
987 dropped_total = prev + 1,
988 "compaction audit sink full — record dropped (best-effort audit)"
989 );
990 }
991 Err(tokio::sync::mpsc::error::TrySendError::Closed(_)) => {
992 tracing::error!("compaction audit sink closed unexpectedly");
993 }
994 }
995 }
996
997 pub async fn flush(&self) {
1003 let (tx, rx) = tokio::sync::oneshot::channel::<()>();
1004 if self.tx.send(AuditCommand::Flush(tx)).await.is_ok() {
1005 let _ = tokio::time::timeout(Duration::from_millis(100), rx).await;
1006 }
1007 }
1008
1009 #[must_use]
1011 pub fn dropped_count(&self) -> u64 {
1012 self.drop_counter.load(std::sync::atomic::Ordering::Relaxed)
1013 }
1014}
1015
1016#[cfg(test)]
1019mod tests {
1020 use super::*;
1021
1022 #[test]
1025 fn page_id_same_input_same_output() {
1026 let a = PageId::compute(PageType::ToolOutput, "tool:shell", b"exit_code: 0");
1027 let b = PageId::compute(PageType::ToolOutput, "tool:shell", b"exit_code: 0");
1028 assert_eq!(a, b);
1029 }
1030
1031 #[test]
1032 fn page_id_different_type_different_id() {
1033 let a = PageId::compute(PageType::ToolOutput, "tool:shell", b"body");
1034 let b = PageId::compute(PageType::ConversationTurn, "tool:shell", b"body");
1035 assert_ne!(a, b);
1036 }
1037
1038 #[test]
1039 fn page_id_starts_with_blake3_prefix() {
1040 let id = PageId::compute(PageType::SystemContext, "system:persona", b"some content");
1041 assert!(id.0.starts_with("blake3:"));
1042 }
1043
1044 #[test]
1047 fn classify_tool_output_prefix() {
1048 assert_eq!(
1049 classify("[tool_output] shell exit_code: 0"),
1050 PageType::ToolOutput
1051 );
1052 assert_eq!(classify("[tool:shell] result"), PageType::ToolOutput);
1053 }
1054
1055 #[test]
1056 fn classify_memory_prefixes() {
1057 assert_eq!(
1058 classify("[cross-session context]\nsome recall"),
1059 PageType::MemoryExcerpt
1060 );
1061 assert_eq!(
1062 classify("[semantic recall]\n- [user] hello"),
1063 PageType::MemoryExcerpt
1064 );
1065 assert_eq!(classify("[known facts]\n- fact"), PageType::MemoryExcerpt);
1066 assert_eq!(
1067 classify("[conversation summaries]\n- 1-10: summary"),
1068 PageType::MemoryExcerpt
1069 );
1070 }
1071
1072 #[test]
1073 fn classify_system_prefixes() {
1074 assert_eq!(classify("[Persona context]\nfact"), PageType::SystemContext);
1075 assert_eq!(classify("[system prompt]"), PageType::SystemContext);
1076 }
1077
1078 #[test]
1079 fn classify_fallback_is_conversation_turn() {
1080 assert_eq!(classify("Hello, world!"), PageType::ConversationTurn);
1081 assert_eq!(classify(""), PageType::ConversationTurn);
1082 }
1083
1084 #[test]
1087 fn detect_schema_hint_json() {
1088 assert_eq!(
1089 detect_schema_hint(r#"{"key": "val"}"#, false),
1090 SchemaHint::Json
1091 );
1092 assert_eq!(detect_schema_hint("[1,2,3]", false), SchemaHint::Json);
1093 }
1094
1095 #[test]
1096 fn detect_schema_hint_diff() {
1097 assert_eq!(detect_schema_hint("--- a\n+++ b", false), SchemaHint::Diff);
1098 }
1099
1100 #[test]
1101 fn detect_schema_hint_binary() {
1102 assert_eq!(detect_schema_hint("anything", true), SchemaHint::Binary);
1103 }
1104
1105 #[test]
1106 fn detect_schema_hint_text_fallback() {
1107 assert_eq!(detect_schema_hint("plain text", false), SchemaHint::Text);
1108 }
1109
1110 #[test]
1113 fn tool_output_invariant_passes_when_fields_present() {
1114 let inv = ToolOutputInvariant;
1115 let page = TypedPage::new(
1116 PageType::ToolOutput,
1117 PageOrigin::ToolPair {
1118 tool_name: "shell".into(),
1119 },
1120 100,
1121 Arc::from("[tool_output] shell exit_code: 0\nsome output"),
1122 Some(SchemaHint::Text),
1123 );
1124 let compacted = CompactedPage {
1125 body: Arc::from("shell exit_status: 0\nkey: value"),
1126 tokens: 10,
1127 };
1128 assert!(inv.verify(&page, &compacted).is_ok());
1129 }
1130
1131 #[test]
1132 fn tool_output_invariant_fails_missing_tool_name() {
1133 let inv = ToolOutputInvariant;
1134 let page = TypedPage::new(
1135 PageType::ToolOutput,
1136 PageOrigin::ToolPair {
1137 tool_name: "my_tool".into(),
1138 },
1139 100,
1140 Arc::from("[tool_output] my_tool exit_code: 0"),
1141 Some(SchemaHint::Text),
1142 );
1143 let compacted = CompactedPage {
1144 body: Arc::from("exit_status: 0"),
1145 tokens: 5,
1146 };
1147 let result = inv.verify(&page, &compacted);
1148 assert!(result.is_err());
1149 let violations = result.unwrap_err();
1150 assert!(violations.iter().any(|v| v.missing_field == "tool_name"));
1151 }
1152
1153 #[test]
1154 fn tool_output_invariant_passes_for_binary() {
1155 let inv = ToolOutputInvariant;
1156 let page = TypedPage::new(
1157 PageType::ToolOutput,
1158 PageOrigin::ToolPair {
1159 tool_name: "binary_tool".into(),
1160 },
1161 100,
1162 Arc::from("<binary:1024 bytes>"),
1163 Some(SchemaHint::Binary),
1164 );
1165 let compacted = CompactedPage {
1166 body: Arc::from("<binary:1024 bytes> (archived)"),
1167 tokens: 5,
1168 };
1169 assert!(inv.verify(&page, &compacted).is_ok());
1170 }
1171
1172 #[test]
1175 fn system_context_invariant_passes_with_pointer() {
1176 let inv = SystemContextInvariant;
1177 let page = TypedPage::new(
1178 PageType::SystemContext,
1179 PageOrigin::System {
1180 key: "persona".into(),
1181 },
1182 200,
1183 Arc::from("[Persona context]\nsome persona info"),
1184 None,
1185 );
1186 let compacted = CompactedPage {
1187 body: Arc::from("[system-ptr:blake3:abcdef123456]"),
1188 tokens: 3,
1189 };
1190 assert!(inv.verify(&page, &compacted).is_ok());
1191 }
1192
1193 #[test]
1194 fn system_context_invariant_fails_without_pointer() {
1195 let inv = SystemContextInvariant;
1196 let page = TypedPage::new(
1197 PageType::SystemContext,
1198 PageOrigin::System {
1199 key: "persona".into(),
1200 },
1201 200,
1202 Arc::from("[Persona context]\nsome persona info"),
1203 None,
1204 );
1205 let compacted = CompactedPage {
1206 body: Arc::from("This is a paraphrase of persona info"),
1207 tokens: 10,
1208 };
1209 let result = inv.verify(&page, &compacted);
1210 assert!(result.is_err());
1211 let violations = result.unwrap_err();
1212 assert!(violations.iter().any(|v| v.missing_field == "pointer"));
1213 }
1214
1215 #[test]
1218 fn registry_covers_all_page_types() {
1219 let reg = InvariantRegistry::default();
1220 for pt in [
1221 PageType::ToolOutput,
1222 PageType::ConversationTurn,
1223 PageType::MemoryExcerpt,
1224 PageType::SystemContext,
1225 ] {
1226 assert!(reg.get(pt).is_some(), "missing invariant for {pt:?}");
1227 }
1228 }
1229
1230 #[test]
1231 fn registry_returns_correct_page_type() {
1232 let reg = InvariantRegistry::default();
1233 assert_eq!(
1234 reg.get(PageType::ToolOutput).unwrap().page_type(),
1235 PageType::ToolOutput
1236 );
1237 assert_eq!(
1238 reg.get(PageType::SystemContext).unwrap().page_type(),
1239 PageType::SystemContext
1240 );
1241 }
1242
1243 #[test]
1246 fn enforce_ok_for_valid_system_pointer() {
1247 let reg = InvariantRegistry::default();
1248 let page = TypedPage::new(
1249 PageType::SystemContext,
1250 PageOrigin::System {
1251 key: "persona".into(),
1252 },
1253 50,
1254 Arc::from("[Persona context]\nrules"),
1255 None,
1256 );
1257 let compacted = CompactedPage {
1258 body: Arc::from("[system-ptr:blake3:aabbccdd11223344]"),
1259 tokens: 3,
1260 };
1261 assert!(reg.enforce(&page, &compacted).is_ok());
1262 }
1263
1264 #[test]
1265 fn enforce_err_for_paraphrased_system_context() {
1266 let reg = InvariantRegistry::default();
1267 let page = TypedPage::new(
1268 PageType::SystemContext,
1269 PageOrigin::System {
1270 key: "persona".into(),
1271 },
1272 50,
1273 Arc::from("[Persona context]\nrules"),
1274 None,
1275 );
1276 let compacted = CompactedPage {
1277 body: Arc::from("The persona says to be helpful."),
1278 tokens: 7,
1279 };
1280 let result = reg.enforce(&page, &compacted);
1281 assert!(result.is_err());
1282 assert!(
1283 result
1284 .unwrap_err()
1285 .iter()
1286 .any(|v| v.missing_field == "pointer")
1287 );
1288 }
1289
1290 #[test]
1291 fn enforce_ok_for_conversation_turn_with_role() {
1292 let reg = InvariantRegistry::default();
1293 let page = TypedPage::new(
1294 PageType::ConversationTurn,
1295 PageOrigin::Turn {
1296 message_id: "42".into(),
1297 },
1298 30,
1299 Arc::from("Hello from user"),
1300 None,
1301 );
1302 let compacted = CompactedPage {
1303 body: Arc::from("user asked about Rust"),
1304 tokens: 5,
1305 };
1306 assert!(reg.enforce(&page, &compacted).is_ok());
1307 }
1308
1309 #[test]
1312 fn memory_excerpt_invariant_passes_when_label_present() {
1313 let inv = MemoryExcerptInvariant;
1314 let label = "semantic_recall";
1315 let page = TypedPage::new(
1316 PageType::MemoryExcerpt,
1317 PageOrigin::Excerpt {
1318 source_label: label.into(),
1319 },
1320 80,
1321 Arc::from("[semantic recall]\n- [user] hello"),
1322 None,
1323 );
1324 let compacted = CompactedPage {
1325 body: Arc::from(format!("Summary from {label}: user greeted")),
1326 tokens: 6,
1327 };
1328 assert!(inv.verify(&page, &compacted).is_ok());
1329 }
1330
1331 #[test]
1332 fn memory_excerpt_invariant_fails_when_label_missing() {
1333 let inv = MemoryExcerptInvariant;
1334 let page = TypedPage::new(
1335 PageType::MemoryExcerpt,
1336 PageOrigin::Excerpt {
1337 source_label: "graph_facts".into(),
1338 },
1339 80,
1340 Arc::from("[known facts]\n- Alice works at Acme"),
1341 None,
1342 );
1343 let compacted = CompactedPage {
1344 body: Arc::from("Alice is employed somewhere"),
1345 tokens: 5,
1346 };
1347 let result = inv.verify(&page, &compacted);
1348 assert!(result.is_err());
1349 assert!(
1350 result
1351 .unwrap_err()
1352 .iter()
1353 .any(|v| v.missing_field == "source_label")
1354 );
1355 }
1356
1357 #[test]
1358 fn memory_excerpt_invariant_passes_for_non_excerpt_origin() {
1359 let inv = MemoryExcerptInvariant;
1360 let page = TypedPage::new(
1361 PageType::MemoryExcerpt,
1362 PageOrigin::System {
1363 key: "digests".into(),
1364 },
1365 40,
1366 Arc::from("[system]"),
1367 None,
1368 );
1369 let compacted = CompactedPage {
1370 body: Arc::from("anything"),
1371 tokens: 1,
1372 };
1373 assert!(inv.verify(&page, &compacted).is_ok());
1374 }
1375
1376 #[test]
1379 fn conversation_turn_invariant_passes_with_role_word() {
1380 let inv = ConversationTurnInvariant;
1381 let page = TypedPage::new(
1382 PageType::ConversationTurn,
1383 PageOrigin::Turn {
1384 message_id: "1".into(),
1385 },
1386 20,
1387 Arc::from("Hello world"),
1388 None,
1389 );
1390 for body in &["user: hi", "assistant replied", "system note"] {
1391 let compacted = CompactedPage {
1392 body: Arc::from(*body),
1393 tokens: 2,
1394 };
1395 assert!(inv.verify(&page, &compacted).is_ok(), "body={body}");
1396 }
1397 }
1398
1399 #[test]
1400 fn conversation_turn_invariant_fails_without_role_word() {
1401 let inv = ConversationTurnInvariant;
1402 let page = TypedPage::new(
1403 PageType::ConversationTurn,
1404 PageOrigin::Turn {
1405 message_id: "2".into(),
1406 },
1407 20,
1408 Arc::from("some turn content"),
1409 None,
1410 );
1411 let compacted = CompactedPage {
1412 body: Arc::from("content was summarized"),
1413 tokens: 3,
1414 };
1415 let result = inv.verify(&page, &compacted);
1416 assert!(result.is_err());
1417 assert!(
1418 result
1419 .unwrap_err()
1420 .iter()
1421 .any(|v| v.missing_field == "role")
1422 );
1423 }
1424
1425 #[tokio::test]
1428 async fn audit_sink_jsonl_roundtrip() {
1429 let dir = tempfile::tempdir().unwrap();
1430 let path = dir.path().join("audit.jsonl");
1431
1432 let sink = CompactionAuditSink::open(&path, 64).await.unwrap();
1433 let record = CompactedPageRecord {
1434 ts: "2026-04-19T00:00:00Z".into(),
1435 turn_id: "1".into(),
1436 page_id: "blake3:aabbccdd".into(),
1437 page_type: PageType::ToolOutput,
1438 origin: PageOrigin::ToolPair {
1439 tool_name: "shell".into(),
1440 },
1441 original_tokens: 100,
1442 compacted_tokens: 20,
1443 fidelity_level: "structured_summary_v1".into(),
1444 invariant_version: 1,
1445 provider_name: "test".into(),
1446 violations: vec![],
1447 classification_fallback: false,
1448 };
1449 sink.send(record);
1450
1451 drop(sink);
1453 tokio::time::sleep(std::time::Duration::from_millis(50)).await;
1455
1456 let contents = std::fs::read_to_string(&path).unwrap();
1457 assert!(!contents.is_empty(), "audit file should not be empty");
1458 let parsed: serde_json::Value = serde_json::from_str(contents.trim()).unwrap();
1459 assert_eq!(parsed["page_type"], "tool_output");
1460 assert_eq!(parsed["turn_id"], "1");
1461 assert_eq!(parsed["provider_name"], "test");
1462 }
1463
1464 #[tokio::test]
1465 async fn audit_sink_drop_counter_increments_when_full() {
1466 let dir = tempfile::tempdir().unwrap();
1467 let path = dir.path().join("audit_full.jsonl");
1468
1469 let sink = CompactionAuditSink::open(&path, 1).await.unwrap();
1471
1472 let make_record = || CompactedPageRecord {
1473 ts: "2026-04-19T00:00:00Z".into(),
1474 turn_id: "x".into(),
1475 page_id: "blake3:00".into(),
1476 page_type: PageType::ConversationTurn,
1477 origin: PageOrigin::Turn {
1478 message_id: "0".into(),
1479 },
1480 original_tokens: 10,
1481 compacted_tokens: 5,
1482 fidelity_level: "semantic_summary_v1".into(),
1483 invariant_version: 1,
1484 provider_name: "test".into(),
1485 violations: vec![],
1486 classification_fallback: false,
1487 };
1488
1489 for _ in 0..10 {
1491 sink.send(make_record());
1492 }
1493
1494 assert!(
1495 sink.dropped_count() > 0,
1496 "expected at least one dropped record"
1497 );
1498 }
1499
1500 #[tokio::test]
1501 async fn audit_sink_flush_does_not_panic() {
1502 let dir = tempfile::tempdir().unwrap();
1503 let path = dir.path().join("audit_flush.jsonl");
1504 let sink = CompactionAuditSink::open(&path, 16).await.unwrap();
1505 sink.flush().await;
1507 }
1508
1509 #[test]
1512 fn classify_with_role_system_flag_overrides_fallback() {
1513 assert_eq!(
1514 classify_with_role("You are a helpful assistant.", true),
1515 PageType::SystemContext
1516 );
1517 }
1518
1519 #[test]
1520 fn classify_with_role_prefix_wins_over_system_flag() {
1521 assert_eq!(
1522 classify_with_role("[tool_output] exit_code: 0", false),
1523 PageType::ToolOutput
1524 );
1525 }
1526
1527 #[test]
1528 fn classify_with_role_false_still_falls_back_to_conversation_turn() {
1529 assert_eq!(
1530 classify_with_role("random prose without markers", false),
1531 PageType::ConversationTurn
1532 );
1533 }
1534
1535 #[test]
1538 fn tool_output_json_structural_check_passes_when_key_preserved() {
1539 let inv = ToolOutputInvariant;
1540 let original_body = r#"{"exit_code": 0, "stdout": "ok"}"#;
1541 let page = TypedPage::new(
1542 PageType::ToolOutput,
1543 PageOrigin::ToolPair {
1544 tool_name: "shell".into(),
1545 },
1546 50,
1547 Arc::from(original_body),
1548 Some(SchemaHint::Json),
1549 );
1550 let compacted = CompactedPage {
1552 body: Arc::from("shell exit_code: 0, stdout was ok"),
1553 tokens: 8,
1554 };
1555 assert!(inv.verify(&page, &compacted).is_ok());
1556 }
1557
1558 #[test]
1559 fn tool_output_json_structural_check_fails_when_no_key_preserved() {
1560 let inv = ToolOutputInvariant;
1561 let original_body = r#"{"some_field": "value", "other_field": 42}"#;
1562 let page = TypedPage::new(
1563 PageType::ToolOutput,
1564 PageOrigin::ToolPair {
1565 tool_name: "my_tool".into(),
1566 },
1567 50,
1568 Arc::from(original_body),
1569 Some(SchemaHint::Json),
1570 );
1571 let compacted = CompactedPage {
1573 body: Arc::from("my_tool exit_status: 0 completed successfully"),
1574 tokens: 7,
1575 };
1576 let result = inv.verify(&page, &compacted);
1577 assert!(result.is_err());
1578 let violations = result.unwrap_err();
1579 assert!(
1580 violations
1581 .iter()
1582 .any(|v| v.missing_field == "structural_key")
1583 );
1584 }
1585
1586 #[tokio::test]
1589 async fn audit_sink_capacity_zero_does_not_panic() {
1590 let dir = tempfile::tempdir().unwrap();
1591 let path = dir.path().join("cap0.jsonl");
1592 let sink = CompactionAuditSink::open(&path, 0).await.unwrap();
1594 sink.flush().await;
1595 }
1596
1597 #[test]
1600 fn classify_with_role_non_ascii_body_does_not_panic() {
1601 let cjk = "你好世界".repeat(20); let emoji = "🦀".repeat(30); let mixed = "abc🦀中文".repeat(15);
1606
1607 let _ = classify_with_role(&cjk, false);
1609 let _ = classify_with_role(&emoji, false);
1610 let _ = classify_with_role(&mixed, false);
1611 }
1612}