1use std::sync::Arc;
24
25use serde::{Deserialize, Serialize};
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub enum PageType {
41 ToolOutput,
43 ConversationTurn,
45 MemoryExcerpt,
47 SystemContext,
49}
50
51impl std::fmt::Display for PageType {
52 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53 match self {
54 Self::ToolOutput => f.write_str("tool_output"),
55 Self::ConversationTurn => f.write_str("conversation_turn"),
56 Self::MemoryExcerpt => f.write_str("memory_excerpt"),
57 Self::SystemContext => f.write_str("system_context"),
58 }
59 }
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
66#[serde(tag = "kind", rename_all = "snake_case")]
67pub enum PageOrigin {
68 ToolPair {
70 tool_name: String,
72 },
73 Turn {
75 message_id: String,
77 },
78 Excerpt {
80 source_label: String,
82 },
83 System {
85 key: String,
87 },
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
96#[serde(rename_all = "snake_case")]
97pub enum SchemaHint {
98 Json,
100 Text,
102 Diff,
104 Table,
106 Binary,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
126pub struct PageId(pub String);
127
128impl PageId {
129 #[must_use]
131 pub fn compute(page_type: PageType, origin_key: &str, body: &[u8]) -> Self {
132 let mut hasher = blake3::Hasher::new();
133 hasher.update(page_type.to_string().as_bytes());
134 hasher.update(b"|");
135 hasher.update(origin_key.as_bytes());
136 hasher.update(b"|");
137 hasher.update(body);
138 let hash = hasher.finalize();
139 let mut hex = String::with_capacity(32);
141 for b in &hash.as_bytes()[..16] {
142 use std::fmt::Write as _;
143 let _ = write!(hex, "{b:02x}");
144 }
145 Self(format!("blake3:{hex}"))
146 }
147}
148
149#[derive(Debug, Clone)]
157pub struct TypedPage {
158 pub page_id: PageId,
160 pub page_type: PageType,
162 pub origin: PageOrigin,
164 pub tokens: u32,
166 pub body: Arc<str>,
168 pub schema_hint: Option<SchemaHint>,
170}
171
172impl TypedPage {
173 #[must_use]
175 pub fn new(
176 page_type: PageType,
177 origin: PageOrigin,
178 tokens: u32,
179 body: Arc<str>,
180 schema_hint: Option<SchemaHint>,
181 ) -> Self {
182 let origin_key = origin_key_for(&origin);
183 let page_id = PageId::compute(page_type, &origin_key, body.as_bytes());
184 Self {
185 page_id,
186 page_type,
187 origin,
188 tokens,
189 body,
190 schema_hint,
191 }
192 }
193}
194
195fn origin_key_for(origin: &PageOrigin) -> String {
196 match origin {
197 PageOrigin::ToolPair { tool_name } => format!("tool:{tool_name}"),
198 PageOrigin::Turn { message_id } => format!("turn:{message_id}"),
199 PageOrigin::Excerpt { source_label } => format!("excerpt:{source_label}"),
200 PageOrigin::System { key } => format!("system:{key}"),
201 }
202}
203
204#[derive(Debug, Clone)]
211pub struct FidelityContract {
212 pub fidelity_level: &'static str,
214 pub invariant_version: u8,
216 pub required_fields: &'static [&'static str],
218}
219
220#[derive(Debug, Clone, Serialize)]
227pub struct FidelityViolation {
228 pub missing_field: String,
230 pub detail: String,
232}
233
234#[derive(Debug, Clone)]
238pub struct CompactedPage {
239 pub body: Arc<str>,
241 pub tokens: u32,
243}
244
245pub trait PageInvariant: Send + Sync {
264 fn page_type(&self) -> PageType;
266
267 fn minimum_fidelity(&self, page: &TypedPage) -> FidelityContract;
269
270 fn verify(
277 &self,
278 original: &TypedPage,
279 compacted: &CompactedPage,
280 ) -> Result<(), Vec<FidelityViolation>>;
281}
282
283pub struct ToolOutputInvariant;
290
291impl PageInvariant for ToolOutputInvariant {
292 fn page_type(&self) -> PageType {
293 PageType::ToolOutput
294 }
295
296 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
297 FidelityContract {
298 fidelity_level: "structured_summary_v1",
299 invariant_version: 1,
300 required_fields: &["tool_name", "exit_status"],
301 }
302 }
303
304 fn verify(
305 &self,
306 original: &TypedPage,
307 compacted: &CompactedPage,
308 ) -> Result<(), Vec<FidelityViolation>> {
309 let body = compacted.body.as_ref();
310 if original.schema_hint == Some(SchemaHint::Binary) {
312 return Ok(());
313 }
314
315 let mut violations = Vec::new();
316
317 let tool_name = match &original.origin {
319 PageOrigin::ToolPair { tool_name } => tool_name.as_str(),
320 _ => "",
321 };
322 if !tool_name.is_empty() && !body.contains(tool_name) {
323 violations.push(FidelityViolation {
324 missing_field: "tool_name".into(),
325 detail: format!("compacted body does not reference tool '{tool_name}'"),
326 });
327 }
328
329 let has_status = body.contains("exit_status")
331 || body.contains("exit_code")
332 || body.contains("status:")
333 || body.contains("Status:")
334 || body.contains("exit:")
335 || body.contains("rc:");
336 if !has_status {
337 violations.push(FidelityViolation {
338 missing_field: "exit_status".into(),
339 detail: "compacted body does not contain an exit status indicator".into(),
340 });
341 }
342
343 if original.schema_hint == Some(SchemaHint::Json) {
347 let original_body = original.body.as_ref();
348 let preserved = check_json_structural_key(original_body, body);
349 if !preserved {
350 violations.push(FidelityViolation {
351 missing_field: "structural_key".into(),
352 detail: "compacted JSON tool output does not reference any top-level field \
353 name from the original output"
354 .into(),
355 });
356 }
357 }
358
359 if violations.is_empty() {
360 Ok(())
361 } else {
362 Err(violations)
363 }
364 }
365}
366
367fn check_json_structural_key(original: &str, compacted: &str) -> bool {
373 let Ok(value) = serde_json::from_str::<serde_json::Value>(original) else {
374 return true;
375 };
376 let Some(obj) = value.as_object() else {
377 return true;
378 };
379 if obj.is_empty() {
380 return true;
381 }
382 obj.keys().any(|k| compacted.contains(k.as_str()))
383}
384
385pub struct ConversationTurnInvariant;
389
390impl PageInvariant for ConversationTurnInvariant {
391 fn page_type(&self) -> PageType {
392 PageType::ConversationTurn
393 }
394
395 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
396 FidelityContract {
397 fidelity_level: "semantic_summary_v1",
398 invariant_version: 1,
399 required_fields: &["role"],
400 }
401 }
402
403 fn verify(
404 &self,
405 _original: &TypedPage,
406 compacted: &CompactedPage,
407 ) -> Result<(), Vec<FidelityViolation>> {
408 let body = compacted.body.as_ref();
409 let has_role =
410 body.contains("user") || body.contains("assistant") || body.contains("system");
411 if !has_role {
412 return Err(vec![FidelityViolation {
413 missing_field: "role".into(),
414 detail: "compacted turn does not identify a speaker role".into(),
415 }]);
416 }
417 Ok(())
418 }
419}
420
421pub struct MemoryExcerptInvariant;
425
426impl PageInvariant for MemoryExcerptInvariant {
427 fn page_type(&self) -> PageType {
428 PageType::MemoryExcerpt
429 }
430
431 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
432 FidelityContract {
433 fidelity_level: "excerpt_summary_v1",
434 invariant_version: 1,
435 required_fields: &["source_label"],
436 }
437 }
438
439 fn verify(
440 &self,
441 original: &TypedPage,
442 compacted: &CompactedPage,
443 ) -> Result<(), Vec<FidelityViolation>> {
444 let source_label = match &original.origin {
445 PageOrigin::Excerpt { source_label } => source_label.as_str(),
446 _ => return Ok(()),
447 };
448 if !compacted.body.contains(source_label) {
449 return Err(vec![FidelityViolation {
450 missing_field: "source_label".into(),
451 detail: format!("compacted excerpt does not contain source label '{source_label}'"),
452 }]);
453 }
454 Ok(())
455 }
456}
457
458pub struct SystemContextInvariant;
463
464pub const SYSTEM_POINTER_PREFIX: &str = "[system-ptr:";
466
467impl PageInvariant for SystemContextInvariant {
468 fn page_type(&self) -> PageType {
469 PageType::SystemContext
470 }
471
472 fn minimum_fidelity(&self, _page: &TypedPage) -> FidelityContract {
473 FidelityContract {
474 fidelity_level: "pointer_replace_v1",
475 invariant_version: 1,
476 required_fields: &["pointer"],
477 }
478 }
479
480 fn verify(
481 &self,
482 _original: &TypedPage,
483 compacted: &CompactedPage,
484 ) -> Result<(), Vec<FidelityViolation>> {
485 if !compacted.body.starts_with(SYSTEM_POINTER_PREFIX) {
486 return Err(vec![FidelityViolation {
487 missing_field: "pointer".into(),
488 detail: format!(
489 "SystemContext page was not pointer-replaced \
490 (body does not start with '{SYSTEM_POINTER_PREFIX}')"
491 ),
492 }]);
493 }
494 Ok(())
495 }
496}
497
498pub struct InvariantRegistry {
514 tool_output: Box<dyn PageInvariant>,
515 conversation_turn: Box<dyn PageInvariant>,
516 memory_excerpt: Box<dyn PageInvariant>,
517 system_context: Box<dyn PageInvariant>,
518}
519
520impl Default for InvariantRegistry {
521 fn default() -> Self {
522 Self {
523 tool_output: Box::new(ToolOutputInvariant),
524 conversation_turn: Box::new(ConversationTurnInvariant),
525 memory_excerpt: Box::new(MemoryExcerptInvariant),
526 system_context: Box::new(SystemContextInvariant),
527 }
528 }
529}
530
531impl InvariantRegistry {
532 #[must_use]
536 pub fn get(&self, page_type: PageType) -> Option<&dyn PageInvariant> {
537 match page_type {
538 PageType::ToolOutput => Some(self.tool_output.as_ref()),
539 PageType::ConversationTurn => Some(self.conversation_turn.as_ref()),
540 PageType::MemoryExcerpt => Some(self.memory_excerpt.as_ref()),
541 PageType::SystemContext => Some(self.system_context.as_ref()),
542 }
543 }
544
545 pub fn enforce(
556 &self,
557 original: &TypedPage,
558 compacted: &CompactedPage,
559 ) -> Result<(), Vec<FidelityViolation>> {
560 let _span = tracing::info_span!(
561 "context.compaction.typed_page",
562 page_type = %original.page_type,
563 page_id = %original.page_id.0,
564 original_tokens = original.tokens,
565 compacted_tokens = compacted.tokens,
566 )
567 .entered();
568
569 if let Some(inv) = self.get(original.page_type) {
570 inv.verify(original, compacted)
571 } else {
572 tracing::warn!(
573 page_type = %original.page_type,
574 "no invariant registered for page type — skipping verification"
575 );
576 Ok(())
577 }
578 }
579}
580
581#[must_use]
610pub fn classify(body: &str) -> PageType {
611 classify_with_role(body, false)
612}
613
614#[must_use]
634pub fn classify_with_role(body: &str, is_system_role: bool) -> PageType {
635 tracing::info_span!(
636 "context.compaction.typed_page.classify",
637 body_len = body.len()
638 )
639 .in_scope(|| classify_with_role_inner(body, is_system_role))
640}
641
642fn classify_with_role_inner(body: &str, is_system_role: bool) -> PageType {
643 const TOOL_PREFIXES: &[&str] = &["[tool_output]", "[tool:", "[Tool output]"];
645 const MEMORY_PREFIXES: &[&str] = &[
646 "[cross-session context]",
647 "[semantic recall]",
648 "[known facts]",
649 "[conversation summaries]",
650 "[past corrections]",
651 "## Relevant documents",
652 ];
653 const SYSTEM_PREFIXES: &[&str] = &[
654 "[Persona context]",
655 "[Past experience]",
656 "[Memory summary]",
657 "[system",
658 "[skill",
659 "[persona",
660 "[digest",
661 "[compression",
662 ];
663
664 let trimmed = body.trim_start();
665
666 for prefix in TOOL_PREFIXES {
667 if trimmed.starts_with(prefix) {
668 return PageType::ToolOutput;
669 }
670 }
671 for prefix in MEMORY_PREFIXES {
672 if trimmed.starts_with(prefix) {
673 return PageType::MemoryExcerpt;
674 }
675 }
676 for prefix in SYSTEM_PREFIXES {
677 if trimmed.starts_with(prefix) {
678 return PageType::SystemContext;
679 }
680 }
681
682 if is_system_role {
687 return PageType::SystemContext;
688 }
689
690 tracing::warn!(
691 body_prefix = &body[..body.len().min(80)],
692 "typed-page classification fallback to ConversationTurn"
693 );
694 PageType::ConversationTurn
695}
696
697#[must_use]
703pub fn detect_schema_hint(body: &str, is_binary: bool) -> SchemaHint {
704 if is_binary || body.contains('\u{FFFD}') {
705 return SchemaHint::Binary;
706 }
707 let trimmed = body.trim_start();
708 if trimmed.starts_with('{') || trimmed.starts_with('[') {
709 return SchemaHint::Json;
710 }
711 if trimmed.starts_with("--- ")
712 || trimmed.starts_with("+++ ")
713 || trimmed.starts_with("diff --git")
714 || trimmed.starts_with("diff -")
715 {
716 return SchemaHint::Diff;
717 }
718 let first_line = trimmed.lines().next().unwrap_or("");
720 if first_line.matches('\t').count() >= 2 || first_line.matches('|').count() >= 2 {
721 return SchemaHint::Table;
722 }
723 SchemaHint::Text
724}
725
726#[derive(Debug, Serialize)]
733pub struct CompactedPageRecord {
734 pub ts: String,
736 pub turn_id: String,
738 pub page_id: String,
740 pub page_type: PageType,
742 pub origin: PageOrigin,
744 pub original_tokens: u32,
746 pub compacted_tokens: u32,
748 pub fidelity_level: String,
750 pub invariant_version: u8,
752 pub provider_name: String,
754 pub violations: Vec<FidelityViolation>,
756 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
758 pub classification_fallback: bool,
759}
760
761#[derive(Debug, Clone)]
788pub struct CompactionAuditSink {
789 tx: tokio::sync::mpsc::Sender<CompactedPageRecord>,
790 drop_counter: Arc<std::sync::atomic::AtomicU64>,
791}
792
793impl CompactionAuditSink {
794 pub async fn open(path: &std::path::Path, capacity: usize) -> Result<Self, std::io::Error> {
803 use tokio::io::AsyncWriteExt as _;
804
805 if let Some(parent) = path.parent() {
806 tokio::fs::create_dir_all(parent).await?;
807 }
808 let file = tokio::fs::OpenOptions::new()
809 .create(true)
810 .append(true)
811 .open(path)
812 .await?;
813
814 let (tx, mut rx) = tokio::sync::mpsc::channel::<CompactedPageRecord>(capacity);
815 let drop_counter = Arc::new(std::sync::atomic::AtomicU64::new(0));
816 let drop_counter_bg = drop_counter.clone();
817
818 tokio::spawn(async move {
819 let mut writer = tokio::io::BufWriter::new(file);
820 while let Some(record) = rx.recv().await {
821 match serde_json::to_string(&record) {
822 Ok(mut line) => {
823 line.push('\n');
824 if let Err(e) = writer.write_all(line.as_bytes()).await {
825 tracing::error!("compaction audit write failed: {e:#}");
826 }
827 }
828 Err(e) => {
829 tracing::error!("compaction audit serialization failed: {e:#}");
830 }
831 }
832 }
833 let _ = writer.flush().await;
835
836 let dropped = drop_counter_bg.load(std::sync::atomic::Ordering::Relaxed);
837 if dropped > 0 {
838 tracing::warn!(dropped, "compaction audit sink closed with dropped records");
839 }
840 });
841
842 Ok(Self { tx, drop_counter })
843 }
844
845 pub fn send(&self, record: CompactedPageRecord) {
849 match self.tx.try_send(record) {
850 Ok(()) => {}
851 Err(tokio::sync::mpsc::error::TrySendError::Full(_)) => {
852 let prev = self
853 .drop_counter
854 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
855 tracing::warn!(
856 dropped_total = prev + 1,
857 "compaction audit sink full — record dropped (best-effort audit)"
858 );
859 }
860 Err(tokio::sync::mpsc::error::TrySendError::Closed(_)) => {
861 tracing::error!("compaction audit sink closed unexpectedly");
862 }
863 }
864 }
865
866 pub async fn flush(&self) {
880 tokio::task::yield_now().await;
882 }
883
884 #[must_use]
886 pub fn dropped_count(&self) -> u64 {
887 self.drop_counter.load(std::sync::atomic::Ordering::Relaxed)
888 }
889}
890
891#[cfg(test)]
894mod tests {
895 use super::*;
896
897 #[test]
900 fn page_id_same_input_same_output() {
901 let a = PageId::compute(PageType::ToolOutput, "tool:shell", b"exit_code: 0");
902 let b = PageId::compute(PageType::ToolOutput, "tool:shell", b"exit_code: 0");
903 assert_eq!(a, b);
904 }
905
906 #[test]
907 fn page_id_different_type_different_id() {
908 let a = PageId::compute(PageType::ToolOutput, "tool:shell", b"body");
909 let b = PageId::compute(PageType::ConversationTurn, "tool:shell", b"body");
910 assert_ne!(a, b);
911 }
912
913 #[test]
914 fn page_id_starts_with_blake3_prefix() {
915 let id = PageId::compute(PageType::SystemContext, "system:persona", b"some content");
916 assert!(id.0.starts_with("blake3:"));
917 }
918
919 #[test]
922 fn classify_tool_output_prefix() {
923 assert_eq!(
924 classify("[tool_output] shell exit_code: 0"),
925 PageType::ToolOutput
926 );
927 assert_eq!(classify("[tool:shell] result"), PageType::ToolOutput);
928 }
929
930 #[test]
931 fn classify_memory_prefixes() {
932 assert_eq!(
933 classify("[cross-session context]\nsome recall"),
934 PageType::MemoryExcerpt
935 );
936 assert_eq!(
937 classify("[semantic recall]\n- [user] hello"),
938 PageType::MemoryExcerpt
939 );
940 assert_eq!(classify("[known facts]\n- fact"), PageType::MemoryExcerpt);
941 assert_eq!(
942 classify("[conversation summaries]\n- 1-10: summary"),
943 PageType::MemoryExcerpt
944 );
945 }
946
947 #[test]
948 fn classify_system_prefixes() {
949 assert_eq!(classify("[Persona context]\nfact"), PageType::SystemContext);
950 assert_eq!(classify("[system prompt]"), PageType::SystemContext);
951 }
952
953 #[test]
954 fn classify_fallback_is_conversation_turn() {
955 assert_eq!(classify("Hello, world!"), PageType::ConversationTurn);
956 assert_eq!(classify(""), PageType::ConversationTurn);
957 }
958
959 #[test]
962 fn detect_schema_hint_json() {
963 assert_eq!(
964 detect_schema_hint(r#"{"key": "val"}"#, false),
965 SchemaHint::Json
966 );
967 assert_eq!(detect_schema_hint("[1,2,3]", false), SchemaHint::Json);
968 }
969
970 #[test]
971 fn detect_schema_hint_diff() {
972 assert_eq!(detect_schema_hint("--- a\n+++ b", false), SchemaHint::Diff);
973 }
974
975 #[test]
976 fn detect_schema_hint_binary() {
977 assert_eq!(detect_schema_hint("anything", true), SchemaHint::Binary);
978 }
979
980 #[test]
981 fn detect_schema_hint_text_fallback() {
982 assert_eq!(detect_schema_hint("plain text", false), SchemaHint::Text);
983 }
984
985 #[test]
988 fn tool_output_invariant_passes_when_fields_present() {
989 let inv = ToolOutputInvariant;
990 let page = TypedPage::new(
991 PageType::ToolOutput,
992 PageOrigin::ToolPair {
993 tool_name: "shell".into(),
994 },
995 100,
996 Arc::from("[tool_output] shell exit_code: 0\nsome output"),
997 Some(SchemaHint::Text),
998 );
999 let compacted = CompactedPage {
1000 body: Arc::from("shell exit_status: 0\nkey: value"),
1001 tokens: 10,
1002 };
1003 assert!(inv.verify(&page, &compacted).is_ok());
1004 }
1005
1006 #[test]
1007 fn tool_output_invariant_fails_missing_tool_name() {
1008 let inv = ToolOutputInvariant;
1009 let page = TypedPage::new(
1010 PageType::ToolOutput,
1011 PageOrigin::ToolPair {
1012 tool_name: "my_tool".into(),
1013 },
1014 100,
1015 Arc::from("[tool_output] my_tool exit_code: 0"),
1016 Some(SchemaHint::Text),
1017 );
1018 let compacted = CompactedPage {
1019 body: Arc::from("exit_status: 0"),
1020 tokens: 5,
1021 };
1022 let result = inv.verify(&page, &compacted);
1023 assert!(result.is_err());
1024 let violations = result.unwrap_err();
1025 assert!(violations.iter().any(|v| v.missing_field == "tool_name"));
1026 }
1027
1028 #[test]
1029 fn tool_output_invariant_passes_for_binary() {
1030 let inv = ToolOutputInvariant;
1031 let page = TypedPage::new(
1032 PageType::ToolOutput,
1033 PageOrigin::ToolPair {
1034 tool_name: "binary_tool".into(),
1035 },
1036 100,
1037 Arc::from("<binary:1024 bytes>"),
1038 Some(SchemaHint::Binary),
1039 );
1040 let compacted = CompactedPage {
1041 body: Arc::from("<binary:1024 bytes> (archived)"),
1042 tokens: 5,
1043 };
1044 assert!(inv.verify(&page, &compacted).is_ok());
1045 }
1046
1047 #[test]
1050 fn system_context_invariant_passes_with_pointer() {
1051 let inv = SystemContextInvariant;
1052 let page = TypedPage::new(
1053 PageType::SystemContext,
1054 PageOrigin::System {
1055 key: "persona".into(),
1056 },
1057 200,
1058 Arc::from("[Persona context]\nsome persona info"),
1059 None,
1060 );
1061 let compacted = CompactedPage {
1062 body: Arc::from("[system-ptr:blake3:abcdef123456]"),
1063 tokens: 3,
1064 };
1065 assert!(inv.verify(&page, &compacted).is_ok());
1066 }
1067
1068 #[test]
1069 fn system_context_invariant_fails_without_pointer() {
1070 let inv = SystemContextInvariant;
1071 let page = TypedPage::new(
1072 PageType::SystemContext,
1073 PageOrigin::System {
1074 key: "persona".into(),
1075 },
1076 200,
1077 Arc::from("[Persona context]\nsome persona info"),
1078 None,
1079 );
1080 let compacted = CompactedPage {
1081 body: Arc::from("This is a paraphrase of persona info"),
1082 tokens: 10,
1083 };
1084 let result = inv.verify(&page, &compacted);
1085 assert!(result.is_err());
1086 let violations = result.unwrap_err();
1087 assert!(violations.iter().any(|v| v.missing_field == "pointer"));
1088 }
1089
1090 #[test]
1093 fn registry_covers_all_page_types() {
1094 let reg = InvariantRegistry::default();
1095 for pt in [
1096 PageType::ToolOutput,
1097 PageType::ConversationTurn,
1098 PageType::MemoryExcerpt,
1099 PageType::SystemContext,
1100 ] {
1101 assert!(reg.get(pt).is_some(), "missing invariant for {pt:?}");
1102 }
1103 }
1104
1105 #[test]
1106 fn registry_returns_correct_page_type() {
1107 let reg = InvariantRegistry::default();
1108 assert_eq!(
1109 reg.get(PageType::ToolOutput).unwrap().page_type(),
1110 PageType::ToolOutput
1111 );
1112 assert_eq!(
1113 reg.get(PageType::SystemContext).unwrap().page_type(),
1114 PageType::SystemContext
1115 );
1116 }
1117
1118 #[test]
1121 fn enforce_ok_for_valid_system_pointer() {
1122 let reg = InvariantRegistry::default();
1123 let page = TypedPage::new(
1124 PageType::SystemContext,
1125 PageOrigin::System {
1126 key: "persona".into(),
1127 },
1128 50,
1129 Arc::from("[Persona context]\nrules"),
1130 None,
1131 );
1132 let compacted = CompactedPage {
1133 body: Arc::from("[system-ptr:blake3:aabbccdd11223344]"),
1134 tokens: 3,
1135 };
1136 assert!(reg.enforce(&page, &compacted).is_ok());
1137 }
1138
1139 #[test]
1140 fn enforce_err_for_paraphrased_system_context() {
1141 let reg = InvariantRegistry::default();
1142 let page = TypedPage::new(
1143 PageType::SystemContext,
1144 PageOrigin::System {
1145 key: "persona".into(),
1146 },
1147 50,
1148 Arc::from("[Persona context]\nrules"),
1149 None,
1150 );
1151 let compacted = CompactedPage {
1152 body: Arc::from("The persona says to be helpful."),
1153 tokens: 7,
1154 };
1155 let result = reg.enforce(&page, &compacted);
1156 assert!(result.is_err());
1157 assert!(
1158 result
1159 .unwrap_err()
1160 .iter()
1161 .any(|v| v.missing_field == "pointer")
1162 );
1163 }
1164
1165 #[test]
1166 fn enforce_ok_for_conversation_turn_with_role() {
1167 let reg = InvariantRegistry::default();
1168 let page = TypedPage::new(
1169 PageType::ConversationTurn,
1170 PageOrigin::Turn {
1171 message_id: "42".into(),
1172 },
1173 30,
1174 Arc::from("Hello from user"),
1175 None,
1176 );
1177 let compacted = CompactedPage {
1178 body: Arc::from("user asked about Rust"),
1179 tokens: 5,
1180 };
1181 assert!(reg.enforce(&page, &compacted).is_ok());
1182 }
1183
1184 #[test]
1187 fn memory_excerpt_invariant_passes_when_label_present() {
1188 let inv = MemoryExcerptInvariant;
1189 let label = "semantic_recall";
1190 let page = TypedPage::new(
1191 PageType::MemoryExcerpt,
1192 PageOrigin::Excerpt {
1193 source_label: label.into(),
1194 },
1195 80,
1196 Arc::from("[semantic recall]\n- [user] hello"),
1197 None,
1198 );
1199 let compacted = CompactedPage {
1200 body: Arc::from(format!("Summary from {label}: user greeted")),
1201 tokens: 6,
1202 };
1203 assert!(inv.verify(&page, &compacted).is_ok());
1204 }
1205
1206 #[test]
1207 fn memory_excerpt_invariant_fails_when_label_missing() {
1208 let inv = MemoryExcerptInvariant;
1209 let page = TypedPage::new(
1210 PageType::MemoryExcerpt,
1211 PageOrigin::Excerpt {
1212 source_label: "graph_facts".into(),
1213 },
1214 80,
1215 Arc::from("[known facts]\n- Alice works at Acme"),
1216 None,
1217 );
1218 let compacted = CompactedPage {
1219 body: Arc::from("Alice is employed somewhere"),
1220 tokens: 5,
1221 };
1222 let result = inv.verify(&page, &compacted);
1223 assert!(result.is_err());
1224 assert!(
1225 result
1226 .unwrap_err()
1227 .iter()
1228 .any(|v| v.missing_field == "source_label")
1229 );
1230 }
1231
1232 #[test]
1233 fn memory_excerpt_invariant_passes_for_non_excerpt_origin() {
1234 let inv = MemoryExcerptInvariant;
1235 let page = TypedPage::new(
1236 PageType::MemoryExcerpt,
1237 PageOrigin::System {
1238 key: "digests".into(),
1239 },
1240 40,
1241 Arc::from("[system]"),
1242 None,
1243 );
1244 let compacted = CompactedPage {
1245 body: Arc::from("anything"),
1246 tokens: 1,
1247 };
1248 assert!(inv.verify(&page, &compacted).is_ok());
1249 }
1250
1251 #[test]
1254 fn conversation_turn_invariant_passes_with_role_word() {
1255 let inv = ConversationTurnInvariant;
1256 let page = TypedPage::new(
1257 PageType::ConversationTurn,
1258 PageOrigin::Turn {
1259 message_id: "1".into(),
1260 },
1261 20,
1262 Arc::from("Hello world"),
1263 None,
1264 );
1265 for body in &["user: hi", "assistant replied", "system note"] {
1266 let compacted = CompactedPage {
1267 body: Arc::from(*body),
1268 tokens: 2,
1269 };
1270 assert!(inv.verify(&page, &compacted).is_ok(), "body={body}");
1271 }
1272 }
1273
1274 #[test]
1275 fn conversation_turn_invariant_fails_without_role_word() {
1276 let inv = ConversationTurnInvariant;
1277 let page = TypedPage::new(
1278 PageType::ConversationTurn,
1279 PageOrigin::Turn {
1280 message_id: "2".into(),
1281 },
1282 20,
1283 Arc::from("some turn content"),
1284 None,
1285 );
1286 let compacted = CompactedPage {
1287 body: Arc::from("content was summarized"),
1288 tokens: 3,
1289 };
1290 let result = inv.verify(&page, &compacted);
1291 assert!(result.is_err());
1292 assert!(
1293 result
1294 .unwrap_err()
1295 .iter()
1296 .any(|v| v.missing_field == "role")
1297 );
1298 }
1299
1300 #[tokio::test]
1303 async fn audit_sink_jsonl_roundtrip() {
1304 let dir = tempfile::tempdir().unwrap();
1305 let path = dir.path().join("audit.jsonl");
1306
1307 let sink = CompactionAuditSink::open(&path, 64).await.unwrap();
1308 let record = CompactedPageRecord {
1309 ts: "2026-04-19T00:00:00Z".into(),
1310 turn_id: "1".into(),
1311 page_id: "blake3:aabbccdd".into(),
1312 page_type: PageType::ToolOutput,
1313 origin: PageOrigin::ToolPair {
1314 tool_name: "shell".into(),
1315 },
1316 original_tokens: 100,
1317 compacted_tokens: 20,
1318 fidelity_level: "structured_summary_v1".into(),
1319 invariant_version: 1,
1320 provider_name: "test".into(),
1321 violations: vec![],
1322 classification_fallback: false,
1323 };
1324 sink.send(record);
1325
1326 drop(sink);
1328 tokio::time::sleep(std::time::Duration::from_millis(50)).await;
1330
1331 let contents = std::fs::read_to_string(&path).unwrap();
1332 assert!(!contents.is_empty(), "audit file should not be empty");
1333 let parsed: serde_json::Value = serde_json::from_str(contents.trim()).unwrap();
1334 assert_eq!(parsed["page_type"], "tool_output");
1335 assert_eq!(parsed["turn_id"], "1");
1336 assert_eq!(parsed["provider_name"], "test");
1337 }
1338
1339 #[tokio::test]
1340 async fn audit_sink_drop_counter_increments_when_full() {
1341 let dir = tempfile::tempdir().unwrap();
1342 let path = dir.path().join("audit_full.jsonl");
1343
1344 let sink = CompactionAuditSink::open(&path, 1).await.unwrap();
1346
1347 let make_record = || CompactedPageRecord {
1348 ts: "2026-04-19T00:00:00Z".into(),
1349 turn_id: "x".into(),
1350 page_id: "blake3:00".into(),
1351 page_type: PageType::ConversationTurn,
1352 origin: PageOrigin::Turn {
1353 message_id: "0".into(),
1354 },
1355 original_tokens: 10,
1356 compacted_tokens: 5,
1357 fidelity_level: "semantic_summary_v1".into(),
1358 invariant_version: 1,
1359 provider_name: "test".into(),
1360 violations: vec![],
1361 classification_fallback: false,
1362 };
1363
1364 for _ in 0..10 {
1366 sink.send(make_record());
1367 }
1368
1369 assert!(
1370 sink.dropped_count() > 0,
1371 "expected at least one dropped record"
1372 );
1373 }
1374
1375 #[tokio::test]
1376 async fn audit_sink_flush_does_not_panic() {
1377 let dir = tempfile::tempdir().unwrap();
1378 let path = dir.path().join("audit_flush.jsonl");
1379 let sink = CompactionAuditSink::open(&path, 16).await.unwrap();
1380 sink.flush().await;
1382 }
1383
1384 #[test]
1387 fn classify_with_role_system_flag_overrides_fallback() {
1388 assert_eq!(
1389 classify_with_role("You are a helpful assistant.", true),
1390 PageType::SystemContext
1391 );
1392 }
1393
1394 #[test]
1395 fn classify_with_role_prefix_wins_over_system_flag() {
1396 assert_eq!(
1397 classify_with_role("[tool_output] exit_code: 0", false),
1398 PageType::ToolOutput
1399 );
1400 }
1401
1402 #[test]
1403 fn classify_with_role_false_still_falls_back_to_conversation_turn() {
1404 assert_eq!(
1405 classify_with_role("random prose without markers", false),
1406 PageType::ConversationTurn
1407 );
1408 }
1409
1410 #[test]
1413 fn tool_output_json_structural_check_passes_when_key_preserved() {
1414 let inv = ToolOutputInvariant;
1415 let original_body = r#"{"exit_code": 0, "stdout": "ok"}"#;
1416 let page = TypedPage::new(
1417 PageType::ToolOutput,
1418 PageOrigin::ToolPair {
1419 tool_name: "shell".into(),
1420 },
1421 50,
1422 Arc::from(original_body),
1423 Some(SchemaHint::Json),
1424 );
1425 let compacted = CompactedPage {
1427 body: Arc::from("shell exit_code: 0, stdout was ok"),
1428 tokens: 8,
1429 };
1430 assert!(inv.verify(&page, &compacted).is_ok());
1431 }
1432
1433 #[test]
1434 fn tool_output_json_structural_check_fails_when_no_key_preserved() {
1435 let inv = ToolOutputInvariant;
1436 let original_body = r#"{"some_field": "value", "other_field": 42}"#;
1437 let page = TypedPage::new(
1438 PageType::ToolOutput,
1439 PageOrigin::ToolPair {
1440 tool_name: "my_tool".into(),
1441 },
1442 50,
1443 Arc::from(original_body),
1444 Some(SchemaHint::Json),
1445 );
1446 let compacted = CompactedPage {
1448 body: Arc::from("my_tool exit_status: 0 completed successfully"),
1449 tokens: 7,
1450 };
1451 let result = inv.verify(&page, &compacted);
1452 assert!(result.is_err());
1453 let violations = result.unwrap_err();
1454 assert!(
1455 violations
1456 .iter()
1457 .any(|v| v.missing_field == "structural_key")
1458 );
1459 }
1460}