use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Format {
Symbolic,
JitProgressive,
FragmentProse,
StructuredDelim,
Prose,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Model {
ClaudeOpus47,
ClaudeSonnet47,
ClaudeHaiku47,
Gpt5,
Gpt4o,
Gpt4,
Gemini25Ultra,
Gemini25Pro,
Llama3Custom(String),
Qwen3Custom(String),
Grok4,
Registered(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum FallbackReason {
Uncompressible,
TokenizerMissing,
QualityDegraded,
OversizedInput,
EncoderFault,
StructuredContent,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Compressed {
pub content: String,
pub format: Format,
pub baseline_tokens: u32,
pub compressed_tokens: u32,
pub model: Model,
pub content_hash: String,
pub fallback: Option<FallbackReason>,
}
pub trait Encoder: Send + Sync {
fn compress(&self, input: &str, model: Model) -> Compressed;
fn select_format(&self, input: &str, model: Model) -> Format;
fn fallback(&self, input: &str, model: Model, reason: FallbackReason) -> Compressed;
}
pub trait Measurer: Send + Sync {
fn tokenize(&self, text: &str, model: &Model) -> Result<u32, TokenizerError>;
fn supported(&self, model: &Model) -> bool;
}
#[derive(Debug, thiserror::Error)]
pub enum TokenizerError {
#[error("tokenizer for model {0:?} is not registered")]
NotRegistered(Model),
#[error("tokenizer I/O failure: {0}")]
Io(String),
#[error("tokenizer library error: {0}")]
Library(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Record {
pub request_id: String,
pub ts: String,
pub tenant_id: String,
pub model: Model,
pub route: String,
pub input: StreamDelta,
pub output: StreamDelta,
pub thinking: Option<StreamDelta>,
pub cost: CostEntry,
pub integrity: Integrity,
#[serde(default)]
pub dialect: Option<String>,
#[serde(default)]
pub rules_applied: Option<Vec<String>>,
#[serde(default)]
pub rule_fire_counts: Option<Vec<(String, u32)>>,
#[serde(default)]
pub quality: Option<QualityScore>,
#[serde(default)]
pub bytes_saved_by_rule: Option<Vec<(String, u64)>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct QualityScore {
pub cosine: f32,
pub judge_verdict: String,
pub judge_model: String,
pub embedder_id: String,
pub sample_rate: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamDelta {
pub baseline_tokens: u32,
pub compressed_tokens: u32,
pub delta_tokens: i32,
pub compressed_pct: f32,
pub format: Option<Format>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostEntry {
pub currency: String,
pub baseline_cost_cents: f64,
pub actual_cost_cents: f64,
pub savings_cents: f64,
pub pricing_snapshot_id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Integrity {
pub prompt_hash: String,
pub response_hash: String,
pub ed25519_signature: String,
pub mldsa_signature: String,
pub signing_key_id: String,
}
pub trait MeasurementLog: Send + Sync {
fn record(&self, r: Record) -> Result<RecordId, LogError>;
fn get(&self, id: &RecordId) -> Result<Option<Record>, LogError>;
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct RecordId(pub String);
#[derive(Debug, thiserror::Error)]
pub enum LogError {
#[error("log I/O failure: {0}")]
Io(String),
#[error("signing failure: {0}")]
Signing(String),
#[error("canonicalization failure: {0}")]
Canonicalization(String),
}
pub trait Signer: Send + Sync {
fn dual_sign(&self, canonical_bytes: &[u8]) -> Result<(String, String), SignerError>;
fn key_id(&self) -> &str;
}
#[derive(Debug, thiserror::Error)]
pub enum SignerError {
#[error("ed25519 error: {0}")]
Ed25519(String),
#[error("ML-DSA error: {0}")]
MlDsa(String),
#[error("key not loaded")]
KeyMissing,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn format_round_trip_json() {
let f = Format::JitProgressive;
let s = serde_json::to_string(&f).unwrap();
let f2: Format = serde_json::from_str(&s).unwrap();
assert_eq!(f, f2);
}
#[test]
fn model_custom_llama_supported() {
let m = Model::Llama3Custom("meta-llama/Llama-3-70b".to_string());
let s = serde_json::to_string(&m).unwrap();
assert!(s.contains("Llama3Custom"));
}
#[test]
fn record_bytes_saved_by_rule_field_is_backward_compatible() {
let v003_json = r#"{
"request_id": "01HY0000000000000000000000",
"ts": "2026-04-24T00:00:00.000Z",
"tenant_id": "t",
"model": "ClaudeOpus47",
"route": "a2a",
"input": {"baseline_tokens": 100, "compressed_tokens": 80, "delta_tokens": -20, "compressed_pct": 0.8, "format": null},
"output": {"baseline_tokens": 50, "compressed_tokens": 50, "delta_tokens": 0, "compressed_pct": 1.0, "format": null},
"thinking": null,
"cost": {"currency": "USD", "baseline_cost_cents": 1.0, "actual_cost_cents": 0.8, "savings_cents": 0.2, "pricing_snapshot_id": "p1"},
"integrity": {"prompt_hash": "a", "response_hash": "b", "ed25519_signature": "c", "mldsa_signature": "d", "signing_key_id": "k"}
}"#;
let r: Record =
serde_json::from_str(v003_json).expect("v0.0.3 record must parse under v0.0.4 schema");
assert!(
r.bytes_saved_by_rule.is_none(),
"missing field must deserialize as None, not surface an error"
);
let with = Record {
bytes_saved_by_rule: Some(vec![
("json_minified".to_string(), 42),
("term_substitutions".to_string(), 17),
]),
..r
};
let serialized = serde_json::to_string(&with).unwrap();
assert!(serialized.contains("\"bytes_saved_by_rule\""));
let back: Record = serde_json::from_str(&serialized).unwrap();
assert_eq!(
back.bytes_saved_by_rule.as_deref(),
Some(
&[
("json_minified".to_string(), 42u64),
("term_substitutions".to_string(), 17u64)
][..]
)
);
}
}