pithy_core/interfaces.rs
1//! Pithy interface contracts — FROZEN at Phase 0.
2//!
3//! This module defines the trait and type contracts that every other
4//! crate in the workspace depends on. Semver commitment: any breaking
5//! change to these types or traits requires a major-version bump and
6//! coordinated release across all consumers.
7//!
8//! Copyright (c) 2026 Mikko Parkkola. All rights reserved.
9//! Licensed under PolyForm Noncommercial 1.0 + Pithy Attribution Rider.
10
11use serde::{Deserialize, Serialize};
12
13/// One of the four compression formats defined by the Pithy specification.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
15pub enum Format {
16 /// Symbolic notation (arrow chains, unicode glyphs, parenthetical state).
17 Symbolic,
18 /// Just-in-time progressive vocabulary binding: bind on first re-use.
19 JitProgressive,
20 /// Fragment prose: articles dropped, grammar compressed.
21 FragmentProse,
22 /// Structured delimiters: colon-key / pipe-separated fields.
23 StructuredDelim,
24 /// No compression applied. Fallback and baseline.
25 Prose,
26}
27
28/// The upstream LLM whose tokenizer rules the measurement.
29#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
30pub enum Model {
31 // Anthropic
32 ClaudeOpus47,
33 ClaudeSonnet47,
34 ClaudeHaiku47,
35 // OpenAI
36 Gpt5,
37 Gpt4o,
38 Gpt4,
39 // Google
40 Gemini25Ultra,
41 Gemini25Pro,
42 // Open weights
43 Llama3Custom(String),
44 Qwen3Custom(String),
45 // xAI
46 Grok4,
47 // Catch-all for registered tokenizers not yet in the enum
48 Registered(String),
49}
50
51/// Reason the encoder fell back to a less aggressive format.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
53pub enum FallbackReason {
54 /// Selector judged compression would be net-negative.
55 Uncompressible,
56 /// Tokenizer not registered for the target model.
57 TokenizerMissing,
58 /// Downstream quality gate failed (semantic fidelity below threshold).
59 QualityDegraded,
60 /// Content exceeds max input size for the selected format.
61 OversizedInput,
62 /// Encoder panic or unrecoverable error.
63 EncoderFault,
64 /// Input contains markdown/structured markers (headings, list items,
65 /// fenced code blocks, or multiple paragraphs). Symbolic compression
66 /// collapses whitespace unconditionally via `MULTI_WS`, which
67 /// destroys paragraph breaks and newline-bearing structure.
68 /// This fallback preserves structure at the cost of some savings.
69 StructuredContent,
70}
71
72/// Output of a single compression operation.
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct Compressed {
75 /// Compressed text ready to send to an upstream LLM.
76 pub content: String,
77 /// Which format was selected.
78 pub format: Format,
79 /// Real tokenizer count of the original prose input.
80 pub baseline_tokens: u32,
81 /// Real tokenizer count of the compressed output.
82 pub compressed_tokens: u32,
83 /// Model whose tokenizer was used for the measurement.
84 pub model: Model,
85 /// Blake3 hash of the original prose input (32 bytes hex-encoded).
86 pub content_hash: String,
87 /// Fallback reason if format is Prose and baseline != compressed semantically.
88 pub fallback: Option<FallbackReason>,
89}
90
91/// A pass-through encoder interface. Implementors MUST preserve semantic fidelity
92/// of the compressed output at or above the Phase-0 kill-switch threshold (≥0.90
93/// cosine similarity under the target LLM's response).
94pub trait Encoder: Send + Sync {
95 /// Compress `input` for the target `model`. Must run in <5ms at p95.
96 /// Returns `Compressed` even on fallback; format field reports what was used.
97 fn compress(&self, input: &str, model: Model) -> Compressed;
98
99 /// Pick the best format for `input` without actually encoding. Used by
100 /// A/B samplers and planners. Must match [`compress`]'s selection.
101 fn select_format(&self, input: &str, model: Model) -> Format;
102
103 /// Explicit fallback request — used when downstream quality monitor flags
104 /// a previously-compressed payload.
105 fn fallback(&self, input: &str, model: Model, reason: FallbackReason) -> Compressed;
106}
107
108/// Count real tokens for the target model. No estimation, no synthetic fallback.
109pub trait Measurer: Send + Sync {
110 /// Returns the actual token count for `text` under `model`'s tokenizer.
111 /// Implementations MUST return `Err` for unregistered models rather than
112 /// silently approximating — this is a DoR gate (no hardcoded ratios).
113 fn tokenize(&self, text: &str, model: &Model) -> Result<u32, TokenizerError>;
114
115 /// Returns true iff the tokenizer for `model` is registered and callable.
116 fn supported(&self, model: &Model) -> bool;
117}
118
119/// Error from the measurer path. Kept narrow so downstream code can fall back.
120#[derive(Debug, thiserror::Error)]
121pub enum TokenizerError {
122 #[error("tokenizer for model {0:?} is not registered")]
123 NotRegistered(Model),
124 #[error("tokenizer I/O failure: {0}")]
125 Io(String),
126 #[error("tokenizer library error: {0}")]
127 Library(String),
128}
129
130/// A single signed measurement record. Wire format frozen at v0.1.
131///
132/// # Changelog
133/// - v0.0.2: Added `dialect` and `rules_applied` as backward-compatible
134/// `#[serde(default)]` optional fields. Both default to `None` for v0.1
135/// readers. Phase-2 Agent D's encoder populates them.
136/// - v0.0.4: Added `bytes_saved_by_rule` as backward-compatible
137/// `#[serde(default)]` optional field. `None` for v0.0.3-and-earlier
138/// readers. Wired by Step 9 of `docs/PLAN_2026-04-24.md` — schema
139/// lands now so future encoder instrumentation (per-rule byte-delta
140/// callbacks) can populate without a second migration. Until then
141/// the field is always `None` at produce time. Consumers must handle
142/// `None` gracefully (treat as "no attribution available").
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct Record {
145 /// ULID request identifier.
146 pub request_id: String,
147 /// ISO-8601 timestamp with millisecond precision.
148 pub ts: String,
149 /// Tenant id. Required for multi-tenant reporting.
150 pub tenant_id: String,
151 /// Model the request was routed to.
152 pub model: Model,
153 /// Route classification (agent-to-agent, agent-to-human, etc).
154 pub route: String,
155 /// Input-stream measurement (baseline vs compressed).
156 pub input: StreamDelta,
157 /// Output-stream measurement.
158 pub output: StreamDelta,
159 /// Thinking-stream measurement (when provider exposes it).
160 pub thinking: Option<StreamDelta>,
161 /// Cost translation at capture time.
162 pub cost: CostEntry,
163 /// Cryptographic integrity fields.
164 pub integrity: Integrity,
165 /// Pithy dialect family used for compression (e.g. `"ultracos-symbolic-v1"`).
166 /// Added in v0.0.2. Populated by Phase-2 encoder; `None` for v0.1 records.
167 #[serde(default)]
168 pub dialect: Option<String>,
169 /// Ordered list of compression rules applied during encoding.
170 /// Added in v0.0.2. Populated by Phase-2 encoder; `None` for v0.1 records.
171 #[serde(default)]
172 pub rules_applied: Option<Vec<String>>,
173 /// Per-rule firing counts (alphabetical by rule name).
174 /// Added in v0.0.3. Populated by `EncoderTrace::as_pairs` so the
175 /// post-hoc analyzer can attribute savings and quality regressions
176 /// to specific rules without re-running the encoder. `None` when
177 /// the encoder did not run (Prose fallback) or the producer is on
178 /// the v0.0.2 wire schema.
179 #[serde(default)]
180 pub rule_fire_counts: Option<Vec<(String, u32)>>,
181 /// Optional shadow-arm quality measurement. Set when the proxy
182 /// ran the same prompt through both the compressed and the prose
183 /// arm at sample rate, so the closed-loop tuner can correlate
184 /// compression with response quality. Added in v0.0.3.
185 #[serde(default)]
186 pub quality: Option<QualityScore>,
187 /// Per-rule byte-savings attribution, in UTF-8 bytes of input payload.
188 /// Added in v0.0.4 as the substrate for PLAN Step 9. Aggregate
189 /// `delta_tokens` + `rule_fire_counts` tells us WHICH rules fired
190 /// and HOW OFTEN, but not WHICH BYTES each rule removed from the
191 /// payload. Step 8+ rule-vs-rule tuning needs the latter.
192 ///
193 /// Populated by the encoder once per-rule byte-delta callbacks are
194 /// wired (separate follow-up commit). Until then, this field is
195 /// `None` at produce time and consumers MUST treat `None` as "no
196 /// attribution available", not as "zero savings".
197 ///
198 /// Units are **bytes of input before vs after the rule ran**, not
199 /// tokens. The bandit converts to token-equivalents against the
200 /// target-model tokenizer at aggregation time, so this field stays
201 /// tokenizer-agnostic and doesn't drift when tokenizer choices
202 /// change (see B8 DeepSeek-V4 addition).
203 #[serde(default)]
204 pub bytes_saved_by_rule: Option<Vec<(String, u64)>>,
205}
206
207/// Optional quality measurement attached to a `Record` when the
208/// proxy ran a shadow comparison between the compressed and the
209/// prose arm at sample rate.
210///
211/// Recorded ONCE at sample time -- never re-derived from response
212/// text -- so the audit ledger remains the single source of truth.
213#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
214pub struct QualityScore {
215 /// Cosine similarity in `[0.0, 1.0]` of compressed-arm vs
216 /// prose-arm response embeddings. Pin the embedder via
217 /// `embedder_id` so values stay comparable across releases.
218 pub cosine: f32,
219 /// Categorical judge verdict: `"yes"`, `"partial"`, `"no"`, or
220 /// `"indeterminate"`. From the cheap comparator model.
221 pub judge_verdict: String,
222 /// Identifier of the model used for `judge_verdict`; pinned for
223 /// reproducibility (e.g. `"claude-haiku-4.5"`).
224 pub judge_model: String,
225 /// Identifier of the embedder used for `cosine`; pinned (e.g.
226 /// `"sentence-transformers/all-MiniLM-L6-v2@v2.7.0"`).
227 pub embedder_id: String,
228 /// Sample rate in `[0.0, 1.0]` this record was drawn at; lets
229 /// the aggregator weight per-record contributions when computing
230 /// fleet-level quality.
231 pub sample_rate: f32,
232}
233
234/// Per-stream delta between uncompressed and compressed.
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct StreamDelta {
237 pub baseline_tokens: u32,
238 pub compressed_tokens: u32,
239 pub delta_tokens: i32,
240 pub compressed_pct: f32,
241 pub format: Option<Format>,
242}
243
244/// Cost translation for a single record.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct CostEntry {
247 pub currency: String,
248 pub baseline_cost_cents: f64,
249 pub actual_cost_cents: f64,
250 pub savings_cents: f64,
251 /// Opaque snapshot id pointing at the versioned pricing table in use.
252 pub pricing_snapshot_id: String,
253}
254
255/// Cryptographic fields of a signed record.
256/// Phase 1 must populate BOTH ed25519 and ML-DSA signatures (Day-1 PQC).
257#[derive(Debug, Clone, Serialize, Deserialize)]
258pub struct Integrity {
259 /// Blake3 hash of the prose input (hex).
260 pub prompt_hash: String,
261 /// Blake3 hash of the model response (hex).
262 pub response_hash: String,
263 /// Classical ed25519 signature over the canonicalized record (hex).
264 pub ed25519_signature: String,
265 /// Post-quantum ML-DSA (FIPS 204) signature (hex). Required.
266 pub mldsa_signature: String,
267 /// Identifier of the signing key pair.
268 pub signing_key_id: String,
269}
270
271/// Log of signed measurement records. Append-only; tampering detectable via merkle.
272pub trait MeasurementLog: Send + Sync {
273 /// Append a record to the log. Returns the record id on success.
274 fn record(&self, r: Record) -> Result<RecordId, LogError>;
275
276 /// Retrieve a record by id for spot-check audit.
277 fn get(&self, id: &RecordId) -> Result<Option<Record>, LogError>;
278}
279
280/// Opaque id for a stored record.
281#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
282pub struct RecordId(pub String);
283
284/// Error type for the measurement log.
285#[derive(Debug, thiserror::Error)]
286pub enum LogError {
287 #[error("log I/O failure: {0}")]
288 Io(String),
289 #[error("signing failure: {0}")]
290 Signing(String),
291 #[error("canonicalization failure: {0}")]
292 Canonicalization(String),
293}
294
295/// Dual-signing facade. Implementations sign with BOTH ed25519 and ML-DSA.
296pub trait Signer: Send + Sync {
297 /// Return (ed25519_sig_hex, mldsa_sig_hex) over `canonical_bytes`.
298 fn dual_sign(&self, canonical_bytes: &[u8]) -> Result<(String, String), SignerError>;
299
300 /// Return the signing-key-pair identifier recorded on every output.
301 fn key_id(&self) -> &str;
302}
303
304/// Error type from the signer.
305#[derive(Debug, thiserror::Error)]
306pub enum SignerError {
307 #[error("ed25519 error: {0}")]
308 Ed25519(String),
309 #[error("ML-DSA error: {0}")]
310 MlDsa(String),
311 #[error("key not loaded")]
312 KeyMissing,
313}
314
315#[cfg(test)]
316mod tests {
317 use super::*;
318
319 #[test]
320 fn format_round_trip_json() {
321 let f = Format::JitProgressive;
322 let s = serde_json::to_string(&f).unwrap();
323 let f2: Format = serde_json::from_str(&s).unwrap();
324 assert_eq!(f, f2);
325 }
326
327 #[test]
328 fn model_custom_llama_supported() {
329 let m = Model::Llama3Custom("meta-llama/Llama-3-70b".to_string());
330 let s = serde_json::to_string(&m).unwrap();
331 assert!(s.contains("Llama3Custom"));
332 }
333
334 #[test]
335 fn record_bytes_saved_by_rule_field_is_backward_compatible() {
336 // B9/Step-9 substrate: a v0.0.3 wire-format record (no
337 // bytes_saved_by_rule field at all) must still deserialize
338 // cleanly against the v0.0.4 schema. This is the whole reason
339 // the field is marked #[serde(default)].
340 let v003_json = r#"{
341 "request_id": "01HY0000000000000000000000",
342 "ts": "2026-04-24T00:00:00.000Z",
343 "tenant_id": "t",
344 "model": "ClaudeOpus47",
345 "route": "a2a",
346 "input": {"baseline_tokens": 100, "compressed_tokens": 80, "delta_tokens": -20, "compressed_pct": 0.8, "format": null},
347 "output": {"baseline_tokens": 50, "compressed_tokens": 50, "delta_tokens": 0, "compressed_pct": 1.0, "format": null},
348 "thinking": null,
349 "cost": {"currency": "USD", "baseline_cost_cents": 1.0, "actual_cost_cents": 0.8, "savings_cents": 0.2, "pricing_snapshot_id": "p1"},
350 "integrity": {"prompt_hash": "a", "response_hash": "b", "ed25519_signature": "c", "mldsa_signature": "d", "signing_key_id": "k"}
351 }"#;
352 let r: Record =
353 serde_json::from_str(v003_json).expect("v0.0.3 record must parse under v0.0.4 schema");
354 assert!(
355 r.bytes_saved_by_rule.is_none(),
356 "missing field must deserialize as None, not surface an error"
357 );
358
359 // Round-trip with the new field populated.
360 let with = Record {
361 bytes_saved_by_rule: Some(vec![
362 ("json_minified".to_string(), 42),
363 ("term_substitutions".to_string(), 17),
364 ]),
365 ..r
366 };
367 let serialized = serde_json::to_string(&with).unwrap();
368 assert!(serialized.contains("\"bytes_saved_by_rule\""));
369 let back: Record = serde_json::from_str(&serialized).unwrap();
370 assert_eq!(
371 back.bytes_saved_by_rule.as_deref(),
372 Some(
373 &[
374 ("json_minified".to_string(), 42u64),
375 ("term_substitutions".to_string(), 17u64)
376 ][..]
377 )
378 );
379 }
380}