Skip to main content

pithy_core/
interfaces.rs

1//! Pithy interface contracts — FROZEN at Phase 0.
2//!
3//! This module defines the trait and type contracts that every other
4//! crate in the workspace depends on. Semver commitment: any breaking
5//! change to these types or traits requires a major-version bump and
6//! coordinated release across all consumers.
7//!
8//! Copyright (c) 2026 Mikko Parkkola. All rights reserved.
9//! Licensed under PolyForm Noncommercial 1.0 + Pithy Attribution Rider.
10
11use serde::{Deserialize, Serialize};
12
13/// One of the four compression formats defined by the Pithy specification.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
15pub enum Format {
16    /// Symbolic notation (arrow chains, unicode glyphs, parenthetical state).
17    Symbolic,
18    /// Just-in-time progressive vocabulary binding: bind on first re-use.
19    JitProgressive,
20    /// Fragment prose: articles dropped, grammar compressed.
21    FragmentProse,
22    /// Structured delimiters: colon-key / pipe-separated fields.
23    StructuredDelim,
24    /// No compression applied. Fallback and baseline.
25    Prose,
26}
27
28/// The upstream LLM whose tokenizer rules the measurement.
29#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
30pub enum Model {
31    // Anthropic
32    ClaudeOpus47,
33    ClaudeSonnet47,
34    ClaudeHaiku47,
35    // OpenAI
36    Gpt5,
37    Gpt4o,
38    Gpt4,
39    // Google
40    Gemini25Ultra,
41    Gemini25Pro,
42    // Open weights
43    Llama3Custom(String),
44    Qwen3Custom(String),
45    // xAI
46    Grok4,
47    // Catch-all for registered tokenizers not yet in the enum
48    Registered(String),
49}
50
51/// Reason the encoder fell back to a less aggressive format.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
53pub enum FallbackReason {
54    /// Selector judged compression would be net-negative.
55    Uncompressible,
56    /// Tokenizer not registered for the target model.
57    TokenizerMissing,
58    /// Downstream quality gate failed (semantic fidelity below threshold).
59    QualityDegraded,
60    /// Content exceeds max input size for the selected format.
61    OversizedInput,
62    /// Encoder panic or unrecoverable error.
63    EncoderFault,
64    /// Input contains markdown/structured markers (headings, list items,
65    /// fenced code blocks, or multiple paragraphs). Symbolic compression
66    /// collapses whitespace unconditionally via `MULTI_WS`, which
67    /// destroys paragraph breaks and newline-bearing structure.
68    /// This fallback preserves structure at the cost of some savings.
69    StructuredContent,
70}
71
72/// Output of a single compression operation.
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct Compressed {
75    /// Compressed text ready to send to an upstream LLM.
76    pub content: String,
77    /// Which format was selected.
78    pub format: Format,
79    /// Real tokenizer count of the original prose input.
80    pub baseline_tokens: u32,
81    /// Real tokenizer count of the compressed output.
82    pub compressed_tokens: u32,
83    /// Model whose tokenizer was used for the measurement.
84    pub model: Model,
85    /// Blake3 hash of the original prose input (32 bytes hex-encoded).
86    pub content_hash: String,
87    /// Fallback reason if format is Prose and baseline != compressed semantically.
88    pub fallback: Option<FallbackReason>,
89}
90
91/// A pass-through encoder interface. Implementors MUST preserve semantic fidelity
92/// of the compressed output at or above the Phase-0 kill-switch threshold (≥0.90
93/// cosine similarity under the target LLM's response).
94pub trait Encoder: Send + Sync {
95    /// Compress `input` for the target `model`. Must run in <5ms at p95.
96    /// Returns `Compressed` even on fallback; format field reports what was used.
97    fn compress(&self, input: &str, model: Model) -> Compressed;
98
99    /// Pick the best format for `input` without actually encoding. Used by
100    /// A/B samplers and planners. Must match [`compress`]'s selection.
101    fn select_format(&self, input: &str, model: Model) -> Format;
102
103    /// Explicit fallback request — used when downstream quality monitor flags
104    /// a previously-compressed payload.
105    fn fallback(&self, input: &str, model: Model, reason: FallbackReason) -> Compressed;
106}
107
108/// Count real tokens for the target model. No estimation, no synthetic fallback.
109pub trait Measurer: Send + Sync {
110    /// Returns the actual token count for `text` under `model`'s tokenizer.
111    /// Implementations MUST return `Err` for unregistered models rather than
112    /// silently approximating — this is a DoR gate (no hardcoded ratios).
113    fn tokenize(&self, text: &str, model: &Model) -> Result<u32, TokenizerError>;
114
115    /// Returns true iff the tokenizer for `model` is registered and callable.
116    fn supported(&self, model: &Model) -> bool;
117}
118
119/// Error from the measurer path. Kept narrow so downstream code can fall back.
120#[derive(Debug, thiserror::Error)]
121pub enum TokenizerError {
122    #[error("tokenizer for model {0:?} is not registered")]
123    NotRegistered(Model),
124    #[error("tokenizer I/O failure: {0}")]
125    Io(String),
126    #[error("tokenizer library error: {0}")]
127    Library(String),
128}
129
130/// A single signed measurement record. Wire format frozen at v0.1.
131///
132/// # Changelog
133/// - v0.0.2: Added `dialect` and `rules_applied` as backward-compatible
134///   `#[serde(default)]` optional fields. Both default to `None` for v0.1
135///   readers. Phase-2 Agent D's encoder populates them.
136/// - v0.0.4: Added `bytes_saved_by_rule` as backward-compatible
137///   `#[serde(default)]` optional field. `None` for v0.0.3-and-earlier
138///   readers. Wired by Step 9 of `docs/PLAN_2026-04-24.md` — schema
139///   lands now so future encoder instrumentation (per-rule byte-delta
140///   callbacks) can populate without a second migration. Until then
141///   the field is always `None` at produce time. Consumers must handle
142///   `None` gracefully (treat as "no attribution available").
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct Record {
145    /// ULID request identifier.
146    pub request_id: String,
147    /// ISO-8601 timestamp with millisecond precision.
148    pub ts: String,
149    /// Tenant id. Required for multi-tenant reporting.
150    pub tenant_id: String,
151    /// Model the request was routed to.
152    pub model: Model,
153    /// Route classification (agent-to-agent, agent-to-human, etc).
154    pub route: String,
155    /// Input-stream measurement (baseline vs compressed).
156    pub input: StreamDelta,
157    /// Output-stream measurement.
158    pub output: StreamDelta,
159    /// Thinking-stream measurement (when provider exposes it).
160    pub thinking: Option<StreamDelta>,
161    /// Cost translation at capture time.
162    pub cost: CostEntry,
163    /// Cryptographic integrity fields.
164    pub integrity: Integrity,
165    /// Pithy dialect family used for compression (e.g. `"ultracos-symbolic-v1"`).
166    /// Added in v0.0.2. Populated by Phase-2 encoder; `None` for v0.1 records.
167    #[serde(default)]
168    pub dialect: Option<String>,
169    /// Ordered list of compression rules applied during encoding.
170    /// Added in v0.0.2. Populated by Phase-2 encoder; `None` for v0.1 records.
171    #[serde(default)]
172    pub rules_applied: Option<Vec<String>>,
173    /// Per-rule firing counts (alphabetical by rule name).
174    /// Added in v0.0.3. Populated by `EncoderTrace::as_pairs` so the
175    /// post-hoc analyzer can attribute savings and quality regressions
176    /// to specific rules without re-running the encoder. `None` when
177    /// the encoder did not run (Prose fallback) or the producer is on
178    /// the v0.0.2 wire schema.
179    #[serde(default)]
180    pub rule_fire_counts: Option<Vec<(String, u32)>>,
181    /// Optional shadow-arm quality measurement. Set when the proxy
182    /// ran the same prompt through both the compressed and the prose
183    /// arm at sample rate, so the closed-loop tuner can correlate
184    /// compression with response quality. Added in v0.0.3.
185    #[serde(default)]
186    pub quality: Option<QualityScore>,
187    /// Per-rule byte-savings attribution, in UTF-8 bytes of input payload.
188    /// Added in v0.0.4 as the substrate for PLAN Step 9. Aggregate
189    /// `delta_tokens` + `rule_fire_counts` tells us WHICH rules fired
190    /// and HOW OFTEN, but not WHICH BYTES each rule removed from the
191    /// payload. Step 8+ rule-vs-rule tuning needs the latter.
192    ///
193    /// Populated by the encoder once per-rule byte-delta callbacks are
194    /// wired (separate follow-up commit). Until then, this field is
195    /// `None` at produce time and consumers MUST treat `None` as "no
196    /// attribution available", not as "zero savings".
197    ///
198    /// Units are **bytes of input before vs after the rule ran**, not
199    /// tokens. The bandit converts to token-equivalents against the
200    /// target-model tokenizer at aggregation time, so this field stays
201    /// tokenizer-agnostic and doesn't drift when tokenizer choices
202    /// change (see B8 DeepSeek-V4 addition).
203    #[serde(default)]
204    pub bytes_saved_by_rule: Option<Vec<(String, u64)>>,
205}
206
207/// Optional quality measurement attached to a `Record` when the
208/// proxy ran a shadow comparison between the compressed and the
209/// prose arm at sample rate.
210///
211/// Recorded ONCE at sample time -- never re-derived from response
212/// text -- so the audit ledger remains the single source of truth.
213#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
214pub struct QualityScore {
215    /// Cosine similarity in `[0.0, 1.0]` of compressed-arm vs
216    /// prose-arm response embeddings. Pin the embedder via
217    /// `embedder_id` so values stay comparable across releases.
218    pub cosine: f32,
219    /// Categorical judge verdict: `"yes"`, `"partial"`, `"no"`, or
220    /// `"indeterminate"`. From the cheap comparator model.
221    pub judge_verdict: String,
222    /// Identifier of the model used for `judge_verdict`; pinned for
223    /// reproducibility (e.g. `"claude-haiku-4.5"`).
224    pub judge_model: String,
225    /// Identifier of the embedder used for `cosine`; pinned (e.g.
226    /// `"sentence-transformers/all-MiniLM-L6-v2@v2.7.0"`).
227    pub embedder_id: String,
228    /// Sample rate in `[0.0, 1.0]` this record was drawn at; lets
229    /// the aggregator weight per-record contributions when computing
230    /// fleet-level quality.
231    pub sample_rate: f32,
232}
233
234/// Per-stream delta between uncompressed and compressed.
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct StreamDelta {
237    pub baseline_tokens: u32,
238    pub compressed_tokens: u32,
239    pub delta_tokens: i32,
240    pub compressed_pct: f32,
241    pub format: Option<Format>,
242}
243
244/// Cost translation for a single record.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct CostEntry {
247    pub currency: String,
248    pub baseline_cost_cents: f64,
249    pub actual_cost_cents: f64,
250    pub savings_cents: f64,
251    /// Opaque snapshot id pointing at the versioned pricing table in use.
252    pub pricing_snapshot_id: String,
253}
254
255/// Cryptographic fields of a signed record.
256/// Phase 1 must populate BOTH ed25519 and ML-DSA signatures (Day-1 PQC).
257#[derive(Debug, Clone, Serialize, Deserialize)]
258pub struct Integrity {
259    /// Blake3 hash of the prose input (hex).
260    pub prompt_hash: String,
261    /// Blake3 hash of the model response (hex).
262    pub response_hash: String,
263    /// Classical ed25519 signature over the canonicalized record (hex).
264    pub ed25519_signature: String,
265    /// Post-quantum ML-DSA (FIPS 204) signature (hex). Required.
266    pub mldsa_signature: String,
267    /// Identifier of the signing key pair.
268    pub signing_key_id: String,
269}
270
271/// Log of signed measurement records. Append-only; tampering detectable via merkle.
272pub trait MeasurementLog: Send + Sync {
273    /// Append a record to the log. Returns the record id on success.
274    fn record(&self, r: Record) -> Result<RecordId, LogError>;
275
276    /// Retrieve a record by id for spot-check audit.
277    fn get(&self, id: &RecordId) -> Result<Option<Record>, LogError>;
278}
279
280/// Opaque id for a stored record.
281#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
282pub struct RecordId(pub String);
283
284/// Error type for the measurement log.
285#[derive(Debug, thiserror::Error)]
286pub enum LogError {
287    #[error("log I/O failure: {0}")]
288    Io(String),
289    #[error("signing failure: {0}")]
290    Signing(String),
291    #[error("canonicalization failure: {0}")]
292    Canonicalization(String),
293}
294
295/// Dual-signing facade. Implementations sign with BOTH ed25519 and ML-DSA.
296pub trait Signer: Send + Sync {
297    /// Return (ed25519_sig_hex, mldsa_sig_hex) over `canonical_bytes`.
298    fn dual_sign(&self, canonical_bytes: &[u8]) -> Result<(String, String), SignerError>;
299
300    /// Return the signing-key-pair identifier recorded on every output.
301    fn key_id(&self) -> &str;
302}
303
304/// Error type from the signer.
305#[derive(Debug, thiserror::Error)]
306pub enum SignerError {
307    #[error("ed25519 error: {0}")]
308    Ed25519(String),
309    #[error("ML-DSA error: {0}")]
310    MlDsa(String),
311    #[error("key not loaded")]
312    KeyMissing,
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318
319    #[test]
320    fn format_round_trip_json() {
321        let f = Format::JitProgressive;
322        let s = serde_json::to_string(&f).unwrap();
323        let f2: Format = serde_json::from_str(&s).unwrap();
324        assert_eq!(f, f2);
325    }
326
327    #[test]
328    fn model_custom_llama_supported() {
329        let m = Model::Llama3Custom("meta-llama/Llama-3-70b".to_string());
330        let s = serde_json::to_string(&m).unwrap();
331        assert!(s.contains("Llama3Custom"));
332    }
333
334    #[test]
335    fn record_bytes_saved_by_rule_field_is_backward_compatible() {
336        // B9/Step-9 substrate: a v0.0.3 wire-format record (no
337        // bytes_saved_by_rule field at all) must still deserialize
338        // cleanly against the v0.0.4 schema. This is the whole reason
339        // the field is marked #[serde(default)].
340        let v003_json = r#"{
341            "request_id": "01HY0000000000000000000000",
342            "ts": "2026-04-24T00:00:00.000Z",
343            "tenant_id": "t",
344            "model": "ClaudeOpus47",
345            "route": "a2a",
346            "input":  {"baseline_tokens": 100, "compressed_tokens": 80, "delta_tokens": -20, "compressed_pct": 0.8, "format": null},
347            "output": {"baseline_tokens": 50,  "compressed_tokens": 50, "delta_tokens":  0, "compressed_pct": 1.0, "format": null},
348            "thinking": null,
349            "cost": {"currency": "USD", "baseline_cost_cents": 1.0, "actual_cost_cents": 0.8, "savings_cents": 0.2, "pricing_snapshot_id": "p1"},
350            "integrity": {"prompt_hash": "a", "response_hash": "b", "ed25519_signature": "c", "mldsa_signature": "d", "signing_key_id": "k"}
351        }"#;
352        let r: Record =
353            serde_json::from_str(v003_json).expect("v0.0.3 record must parse under v0.0.4 schema");
354        assert!(
355            r.bytes_saved_by_rule.is_none(),
356            "missing field must deserialize as None, not surface an error"
357        );
358
359        // Round-trip with the new field populated.
360        let with = Record {
361            bytes_saved_by_rule: Some(vec![
362                ("json_minified".to_string(), 42),
363                ("term_substitutions".to_string(), 17),
364            ]),
365            ..r
366        };
367        let serialized = serde_json::to_string(&with).unwrap();
368        assert!(serialized.contains("\"bytes_saved_by_rule\""));
369        let back: Record = serde_json::from_str(&serialized).unwrap();
370        assert_eq!(
371            back.bytes_saved_by_rule.as_deref(),
372            Some(
373                &[
374                    ("json_minified".to_string(), 42u64),
375                    ("term_substitutions".to_string(), 17u64)
376                ][..]
377            )
378        );
379    }
380}