harn_vm/llm/
cache_conformance.rs

1//! Prompt-cache conformance probe + classifier for Harn providers.
2//!
3//! The classifier is the stable contract Burin dogfood (#3532) and Harn Cloud
4//! receipts (#1106) consume; a live repeat-run HTTP probe is a convenience
5//! around it. Given a provider/model and one-or-more repeat runs of a
6//! stable-prefix request, this module:
7//!
8//! - resolves prompt-cache SUPPORT + cache-control requirements from the single
9//!   provider capability path ([`crate::llm::capabilities::lookup`]), projecting
10//!   a self-describing [`CacheControlProfile`] (breakpoint style, minimum useful
11//!   prefix, TTL notes, and the provider usage-field mapping);
12//! - normalizes each run's usage keeping fresh-input / cache-read / cache-write /
13//!   output / unknown-missing SEPARATE ([`NormalizedCacheUsage`]);
14//! - classifies each run into one stable bucket
15//!   ([`CacheConformanceClassification`]); and
16//! - aggregates a report verdict a repeat run can act on.
17//!
18//! The taxonomy here is the Harn-owned home for what Burin's
19//! `lib/runtime/model-selection.harn` bootstrapped: support classification plus
20//! the observation buckets. Product/runtime layers read this one verdict rather
21//! than re-deriving provider behavior.
22//!
23//! A missing provider usage field is recorded as an OBSERVATION
24//! ([`NormalizedCacheUsage::missing_fields`]); it never re-classifies a route to
25//! "unsupported". Only the capability matrix decides support.
26
27use serde::{Deserialize, Serialize};
28use serde_json::Value;
29
30use crate::llm::capabilities::{self, Capabilities, WireDialect};
31
32/// Wire-format version of [`CacheConformanceReport`]. Bump on a breaking shape
33/// change so Burin/Cloud consumers can gate on the contract they parse.
34pub const CACHE_CONFORMANCE_SCHEMA_VERSION: u32 = 1;
35
36/// Cache-control requirements for a `(provider, model)` route, derived from the
37/// single provider capability path. This is the self-describing capability the
38/// issue asks Harn to expose: cache-control strategy, minimum useful prefix,
39/// TTL notes, and the usage-field mapping — one source, no per-call-site
40/// provider branching.
41#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
42pub struct CacheControlProfile {
43    /// Whether the route reports prompt-cache accounting at all
44    /// ([`Capabilities::prompt_caching`]).
45    pub prompt_caching: bool,
46    /// Request-side cache breakpoint strategy: `none`, `top_level`, or
47    /// `last_block` ([`Capabilities::cache_breakpoint_style`]).
48    pub cache_breakpoint_style: String,
49    /// Minimum prompt-prefix tokens below which a provider will not create or
50    /// serve a cache entry, so a zero cache-read on a short prefix is expected
51    /// rather than a miss. `None` when the route reports no cache accounting.
52    pub min_useful_prefix_tokens: Option<u32>,
53    /// Human-readable cache time-to-live / eviction notes for the route. `None`
54    /// when the route reports no cache accounting.
55    pub ttl_notes: Option<String>,
56    /// Provider response usage field that carries cache-read (served-from-cache)
57    /// prompt tokens, in dotted path form. Empty when the route reports none.
58    pub cache_read_usage_field: String,
59    /// Provider response usage field that carries cache-write (cache-creation)
60    /// prompt tokens, in dotted path form. Empty when the route neither reports
61    /// nor bills a separate cache-write field (OpenAI-style automatic caching).
62    pub cache_write_usage_field: String,
63}
64
65impl CacheControlProfile {
66    /// Derive the cache-control profile from resolved [`Capabilities`]. Minimum
67    /// prefix, TTL notes, and the usage-field mapping are wire-dialect facts, so
68    /// they live here keyed off the one capability path rather than duplicated
69    /// per model row or per call site.
70    pub fn from_capabilities(caps: &Capabilities) -> Self {
71        if !caps.prompt_caching {
72            return Self {
73                prompt_caching: false,
74                cache_breakpoint_style: caps.cache_breakpoint_style.clone(),
75                min_useful_prefix_tokens: None,
76                ttl_notes: None,
77                cache_read_usage_field: String::new(),
78                cache_write_usage_field: String::new(),
79            };
80        }
81        let (min_prefix, ttl, read_field, write_field) = match caps.message_wire_format {
82            WireDialect::Anthropic => (
83                1024,
84                "5m default breakpoint TTL; 1h with the extended-cache-ttl beta",
85                "usage.cache_read_input_tokens",
86                "usage.cache_creation_input_tokens",
87            ),
88            WireDialect::Gemini => (
89                1024,
90                "Implicit caching with provider-managed eviction; explicit cachedContent honors a caller TTL",
91                "usageMetadata.cachedContentTokenCount",
92                "",
93            ),
94            // OpenAI-compatible routes (including OpenRouter's OpenAI passthrough)
95            // cache automatically with no separate cache-write field billed.
96            WireDialect::OpenAiCompat => (
97                1024,
98                "Automatic prefix caching; entries idle-evict after ~5-10 minutes",
99                "usage.prompt_tokens_details.cached_tokens",
100                "",
101            ),
102            // Native Ollama reports no cache accounting; a prompt_caching=true
103            // rule on this dialect is unexpected, so surface the normalized
104            // fields and let the miss classify on capability support.
105            WireDialect::Ollama => (0, "No provider-reported cache accounting", "", ""),
106        };
107        Self {
108            prompt_caching: true,
109            cache_breakpoint_style: caps.cache_breakpoint_style.clone(),
110            min_useful_prefix_tokens: if min_prefix > 0 {
111                Some(min_prefix)
112            } else {
113                None
114            },
115            ttl_notes: if ttl.is_empty() {
116                None
117            } else {
118                Some(ttl.to_string())
119            },
120            cache_read_usage_field: read_field.to_string(),
121            cache_write_usage_field: write_field.to_string(),
122        }
123    }
124}
125
126/// Capability-derived prompt-cache support verdict. `Unknown` is distinct from
127/// `Unsupported`: an unresolved provider/model (empty or `auto`) is not proof of
128/// no support, matching the missing-field rule.
129#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
130#[serde(rename_all = "snake_case")]
131pub enum PromptCacheSupportStatus {
132    CacheSupported,
133    CacheUnsupported,
134    CacheSupportUnknown,
135}
136
137impl PromptCacheSupportStatus {
138    pub fn as_str(self) -> &'static str {
139        match self {
140            Self::CacheSupported => "cache_supported",
141            Self::CacheUnsupported => "cache_unsupported",
142            Self::CacheSupportUnknown => "cache_support_unknown",
143        }
144    }
145}
146
147/// Prompt-cache support resolved from the provider capability path, plus the
148/// cache-control profile consumers need to explain a zero cache-read.
149#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
150pub struct PromptCacheSupport {
151    pub status: PromptCacheSupportStatus,
152    /// `Some(true)` / `Some(false)` from the capability matrix; `None` when the
153    /// provider/model didn't resolve to a concrete route.
154    pub supported: Option<bool>,
155    /// `provider-prompt-cache` when supported, `none` when explicitly
156    /// unsupported, absent when unknown.
157    #[serde(skip_serializing_if = "Option::is_none")]
158    pub cache_tier: Option<String>,
159    pub resolved_provider: String,
160    pub resolved_model: String,
161    pub source: String,
162    pub profile: CacheControlProfile,
163}
164
165/// Resolve prompt-cache support for a `(provider, model)` pair from the single
166/// provider capability path. An empty or `auto` provider (or empty model)
167/// resolves to `Unknown` rather than fabricating an unsupported verdict.
168pub fn prompt_cache_support(provider: &str, model: &str) -> PromptCacheSupport {
169    let provider_key = provider.trim();
170    let model_key = model.trim();
171    let unresolved = provider_key.is_empty()
172        || provider_key.eq_ignore_ascii_case("auto")
173        || model_key.is_empty();
174    if unresolved {
175        return PromptCacheSupport {
176            status: PromptCacheSupportStatus::CacheSupportUnknown,
177            supported: None,
178            cache_tier: None,
179            resolved_provider: provider_key.to_string(),
180            resolved_model: model_key.to_string(),
181            source: "unresolved".to_string(),
182            profile: CacheControlProfile {
183                prompt_caching: false,
184                cache_breakpoint_style: "none".to_string(),
185                min_useful_prefix_tokens: None,
186                ttl_notes: None,
187                cache_read_usage_field: String::new(),
188                cache_write_usage_field: String::new(),
189            },
190        };
191    }
192    let caps = capabilities::lookup(provider_key, model_key);
193    let profile = CacheControlProfile::from_capabilities(&caps);
194    let (status, cache_tier) = if caps.prompt_caching {
195        (
196            PromptCacheSupportStatus::CacheSupported,
197            Some("provider-prompt-cache".to_string()),
198        )
199    } else {
200        (
201            PromptCacheSupportStatus::CacheUnsupported,
202            Some("none".to_string()),
203        )
204    };
205    PromptCacheSupport {
206        status,
207        supported: Some(caps.prompt_caching),
208        cache_tier,
209        resolved_provider: provider_key.to_string(),
210        resolved_model: model_key.to_string(),
211        source: "provider-capabilities".to_string(),
212        profile,
213    }
214}
215
216/// Normalized cache usage for one run. Fresh-input, cache-read, cache-write, and
217/// output token counts stay SEPARATE; fields the provider omitted are recorded
218/// in `missing_fields` as an observation, never folded into a zero that would
219/// read as "no support".
220#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
221pub struct NormalizedCacheUsage {
222    /// Total prompt tokens as the provider reported them (cache-read tokens are
223    /// included here on providers that count them toward the prompt total).
224    pub input_tokens: i64,
225    /// Prompt tokens billed as fresh (non-cached) input: `input - read - write`,
226    /// clamped at 0.
227    pub fresh_input_tokens: i64,
228    /// Prompt tokens served from the provider cache.
229    pub cache_read_tokens: i64,
230    /// Prompt tokens written to the provider cache on this request.
231    pub cache_write_tokens: i64,
232    pub output_tokens: i64,
233    /// Whether the provider reported any cache accounting field for this run.
234    /// `false` means "unknown", not "0% hit".
235    pub cache_supported: bool,
236    /// Usage fields the provider response did not carry (e.g. `cache_read_tokens`
237    /// on a native-Ollama done frame). Diagnostic only.
238    #[serde(default, skip_serializing_if = "Vec::is_empty")]
239    pub missing_fields: Vec<String>,
240}
241
242fn usage_i64(usage: &serde_json::Map<String, Value>, keys: &[&str]) -> Option<i64> {
243    for key in keys {
244        if let Some(found) = usage.get(*key).and_then(Value::as_i64) {
245            return Some(found);
246        }
247    }
248    None
249}
250
251impl NormalizedCacheUsage {
252    /// Normalize a usage object that may be Harn's own usage dict shape or a raw
253    /// provider usage object. Accepts the provider aliases Harn already reads in
254    /// [`crate::llm::jsonl`] and [`crate::llm::api::result`]
255    /// (`cache_creation_input_tokens`, `cache_read_input_tokens`,
256    /// `prompt_tokens_details.cached_tokens`), so a fixture can be a saved
257    /// provider response or a normalized transcript usage entry.
258    pub fn from_usage_value(usage: &Value) -> Self {
259        let Some(object) = usage.as_object() else {
260            return Self {
261                input_tokens: 0,
262                fresh_input_tokens: 0,
263                cache_read_tokens: 0,
264                cache_write_tokens: 0,
265                output_tokens: 0,
266                cache_supported: false,
267                missing_fields: vec!["usage".to_string()],
268            };
269        };
270        let mut missing_fields = Vec::new();
271
272        let input_tokens =
273            usage_i64(object, &["input_tokens", "prompt_tokens"]).unwrap_or_else(|| {
274                missing_fields.push("input_tokens".to_string());
275                0
276            });
277        let output_tokens = usage_i64(object, &["output_tokens", "completion_tokens"])
278            .unwrap_or_else(|| {
279                missing_fields.push("output_tokens".to_string());
280                0
281            });
282
283        // A provider "reports cache accounting" when it carries an explicit
284        // read/write field OR an explicit cache_supported flag. Native local
285        // runtimes carry neither, so a 0 there is unknown, not a real miss.
286        let explicit_supported = object.get("cache_supported").and_then(Value::as_bool);
287        let cache_read = usage_i64(
288            object,
289            &[
290                "cache_read_tokens",
291                "cache_read_input_tokens",
292                "cached_tokens",
293            ],
294        )
295        .or_else(|| nested_cached_tokens(object));
296        let cache_write = usage_i64(
297            object,
298            &["cache_write_tokens", "cache_creation_input_tokens"],
299        );
300        if cache_read.is_none() {
301            missing_fields.push("cache_read_tokens".to_string());
302        }
303        if cache_write.is_none() {
304            missing_fields.push("cache_write_tokens".to_string());
305        }
306        let cache_read_tokens = cache_read.unwrap_or(0);
307        let cache_write_tokens = cache_write.unwrap_or(0);
308        let cache_supported = match explicit_supported {
309            Some(flag) => flag,
310            None => cache_read.is_some() || cache_write.is_some(),
311        };
312        let fresh_input_tokens = (input_tokens - cache_read_tokens - cache_write_tokens).max(0);
313        Self {
314            input_tokens,
315            fresh_input_tokens,
316            cache_read_tokens,
317            cache_write_tokens,
318            output_tokens,
319            cache_supported,
320            missing_fields,
321        }
322    }
323}
324
325fn nested_cached_tokens(object: &serde_json::Map<String, Value>) -> Option<i64> {
326    object
327        .get("prompt_tokens_details")
328        .and_then(Value::as_object)
329        .and_then(|details| details.get("cached_tokens"))
330        .and_then(Value::as_i64)
331}
332
333/// The stable observation bucket for one repeat run. `ProviderFieldInconsistent`
334/// flags a response whose own usage fields contradict each other so a consumer
335/// never trusts a cache verdict built on bad numbers.
336#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum CacheConformanceClassification {
339    /// Cache-read tokens > 0: the cache served part of the prefix.
340    CacheEffective,
341    /// Capability says the route caches, but this run read 0 from cache.
342    CacheSupportedMiss,
343    /// Capability says the route does NOT cache; a 0 read is expected.
344    UnsupportedZero,
345    /// Capability could not resolve support; a 0 read is inconclusive.
346    SupportUnknownZero,
347    /// No prompt tokens on the request, so cache behavior is undefined.
348    NoPromptTokens,
349    /// The run's own usage fields contradict each other (e.g. cache tokens
350    /// exceed the prompt total, or a read on a route that flagged no support).
351    ProviderFieldInconsistent,
352}
353
354impl CacheConformanceClassification {
355    pub fn as_str(self) -> &'static str {
356        match self {
357            Self::CacheEffective => "cache_effective",
358            Self::CacheSupportedMiss => "cache_supported_miss",
359            Self::UnsupportedZero => "unsupported_zero",
360            Self::SupportUnknownZero => "support_unknown_zero",
361            Self::NoPromptTokens => "no_prompt_tokens",
362            Self::ProviderFieldInconsistent => "provider_field_inconsistent",
363        }
364    }
365}
366
367/// Detect a self-contradictory usage report. Returns a human reason when the
368/// numbers can't be trusted, else `None`.
369fn field_inconsistency(usage: &NormalizedCacheUsage) -> Option<String> {
370    if usage.input_tokens < 0
371        || usage.output_tokens < 0
372        || usage.cache_read_tokens < 0
373        || usage.cache_write_tokens < 0
374    {
375        return Some("negative token count".to_string());
376    }
377    // A read with no prompt at all can't have come from this prompt's cache.
378    if usage.input_tokens <= 0 && (usage.cache_read_tokens > 0 || usage.cache_write_tokens > 0) {
379        return Some("cache tokens reported with zero prompt tokens".to_string());
380    }
381    if usage.input_tokens > 0
382        && usage.cache_read_tokens + usage.cache_write_tokens > usage.input_tokens
383    {
384        return Some("cache-read + cache-write exceed prompt tokens".to_string());
385    }
386    // Provider both flagged "no cache accounting" AND reported cache tokens.
387    if !usage.cache_supported && (usage.cache_read_tokens > 0 || usage.cache_write_tokens > 0) {
388        return Some("cache tokens reported while cache_supported=false".to_string());
389    }
390    None
391}
392
393/// Classify one run from its normalized usage and the capability support
394/// verdict. Support status — never the presence/absence of a usage field —
395/// decides the zero-read bucket, so a missing field can't masquerade as
396/// "unsupported".
397pub fn classify_cache_run(
398    usage: &NormalizedCacheUsage,
399    support: &PromptCacheSupport,
400) -> CacheConformanceClassification {
401    if field_inconsistency(usage).is_some() {
402        return CacheConformanceClassification::ProviderFieldInconsistent;
403    }
404    if usage.input_tokens <= 0 {
405        return CacheConformanceClassification::NoPromptTokens;
406    }
407    if usage.cache_read_tokens > 0 {
408        return CacheConformanceClassification::CacheEffective;
409    }
410    match support.status {
411        PromptCacheSupportStatus::CacheSupported => {
412            CacheConformanceClassification::CacheSupportedMiss
413        }
414        PromptCacheSupportStatus::CacheUnsupported => {
415            CacheConformanceClassification::UnsupportedZero
416        }
417        PromptCacheSupportStatus::CacheSupportUnknown => {
418            CacheConformanceClassification::SupportUnknownZero
419        }
420    }
421}
422
423/// The stable identity of the request whose prefix must stay fixed across repeat
424/// runs for a cache-read to mean anything. Captured (not the raw bytes, which
425/// may carry secrets) so a consumer can confirm the runs were actually
426/// comparable.
427#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
428pub struct CacheRequestIdentity {
429    #[serde(skip_serializing_if = "Option::is_none")]
430    pub task: Option<String>,
431    #[serde(skip_serializing_if = "Option::is_none")]
432    pub prefix_sha256: Option<String>,
433    #[serde(skip_serializing_if = "Option::is_none")]
434    pub prefix_tokens_estimate: Option<u32>,
435    #[serde(skip_serializing_if = "Option::is_none")]
436    pub tool_schema_sha256: Option<String>,
437    #[serde(skip_serializing_if = "Option::is_none")]
438    pub settings_sha256: Option<String>,
439}
440
441/// One repeat run: request identity, normalized usage, classification, timing.
442#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
443pub struct CacheConformanceRun {
444    pub run_index: usize,
445    #[serde(skip_serializing_if = "Option::is_none")]
446    pub request: Option<CacheRequestIdentity>,
447    pub usage: NormalizedCacheUsage,
448    pub classification: CacheConformanceClassification,
449    #[serde(skip_serializing_if = "Option::is_none")]
450    pub inconsistency_reason: Option<String>,
451    #[serde(skip_serializing_if = "Option::is_none")]
452    pub elapsed_ms: Option<u64>,
453    /// Raw provider usage object as captured, for downstream audit. Preserved
454    /// verbatim so a consumer can re-derive without re-running the provider.
455    #[serde(skip_serializing_if = "Option::is_none")]
456    pub raw_usage: Option<Value>,
457}
458
459/// Report-level cache verdict aggregated across repeat runs — the one signal
460/// Burin dogfood and Cloud receipts key on.
461#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
462#[serde(rename_all = "snake_case")]
463pub enum CacheVerdict {
464    /// A run after the first read from cache: repeat caching works.
465    CacheEffective,
466    /// Route caches per capability, but no repeat run read from cache.
467    CacheSupportedMiss,
468    /// Route does not cache per capability; zero reads are expected.
469    UnsupportedZero,
470    /// Support unknown and no reads observed.
471    SupportUnknownZero,
472    /// At least one run's usage fields were self-contradictory.
473    ProviderFieldInconsistent,
474    /// No run carried prompt tokens.
475    NoPromptTokens,
476    /// Fewer than two runs, so repeat-cache behavior can't be judged.
477    InsufficientRuns,
478}
479
480impl CacheVerdict {
481    pub fn as_str(self) -> &'static str {
482        match self {
483            Self::CacheEffective => "cache_effective",
484            Self::CacheSupportedMiss => "cache_supported_miss",
485            Self::UnsupportedZero => "unsupported_zero",
486            Self::SupportUnknownZero => "support_unknown_zero",
487            Self::ProviderFieldInconsistent => "provider_field_inconsistent",
488            Self::NoPromptTokens => "no_prompt_tokens",
489            Self::InsufficientRuns => "insufficient_runs",
490        }
491    }
492
493    /// Whether this verdict should fail product dogfood. A non-cache provider
494    /// classifying as `unsupported_zero` is NOT a failure; only a supported
495    /// route that never caches, or a provider reporting contradictory fields,
496    /// is a real conformance failure.
497    pub fn is_dogfood_failure(self) -> bool {
498        matches!(
499            self,
500            Self::CacheSupportedMiss | Self::ProviderFieldInconsistent
501        )
502    }
503}
504
505/// Per-bucket run counts for report rollups. Mirrors Burin's
506/// `prompt_cache_observation_bucket_counts`, now Harn-owned.
507#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
508pub struct CacheConformanceBucketCounts {
509    pub cache_effective: usize,
510    pub cache_supported_miss: usize,
511    pub unsupported_zero: usize,
512    pub support_unknown_zero: usize,
513    pub no_prompt_tokens: usize,
514    pub provider_field_inconsistent: usize,
515}
516
517impl CacheConformanceBucketCounts {
518    fn tally(runs: &[CacheConformanceRun]) -> Self {
519        let mut counts = Self::default();
520        for run in runs {
521            match run.classification {
522                CacheConformanceClassification::CacheEffective => counts.cache_effective += 1,
523                CacheConformanceClassification::CacheSupportedMiss => {
524                    counts.cache_supported_miss += 1;
525                }
526                CacheConformanceClassification::UnsupportedZero => counts.unsupported_zero += 1,
527                CacheConformanceClassification::SupportUnknownZero => {
528                    counts.support_unknown_zero += 1;
529                }
530                CacheConformanceClassification::NoPromptTokens => counts.no_prompt_tokens += 1,
531                CacheConformanceClassification::ProviderFieldInconsistent => {
532                    counts.provider_field_inconsistent += 1;
533                }
534            }
535        }
536        counts
537    }
538}
539
540/// The full conformance report: capability support + per-run observations + one
541/// aggregate verdict, consumable by Burin #3532 and Harn Cloud #1106 without
542/// reclassifying provider behavior.
543#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
544pub struct CacheConformanceReport {
545    pub schema_version: u32,
546    pub provider: String,
547    pub model: String,
548    pub support: PromptCacheSupport,
549    pub runs: Vec<CacheConformanceRun>,
550    pub bucket_counts: CacheConformanceBucketCounts,
551    pub verdict: CacheVerdict,
552    /// Whether `verdict` should fail product dogfood (mirror of
553    /// [`CacheVerdict::is_dogfood_failure`], serialized for consumers that read
554    /// JSON without the enum semantics).
555    pub dogfood_failure: bool,
556}
557
558fn aggregate_verdict(runs: &[CacheConformanceRun], support: &PromptCacheSupport) -> CacheVerdict {
559    if runs
560        .iter()
561        .any(|run| run.classification == CacheConformanceClassification::ProviderFieldInconsistent)
562    {
563        return CacheVerdict::ProviderFieldInconsistent;
564    }
565    // A repeat run (index > 0) reading from cache is the positive signal; a
566    // first-run read alone can't prove repeat caching.
567    let repeat_cache_read = runs.iter().any(|run| {
568        run.run_index > 0 && run.classification == CacheConformanceClassification::CacheEffective
569    });
570    if repeat_cache_read {
571        return CacheVerdict::CacheEffective;
572    }
573    // A single run that read from cache (e.g. a warm fixture) still confirms the
574    // cache served this prefix.
575    let any_cache_read = runs
576        .iter()
577        .any(|run| run.classification == CacheConformanceClassification::CacheEffective);
578    let all_no_prompt = !runs.is_empty()
579        && runs
580            .iter()
581            .all(|run| run.classification == CacheConformanceClassification::NoPromptTokens);
582    if all_no_prompt {
583        return CacheVerdict::NoPromptTokens;
584    }
585    match support.status {
586        PromptCacheSupportStatus::CacheUnsupported => CacheVerdict::UnsupportedZero,
587        PromptCacheSupportStatus::CacheSupportUnknown => CacheVerdict::SupportUnknownZero,
588        PromptCacheSupportStatus::CacheSupported => {
589            if any_cache_read {
590                // Only a first-run read observed; need a repeat to confirm.
591                if runs.len() < 2 {
592                    CacheVerdict::InsufficientRuns
593                } else {
594                    CacheVerdict::CacheSupportedMiss
595                }
596            } else if runs.len() < 2 {
597                CacheVerdict::InsufficientRuns
598            } else {
599                CacheVerdict::CacheSupportedMiss
600            }
601        }
602    }
603}
604
605/// Assemble a report from already-classified runs.
606pub fn report_from_runs(
607    provider: String,
608    model: String,
609    support: PromptCacheSupport,
610    runs: Vec<CacheConformanceRun>,
611) -> CacheConformanceReport {
612    let bucket_counts = CacheConformanceBucketCounts::tally(&runs);
613    let verdict = aggregate_verdict(&runs, &support);
614    CacheConformanceReport {
615        schema_version: CACHE_CONFORMANCE_SCHEMA_VERSION,
616        provider,
617        model,
618        support,
619        runs,
620        bucket_counts,
621        verdict,
622        dogfood_failure: verdict.is_dogfood_failure(),
623    }
624}
625
626/// Parse one fixture run entry. Accepts either a bare usage object or an entry
627/// wrapping `usage` plus optional `request`, `elapsed_ms`, and a `raw_usage`
628/// passthrough.
629fn run_from_fixture_entry(
630    index: usize,
631    entry: &Value,
632    support: &PromptCacheSupport,
633) -> CacheConformanceRun {
634    let (usage_value, request, elapsed_ms) = match entry.as_object() {
635        Some(object) if object.contains_key("usage") => {
636            let usage_value = object.get("usage").cloned().unwrap_or(Value::Null);
637            let request = object.get("request").and_then(|value| {
638                serde_json::from_value::<CacheRequestIdentity>(value.clone()).ok()
639            });
640            let elapsed_ms = object.get("elapsed_ms").and_then(Value::as_u64);
641            (usage_value, request, elapsed_ms)
642        }
643        // A bare usage object is the whole entry.
644        _ => (entry.clone(), None, None),
645    };
646    let usage = NormalizedCacheUsage::from_usage_value(&usage_value);
647    let classification = classify_cache_run(&usage, support);
648    let inconsistency_reason = field_inconsistency(&usage);
649    CacheConformanceRun {
650        run_index: index,
651        request,
652        usage,
653        classification,
654        inconsistency_reason,
655        elapsed_ms,
656        raw_usage: Some(usage_value),
657    }
658}
659
660/// Classify a saved repeat-run fixture into a conformance report. `raw` is a
661/// JSON document shaped as either a top-level array of run entries or an object
662/// with a `runs` array (and optional `provider`/`model` overrides). This is the
663/// committed-conformance path: no keys, no live provider, deterministic verdict.
664pub fn classify_cache_conformance_fixture(
665    provider: impl Into<String>,
666    model: impl Into<String>,
667    raw: &str,
668) -> Result<CacheConformanceReport, String> {
669    let document: Value = serde_json::from_str(raw)
670        .map_err(|error| format!("failed to parse cache conformance fixture: {error}"))?;
671    let mut provider = provider.into();
672    let mut model = model.into();
673    let runs_value = match &document {
674        Value::Array(items) => items.clone(),
675        Value::Object(object) => {
676            if let Some(fixture_provider) = object.get("provider").and_then(Value::as_str) {
677                if provider.trim().is_empty() {
678                    provider = fixture_provider.to_string();
679                }
680            }
681            if let Some(fixture_model) = object.get("model").and_then(Value::as_str) {
682                if model.trim().is_empty() {
683                    model = fixture_model.to_string();
684                }
685            }
686            match object.get("runs") {
687                Some(Value::Array(items)) => items.clone(),
688                _ => {
689                    return Err(
690                        "cache conformance fixture object must carry a `runs` array".to_string()
691                    )
692                }
693            }
694        }
695        _ => {
696            return Err(
697                "cache conformance fixture must be a runs array or an object with `runs`"
698                    .to_string(),
699            )
700        }
701    };
702    let support = prompt_cache_support(&provider, &model);
703    let runs = runs_value
704        .iter()
705        .enumerate()
706        .map(|(index, entry)| run_from_fixture_entry(index, entry, &support))
707        .collect::<Vec<_>>();
708    Ok(report_from_runs(provider, model, support, runs))
709}
710
711#[cfg(test)]
712mod tests {
713    use super::*;
714    use serde_json::json;
715
716    fn supported() -> PromptCacheSupport {
717        PromptCacheSupport {
718            status: PromptCacheSupportStatus::CacheSupported,
719            supported: Some(true),
720            cache_tier: Some("provider-prompt-cache".to_string()),
721            resolved_provider: "anthropic".to_string(),
722            resolved_model: "claude-sonnet-4-6".to_string(),
723            source: "provider-capabilities".to_string(),
724            profile: CacheControlProfile {
725                prompt_caching: true,
726                cache_breakpoint_style: "last_block".to_string(),
727                min_useful_prefix_tokens: Some(1024),
728                ttl_notes: Some("5m".to_string()),
729                cache_read_usage_field: "usage.cache_read_input_tokens".to_string(),
730                cache_write_usage_field: "usage.cache_creation_input_tokens".to_string(),
731            },
732        }
733    }
734
735    fn unsupported() -> PromptCacheSupport {
736        PromptCacheSupport {
737            status: PromptCacheSupportStatus::CacheUnsupported,
738            supported: Some(false),
739            cache_tier: Some("none".to_string()),
740            resolved_provider: "ollama".to_string(),
741            resolved_model: "qwen3".to_string(),
742            source: "provider-capabilities".to_string(),
743            profile: CacheControlProfile {
744                prompt_caching: false,
745                cache_breakpoint_style: "none".to_string(),
746                min_useful_prefix_tokens: None,
747                ttl_notes: None,
748                cache_read_usage_field: String::new(),
749                cache_write_usage_field: String::new(),
750            },
751        }
752    }
753
754    fn unknown() -> PromptCacheSupport {
755        prompt_cache_support("auto", "")
756    }
757
758    fn usage(input: i64, read: i64, write: i64, output: i64) -> NormalizedCacheUsage {
759        NormalizedCacheUsage {
760            input_tokens: input,
761            fresh_input_tokens: (input - read - write).max(0),
762            cache_read_tokens: read,
763            cache_write_tokens: write,
764            output_tokens: output,
765            cache_supported: true,
766            missing_fields: Vec::new(),
767        }
768    }
769
770    #[test]
771    fn cache_read_is_effective_regardless_of_support() {
772        let run = usage(2000, 1800, 0, 50);
773        assert_eq!(
774            classify_cache_run(&run, &supported()),
775            CacheConformanceClassification::CacheEffective
776        );
777    }
778
779    #[test]
780    fn supported_zero_read_is_a_miss_not_unsupported() {
781        let run = usage(2000, 0, 2000, 50);
782        assert_eq!(
783            classify_cache_run(&run, &supported()),
784            CacheConformanceClassification::CacheSupportedMiss
785        );
786    }
787
788    #[test]
789    fn unsupported_zero_read_classifies_unsupported() {
790        let run = usage(2000, 0, 0, 50);
791        assert_eq!(
792            classify_cache_run(&run, &unsupported()),
793            CacheConformanceClassification::UnsupportedZero
794        );
795    }
796
797    #[test]
798    fn missing_field_with_unknown_support_stays_unknown_not_unsupported() {
799        // Native-local run: no cache fields at all. cache_supported=false is an
800        // observation, not proof of no support — the capability path is unknown.
801        let raw = json!({ "input_tokens": 2000, "output_tokens": 40 });
802        let normalized = NormalizedCacheUsage::from_usage_value(&raw);
803        assert!(!normalized.cache_supported);
804        assert!(normalized
805            .missing_fields
806            .contains(&"cache_read_tokens".to_string()));
807        assert_eq!(
808            classify_cache_run(&normalized, &unknown()),
809            CacheConformanceClassification::SupportUnknownZero
810        );
811    }
812
813    #[test]
814    fn no_prompt_tokens_bucket() {
815        let run = usage(0, 0, 0, 10);
816        assert_eq!(
817            classify_cache_run(&run, &supported()),
818            CacheConformanceClassification::NoPromptTokens
819        );
820    }
821
822    #[test]
823    fn cache_exceeding_prompt_is_inconsistent() {
824        let run = usage(1000, 900, 500, 10);
825        assert_eq!(
826            classify_cache_run(&run, &supported()),
827            CacheConformanceClassification::ProviderFieldInconsistent
828        );
829    }
830
831    #[test]
832    fn read_with_support_false_is_inconsistent() {
833        let mut run = usage(2000, 500, 0, 10);
834        run.cache_supported = false;
835        assert_eq!(
836            classify_cache_run(&run, &supported()),
837            CacheConformanceClassification::ProviderFieldInconsistent
838        );
839    }
840
841    #[test]
842    fn normalize_reads_anthropic_aliases() {
843        let raw = json!({
844            "input_tokens": 4000,
845            "output_tokens": 120,
846            "cache_read_input_tokens": 3500,
847            "cache_creation_input_tokens": 500,
848        });
849        let normalized = NormalizedCacheUsage::from_usage_value(&raw);
850        assert_eq!(normalized.cache_read_tokens, 3500);
851        assert_eq!(normalized.cache_write_tokens, 500);
852        assert_eq!(normalized.fresh_input_tokens, 0);
853        assert!(normalized.cache_supported);
854        assert!(normalized.missing_fields.is_empty());
855    }
856
857    #[test]
858    fn normalize_reads_openai_nested_cached_tokens() {
859        let raw = json!({
860            "prompt_tokens": 3000,
861            "completion_tokens": 90,
862            "prompt_tokens_details": { "cached_tokens": 2048 },
863        });
864        let normalized = NormalizedCacheUsage::from_usage_value(&raw);
865        assert_eq!(normalized.input_tokens, 3000);
866        assert_eq!(normalized.cache_read_tokens, 2048);
867        assert_eq!(normalized.fresh_input_tokens, 952);
868    }
869
870    #[test]
871    fn repeat_run_cache_read_yields_cache_effective_verdict() {
872        let raw = json!({
873            "provider": "anthropic",
874            "model": "claude-sonnet-4-6",
875            "runs": [
876                { "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_read_tokens": 0, "cache_creation_input_tokens": 3800 } },
877                { "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_read_tokens": 3800, "cache_creation_input_tokens": 0 } }
878            ]
879        });
880        let report =
881            classify_cache_conformance_fixture("", "", &raw.to_string()).expect("classify");
882        assert_eq!(report.verdict, CacheVerdict::CacheEffective);
883        assert!(!report.dogfood_failure);
884        assert_eq!(report.bucket_counts.cache_effective, 1);
885        assert_eq!(report.bucket_counts.cache_supported_miss, 1);
886    }
887
888    #[test]
889    fn non_cache_provider_does_not_fail_dogfood() {
890        let raw = json!({
891            "provider": "ollama",
892            "model": "qwen3",
893            "runs": [
894                { "usage": { "input_tokens": 4000, "output_tokens": 80 } },
895                { "usage": { "input_tokens": 4000, "output_tokens": 80 } }
896            ]
897        });
898        let report =
899            classify_cache_conformance_fixture("", "", &raw.to_string()).expect("classify");
900        assert_eq!(report.verdict, CacheVerdict::UnsupportedZero);
901        assert!(!report.dogfood_failure);
902    }
903
904    #[test]
905    fn supported_route_that_never_caches_fails_dogfood() {
906        let raw = json!({
907            "provider": "anthropic",
908            "model": "claude-sonnet-4-6",
909            "runs": [
910                { "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_creation_input_tokens": 3800 } },
911                { "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_creation_input_tokens": 3800 } }
912            ]
913        });
914        let report =
915            classify_cache_conformance_fixture("", "", &raw.to_string()).expect("classify");
916        assert_eq!(report.verdict, CacheVerdict::CacheSupportedMiss);
917        assert!(report.dogfood_failure);
918    }
919}
harn_vm/llm/cache_conformance.rs

harn_vm/llm/
cache_conformance.rs