use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::llm::capabilities::{self, Capabilities, WireDialect};
pub const CACHE_CONFORMANCE_SCHEMA_VERSION: u32 = 1;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CacheControlProfile {
pub prompt_caching: bool,
pub cache_breakpoint_style: String,
pub min_useful_prefix_tokens: Option<u32>,
pub ttl_notes: Option<String>,
pub cache_read_usage_field: String,
pub cache_write_usage_field: String,
}
impl CacheControlProfile {
pub fn from_capabilities(caps: &Capabilities) -> Self {
if !caps.prompt_caching {
return Self {
prompt_caching: false,
cache_breakpoint_style: caps.cache_breakpoint_style.clone(),
min_useful_prefix_tokens: None,
ttl_notes: None,
cache_read_usage_field: String::new(),
cache_write_usage_field: String::new(),
};
}
let (min_prefix, ttl, read_field, write_field) = match caps.message_wire_format {
WireDialect::Anthropic => (
1024,
"5m default breakpoint TTL; 1h with the extended-cache-ttl beta",
"usage.cache_read_input_tokens",
"usage.cache_creation_input_tokens",
),
WireDialect::Gemini => (
1024,
"Implicit caching with provider-managed eviction; explicit cachedContent honors a caller TTL",
"usageMetadata.cachedContentTokenCount",
"",
),
WireDialect::OpenAiCompat => (
1024,
"Automatic prefix caching; entries idle-evict after ~5-10 minutes",
"usage.prompt_tokens_details.cached_tokens",
"",
),
WireDialect::Ollama => (0, "No provider-reported cache accounting", "", ""),
};
Self {
prompt_caching: true,
cache_breakpoint_style: caps.cache_breakpoint_style.clone(),
min_useful_prefix_tokens: if min_prefix > 0 {
Some(min_prefix)
} else {
None
},
ttl_notes: if ttl.is_empty() {
None
} else {
Some(ttl.to_string())
},
cache_read_usage_field: read_field.to_string(),
cache_write_usage_field: write_field.to_string(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum PromptCacheSupportStatus {
CacheSupported,
CacheUnsupported,
CacheSupportUnknown,
}
impl PromptCacheSupportStatus {
pub fn as_str(self) -> &'static str {
match self {
Self::CacheSupported => "cache_supported",
Self::CacheUnsupported => "cache_unsupported",
Self::CacheSupportUnknown => "cache_support_unknown",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct PromptCacheSupport {
pub status: PromptCacheSupportStatus,
pub supported: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_tier: Option<String>,
pub resolved_provider: String,
pub resolved_model: String,
pub source: String,
pub profile: CacheControlProfile,
}
pub fn prompt_cache_support(provider: &str, model: &str) -> PromptCacheSupport {
let provider_key = provider.trim();
let model_key = model.trim();
let unresolved = provider_key.is_empty()
|| provider_key.eq_ignore_ascii_case("auto")
|| model_key.is_empty();
if unresolved {
return PromptCacheSupport {
status: PromptCacheSupportStatus::CacheSupportUnknown,
supported: None,
cache_tier: None,
resolved_provider: provider_key.to_string(),
resolved_model: model_key.to_string(),
source: "unresolved".to_string(),
profile: CacheControlProfile {
prompt_caching: false,
cache_breakpoint_style: "none".to_string(),
min_useful_prefix_tokens: None,
ttl_notes: None,
cache_read_usage_field: String::new(),
cache_write_usage_field: String::new(),
},
};
}
let caps = capabilities::lookup(provider_key, model_key);
let profile = CacheControlProfile::from_capabilities(&caps);
let (status, cache_tier) = if caps.prompt_caching {
(
PromptCacheSupportStatus::CacheSupported,
Some("provider-prompt-cache".to_string()),
)
} else {
(
PromptCacheSupportStatus::CacheUnsupported,
Some("none".to_string()),
)
};
PromptCacheSupport {
status,
supported: Some(caps.prompt_caching),
cache_tier,
resolved_provider: provider_key.to_string(),
resolved_model: model_key.to_string(),
source: "provider-capabilities".to_string(),
profile,
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct NormalizedCacheUsage {
pub input_tokens: i64,
pub fresh_input_tokens: i64,
pub cache_read_tokens: i64,
pub cache_write_tokens: i64,
pub output_tokens: i64,
pub cache_supported: bool,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub missing_fields: Vec<String>,
}
fn usage_i64(usage: &serde_json::Map<String, Value>, keys: &[&str]) -> Option<i64> {
for key in keys {
if let Some(found) = usage.get(*key).and_then(Value::as_i64) {
return Some(found);
}
}
None
}
impl NormalizedCacheUsage {
pub fn from_usage_value(usage: &Value) -> Self {
let Some(object) = usage.as_object() else {
return Self {
input_tokens: 0,
fresh_input_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,
output_tokens: 0,
cache_supported: false,
missing_fields: vec!["usage".to_string()],
};
};
let mut missing_fields = Vec::new();
let input_tokens =
usage_i64(object, &["input_tokens", "prompt_tokens"]).unwrap_or_else(|| {
missing_fields.push("input_tokens".to_string());
0
});
let output_tokens = usage_i64(object, &["output_tokens", "completion_tokens"])
.unwrap_or_else(|| {
missing_fields.push("output_tokens".to_string());
0
});
let explicit_supported = object.get("cache_supported").and_then(Value::as_bool);
let cache_read = usage_i64(
object,
&[
"cache_read_tokens",
"cache_read_input_tokens",
"cached_tokens",
],
)
.or_else(|| nested_cached_tokens(object));
let cache_write = usage_i64(
object,
&["cache_write_tokens", "cache_creation_input_tokens"],
);
if cache_read.is_none() {
missing_fields.push("cache_read_tokens".to_string());
}
if cache_write.is_none() {
missing_fields.push("cache_write_tokens".to_string());
}
let cache_read_tokens = cache_read.unwrap_or(0);
let cache_write_tokens = cache_write.unwrap_or(0);
let cache_supported = match explicit_supported {
Some(flag) => flag,
None => cache_read.is_some() || cache_write.is_some(),
};
let fresh_input_tokens = (input_tokens - cache_read_tokens - cache_write_tokens).max(0);
Self {
input_tokens,
fresh_input_tokens,
cache_read_tokens,
cache_write_tokens,
output_tokens,
cache_supported,
missing_fields,
}
}
}
fn nested_cached_tokens(object: &serde_json::Map<String, Value>) -> Option<i64> {
object
.get("prompt_tokens_details")
.and_then(Value::as_object)
.and_then(|details| details.get("cached_tokens"))
.and_then(Value::as_i64)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CacheConformanceClassification {
CacheEffective,
CacheSupportedMiss,
UnsupportedZero,
SupportUnknownZero,
NoPromptTokens,
ProviderFieldInconsistent,
}
impl CacheConformanceClassification {
pub fn as_str(self) -> &'static str {
match self {
Self::CacheEffective => "cache_effective",
Self::CacheSupportedMiss => "cache_supported_miss",
Self::UnsupportedZero => "unsupported_zero",
Self::SupportUnknownZero => "support_unknown_zero",
Self::NoPromptTokens => "no_prompt_tokens",
Self::ProviderFieldInconsistent => "provider_field_inconsistent",
}
}
}
fn field_inconsistency(usage: &NormalizedCacheUsage) -> Option<String> {
if usage.input_tokens < 0
|| usage.output_tokens < 0
|| usage.cache_read_tokens < 0
|| usage.cache_write_tokens < 0
{
return Some("negative token count".to_string());
}
if usage.input_tokens <= 0 && (usage.cache_read_tokens > 0 || usage.cache_write_tokens > 0) {
return Some("cache tokens reported with zero prompt tokens".to_string());
}
if usage.input_tokens > 0
&& usage.cache_read_tokens + usage.cache_write_tokens > usage.input_tokens
{
return Some("cache-read + cache-write exceed prompt tokens".to_string());
}
if !usage.cache_supported && (usage.cache_read_tokens > 0 || usage.cache_write_tokens > 0) {
return Some("cache tokens reported while cache_supported=false".to_string());
}
None
}
pub fn classify_cache_run(
usage: &NormalizedCacheUsage,
support: &PromptCacheSupport,
) -> CacheConformanceClassification {
if field_inconsistency(usage).is_some() {
return CacheConformanceClassification::ProviderFieldInconsistent;
}
if usage.input_tokens <= 0 {
return CacheConformanceClassification::NoPromptTokens;
}
if usage.cache_read_tokens > 0 {
return CacheConformanceClassification::CacheEffective;
}
match support.status {
PromptCacheSupportStatus::CacheSupported => {
CacheConformanceClassification::CacheSupportedMiss
}
PromptCacheSupportStatus::CacheUnsupported => {
CacheConformanceClassification::UnsupportedZero
}
PromptCacheSupportStatus::CacheSupportUnknown => {
CacheConformanceClassification::SupportUnknownZero
}
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct CacheRequestIdentity {
#[serde(skip_serializing_if = "Option::is_none")]
pub task: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prefix_sha256: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prefix_tokens_estimate: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_schema_sha256: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub settings_sha256: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CacheConformanceRun {
pub run_index: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub request: Option<CacheRequestIdentity>,
pub usage: NormalizedCacheUsage,
pub classification: CacheConformanceClassification,
#[serde(skip_serializing_if = "Option::is_none")]
pub inconsistency_reason: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub elapsed_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub raw_usage: Option<Value>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CacheVerdict {
CacheEffective,
CacheSupportedMiss,
UnsupportedZero,
SupportUnknownZero,
ProviderFieldInconsistent,
NoPromptTokens,
InsufficientRuns,
}
impl CacheVerdict {
pub fn as_str(self) -> &'static str {
match self {
Self::CacheEffective => "cache_effective",
Self::CacheSupportedMiss => "cache_supported_miss",
Self::UnsupportedZero => "unsupported_zero",
Self::SupportUnknownZero => "support_unknown_zero",
Self::ProviderFieldInconsistent => "provider_field_inconsistent",
Self::NoPromptTokens => "no_prompt_tokens",
Self::InsufficientRuns => "insufficient_runs",
}
}
pub fn is_dogfood_failure(self) -> bool {
matches!(
self,
Self::CacheSupportedMiss | Self::ProviderFieldInconsistent
)
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct CacheConformanceBucketCounts {
pub cache_effective: usize,
pub cache_supported_miss: usize,
pub unsupported_zero: usize,
pub support_unknown_zero: usize,
pub no_prompt_tokens: usize,
pub provider_field_inconsistent: usize,
}
impl CacheConformanceBucketCounts {
fn tally(runs: &[CacheConformanceRun]) -> Self {
let mut counts = Self::default();
for run in runs {
match run.classification {
CacheConformanceClassification::CacheEffective => counts.cache_effective += 1,
CacheConformanceClassification::CacheSupportedMiss => {
counts.cache_supported_miss += 1;
}
CacheConformanceClassification::UnsupportedZero => counts.unsupported_zero += 1,
CacheConformanceClassification::SupportUnknownZero => {
counts.support_unknown_zero += 1;
}
CacheConformanceClassification::NoPromptTokens => counts.no_prompt_tokens += 1,
CacheConformanceClassification::ProviderFieldInconsistent => {
counts.provider_field_inconsistent += 1;
}
}
}
counts
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CacheConformanceReport {
pub schema_version: u32,
pub provider: String,
pub model: String,
pub support: PromptCacheSupport,
pub runs: Vec<CacheConformanceRun>,
pub bucket_counts: CacheConformanceBucketCounts,
pub verdict: CacheVerdict,
pub dogfood_failure: bool,
}
fn aggregate_verdict(runs: &[CacheConformanceRun], support: &PromptCacheSupport) -> CacheVerdict {
if runs
.iter()
.any(|run| run.classification == CacheConformanceClassification::ProviderFieldInconsistent)
{
return CacheVerdict::ProviderFieldInconsistent;
}
let repeat_cache_read = runs.iter().any(|run| {
run.run_index > 0 && run.classification == CacheConformanceClassification::CacheEffective
});
if repeat_cache_read {
return CacheVerdict::CacheEffective;
}
let any_cache_read = runs
.iter()
.any(|run| run.classification == CacheConformanceClassification::CacheEffective);
let all_no_prompt = !runs.is_empty()
&& runs
.iter()
.all(|run| run.classification == CacheConformanceClassification::NoPromptTokens);
if all_no_prompt {
return CacheVerdict::NoPromptTokens;
}
match support.status {
PromptCacheSupportStatus::CacheUnsupported => CacheVerdict::UnsupportedZero,
PromptCacheSupportStatus::CacheSupportUnknown => CacheVerdict::SupportUnknownZero,
PromptCacheSupportStatus::CacheSupported => {
if any_cache_read {
if runs.len() < 2 {
CacheVerdict::InsufficientRuns
} else {
CacheVerdict::CacheSupportedMiss
}
} else if runs.len() < 2 {
CacheVerdict::InsufficientRuns
} else {
CacheVerdict::CacheSupportedMiss
}
}
}
}
pub fn report_from_runs(
provider: String,
model: String,
support: PromptCacheSupport,
runs: Vec<CacheConformanceRun>,
) -> CacheConformanceReport {
let bucket_counts = CacheConformanceBucketCounts::tally(&runs);
let verdict = aggregate_verdict(&runs, &support);
CacheConformanceReport {
schema_version: CACHE_CONFORMANCE_SCHEMA_VERSION,
provider,
model,
support,
runs,
bucket_counts,
verdict,
dogfood_failure: verdict.is_dogfood_failure(),
}
}
fn run_from_fixture_entry(
index: usize,
entry: &Value,
support: &PromptCacheSupport,
) -> CacheConformanceRun {
let (usage_value, request, elapsed_ms) = match entry.as_object() {
Some(object) if object.contains_key("usage") => {
let usage_value = object.get("usage").cloned().unwrap_or(Value::Null);
let request = object.get("request").and_then(|value| {
serde_json::from_value::<CacheRequestIdentity>(value.clone()).ok()
});
let elapsed_ms = object.get("elapsed_ms").and_then(Value::as_u64);
(usage_value, request, elapsed_ms)
}
_ => (entry.clone(), None, None),
};
let usage = NormalizedCacheUsage::from_usage_value(&usage_value);
let classification = classify_cache_run(&usage, support);
let inconsistency_reason = field_inconsistency(&usage);
CacheConformanceRun {
run_index: index,
request,
usage,
classification,
inconsistency_reason,
elapsed_ms,
raw_usage: Some(usage_value),
}
}
pub fn classify_cache_conformance_fixture(
provider: impl Into<String>,
model: impl Into<String>,
raw: &str,
) -> Result<CacheConformanceReport, String> {
let document: Value = serde_json::from_str(raw)
.map_err(|error| format!("failed to parse cache conformance fixture: {error}"))?;
let mut provider = provider.into();
let mut model = model.into();
let runs_value = match &document {
Value::Array(items) => items.clone(),
Value::Object(object) => {
if let Some(fixture_provider) = object.get("provider").and_then(Value::as_str) {
if provider.trim().is_empty() {
provider = fixture_provider.to_string();
}
}
if let Some(fixture_model) = object.get("model").and_then(Value::as_str) {
if model.trim().is_empty() {
model = fixture_model.to_string();
}
}
match object.get("runs") {
Some(Value::Array(items)) => items.clone(),
_ => {
return Err(
"cache conformance fixture object must carry a `runs` array".to_string()
)
}
}
}
_ => {
return Err(
"cache conformance fixture must be a runs array or an object with `runs`"
.to_string(),
)
}
};
let support = prompt_cache_support(&provider, &model);
let runs = runs_value
.iter()
.enumerate()
.map(|(index, entry)| run_from_fixture_entry(index, entry, &support))
.collect::<Vec<_>>();
Ok(report_from_runs(provider, model, support, runs))
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn supported() -> PromptCacheSupport {
PromptCacheSupport {
status: PromptCacheSupportStatus::CacheSupported,
supported: Some(true),
cache_tier: Some("provider-prompt-cache".to_string()),
resolved_provider: "anthropic".to_string(),
resolved_model: "claude-sonnet-4-6".to_string(),
source: "provider-capabilities".to_string(),
profile: CacheControlProfile {
prompt_caching: true,
cache_breakpoint_style: "last_block".to_string(),
min_useful_prefix_tokens: Some(1024),
ttl_notes: Some("5m".to_string()),
cache_read_usage_field: "usage.cache_read_input_tokens".to_string(),
cache_write_usage_field: "usage.cache_creation_input_tokens".to_string(),
},
}
}
fn unsupported() -> PromptCacheSupport {
PromptCacheSupport {
status: PromptCacheSupportStatus::CacheUnsupported,
supported: Some(false),
cache_tier: Some("none".to_string()),
resolved_provider: "ollama".to_string(),
resolved_model: "qwen3".to_string(),
source: "provider-capabilities".to_string(),
profile: CacheControlProfile {
prompt_caching: false,
cache_breakpoint_style: "none".to_string(),
min_useful_prefix_tokens: None,
ttl_notes: None,
cache_read_usage_field: String::new(),
cache_write_usage_field: String::new(),
},
}
}
fn unknown() -> PromptCacheSupport {
prompt_cache_support("auto", "")
}
fn usage(input: i64, read: i64, write: i64, output: i64) -> NormalizedCacheUsage {
NormalizedCacheUsage {
input_tokens: input,
fresh_input_tokens: (input - read - write).max(0),
cache_read_tokens: read,
cache_write_tokens: write,
output_tokens: output,
cache_supported: true,
missing_fields: Vec::new(),
}
}
#[test]
fn cache_read_is_effective_regardless_of_support() {
let run = usage(2000, 1800, 0, 50);
assert_eq!(
classify_cache_run(&run, &supported()),
CacheConformanceClassification::CacheEffective
);
}
#[test]
fn supported_zero_read_is_a_miss_not_unsupported() {
let run = usage(2000, 0, 2000, 50);
assert_eq!(
classify_cache_run(&run, &supported()),
CacheConformanceClassification::CacheSupportedMiss
);
}
#[test]
fn unsupported_zero_read_classifies_unsupported() {
let run = usage(2000, 0, 0, 50);
assert_eq!(
classify_cache_run(&run, &unsupported()),
CacheConformanceClassification::UnsupportedZero
);
}
#[test]
fn missing_field_with_unknown_support_stays_unknown_not_unsupported() {
let raw = json!({ "input_tokens": 2000, "output_tokens": 40 });
let normalized = NormalizedCacheUsage::from_usage_value(&raw);
assert!(!normalized.cache_supported);
assert!(normalized
.missing_fields
.contains(&"cache_read_tokens".to_string()));
assert_eq!(
classify_cache_run(&normalized, &unknown()),
CacheConformanceClassification::SupportUnknownZero
);
}
#[test]
fn no_prompt_tokens_bucket() {
let run = usage(0, 0, 0, 10);
assert_eq!(
classify_cache_run(&run, &supported()),
CacheConformanceClassification::NoPromptTokens
);
}
#[test]
fn cache_exceeding_prompt_is_inconsistent() {
let run = usage(1000, 900, 500, 10);
assert_eq!(
classify_cache_run(&run, &supported()),
CacheConformanceClassification::ProviderFieldInconsistent
);
}
#[test]
fn read_with_support_false_is_inconsistent() {
let mut run = usage(2000, 500, 0, 10);
run.cache_supported = false;
assert_eq!(
classify_cache_run(&run, &supported()),
CacheConformanceClassification::ProviderFieldInconsistent
);
}
#[test]
fn normalize_reads_anthropic_aliases() {
let raw = json!({
"input_tokens": 4000,
"output_tokens": 120,
"cache_read_input_tokens": 3500,
"cache_creation_input_tokens": 500,
});
let normalized = NormalizedCacheUsage::from_usage_value(&raw);
assert_eq!(normalized.cache_read_tokens, 3500);
assert_eq!(normalized.cache_write_tokens, 500);
assert_eq!(normalized.fresh_input_tokens, 0);
assert!(normalized.cache_supported);
assert!(normalized.missing_fields.is_empty());
}
#[test]
fn normalize_reads_openai_nested_cached_tokens() {
let raw = json!({
"prompt_tokens": 3000,
"completion_tokens": 90,
"prompt_tokens_details": { "cached_tokens": 2048 },
});
let normalized = NormalizedCacheUsage::from_usage_value(&raw);
assert_eq!(normalized.input_tokens, 3000);
assert_eq!(normalized.cache_read_tokens, 2048);
assert_eq!(normalized.fresh_input_tokens, 952);
}
#[test]
fn repeat_run_cache_read_yields_cache_effective_verdict() {
let raw = json!({
"provider": "anthropic",
"model": "claude-sonnet-4-6",
"runs": [
{ "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_read_tokens": 0, "cache_creation_input_tokens": 3800 } },
{ "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_read_tokens": 3800, "cache_creation_input_tokens": 0 } }
]
});
let report =
classify_cache_conformance_fixture("", "", &raw.to_string()).expect("classify");
assert_eq!(report.verdict, CacheVerdict::CacheEffective);
assert!(!report.dogfood_failure);
assert_eq!(report.bucket_counts.cache_effective, 1);
assert_eq!(report.bucket_counts.cache_supported_miss, 1);
}
#[test]
fn non_cache_provider_does_not_fail_dogfood() {
let raw = json!({
"provider": "ollama",
"model": "qwen3",
"runs": [
{ "usage": { "input_tokens": 4000, "output_tokens": 80 } },
{ "usage": { "input_tokens": 4000, "output_tokens": 80 } }
]
});
let report =
classify_cache_conformance_fixture("", "", &raw.to_string()).expect("classify");
assert_eq!(report.verdict, CacheVerdict::UnsupportedZero);
assert!(!report.dogfood_failure);
}
#[test]
fn supported_route_that_never_caches_fails_dogfood() {
let raw = json!({
"provider": "anthropic",
"model": "claude-sonnet-4-6",
"runs": [
{ "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_creation_input_tokens": 3800 } },
{ "usage": { "input_tokens": 4000, "output_tokens": 80, "cache_creation_input_tokens": 3800 } }
]
});
let report =
classify_cache_conformance_fixture("", "", &raw.to_string()).expect("classify");
assert_eq!(report.verdict, CacheVerdict::CacheSupportedMiss);
assert!(report.dogfood_failure);
}
}