use chrono::{DateTime, Utc};
use nemo_flow::codec::response::Usage;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::types::AgentIdentity;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CacheRequestFacts {
pub provider: String,
pub stable_prefix_length: usize,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub stable_prefix_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub required_min_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub first_mismatch_span_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub first_mismatch_sequence_index: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub expected_hash_prefix: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub actual_hash_prefix: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub retention_window_secs: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub observed_gap_secs: Option<f64>,
#[serde(default)]
pub missing_facts: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CacheMissDiagnosis {
pub summary: String,
pub recommendation: String,
pub evidence: CacheMissEvidence,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum CacheMissEvidence {
PrefixMismatch {
first_mismatch_span_id: String,
sequence_index: u32,
expected_hash_prefix: String,
actual_hash_prefix: String,
},
BelowMinimumThreshold {
observed_prefix_tokens: u32,
required_min_tokens: u32,
estimation_source: String,
},
RetentionExpired {
observed_gap_secs: f64,
retention_window_secs: f64,
provider_semantics: String,
},
Unknown {
missing_facts: Vec<String>,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(tag = "reason", rename_all = "snake_case")]
pub enum CacheMissReason {
PrefixMismatch,
BelowMinimumThreshold,
RetentionExpired,
RoutingMismatch,
Evicted,
UnsupportedFeature,
ColdStart,
Unknown,
Other {
description: String,
},
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CacheHitRate {
pub hit_rate: f64,
pub sample_count: u32,
pub window_duration_secs: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum CacheTelemetryProvider {
Anthropic,
OpenAI,
}
impl CacheTelemetryProvider {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
Self::Anthropic => "anthropic",
Self::OpenAI => "openai",
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CacheTelemetryEvent {
pub request_id: Uuid,
pub agent_identity: AgentIdentity,
pub cache_read_tokens: u64,
pub cache_creation_tokens: u64,
pub total_prompt_tokens: u64,
pub hit_rate: f64,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub miss_reason: Option<CacheMissReason>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub miss_diagnosis: Option<CacheMissDiagnosis>,
pub provider: String,
pub timestamp: DateTime<Utc>,
}
impl CacheTelemetryEvent {
pub fn compute_hit_rate(cache_read_tokens: u64, total_prompt_tokens: u64) -> f64 {
if total_prompt_tokens == 0 {
0.0
} else {
cache_read_tokens as f64 / total_prompt_tokens as f64
}
}
#[must_use]
pub fn from_usage(
request_id: Uuid,
agent_identity: AgentIdentity,
provider: CacheTelemetryProvider,
usage: &Usage,
timestamp: DateTime<Utc>,
request_facts: Option<&CacheRequestFacts>,
) -> Option<Self> {
let prompt_tokens = usage.prompt_tokens?;
let cache_read_tokens = usage.cache_read_tokens.unwrap_or(0);
let (cache_creation_tokens, total_prompt_tokens) = match provider {
CacheTelemetryProvider::Anthropic => {
let cache_creation_tokens = usage.cache_write_tokens.unwrap_or(0);
let total_prompt_tokens = prompt_tokens + cache_read_tokens + cache_creation_tokens;
(cache_creation_tokens, total_prompt_tokens)
}
CacheTelemetryProvider::OpenAI => (0, prompt_tokens),
};
let (miss_reason, miss_diagnosis) = if cache_read_tokens > 0 {
(None, None)
} else if matches!(provider, CacheTelemetryProvider::Anthropic) && cache_creation_tokens > 0
{
(Some(CacheMissReason::ColdStart), None)
} else {
classify_cache_miss(provider, request_facts)
};
Some(Self {
request_id,
agent_identity,
cache_read_tokens,
cache_creation_tokens,
total_prompt_tokens,
hit_rate: Self::compute_hit_rate(cache_read_tokens, total_prompt_tokens),
miss_reason,
miss_diagnosis,
provider: provider.as_str().to_string(),
timestamp,
})
}
}
fn classify_cache_miss(
provider: CacheTelemetryProvider,
request_facts: Option<&CacheRequestFacts>,
) -> (Option<CacheMissReason>, Option<CacheMissDiagnosis>) {
if let Some(diagnosis) = prefix_mismatch_diagnosis(request_facts) {
return (Some(CacheMissReason::PrefixMismatch), Some(diagnosis));
}
if let Some(diagnosis) = below_minimum_threshold_diagnosis(request_facts) {
return (
Some(CacheMissReason::BelowMinimumThreshold),
Some(diagnosis),
);
}
if let Some(diagnosis) = retention_expired_diagnosis(provider, request_facts) {
return (Some(CacheMissReason::RetentionExpired), Some(diagnosis));
}
(
Some(CacheMissReason::Unknown),
Some(unknown_diagnosis(request_facts)),
)
}
fn prefix_mismatch_diagnosis(
request_facts: Option<&CacheRequestFacts>,
) -> Option<CacheMissDiagnosis> {
let facts = request_facts?;
let span_id = facts.first_mismatch_span_id.as_ref()?;
let sequence_index = facts.first_mismatch_sequence_index?;
let expected_hash_prefix = facts.expected_hash_prefix.as_ref()?;
let actual_hash_prefix = facts.actual_hash_prefix.as_ref()?;
Some(CacheMissDiagnosis {
summary: format!(
"Stable prefix diverged at span {} before cache reuse.",
span_id
),
recommendation: "Move or extract the mismatching block after the stable prefix."
.to_string(),
evidence: CacheMissEvidence::PrefixMismatch {
first_mismatch_span_id: span_id.clone(),
sequence_index,
expected_hash_prefix: canonicalize_hash_prefix(expected_hash_prefix),
actual_hash_prefix: canonicalize_hash_prefix(actual_hash_prefix),
},
})
}
fn below_minimum_threshold_diagnosis(
request_facts: Option<&CacheRequestFacts>,
) -> Option<CacheMissDiagnosis> {
let facts = request_facts?;
let observed_prefix_tokens = facts.stable_prefix_tokens?;
let required_min_tokens = facts.required_min_tokens?;
if observed_prefix_tokens >= required_min_tokens {
return None;
}
Some(CacheMissDiagnosis {
summary: format!(
"Stable prefix has {observed_prefix_tokens} tokens, below the {required_min_tokens}-token cache minimum."
),
recommendation:
"Increase the cacheable prefix above the provider minimum or stop expecting a hit."
.to_string(),
evidence: CacheMissEvidence::BelowMinimumThreshold {
observed_prefix_tokens,
required_min_tokens,
estimation_source: "prompt_ir_token_metadata".to_string(),
},
})
}
fn retention_expired_diagnosis(
provider: CacheTelemetryProvider,
request_facts: Option<&CacheRequestFacts>,
) -> Option<CacheMissDiagnosis> {
if !matches!(provider, CacheTelemetryProvider::Anthropic) {
return None;
}
let facts = request_facts?;
let observed_gap_secs = facts.observed_gap_secs?;
let retention_window_secs = facts.retention_window_secs?;
if observed_gap_secs <= retention_window_secs {
return None;
}
Some(CacheMissDiagnosis {
summary: format!(
"Stable prefix reuse arrived {:.1}s after the {:.1}s retention window.",
observed_gap_secs, retention_window_secs
),
recommendation:
"Reuse the stable prefix inside the active retention window or accept a cold rebuild."
.to_string(),
evidence: CacheMissEvidence::RetentionExpired {
observed_gap_secs,
retention_window_secs,
provider_semantics:
"anthropic prompt caching reuses prefixes inside the active retention window"
.to_string(),
},
})
}
fn unknown_diagnosis(request_facts: Option<&CacheRequestFacts>) -> CacheMissDiagnosis {
let missing_facts = request_facts.map_or_else(
|| vec!["request_facts_unavailable".to_string()],
|facts| facts.missing_facts.clone(),
);
CacheMissDiagnosis {
summary: "Cache miss could not be classified from the available request facts.".to_string(),
recommendation: "Capture request facts earlier or keep the miss classified as unknown."
.to_string(),
evidence: CacheMissEvidence::Unknown { missing_facts },
}
}
fn canonicalize_hash_prefix(value: &str) -> String {
const PREFIX: &str = "sha256:";
const HEX_LEN: usize = 12;
let suffix = value
.strip_prefix(PREFIX)
.unwrap_or(value)
.chars()
.take(HEX_LEN)
.collect::<String>();
format!("{PREFIX}{suffix}")
}
#[cfg(test)]
#[path = "../../tests/unit/acg/telemetry_tests.rs"]
mod tests;