pub mod memory;
pub mod redis;
#[cfg(feature = "qdrant")]
pub mod semantic;
use bytes::Bytes;
use sha2::{Digest, Sha256};
use std::collections::BTreeMap;
use std::time::Instant;
pub const MAX_TTL_SECS: u64 = 86_400;
#[derive(Clone, Debug)]
pub struct CacheEntry {
pub response_body: Bytes,
pub model: String,
pub created_at: Instant,
pub ttl_secs: Option<u64>,
}
#[derive(Debug, Clone, Copy)]
pub enum CacheNamespace {
Anthropic,
OpenAI,
}
impl CacheNamespace {
fn prefix(self) -> &'static str {
match self {
Self::Anthropic => "anth",
Self::OpenAI => "oai",
}
}
}
pub trait CacheBackend: Send + Sync {
fn get(&self, key: &str) -> impl std::future::Future<Output = Option<CacheEntry>> + Send;
fn put(
&self,
key: &str,
entry: CacheEntry,
ttl_secs: u64,
) -> impl std::future::Future<Output = ()> + Send;
}
pub fn cache_key_for_request(
body: &serde_json::Value,
ns: CacheNamespace,
scope: &CacheScope<'_>,
) -> String {
const CACHE_FIELDS: &[&str] = &[
"cache_ttl_secs",
"max_tokens",
"messages",
"model",
"system",
"stop",
"temperature",
"tool_choice",
"tools",
"top_p",
];
let mut canonical = BTreeMap::new();
if let Some(obj) = body.as_object() {
for &field in CACHE_FIELDS {
if let Some(val) = obj.get(field) {
if !val.is_null() {
canonical.insert(field, val.clone());
}
}
}
}
canonical.insert(
"_scope_backend",
serde_json::Value::String(scope.backend_name.to_string()),
);
canonical.insert(
"_scope_auth",
serde_json::Value::String(scope.auth_identity.to_string()),
);
let json = serde_json::to_string(&canonical).unwrap_or_default();
let hash = Sha256::digest(json.as_bytes());
let hex = hex::encode(hash);
format!("{}:{}", ns.prefix(), hex)
}
pub struct CacheScope<'a> {
pub backend_name: &'a str,
pub auth_identity: &'a str,
}
pub fn parse_cache_ttl(body: &serde_json::Value) -> Result<Option<u64>, String> {
let Some(val) = body.get("cache_ttl_secs") else {
return Ok(None);
};
if val.is_null() {
return Ok(None);
}
if let Some(n) = val.as_u64() {
if n > MAX_TTL_SECS {
return Err(format!("cache_ttl_secs must be <= {MAX_TTL_SECS}, got {n}"));
}
return Ok(Some(n));
}
if let Some(n) = val.as_i64() {
return Err(format!("cache_ttl_secs must be non-negative, got {n}"));
}
if let Some(n) = val.as_f64() {
if n < 0.0 {
return Err(format!("cache_ttl_secs must be non-negative, got {n}"));
}
let truncated = n as u64;
if truncated > MAX_TTL_SECS {
return Err(format!(
"cache_ttl_secs must be <= {MAX_TTL_SECS}, got {truncated}"
));
}
return Ok(Some(truncated));
}
Err(format!("cache_ttl_secs must be a number, got {}", val))
}
#[derive(Debug, Clone)]
pub struct CacheConfig {
pub ttl_secs: u64,
pub max_entries: u64,
pub redis_url: Option<String>,
}
impl Default for CacheConfig {
fn default() -> Self {
Self {
ttl_secs: 300,
max_entries: 10_000,
redis_url: None,
}
}
}
impl CacheConfig {
pub fn from_env() -> Self {
let ttl_secs = std::env::var("CACHE_TTL_SECS")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(300);
let max_entries = std::env::var("CACHE_MAX_ENTRIES")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(10_000);
let redis_url = std::env::var("REDIS_URL").ok();
Self {
ttl_secs,
max_entries,
redis_url,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cache_key_deterministic_same_fields() {
let body = serde_json::json!({
"model": "claude-sonnet-4-6",
"messages": [{"role": "user", "content": "hello"}],
"temperature": 0.7,
"max_tokens": 100
});
let key1 = cache_key_for_request(
&body,
CacheNamespace::Anthropic,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
let key2 = cache_key_for_request(
&body,
CacheNamespace::Anthropic,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
assert_eq!(key1, key2);
assert!(key1.starts_with("anth:"));
}
#[test]
fn cache_key_different_for_different_temperature() {
let body1 = serde_json::json!({
"model": "claude-sonnet-4-6",
"messages": [{"role": "user", "content": "hello"}],
"temperature": 0.7
});
let body2 = serde_json::json!({
"model": "claude-sonnet-4-6",
"messages": [{"role": "user", "content": "hello"}],
"temperature": 0.9
});
let key1 = cache_key_for_request(
&body1,
CacheNamespace::Anthropic,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
let key2 = cache_key_for_request(
&body2,
CacheNamespace::Anthropic,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
assert_ne!(key1, key2);
}
#[test]
fn cache_key_ignores_field_order() {
let body1 = serde_json::json!({
"model": "gpt-4o",
"temperature": 0.5,
"messages": [{"role": "user", "content": "hi"}]
});
let body2 = serde_json::json!({
"messages": [{"role": "user", "content": "hi"}],
"model": "gpt-4o",
"temperature": 0.5
});
let key1 = cache_key_for_request(
&body1,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
let key2 = cache_key_for_request(
&body2,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
assert_eq!(key1, key2);
}
#[test]
fn cache_key_ignores_non_cache_fields() {
let body1 = serde_json::json!({
"model": "gpt-4o",
"messages": [{"role": "user", "content": "hi"}],
"stream": true
});
let body2 = serde_json::json!({
"model": "gpt-4o",
"messages": [{"role": "user", "content": "hi"}]
});
let key1 = cache_key_for_request(
&body1,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
let key2 = cache_key_for_request(
&body2,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
assert_eq!(key1, key2);
}
#[test]
fn cache_key_namespace_differs() {
let body = serde_json::json!({
"model": "test",
"messages": []
});
let anth = cache_key_for_request(
&body,
CacheNamespace::Anthropic,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
let oai = cache_key_for_request(
&body,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
assert_ne!(anth, oai);
assert!(anth.starts_with("anth:"));
assert!(oai.starts_with("oai:"));
}
#[test]
fn cache_key_null_field_same_as_absent() {
let body1 = serde_json::json!({
"model": "gpt-4o",
"messages": [],
"temperature": null
});
let body2 = serde_json::json!({
"model": "gpt-4o",
"messages": []
});
let key1 = cache_key_for_request(
&body1,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
let key2 = cache_key_for_request(
&body2,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
assert_eq!(key1, key2);
}
#[test]
fn parse_cache_ttl_absent() {
let body = serde_json::json!({"model": "test"});
assert_eq!(parse_cache_ttl(&body).unwrap(), None);
}
#[test]
fn parse_cache_ttl_null() {
let body = serde_json::json!({"cache_ttl_secs": null});
assert_eq!(parse_cache_ttl(&body).unwrap(), None);
}
#[test]
fn parse_cache_ttl_zero() {
let body = serde_json::json!({"cache_ttl_secs": 0});
assert_eq!(parse_cache_ttl(&body).unwrap(), Some(0));
}
#[test]
fn parse_cache_ttl_valid() {
let body = serde_json::json!({"cache_ttl_secs": 600});
assert_eq!(parse_cache_ttl(&body).unwrap(), Some(600));
}
#[test]
fn parse_cache_ttl_max() {
let body = serde_json::json!({"cache_ttl_secs": 86400});
assert_eq!(parse_cache_ttl(&body).unwrap(), Some(86400));
}
#[test]
fn parse_cache_ttl_over_max() {
let body = serde_json::json!({"cache_ttl_secs": 86401});
assert!(parse_cache_ttl(&body).is_err());
}
#[test]
fn parse_cache_ttl_negative() {
let body = serde_json::json!({"cache_ttl_secs": -1});
assert!(parse_cache_ttl(&body).is_err());
}
#[test]
fn parse_cache_ttl_string() {
let body = serde_json::json!({"cache_ttl_secs": "not a number"});
assert!(parse_cache_ttl(&body).is_err());
}
#[test]
fn cache_key_differs_for_different_cache_ttl_secs() {
let body1 = serde_json::json!({
"model": "gpt-4o",
"messages": [{"role": "user", "content": "hi"}],
"cache_ttl_secs": 60
});
let body2 = serde_json::json!({
"model": "gpt-4o",
"messages": [{"role": "user", "content": "hi"}],
"cache_ttl_secs": 3600
});
let key1 = cache_key_for_request(
&body1,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
let key2 = cache_key_for_request(
&body2,
CacheNamespace::OpenAI,
&CacheScope {
backend_name: "openai",
auth_identity: "k1",
},
);
assert_ne!(
key1, key2,
"different cache_ttl_secs must produce different cache keys"
);
}
}