use crate::pricing::ModelPricing;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
pub const LITELLM_PRICING_URL: &str =
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
#[derive(Debug, Deserialize)]
struct LiteLLMModelEntry {
#[serde(default)]
input_cost_per_token: Option<f64>,
#[serde(default)]
output_cost_per_token: Option<f64>,
#[serde(default)]
cache_creation_input_token_cost: Option<f64>,
#[serde(default)]
cache_read_input_token_cost: Option<f64>,
#[serde(default)]
litellm_provider: Option<String>,
#[serde(default)]
mode: Option<String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CachedPricing {
pub last_updated: chrono::DateTime<chrono::Utc>,
pub models: HashMap<String, ModelPricing>,
pub source: String,
}
pub async fn fetch_litellm_pricing() -> Result<HashMap<String, ModelPricing>> {
tracing::info!("Fetching pricing from LiteLLM: {}", LITELLM_PRICING_URL);
let response = reqwest::get(LITELLM_PRICING_URL)
.await
.context("Failed to fetch LiteLLM pricing")?;
let json_text = response
.text()
.await
.context("Failed to read LiteLLM response")?;
parse_litellm_json(&json_text)
}
fn parse_litellm_json(json: &str) -> Result<HashMap<String, ModelPricing>> {
let entries: HashMap<String, LiteLLMModelEntry> =
serde_json::from_str(json).context("Failed to parse LiteLLM JSON")?;
let mut pricing_map = HashMap::new();
for (model_id, entry) in entries {
if let Some(provider) = &entry.litellm_provider {
if !matches!(provider.as_str(), "anthropic" | "bedrock") {
continue;
}
}
if let Some(mode) = &entry.mode {
if mode != "chat" {
continue;
}
}
if !model_id.starts_with("claude-") {
continue;
}
let Some(input_cost) = entry.input_cost_per_token else {
continue;
};
let Some(output_cost) = entry.output_cost_per_token else {
continue;
};
let input_price_per_million = input_cost * 1_000_000.0;
let output_price_per_million = output_cost * 1_000_000.0;
let (cache_read_mult, cache_write_mult) = if let (Some(cache_read), Some(cache_write)) = (
entry.cache_read_input_token_cost,
entry.cache_creation_input_token_cost,
) {
(
cache_read / input_cost, cache_write / input_cost, )
} else {
(0.1, 1.25) };
pricing_map.insert(
model_id,
ModelPricing {
input_price_per_million,
output_price_per_million,
cache_read_multiplier: cache_read_mult,
cache_write_multiplier: cache_write_mult,
},
);
}
tracing::info!(
"Parsed {} Claude model prices from LiteLLM",
pricing_map.len()
);
Ok(pricing_map)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_litellm_json() {
let json = r#"{
"claude-opus-4-5": {
"input_cost_per_token": 5e-06,
"output_cost_per_token": 2.5e-05,
"cache_creation_input_token_cost": 6.25e-06,
"cache_read_input_token_cost": 5e-07,
"litellm_provider": "anthropic",
"mode": "chat"
},
"claude-sonnet-4-5-20250929": {
"input_cost_per_token": 3e-06,
"output_cost_per_token": 1.5e-05,
"cache_creation_input_token_cost": 3.75e-06,
"cache_read_input_token_cost": 3e-07,
"litellm_provider": "anthropic",
"mode": "chat"
},
"gpt-4": {
"input_cost_per_token": 1e-05,
"output_cost_per_token": 3e-05,
"litellm_provider": "openai",
"mode": "chat"
}
}"#;
let pricing = parse_litellm_json(json).unwrap();
assert_eq!(pricing.len(), 2);
let opus = pricing.get("claude-opus-4-5").unwrap();
assert_eq!(opus.input_price_per_million, 5.0);
assert_eq!(opus.output_price_per_million, 25.0);
assert!((opus.cache_read_multiplier - 0.1).abs() < 0.01);
assert!((opus.cache_write_multiplier - 1.25).abs() < 0.01);
let sonnet = pricing.get("claude-sonnet-4-5-20250929").unwrap();
assert_eq!(sonnet.input_price_per_million, 3.0);
assert_eq!(sonnet.output_price_per_million, 15.0);
}
}