#[derive(Debug, Clone)]
pub(crate) struct ModelCost {
pub(crate) name: &'static str,
pub(crate) input: f64,
pub(crate) cached_input: Option<f64>,
pub(crate) output: f64,
}
pub(crate) const CHAT_COMPLETIONS: &[ModelCost] = &[
ModelCost {
name: "claude-opus-4.5",
input: 5.0,
cached_input: None,
output: 25.0,
},
ModelCost {
name: "claude-opus-4.1",
input: 15.0,
cached_input: None,
output: 75.0,
},
ModelCost {
name: "claude-opus-4",
input: 15.0,
cached_input: None,
output: 75.0,
},
ModelCost {
name: "claude-sonnet-4.5",
input: 3.0,
cached_input: None,
output: 15.0,
},
ModelCost {
name: "claude-sonnet-4",
input: 3.0,
cached_input: None,
output: 15.0,
},
ModelCost {
name: "claude-haiku-4",
input: 0.80,
cached_input: None,
output: 4.0,
},
ModelCost {
name: "claude-3-opus",
input: 15.0,
cached_input: None,
output: 75.0,
},
ModelCost {
name: "claude-3-7-sonnet",
input: 3.0,
cached_input: None,
output: 15.0,
},
ModelCost {
name: "claude-3-5-haiku",
input: 0.80,
cached_input: None,
output: 4.0,
},
ModelCost {
name: "gpt-4.1",
input: 2.00,
cached_input: Some(0.50),
output: 8.00,
},
ModelCost {
name: "gpt-4.1-mini",
input: 0.40,
cached_input: Some(0.10),
output: 1.60,
},
ModelCost {
name: "gpt-4.1-nano",
input: 0.10,
cached_input: Some(0.025),
output: 0.40,
},
ModelCost {
name: "gpt-4.5-preview",
input: 75.00,
cached_input: Some(37.50),
output: 150.00,
},
ModelCost {
name: "gpt-4o",
input: 2.50,
cached_input: Some(1.25),
output: 10.00,
},
ModelCost {
name: "gpt-4o-audio-preview",
input: 2.50,
cached_input: None,
output: 10.00,
},
ModelCost {
name: "gpt-4o-realtime-preview",
input: 5.00,
cached_input: Some(2.50),
output: 20.00,
},
ModelCost {
name: "gpt-4o-mini",
input: 0.15,
cached_input: Some(0.075),
output: 0.60,
},
ModelCost {
name: "gpt-4o-mini-audio-preview",
input: 0.15,
cached_input: None,
output: 0.60,
},
ModelCost {
name: "gpt-4o-mini-realtime-preview",
input: 0.60,
cached_input: Some(0.30),
output: 2.40,
},
ModelCost {
name: "o1",
input: 15.00,
cached_input: Some(7.50),
output: 60.00,
},
ModelCost {
name: "o1-pro",
input: 150.00,
cached_input: None,
output: 600.00,
},
ModelCost {
name: "o3",
input: 2.00,
cached_input: Some(0.50),
output: 8.00,
},
ModelCost {
name: "o4-mini",
input: 1.10,
cached_input: Some(0.275),
output: 4.40,
},
ModelCost {
name: "o3-mini",
input: 1.10,
cached_input: Some(0.55),
output: 4.40,
},
ModelCost {
name: "o1-mini",
input: 1.10,
cached_input: Some(0.55),
output: 4.40,
},
ModelCost {
name: "gpt-4o-mini-search-preview",
input: 0.15,
cached_input: None,
output: 0.60,
},
ModelCost {
name: "gpt-4o-search-preview",
input: 2.50,
cached_input: None,
output: 10.00,
},
ModelCost {
name: "computer-use-preview",
input: 3.00,
cached_input: None,
output: 12.00,
},
ModelCost {
name: "gemini-3.1-pro",
input: 2.00,
cached_input: None,
output: 12.00,
},
ModelCost {
name: "gemini-3-flash",
input: 0.50,
cached_input: None,
output: 3.00,
},
ModelCost {
name: "gemini-3.1-flash-lite",
input: 0.25,
cached_input: None,
output: 1.50,
},
ModelCost {
name: "gpt-5.4-pro",
input: 30.00,
cached_input: None,
output: 180.00,
},
ModelCost {
name: "gpt-5.4",
input: 2.50,
cached_input: Some(0.25),
output: 15.00,
},
ModelCost {
name: "gpt-5.4-mini",
input: 0.75,
cached_input: Some(0.075),
output: 4.50,
},
ModelCost {
name: "gpt-5.4-nano",
input: 0.20,
cached_input: Some(0.02),
output: 1.25,
},
ModelCost {
name: "gpt-5.2-chat-latest",
input: 1.75,
cached_input: Some(0.175),
output: 14.00,
},
ModelCost {
name: "gpt-5.2-pro",
input: 21.00,
cached_input: None,
output: 168.00,
},
ModelCost {
name: "gpt-5.2",
input: 1.75,
cached_input: Some(0.175),
output: 14.00,
},
ModelCost {
name: "gpt-5.1-chat-latest",
input: 1.25,
cached_input: Some(0.125),
output: 10.00,
},
ModelCost {
name: "gpt-5.1-codex-max",
input: 1.25,
cached_input: Some(0.125),
output: 10.00,
},
ModelCost {
name: "gpt-5.1-codex",
input: 1.25,
cached_input: Some(0.125),
output: 10.00,
},
ModelCost {
name: "gpt-5.1",
input: 1.25,
cached_input: Some(0.125),
output: 10.00,
},
ModelCost {
name: "gpt-5-chat-latest",
input: 1.25,
cached_input: Some(0.125),
output: 10.00,
},
ModelCost {
name: "gpt-5-codex",
input: 1.25,
cached_input: Some(0.125),
output: 10.00,
},
ModelCost {
name: "gpt-5-pro",
input: 15.00,
cached_input: None,
output: 120.00,
},
ModelCost {
name: "gpt-5-mini",
input: 0.25,
cached_input: Some(0.025),
output: 2.00,
},
ModelCost {
name: "gpt-5-nano",
input: 0.05,
cached_input: Some(0.005),
output: 0.40,
},
ModelCost {
name: "gpt-5",
input: 1.25,
cached_input: Some(0.125),
output: 10.00,
},
];
pub(crate) const FLEX_PRICES: &[ModelCost] = &[
ModelCost {
name: "gpt-5.4",
input: 1.25,
cached_input: Some(0.13),
output: 7.50,
},
ModelCost {
name: "gpt-5.4-pro",
input: 15.00,
cached_input: None,
output: 90.00,
},
ModelCost {
name: "gpt-5.4-mini",
input: 0.375,
cached_input: Some(0.0375),
output: 2.25,
},
ModelCost {
name: "gpt-5.4-nano",
input: 0.10,
cached_input: Some(0.01),
output: 0.625,
},
ModelCost {
name: "gpt-5.2",
input: 0.875,
cached_input: Some(0.0875),
output: 7.00,
},
ModelCost {
name: "gpt-5.1",
input: 0.625,
cached_input: Some(0.0625),
output: 5.00,
},
ModelCost {
name: "gpt-5",
input: 0.625,
cached_input: Some(0.0625),
output: 5.00,
},
ModelCost {
name: "gpt-5-mini",
input: 0.125,
cached_input: Some(0.0125),
output: 1.00,
},
ModelCost {
name: "gpt-5-nano",
input: 0.025,
cached_input: Some(0.0025),
output: 0.20,
},
ModelCost {
name: "o3",
input: 1.00,
cached_input: Some(0.25),
output: 4.00,
},
ModelCost {
name: "o4-mini",
input: 0.55,
cached_input: Some(0.138),
output: 2.20,
},
];
pub(crate) const PRIORITY_PRICES: &[ModelCost] = &[ModelCost {
name: "gpt-5.4",
input: 5.00,
cached_input: Some(0.50),
output: 30.00,
}];
fn lookup<'a>(table: &'a [ModelCost], model: &str) -> Option<&'a ModelCost> {
table
.iter()
.filter(|mc| model.starts_with(mc.name))
.max_by_key(|mc| mc.name.len())
}
pub(crate) fn cost(
model: &str,
service_tier: Option<&str>,
usage: crate::chat_completions::ChatUsage,
) -> Option<f64> {
let tier_table = match service_tier {
Some("flex") => Some(FLEX_PRICES),
Some("priority") => Some(PRIORITY_PRICES),
_ => None,
};
let model_cost = tier_table
.and_then(|t| lookup(t, model))
.or_else(|| lookup(CHAT_COMPLETIONS, model))?;
let (cached_prompt_tokens, uncached_prompt_tokens) =
if let Some(details) = usage.prompt_token_details {
(
details.cached_tokens,
usage.prompt_tokens - details.cached_tokens,
)
} else {
(0, usage.prompt_tokens)
};
let usage_cost = model_cost.input * uncached_prompt_tokens as f64 / 1_000_000.0
+ model_cost.cached_input.unwrap_or(model_cost.input) * cached_prompt_tokens as f64
/ 1_000_000.0
+ model_cost.output * usage.completion_tokens as f64 / 1_000_000.0;
Some(usage_cost)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cost() {
let usage = crate::chat_completions::ChatUsage {
prompt_tokens: 2000000,
completion_tokens: 1000000,
prompt_token_details: Some(crate::chat_completions::PromptTokenDetails {
cached_tokens: 1000000,
}),
completion_token_details: None,
total_tokens: 2000000,
};
let cost = cost("gpt-4o", None, usage);
assert_eq!(cost, Some(2.50 + 1.25 + 10.00));
}
#[test]
fn test_flex_cost() {
let usage = crate::chat_completions::ChatUsage {
prompt_tokens: 1000000,
completion_tokens: 1000000,
prompt_token_details: None,
completion_token_details: None,
total_tokens: 2000000,
};
let c = cost("gpt-5.4", Some("flex"), usage);
assert_eq!(c, Some(1.25 + 7.50));
}
#[test]
fn test_priority_cost() {
let usage = crate::chat_completions::ChatUsage {
prompt_tokens: 1000000,
completion_tokens: 1000000,
prompt_token_details: None,
completion_token_details: None,
total_tokens: 2000000,
};
let c = cost("gpt-5.4", Some("priority"), usage);
assert_eq!(c, Some(5.00 + 30.00));
}
#[test]
fn test_tier_falls_back_to_standard() {
let usage = crate::chat_completions::ChatUsage {
prompt_tokens: 1000000,
completion_tokens: 1000000,
prompt_token_details: None,
completion_token_details: None,
total_tokens: 2000000,
};
let standard = cost("gpt-4o", None, usage);
let flex = cost("gpt-4o", Some("flex"), usage);
assert_eq!(standard, flex);
}
}