use crate::normalize::normalize_model_id;
use crate::usage::Usage;
pub const LONG_PROMPT_THRESHOLD: u64 = 200_000;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Pricing {
pub input_per_mtok: f64,
pub output_per_mtok: f64,
pub cached_input_per_mtok: f64,
pub input_long_per_mtok: f64,
pub output_long_per_mtok: f64,
pub cached_input_long_per_mtok: f64,
}
impl Pricing {
pub fn flat(input: f64, output: f64, cached_input: f64) -> Self {
Self {
input_per_mtok: input,
output_per_mtok: output,
cached_input_per_mtok: cached_input,
input_long_per_mtok: input,
output_long_per_mtok: output,
cached_input_long_per_mtok: cached_input,
}
}
pub fn cost_for(&self, usage: &Usage) -> f64 {
let long = usage.input_tokens > LONG_PROMPT_THRESHOLD;
let (input_r, output_r, cached_r) = if long {
(
self.input_long_per_mtok,
self.output_long_per_mtok,
self.cached_input_long_per_mtok,
)
} else {
(
self.input_per_mtok,
self.output_per_mtok,
self.cached_input_per_mtok,
)
};
(usage.input_tokens as f64 * input_r
+ usage.output_tokens as f64 * output_r
+ usage.cached_input_tokens as f64 * cached_r)
/ 1_000_000.0
}
}
pub const DEFAULT_PRICING_TABLE: &[(&str, Pricing)] = &[
(
"gemini-2.5-pro",
Pricing {
input_per_mtok: 1.25,
output_per_mtok: 10.0,
cached_input_per_mtok: 0.3125,
input_long_per_mtok: 2.5,
output_long_per_mtok: 15.0,
cached_input_long_per_mtok: 0.625,
},
),
(
"gemini-2.5-flash",
Pricing {
input_per_mtok: 0.30,
output_per_mtok: 2.50,
cached_input_per_mtok: 0.075,
input_long_per_mtok: 0.30,
output_long_per_mtok: 2.50,
cached_input_long_per_mtok: 0.075,
},
),
(
"gemini-2.5-flash-lite",
Pricing {
input_per_mtok: 0.10,
output_per_mtok: 0.40,
cached_input_per_mtok: 0.025,
input_long_per_mtok: 0.10,
output_long_per_mtok: 0.40,
cached_input_long_per_mtok: 0.025,
},
),
(
"gemini-2.0-flash",
Pricing {
input_per_mtok: 0.10,
output_per_mtok: 0.40,
cached_input_per_mtok: 0.025,
input_long_per_mtok: 0.10,
output_long_per_mtok: 0.40,
cached_input_long_per_mtok: 0.025,
},
),
(
"gemini-2.0-flash-lite",
Pricing {
input_per_mtok: 0.075,
output_per_mtok: 0.30,
cached_input_per_mtok: 0.01875,
input_long_per_mtok: 0.075,
output_long_per_mtok: 0.30,
cached_input_long_per_mtok: 0.01875,
},
),
];
pub fn default_pricing(model_id: &str) -> Option<Pricing> {
let key = normalize_model_id(model_id);
DEFAULT_PRICING_TABLE
.iter()
.find(|(k, _)| *k == key)
.map(|(_, p)| *p)
}