Skip to main content

crabllm_core/
model_info.rs

1use crate::{PricingConfig, Usage};
2use serde::{Deserialize, Serialize};
3
4/// Per-model metadata: context window and token pricing.
5///
6/// Every field is `Option` so partial overrides work — a config entry
7/// that sets only `context_length` can leave pricing unset.
8#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
10pub struct ModelInfo {
11    /// Maximum context window in tokens.
12    #[serde(default, skip_serializing_if = "Option::is_none")]
13    pub context_length: Option<u32>,
14    /// Token pricing (per-axis costs per million tokens).
15    #[serde(default, skip_serializing_if = "Option::is_none")]
16    pub pricing: Option<PricingConfig>,
17    /// Whether the model accepts image/video input.
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub vision: Option<bool>,
20}
21
22impl ModelInfo {
23    /// Compute cost in USD for the given usage. Returns 0.0 when pricing is
24    /// unset.
25    ///
26    /// Each axis is priced independently. Secondary rates fall back to the
27    /// next-coarser bucket when unset:
28    /// - `cache_read`, `cache_write`, `audio_input` → `input_cost_per_million`
29    /// - `reasoning`, `audio_output` → `output_cost_per_million`
30    ///
31    /// `None` never means "free" — that would silently zero out billing on any
32    /// axis where pricing data is incomplete.
33    pub fn cost(&self, u: &Usage) -> f64 {
34        let Some(ref p) = self.pricing else {
35            return 0.0;
36        };
37        let input_rate = p.input_cost_per_million;
38        let output_rate = p.output_cost_per_million;
39        let cache_read_rate = p.cache_read_cost_per_million.unwrap_or(input_rate);
40        let cache_write_rate = p.cache_write_cost_per_million.unwrap_or(input_rate);
41        let reasoning_rate = p.reasoning_cost_per_million.unwrap_or(output_rate);
42
43        let tokens = u.input_tokens as f64 * input_rate
44            + u.cache_read_tokens as f64 * cache_read_rate
45            + u.cache_write_tokens as f64 * cache_write_rate
46            + u.output_tokens as f64 * output_rate
47            + u.reasoning_tokens as f64 * reasoning_rate;
48        let mut total = tokens / 1_000_000.0;
49
50        for (tool, calls) in &u.server_tool_calls {
51            if let Some(rate) = p.server_tool_cost_per_call.get(tool) {
52                total += *calls as f64 * rate;
53            }
54        }
55        total
56    }
57}