Skip to main content

deepseek/agent/
pricing.rs

1//! Per-model USD pricing for cost reporting.
2//!
3//! Values in $ per 1M tokens. Returns `None` for unknown models so that the
4//! caller can set `total_cost_usd = None`. DeepSeek bills prompt tokens at two
5//! rates: a discounted **cache-hit** rate for prefix tokens served from their
6//! context cache, and the full **cache-miss** rate for everything else. The
7//! API reports the split in `usage.prompt_cache_hit_tokens` /
8//! `prompt_cache_miss_tokens`; when present we use the split rates, otherwise
9//! we fall back to charging the entire prompt at the miss rate.
10
11use crate::types::UsageInfo;
12
13pub struct ModelPricing {
14    /// Rate for prompt tokens that missed the context cache. Also the
15    /// fallback rate when cache stats are not reported.
16    pub input_per_mtok: f64,
17    /// Rate for prompt tokens served from the context cache. Typically ~25%
18    /// of `input_per_mtok`.
19    pub cached_input_per_mtok: f64,
20    pub output_per_mtok: f64,
21}
22
23pub fn model_pricing(model: &str) -> Option<ModelPricing> {
24    let m = model.to_lowercase();
25    // (cache_miss_input, cache_hit_input, output) per 1M tokens.
26    let (miss, hit, out) = match m.as_str() {
27        "deepseek-v4-pro" | "deepseek-v4" => (0.55, 0.14, 2.19),
28        "deepseek-v4-flash" => (0.14, 0.04, 0.55),
29        "deepseek-reasoner" | "deepseek-r1" => (0.55, 0.14, 2.19),
30        "deepseek-chat" | "deepseek-v3" => (0.27, 0.07, 1.10),
31        _ => return None,
32    };
33    Some(ModelPricing {
34        input_per_mtok: miss,
35        cached_input_per_mtok: hit,
36        output_per_mtok: out,
37    })
38}
39
40/// Convert a turn's `UsageInfo` into a USD cost given the model. Uses the
41/// cache-hit/miss split when the API reported it; otherwise charges the full
42/// prompt at the miss rate. Returns `None` if pricing is unknown.
43pub fn turn_cost_usd(model: &str, usage: &UsageInfo) -> Option<f64> {
44    let p = model_pricing(model)?;
45    let (hit, miss) = match (
46        usage.prompt_cache_hit_tokens,
47        usage.prompt_cache_miss_tokens,
48    ) {
49        (Some(h), Some(m)) => (h, m),
50        (Some(h), None) => (h, usage.prompt_tokens.saturating_sub(h)),
51        (None, Some(m)) => (usage.prompt_tokens.saturating_sub(m), m),
52        (None, None) => (0, usage.prompt_tokens),
53    };
54    let cost = (hit as f64 / 1_000_000.0) * p.cached_input_per_mtok
55        + (miss as f64 / 1_000_000.0) * p.input_per_mtok
56        + (usage.completion_tokens as f64 / 1_000_000.0) * p.output_per_mtok;
57    Some(cost)
58}
59
60/// Map OpenAI `finish_reason` to a Claude-style `stop_reason`.
61pub fn map_stop_reason(finish_reason: &str) -> Option<String> {
62    let r = match finish_reason {
63        "stop" => "end_turn",
64        "tool_calls" => "tool_use",
65        "length" => "max_tokens",
66        "content_filter" => "refusal",
67        _ => return Some(finish_reason.to_string()),
68    };
69    Some(r.to_string())
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75
76    #[test]
77    fn cache_hit_costs_less_than_miss() {
78        // 1M prompt tokens all served from cache vs all missed.
79        let hit = UsageInfo {
80            prompt_tokens: 1_000_000,
81            completion_tokens: 0,
82            total_tokens: 1_000_000,
83            prompt_cache_hit_tokens: Some(1_000_000),
84            prompt_cache_miss_tokens: Some(0),
85        };
86        let miss = UsageInfo {
87            prompt_tokens: 1_000_000,
88            completion_tokens: 0,
89            total_tokens: 1_000_000,
90            prompt_cache_hit_tokens: Some(0),
91            prompt_cache_miss_tokens: Some(1_000_000),
92        };
93        let hit_cost = turn_cost_usd("deepseek-chat", &hit).unwrap();
94        let miss_cost = turn_cost_usd("deepseek-chat", &miss).unwrap();
95        assert!(hit_cost < miss_cost);
96        assert!((hit_cost - 0.07).abs() < 1e-9);
97        assert!((miss_cost - 0.27).abs() < 1e-9);
98    }
99
100    #[test]
101    fn missing_cache_fields_charge_full_rate() {
102        let usage = UsageInfo {
103            prompt_tokens: 1_000_000,
104            completion_tokens: 0,
105            total_tokens: 1_000_000,
106            prompt_cache_hit_tokens: None,
107            prompt_cache_miss_tokens: None,
108        };
109        let cost = turn_cost_usd("deepseek-chat", &usage).unwrap();
110        assert!((cost - 0.27).abs() < 1e-9);
111    }
112
113    #[test]
114    fn split_cache_fields_apply_blended_rate() {
115        let usage = UsageInfo {
116            prompt_tokens: 1_000_000,
117            completion_tokens: 0,
118            total_tokens: 1_000_000,
119            prompt_cache_hit_tokens: Some(800_000),
120            prompt_cache_miss_tokens: Some(200_000),
121        };
122        let cost = turn_cost_usd("deepseek-chat", &usage).unwrap();
123        // 0.8 * 0.07 + 0.2 * 0.27 = 0.056 + 0.054 = 0.110
124        assert!((cost - 0.110).abs() < 1e-9);
125    }
126
127    #[test]
128    fn unknown_model_returns_none() {
129        let usage = UsageInfo::default();
130        assert!(turn_cost_usd("gpt-9", &usage).is_none());
131    }
132}