Skip to main content

agent_code_lib/services/
pricing.rs

1//! Per-model pricing for cost estimation.
2//!
3//! Prices are in USD per million tokens. Updated periodically
4//! as providers adjust pricing.
5
6/// Pricing per million tokens.
7struct ModelPricing {
8    input_per_m: f64,
9    output_per_m: f64,
10    cache_read_per_m: f64,
11    cache_write_per_m: f64,
12}
13
14/// Calculate USD cost from token usage.
15pub fn calculate_cost(
16    model: &str,
17    input_tokens: u64,
18    output_tokens: u64,
19    cache_read_tokens: u64,
20    cache_write_tokens: u64,
21) -> f64 {
22    let pricing = pricing_for_model(model);
23    let input_cost = input_tokens as f64 * pricing.input_per_m / 1_000_000.0;
24    let output_cost = output_tokens as f64 * pricing.output_per_m / 1_000_000.0;
25    let cache_read_cost = cache_read_tokens as f64 * pricing.cache_read_per_m / 1_000_000.0;
26    let cache_write_cost = cache_write_tokens as f64 * pricing.cache_write_per_m / 1_000_000.0;
27    input_cost + output_cost + cache_read_cost + cache_write_cost
28}
29
30fn pricing_for_model(model: &str) -> ModelPricing {
31    let lower = model.to_lowercase();
32
33    // Anthropic models.
34    if lower.contains("opus") {
35        return ModelPricing {
36            input_per_m: 15.0,
37            output_per_m: 75.0,
38            cache_read_per_m: 1.5,
39            cache_write_per_m: 18.75,
40        };
41    }
42    if lower.contains("sonnet") {
43        return ModelPricing {
44            input_per_m: 3.0,
45            output_per_m: 15.0,
46            cache_read_per_m: 0.3,
47            cache_write_per_m: 3.75,
48        };
49    }
50    if lower.contains("haiku") {
51        return ModelPricing {
52            input_per_m: 0.25,
53            output_per_m: 1.25,
54            cache_read_per_m: 0.03,
55            cache_write_per_m: 0.3,
56        };
57    }
58
59    // OpenAI models.
60    if lower.contains("gpt-5.4") && !lower.contains("mini") && !lower.contains("nano") {
61        return ModelPricing {
62            input_per_m: 2.50,
63            output_per_m: 10.0,
64            cache_read_per_m: 1.25,
65            cache_write_per_m: 2.50,
66        };
67    }
68    if lower.contains("gpt-5.4-mini") {
69        return ModelPricing {
70            input_per_m: 0.40,
71            output_per_m: 1.60,
72            cache_read_per_m: 0.20,
73            cache_write_per_m: 0.40,
74        };
75    }
76    if lower.contains("gpt-5.4-nano") {
77        return ModelPricing {
78            input_per_m: 0.10,
79            output_per_m: 0.40,
80            cache_read_per_m: 0.05,
81            cache_write_per_m: 0.10,
82        };
83    }
84    if lower.contains("gpt-4.1") && !lower.contains("mini") && !lower.contains("nano") {
85        return ModelPricing {
86            input_per_m: 2.0,
87            output_per_m: 8.0,
88            cache_read_per_m: 0.50,
89            cache_write_per_m: 2.0,
90        };
91    }
92    if lower.contains("gpt-4.1-mini") {
93        return ModelPricing {
94            input_per_m: 0.40,
95            output_per_m: 1.60,
96            cache_read_per_m: 0.10,
97            cache_write_per_m: 0.40,
98        };
99    }
100    if lower.contains("gpt-4.1-nano") {
101        return ModelPricing {
102            input_per_m: 0.10,
103            output_per_m: 0.40,
104            cache_read_per_m: 0.025,
105            cache_write_per_m: 0.10,
106        };
107    }
108    if lower.starts_with("o3") || lower.starts_with("o1") {
109        return ModelPricing {
110            input_per_m: 10.0,
111            output_per_m: 40.0,
112            cache_read_per_m: 2.50,
113            cache_write_per_m: 10.0,
114        };
115    }
116    if lower.contains("gpt-4o") {
117        return ModelPricing {
118            input_per_m: 2.50,
119            output_per_m: 10.0,
120            cache_read_per_m: 1.25,
121            cache_write_per_m: 2.50,
122        };
123    }
124
125    // xAI/Grok.
126    if lower.contains("grok") {
127        return ModelPricing {
128            input_per_m: 3.0,
129            output_per_m: 15.0,
130            cache_read_per_m: 0.0,
131            cache_write_per_m: 0.0,
132        };
133    }
134
135    // Google Gemini.
136    if lower.contains("gemini") && lower.contains("pro") {
137        return ModelPricing {
138            input_per_m: 1.25,
139            output_per_m: 5.0,
140            cache_read_per_m: 0.0,
141            cache_write_per_m: 0.0,
142        };
143    }
144    if lower.contains("gemini") && lower.contains("flash") {
145        return ModelPricing {
146            input_per_m: 0.15,
147            output_per_m: 0.60,
148            cache_read_per_m: 0.0,
149            cache_write_per_m: 0.0,
150        };
151    }
152
153    // DeepSeek.
154    if lower.contains("deepseek") {
155        return ModelPricing {
156            input_per_m: 0.27,
157            output_per_m: 1.10,
158            cache_read_per_m: 0.07,
159            cache_write_per_m: 0.27,
160        };
161    }
162
163    // Mistral.
164    if lower.contains("mistral") && lower.contains("large") {
165        return ModelPricing {
166            input_per_m: 2.0,
167            output_per_m: 6.0,
168            cache_read_per_m: 0.0,
169            cache_write_per_m: 0.0,
170        };
171    }
172
173    // Local/unknown — free.
174    ModelPricing {
175        input_per_m: 0.0,
176        output_per_m: 0.0,
177        cache_read_per_m: 0.0,
178        cache_write_per_m: 0.0,
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn test_sonnet_pricing() {
188        let cost = calculate_cost("claude-sonnet-4-20250514", 1_000_000, 100_000, 0, 0);
189        // 1M input * $3/M + 100K output * $15/M = $3 + $1.5 = $4.5
190        assert!((cost - 4.5).abs() < 0.01);
191    }
192
193    #[test]
194    fn test_unknown_model_free() {
195        let cost = calculate_cost("local-llama", 1_000_000, 1_000_000, 0, 0);
196        assert_eq!(cost, 0.0);
197    }
198
199    #[test]
200    fn test_gpt4_1_mini() {
201        let cost = calculate_cost("gpt-4.1-mini", 1_000_000, 0, 0, 0);
202        assert!((cost - 0.40).abs() < 0.01);
203    }
204
205    #[test]
206    fn test_opus_pricing() {
207        let cost = calculate_cost("claude-opus-4", 1_000_000, 0, 0, 0);
208        assert!((cost - 15.0).abs() < 0.01);
209    }
210
211    #[test]
212    fn test_haiku_pricing() {
213        let cost = calculate_cost("claude-haiku-4", 0, 1_000_000, 0, 0);
214        assert!((cost - 1.25).abs() < 0.01);
215    }
216
217    #[test]
218    fn test_cache_pricing() {
219        // Sonnet: cache read = $0.3/M, cache write = $3.75/M
220        let cost = calculate_cost("claude-sonnet-4", 0, 0, 1_000_000, 1_000_000);
221        assert!((cost - (0.3 + 3.75)).abs() < 0.01);
222    }
223
224    #[test]
225    fn test_deepseek_pricing() {
226        let cost = calculate_cost("deepseek-v3", 1_000_000, 0, 0, 0);
227        assert!((cost - 0.27).abs() < 0.01);
228    }
229
230    #[test]
231    fn test_grok_pricing() {
232        let cost = calculate_cost("grok-3", 1_000_000, 0, 0, 0);
233        assert!((cost - 3.0).abs() < 0.01);
234    }
235
236    #[test]
237    fn test_zero_tokens() {
238        let cost = calculate_cost("claude-sonnet-4", 0, 0, 0, 0);
239        assert_eq!(cost, 0.0);
240    }
241}