hematite/agent/
pricing.rs1pub struct ModelPricing {
3 pub input: f64,
4 pub output: f64,
5}
6
7pub fn get_pricing(model: &str) -> ModelPricing {
10 let m = model.to_lowercase();
11
12 if m.contains("gemma-4") || m.contains("gemini-1.5") {
14 return ModelPricing {
15 input: 0.15,
16 output: 0.60,
17 };
18 }
19
20 if m.contains("opus") || m.contains("deepseek-v3") || m.contains("r1") {
22 return ModelPricing {
23 input: 15.0,
24 output: 75.0,
25 };
26 }
27
28 if m.contains("sonnet") || m.contains("gpt-4") || m.contains("72b") || m.contains("70b") {
30 return ModelPricing {
31 input: 3.0,
32 output: 15.0,
33 };
34 }
35
36 if m.contains("haiku")
38 || m.contains("mini")
39 || m.contains("7b")
40 || m.contains("8b")
41 || m.contains("9b")
42 || m.contains("12b")
43 || m.contains("14b")
44 {
45 return ModelPricing {
46 input: 0.25,
47 output: 1.25,
48 };
49 }
50
51 ModelPricing {
53 input: 0.25,
54 output: 1.25,
55 }
56}
57
58pub fn calculate_cost(usage: &crate::agent::inference::TokenUsage, model: &str) -> f64 {
61 let p = get_pricing(model);
62
63 let cache_hits = usage.prompt_cache_hit_tokens + usage.cache_read_input_tokens;
64 let fresh_input = usage.prompt_tokens.saturating_sub(cache_hits);
65
66 let input_cost = (fresh_input as f64 / 1_000_000.0) * p.input;
67 let cache_cost = (cache_hits as f64 / 1_000_000.0) * p.input * 0.10; let output_cost = (usage.completion_tokens as f64 / 1_000_000.0) * p.output;
69
70 input_cost + cache_cost + output_cost
71}
72
73pub fn calculate_estimated_cost(tokens: usize, model: &str) -> f64 {
75 let p = get_pricing(model);
76 (tokens as f64 / 1_000_000.0) * p.input
77}