hematite/agent/
pricing.rs

1/// Model Pricing tiers (USD per 1 million tokens).
2pub struct ModelPricing {
3    pub input: f64,
4    pub output: f64,
5}
6
7/// Returns the pricing tier for a model based on its ID.
8/// Benchmark rates based on standard cloud provider tiers.
9pub fn get_pricing(model: &str) -> ModelPricing {
10    let m = model.to_lowercase();
11
12    // Gemma-4 / Gemini-1.5 Tier ($0.15 / $0.60)
13    if m.contains("gemma-4") || m.contains("gemini-1.5") {
14        return ModelPricing {
15            input: 0.15,
16            output: 0.60,
17        };
18    }
19
20    // Opus / DeepSeek-V3 Tier ($15.00 / $75.00)
21    if m.contains("opus") || m.contains("deepseek-v3") || m.contains("r1") {
22        return ModelPricing {
23            input: 15.0,
24            output: 75.0,
25        };
26    }
27
28    // Sonnet / GPT-4o / Qwen-72B Tier ($3.00 / $15.00)
29    if m.contains("sonnet") || m.contains("gpt-4") || m.contains("72b") || m.contains("70b") {
30        return ModelPricing {
31            input: 3.0,
32            output: 15.0,
33        };
34    }
35
36    // Haiku / GPT-4o-mini / Qwen-7B Tier ($0.25 / $1.25)
37    if m.contains("haiku")
38        || m.contains("mini")
39        || m.contains("7b")
40        || m.contains("8b")
41        || m.contains("9b")
42        || m.contains("12b")
43        || m.contains("14b")
44    {
45        return ModelPricing {
46            input: 0.25,
47            output: 1.25,
48        };
49    }
50
51    // Default safe fallback (standard Haiku-like rate)
52    ModelPricing {
53        input: 0.25,
54        output: 1.25,
55    }
56}
57
58/// Calculates the cost in USD for a given token usage block.
59/// Applies a 90% discount for cached input tokens.
60pub fn calculate_cost(usage: &crate::agent::inference::TokenUsage, model: &str) -> f64 {
61    let p = get_pricing(model);
62
63    let cache_hits = usage.prompt_cache_hit_tokens + usage.cache_read_input_tokens;
64    let fresh_input = usage.prompt_tokens.saturating_sub(cache_hits);
65
66    let input_cost = (fresh_input as f64 / 1_000_000.0) * p.input;
67    let cache_cost = (cache_hits as f64 / 1_000_000.0) * p.input * 0.10; // 90% discount
68    let output_cost = (usage.completion_tokens as f64 / 1_000_000.0) * p.output;
69
70    input_cost + cache_cost + output_cost
71}
72
73/// Calculates a rough estimate for non-streamed or partial data.
74pub fn calculate_estimated_cost(tokens: usize, model: &str) -> f64 {
75    let p = get_pricing(model);
76    (tokens as f64 / 1_000_000.0) * p.input
77}
hematite/agent/pricing.rs

hematite/agent/
pricing.rs