adk-model 1.0.0

LLM model integrations for Rust Agent Development Kit (ADK-Rust) (Gemini, OpenAI, Claude, DeepSeek, etc.)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
//! Token pricing for OpenAI models (March 2026).
//!
//! Provides per-model cost calculation based on token counts.
//!
//! OpenAI models have automatic prompt caching with varying discount tiers:
//! - GPT-5 family: 90% off cached input reads
//! - GPT-4.1 family: 75% off cached input reads
//! - GPT-4o / o-series: 50% off cached input reads
//!
//! # Example
//!
//! ```rust
//! use adk_model::openai::pricing::{OpenAIPricing, estimate_cost};
//!
//! let cost = estimate_cost(&OpenAIPricing::GPT_41, 50_000, 1_000, 10_000);
//! println!("Total: ${:.6}", cost.total());
//! ```

/// Per-million-token prices for a single OpenAI model.
///
/// All values are in USD per 1 million tokens.
#[derive(Debug, Clone, Copy)]
pub struct OpenAIPricing {
    /// Input token price ($/MTok).
    pub input: f64,
    /// Cached input token price ($/MTok).
    pub cached_input: f64,
    /// Output token price ($/MTok).
    pub output: f64,
}

impl OpenAIPricing {
    // ── GPT-5.5 family (90% cache discount) ──

    /// GPT-5.5 — next-generation flagship model.
    pub const GPT_55: Self = Self { input: 3.00, cached_input: 0.30, output: 18.00 };

    /// GPT-5.5 Pro — highest capability tier.
    pub const GPT_55_PRO: Self = Self { input: 6.00, cached_input: 0.60, output: 36.00 };

    /// GPT-5.5 Instant — fast, cost-effective GPT-5.5-class model.
    pub const GPT_55_INSTANT: Self = Self { input: 0.50, cached_input: 0.05, output: 3.00 };

    // ── GPT-5.4 family (90% cache discount) ──

    /// GPT-5.4 — most capable model for professional work.
    pub const GPT_54: Self = Self { input: 2.00, cached_input: 0.20, output: 14.00 };

    /// GPT-5.4 Mini — strongest mini model for coding, computer use, subagents.
    pub const GPT_54_MINI: Self = Self { input: 0.80, cached_input: 0.08, output: 6.00 };

    /// GPT-5.4 Nano — cheapest GPT-5.4-class model for high-volume tasks.
    pub const GPT_54_NANO: Self = Self { input: 0.20, cached_input: 0.02, output: 1.50 };

    /// GPT-5.4 Pro — premium GPT-5.4-class model.
    pub const GPT_54_PRO: Self = Self { input: 4.00, cached_input: 0.40, output: 28.00 };

    // ── GPT-5.3 family (90% cache discount) ──

    /// GPT-5.3 Codex — code-optimized model.
    pub const GPT_53_CODEX: Self = Self { input: 1.50, cached_input: 0.15, output: 12.00 };

    /// GPT-5.3 Chat Latest — latest chat-optimized model.
    pub const GPT_53_CHAT_LATEST: Self = Self { input: 1.50, cached_input: 0.15, output: 12.00 };

    // ── GPT-5.2 family (90% cache discount) ──

    /// GPT-5.2 — general-purpose model.
    pub const GPT_52: Self = Self { input: 1.25, cached_input: 0.125, output: 10.00 };

    /// GPT-5.2 Codex — code-optimized GPT-5.2 variant.
    pub const GPT_52_CODEX: Self = Self { input: 1.25, cached_input: 0.125, output: 10.00 };

    // ── GPT-5.1 family (90% cache discount) ──

    /// GPT-5.1 — general-purpose model.
    pub const GPT_51: Self = Self { input: 1.00, cached_input: 0.10, output: 8.00 };

    /// GPT-5.1 Codex — code-optimized GPT-5.1 variant.
    pub const GPT_51_CODEX: Self = Self { input: 1.00, cached_input: 0.10, output: 8.00 };

    /// GPT-5.1 Codex Max — high-throughput code model.
    pub const GPT_51_CODEX_MAX: Self = Self { input: 2.00, cached_input: 0.20, output: 16.00 };

    /// GPT-5.1 Codex Mini — budget code model.
    pub const GPT_51_CODEX_MINI: Self = Self { input: 0.30, cached_input: 0.03, output: 2.40 };

    // ── GPT-5 family (90% cache discount) ──

    /// GPT-5 — flagship agentic model.
    pub const GPT_5: Self = Self { input: 2.50, cached_input: 0.25, output: 15.00 };

    /// GPT-5 Mini — budget GPT-5-class model.
    pub const GPT_5_MINI: Self = Self { input: 0.60, cached_input: 0.06, output: 4.00 };

    /// GPT-5 Nano — cheapest GPT-5-class model.
    pub const GPT_5_NANO: Self = Self { input: 0.15, cached_input: 0.015, output: 1.00 };

    /// GPT-5 Pro — premium GPT-5-class model.
    pub const GPT_5_PRO: Self = Self { input: 5.00, cached_input: 0.50, output: 30.00 };

    // ── GPT-4.1 family (75% cache discount) ──

    /// GPT-4.1 — production workhorse, 1M context window.
    pub const GPT_41: Self = Self { input: 2.00, cached_input: 0.50, output: 8.00 };

    /// GPT-4.1 Mini — mid-tier production tasks, 1M context.
    pub const GPT_41_MINI: Self = Self { input: 0.40, cached_input: 0.10, output: 1.60 };

    /// GPT-4.1 Nano — classification, routing, extraction, 1M context.
    pub const GPT_41_NANO: Self = Self { input: 0.10, cached_input: 0.025, output: 0.40 };

    // ── o-series reasoning models (50% cache discount) ──

    /// o3 — advanced reasoning model.
    pub const O3: Self = Self { input: 2.00, cached_input: 0.50, output: 8.00 };

    /// o4-mini — best-value reasoning model.
    pub const O4_MINI: Self = Self { input: 1.10, cached_input: 0.275, output: 4.40 };

    /// o3-mini — legacy reasoning model.
    pub const O3_MINI: Self = Self { input: 1.10, cached_input: 0.55, output: 4.40 };

    /// o1 — legacy deep reasoning model.
    pub const O1: Self = Self { input: 15.00, cached_input: 7.50, output: 60.00 };

    // ── GPT-4o family (50% cache discount, legacy) ──

    /// GPT-4o — legacy production model.
    pub const GPT_4O: Self = Self { input: 2.50, cached_input: 1.25, output: 10.00 };

    /// GPT-4o Mini — legacy simple tasks.
    pub const GPT_4O_MINI: Self = Self { input: 0.15, cached_input: 0.075, output: 0.60 };

    // ── Realtime models ──

    /// GPT-Realtime-1.5 — text pricing (audio is separate).
    ///
    /// Audio: input $32/MTok, cached $0.40/MTok, output $64/MTok.
    /// Image: input $5/MTok, cached $0.50/MTok.
    pub const GPT_REALTIME_15_TEXT: Self = Self { input: 4.00, cached_input: 0.40, output: 16.00 };

    /// GPT-Realtime-1.5 — audio pricing.
    pub const GPT_REALTIME_15_AUDIO: Self =
        Self { input: 32.00, cached_input: 0.40, output: 64.00 };

    // ── Image generation ──

    /// GPT-Image-1.5 — text pricing.
    pub const GPT_IMAGE_15_TEXT: Self = Self { input: 5.00, cached_input: 1.25, output: 10.00 };

    /// GPT-Image-1.5 — image pricing.
    pub const GPT_IMAGE_15_IMAGE: Self = Self { input: 8.00, cached_input: 2.00, output: 32.00 };

    // ── GPT Image 2 ──

    /// GPT-Image-2 — text pricing.
    pub const GPT_IMAGE_2_TEXT: Self = Self { input: 5.00, cached_input: 1.25, output: 10.00 };

    /// GPT-Image-2 — image pricing.
    pub const GPT_IMAGE_2_IMAGE: Self = Self { input: 8.00, cached_input: 2.00, output: 32.00 };

    // ── Deep research models ──

    /// o3 Deep Research — extended reasoning for research tasks.
    pub const O3_DEEP_RESEARCH: Self = Self { input: 2.00, cached_input: 0.50, output: 8.00 };

    /// o4-mini Deep Research — cost-effective deep research model.
    pub const O4_MINI_DEEP_RESEARCH: Self = Self { input: 1.10, cached_input: 0.275, output: 4.40 };
}

/// Itemised cost breakdown from a single API call.
#[derive(Debug, Clone, Copy, Default)]
pub struct CostBreakdown {
    /// Cost of uncached input tokens.
    pub input_cost: f64,
    /// Cost of cached input tokens.
    pub cache_cost: f64,
    /// Cost of output tokens.
    pub output_cost: f64,
}

impl CostBreakdown {
    /// Total cost in USD.
    pub fn total(&self) -> f64 {
        self.input_cost + self.cache_cost + self.output_cost
    }
}

impl std::fmt::Display for CostBreakdown {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "${:.6} (in=${:.6} cache=${:.6} out=${:.6})",
            self.total(),
            self.input_cost,
            self.cache_cost,
            self.output_cost
        )
    }
}

/// Estimate the cost of a single API call.
///
/// # Arguments
///
/// * `pricing` - The model's pricing tier
/// * `input_tokens` - Number of uncached input tokens
/// * `output_tokens` - Number of output tokens (includes reasoning tokens for o-series)
/// * `cached_tokens` - Number of tokens served from cache
///
/// # Example
///
/// ```rust
/// use adk_model::openai::pricing::{OpenAIPricing, estimate_cost};
///
/// let cost = estimate_cost(&OpenAIPricing::GPT_41, 50_000, 1_000, 10_000);
/// println!("Total: ${:.6}", cost.total());
/// ```
pub fn estimate_cost(
    pricing: &OpenAIPricing,
    input_tokens: u64,
    output_tokens: u64,
    cached_tokens: u64,
) -> CostBreakdown {
    let mtok = 1_000_000.0;
    CostBreakdown {
        input_cost: input_tokens as f64 / mtok * pricing.input,
        cache_cost: cached_tokens as f64 / mtok * pricing.cached_input,
        output_cost: output_tokens as f64 / mtok * pricing.output,
    }
}

/// Estimate batch API cost (50% off all token costs).
pub fn estimate_batch_cost(
    pricing: &OpenAIPricing,
    input_tokens: u64,
    output_tokens: u64,
    cached_tokens: u64,
) -> CostBreakdown {
    let mtok = 1_000_000.0;
    CostBreakdown {
        input_cost: input_tokens as f64 / mtok * pricing.input * 0.5,
        cache_cost: cached_tokens as f64 / mtok * pricing.cached_input * 0.5,
        output_cost: output_tokens as f64 / mtok * pricing.output * 0.5,
    }
}

/// Look up pricing for a model by its identifier string.
///
/// Returns `None` for unknown models, allowing callers to use a zero-cost fallback.
///
/// # Arguments
///
/// * `model_name` - The model identifier (e.g., "gpt-5.5", "o3-deep-research")
///
/// # Example
///
/// ```rust
/// use adk_model::openai::pricing::lookup_pricing;
///
/// let pricing = lookup_pricing("gpt-5.5");
/// assert!(pricing.is_some());
///
/// let unknown = lookup_pricing("unknown-model");
/// assert!(unknown.is_none());
/// ```
pub fn lookup_pricing(model_name: &str) -> Option<&'static OpenAIPricing> {
    match model_name {
        // GPT-5.5 family
        "gpt-5.5" => Some(&OpenAIPricing::GPT_55),
        "gpt-5.5-pro" => Some(&OpenAIPricing::GPT_55_PRO),
        "gpt-5.5-instant" => Some(&OpenAIPricing::GPT_55_INSTANT),

        // GPT-5.4 family
        "gpt-5.4" => Some(&OpenAIPricing::GPT_54),
        "gpt-5.4-mini" => Some(&OpenAIPricing::GPT_54_MINI),
        "gpt-5.4-nano" => Some(&OpenAIPricing::GPT_54_NANO),
        "gpt-5.4-pro" => Some(&OpenAIPricing::GPT_54_PRO),

        // GPT-5.3 family
        "gpt-5.3-codex" => Some(&OpenAIPricing::GPT_53_CODEX),
        "gpt-5.3-chat-latest" => Some(&OpenAIPricing::GPT_53_CHAT_LATEST),

        // GPT-5.2 family
        "gpt-5.2" => Some(&OpenAIPricing::GPT_52),
        "gpt-5.2-codex" => Some(&OpenAIPricing::GPT_52_CODEX),

        // GPT-5.1 family
        "gpt-5.1" => Some(&OpenAIPricing::GPT_51),
        "gpt-5.1-codex" => Some(&OpenAIPricing::GPT_51_CODEX),
        "gpt-5.1-codex-max" => Some(&OpenAIPricing::GPT_51_CODEX_MAX),
        "gpt-5.1-codex-mini" => Some(&OpenAIPricing::GPT_51_CODEX_MINI),

        // GPT-5 family
        "gpt-5" => Some(&OpenAIPricing::GPT_5),
        "gpt-5-mini" => Some(&OpenAIPricing::GPT_5_MINI),
        "gpt-5-nano" => Some(&OpenAIPricing::GPT_5_NANO),
        "gpt-5-pro" => Some(&OpenAIPricing::GPT_5_PRO),

        // GPT-4.1 family
        "gpt-4.1" => Some(&OpenAIPricing::GPT_41),
        "gpt-4.1-mini" => Some(&OpenAIPricing::GPT_41_MINI),
        "gpt-4.1-nano" => Some(&OpenAIPricing::GPT_41_NANO),

        // o-series reasoning models
        "o3" => Some(&OpenAIPricing::O3),
        "o4-mini" => Some(&OpenAIPricing::O4_MINI),
        "o3-mini" => Some(&OpenAIPricing::O3_MINI),
        "o1" => Some(&OpenAIPricing::O1),

        // Deep research models
        "o3-deep-research" => Some(&OpenAIPricing::O3_DEEP_RESEARCH),
        "o4-mini-deep-research" => Some(&OpenAIPricing::O4_MINI_DEEP_RESEARCH),

        // GPT-4o family (legacy)
        "gpt-4o" => Some(&OpenAIPricing::GPT_4O),
        "gpt-4o-mini" => Some(&OpenAIPricing::GPT_4O_MINI),

        // Realtime models
        "gpt-realtime-1.5" => Some(&OpenAIPricing::GPT_REALTIME_15_TEXT),
        "gpt-realtime-1.5-audio" => Some(&OpenAIPricing::GPT_REALTIME_15_AUDIO),

        // Image generation models
        "gpt-image-1.5" => Some(&OpenAIPricing::GPT_IMAGE_15_TEXT),
        "gpt-image-1.5-image" => Some(&OpenAIPricing::GPT_IMAGE_15_IMAGE),
        "gpt-image-2" => Some(&OpenAIPricing::GPT_IMAGE_2_TEXT),
        "gpt-image-2-image" => Some(&OpenAIPricing::GPT_IMAGE_2_IMAGE),

        // Unknown model — return None for zero-cost fallback
        _ => None,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn gpt_41_basic_cost() {
        let cost = estimate_cost(&OpenAIPricing::GPT_41, 1_000_000, 1_000_000, 0);
        assert!((cost.input_cost - 2.00).abs() < 1e-9);
        assert!((cost.output_cost - 8.00).abs() < 1e-9);
        assert!((cost.total() - 10.00).abs() < 1e-9);
    }

    #[test]
    fn gpt_41_with_cache() {
        let cost = estimate_cost(&OpenAIPricing::GPT_41, 500_000, 100_000, 500_000);
        // 500K input @ $2.00/MTok = $1.00
        assert!((cost.input_cost - 1.00).abs() < 1e-9);
        // 500K cached @ $0.50/MTok = $0.25
        assert!((cost.cache_cost - 0.25).abs() < 1e-9);
        // 100K output @ $8.00/MTok = $0.80
        assert!((cost.output_cost - 0.80).abs() < 1e-9);
        assert!((cost.total() - 2.05).abs() < 1e-9);
    }

    #[test]
    fn gpt_5_cache_discount_90_percent() {
        // GPT-5: input $2.50, cached $0.25 (90% off)
        let cost = estimate_cost(&OpenAIPricing::GPT_5, 0, 0, 1_000_000);
        assert!((cost.cache_cost - 0.25).abs() < 1e-9);
    }

    #[test]
    fn o4_mini_reasoning_cost() {
        // o4-mini: 1M input + 5M output (reasoning tokens count as output)
        let cost = estimate_cost(&OpenAIPricing::O4_MINI, 1_000_000, 5_000_000, 0);
        assert!((cost.input_cost - 1.10).abs() < 1e-9);
        assert!((cost.output_cost - 22.00).abs() < 1e-9);
    }

    #[test]
    fn batch_50_percent_discount() {
        let standard = estimate_cost(&OpenAIPricing::GPT_41, 1_000_000, 1_000_000, 0);
        let batch = estimate_batch_cost(&OpenAIPricing::GPT_41, 1_000_000, 1_000_000, 0);
        assert!((batch.total() - standard.total() * 0.5).abs() < 1e-9);
    }

    #[test]
    fn gpt_41_nano_cheapest() {
        let cost = estimate_cost(&OpenAIPricing::GPT_41_NANO, 1_000_000, 1_000_000, 0);
        assert!((cost.input_cost - 0.10).abs() < 1e-9);
        assert!((cost.output_cost - 0.40).abs() < 1e-9);
    }

    #[test]
    fn zero_tokens_zero_cost() {
        let cost = estimate_cost(&OpenAIPricing::GPT_5, 0, 0, 0);
        assert!((cost.total() - 0.0).abs() < 1e-9);
    }

    #[test]
    fn display_format() {
        let cost = CostBreakdown { input_cost: 0.003, cache_cost: 0.001, output_cost: 0.0075 };
        let s = cost.to_string();
        assert!(s.starts_with('$'));
        assert!(s.contains("in="));
        assert!(s.contains("cache="));
        assert!(s.contains("out="));
    }

    #[test]
    fn lookup_known_models() {
        assert!(lookup_pricing("gpt-5.5").is_some());
        assert!(lookup_pricing("gpt-5.5-pro").is_some());
        assert!(lookup_pricing("gpt-5.5-instant").is_some());
        assert!(lookup_pricing("gpt-5.4").is_some());
        assert!(lookup_pricing("gpt-5.4-mini").is_some());
        assert!(lookup_pricing("gpt-5.4-nano").is_some());
        assert!(lookup_pricing("gpt-5.4-pro").is_some());
        assert!(lookup_pricing("gpt-5.3-codex").is_some());
        assert!(lookup_pricing("gpt-5.3-chat-latest").is_some());
        assert!(lookup_pricing("gpt-5.2").is_some());
        assert!(lookup_pricing("gpt-5.2-codex").is_some());
        assert!(lookup_pricing("gpt-5.1").is_some());
        assert!(lookup_pricing("gpt-5.1-codex").is_some());
        assert!(lookup_pricing("gpt-5.1-codex-max").is_some());
        assert!(lookup_pricing("gpt-5.1-codex-mini").is_some());
        assert!(lookup_pricing("gpt-5").is_some());
        assert!(lookup_pricing("gpt-5-mini").is_some());
        assert!(lookup_pricing("gpt-5-nano").is_some());
        assert!(lookup_pricing("gpt-5-pro").is_some());
        assert!(lookup_pricing("gpt-image-2").is_some());
        assert!(lookup_pricing("gpt-image-2-image").is_some());
        assert!(lookup_pricing("o3-deep-research").is_some());
        assert!(lookup_pricing("o4-mini-deep-research").is_some());
    }

    #[test]
    fn lookup_unknown_model_returns_none() {
        assert!(lookup_pricing("unknown-model").is_none());
        assert!(lookup_pricing("gpt-99").is_none());
        assert!(lookup_pricing("").is_none());
    }

    #[test]
    fn lookup_pricing_values_correct() {
        let p = lookup_pricing("gpt-5.5").unwrap();
        assert!((p.input - 3.00).abs() < 1e-9);
        assert!((p.cached_input - 0.30).abs() < 1e-9);
        assert!((p.output - 18.00).abs() < 1e-9);

        let p = lookup_pricing("o3-deep-research").unwrap();
        assert!((p.input - 2.00).abs() < 1e-9);
        assert!((p.cached_input - 0.50).abs() < 1e-9);
        assert!((p.output - 8.00).abs() < 1e-9);
    }
}