Skip to main content

context_bar_core/
pricing.rs

1//! Cost kernel — the API-equivalent pricing math, ported 1:1 from
2//! `usage_signal.py` (the `FALLBACK_PRICING` table, model matcher, `_tiered`,
3//! `turn_cost`, `turn_cache_savings`). This is the first slice of folding the
4//! Python aggregator into Rust (ROADMAP E1). It is PURE — no I/O, no clock — so
5//! it is pinned by a golden fixture generated from the Python (`tests/`),
6//! guaranteeing byte-for-byte cost parity (see `docs/ai/COST_MODEL.md`).
7//!
8//! Rates are USD per token. The bundled table mirrors the LiteLLM dataset
9//! ccusage uses; the live LiteLLM fetch + 24h cache (the I/O half of
10//! `load_pricing`) is a later slice — until then this is the offline table,
11//! which is exactly what `CONTEXTBAR_PRICING_OFFLINE=1` selects in the Python.
12
13/// Anthropic's long-context tier threshold: tokens strictly above this in a
14/// category bill at the `*_200k` rate (when the model carries one).
15pub const TIER_THRESHOLD: u64 = 200_000;
16
17/// One model's per-token rates. `None` = the category isn't billed / has no
18/// tier (e.g. OpenAI models have no cache-write `cw`; flat models have no
19/// `*_200k`). Mirrors the Python short-rate dict keys exactly.
20#[derive(Clone, Copy, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
21pub struct Rate {
22    // Short keys match the Python short-rate dict + the on-disk pricing cache,
23    // so the Rust engine reads/writes the same `pricing.cache.json`.
24    #[serde(rename = "in", default, skip_serializing_if = "Option::is_none")]
25    pub input: Option<f64>,
26    #[serde(rename = "out", default, skip_serializing_if = "Option::is_none")]
27    pub output: Option<f64>,
28    #[serde(rename = "cw", default, skip_serializing_if = "Option::is_none")]
29    pub cache_write: Option<f64>,
30    #[serde(rename = "cr", default, skip_serializing_if = "Option::is_none")]
31    pub cache_read: Option<f64>,
32    #[serde(rename = "in_200k", default, skip_serializing_if = "Option::is_none")]
33    pub input_200k: Option<f64>,
34    #[serde(rename = "out_200k", default, skip_serializing_if = "Option::is_none")]
35    pub output_200k: Option<f64>,
36    #[serde(rename = "cw_200k", default, skip_serializing_if = "Option::is_none")]
37    pub cache_write_200k: Option<f64>,
38    #[serde(rename = "cr_200k", default, skip_serializing_if = "Option::is_none")]
39    pub cache_read_200k: Option<f64>,
40}
41
42// Constructors mirroring the three shapes in FALLBACK_PRICING.
43const fn anthropic(input: f64, output: f64, cw: f64, cr: f64) -> Rate {
44    Rate {
45        input: Some(input),
46        output: Some(output),
47        cache_write: Some(cw),
48        cache_read: Some(cr),
49        input_200k: None,
50        output_200k: None,
51        cache_write_200k: None,
52        cache_read_200k: None,
53    }
54}
55
56#[allow(clippy::too_many_arguments)]
57const fn anthropic_tiered(
58    input: f64,
59    output: f64,
60    cw: f64,
61    cr: f64,
62    i2: f64,
63    o2: f64,
64    cw2: f64,
65    cr2: f64,
66) -> Rate {
67    Rate {
68        input: Some(input),
69        output: Some(output),
70        cache_write: Some(cw),
71        cache_read: Some(cr),
72        input_200k: Some(i2),
73        output_200k: Some(o2),
74        cache_write_200k: Some(cw2),
75        cache_read_200k: Some(cr2),
76    }
77}
78
79/// OpenAI/Codex shape: input + output + cache-read, no cache-write charge.
80const fn openai(input: f64, output: f64, cr: f64) -> Rate {
81    Rate {
82        input: Some(input),
83        output: Some(output),
84        cache_write: None,
85        cache_read: Some(cr),
86        input_200k: None,
87        output_200k: None,
88        cache_write_200k: None,
89        cache_read_200k: None,
90    }
91}
92
93/// OpenAI shape with no published cache-read rate (e.g. `gpt-5-pro`).
94const fn openai_no_cr(input: f64, output: f64) -> Rate {
95    Rate {
96        input: Some(input),
97        output: Some(output),
98        cache_write: None,
99        cache_read: None,
100        input_200k: None,
101        output_200k: None,
102        cache_write_200k: None,
103        cache_read_200k: None,
104    }
105}
106
107/// Bundled offline rate table (USD/token), captured from LiteLLM — verbatim
108/// from `FALLBACK_PRICING` in `usage_signal.py`. Order is preserved so the
109/// longest-prefix match in [`match_pricing`] is deterministic.
110pub static FALLBACK_PRICING: &[(&str, Rate)] = &[
111    ("claude-opus-4-8", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
112    ("claude-opus-4-7", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
113    ("claude-opus-4-6", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
114    ("claude-opus-4-5", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
115    ("claude-opus-4-1", anthropic(15e-6, 75e-6, 18.75e-6, 1.5e-6)),
116    ("claude-opus-4", anthropic(15e-6, 75e-6, 18.75e-6, 1.5e-6)),
117    ("claude-sonnet-4-6", anthropic(3e-6, 15e-6, 3.75e-6, 0.3e-6)),
118    (
119        "claude-sonnet-4-5",
120        anthropic_tiered(3e-6, 15e-6, 3.75e-6, 0.3e-6, 6e-6, 22.5e-6, 7.5e-6, 0.6e-6),
121    ),
122    (
123        "claude-sonnet-4",
124        anthropic_tiered(3e-6, 15e-6, 3.75e-6, 0.3e-6, 6e-6, 22.5e-6, 7.5e-6, 0.6e-6),
125    ),
126    ("claude-3-7-sonnet", anthropic(3e-6, 15e-6, 3.75e-6, 0.3e-6)),
127    ("claude-3-5-sonnet", anthropic(3e-6, 15e-6, 3.75e-6, 0.3e-6)),
128    ("claude-haiku-4-5", anthropic(1e-6, 5e-6, 1.25e-6, 0.1e-6)),
129    ("claude-3-5-haiku", anthropic(0.8e-6, 4e-6, 1e-6, 0.08e-6)),
130    ("mythos", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
131    ("gpt-5", openai(1.25e-6, 10e-6, 0.125e-6)),
132    ("gpt-5-codex", openai(1.25e-6, 10e-6, 0.125e-6)),
133    ("gpt-5-pro", openai_no_cr(15e-6, 120e-6)),
134    ("gpt-5-mini", openai(0.25e-6, 2e-6, 0.025e-6)),
135    ("gpt-5-nano", openai(0.05e-6, 0.4e-6, 0.005e-6)),
136    ("gpt-5.1", openai(1.25e-6, 10e-6, 0.125e-6)),
137    ("gpt-5.1-codex", openai(1.25e-6, 10e-6, 0.125e-6)),
138    ("gpt-5.1-codex-max", openai(1.25e-6, 10e-6, 0.125e-6)),
139    ("gpt-5.1-codex-mini", openai(0.25e-6, 2e-6, 0.025e-6)),
140    ("gpt-5.2", openai(1.75e-6, 14e-6, 0.175e-6)),
141    ("gpt-5.2-codex", openai(1.75e-6, 14e-6, 0.175e-6)),
142    ("gpt-5.3-codex", openai(1.75e-6, 14e-6, 0.175e-6)),
143    ("gpt-5.4", openai(2.5e-6, 15e-6, 0.25e-6)),
144    ("gpt-5.4-codex", openai(2.5e-6, 15e-6, 0.25e-6)),
145    ("gpt-5.4-mini", openai(0.75e-6, 4.5e-6, 0.075e-6)),
146    ("gpt-5.4-nano", openai(0.2e-6, 1.25e-6, 0.02e-6)),
147    ("gpt-5.4-pro", openai(30e-6, 180e-6, 3e-6)),
148    ("gpt-5.5", openai(5e-6, 30e-6, 0.5e-6)),
149    ("gpt-5.5-pro", openai(30e-6, 180e-6, 3e-6)),
150    ("codex-mini-latest", openai(1.5e-6, 6e-6, 0.375e-6)),
151    ("o4-mini", openai(1.1e-6, 4.4e-6, 0.275e-6)),
152    ("o3", openai(2e-6, 8e-6, 0.5e-6)),
153    ("o3-mini", openai(1.1e-6, 4.4e-6, 0.55e-6)),
154];
155
156/// Coarse family fallback, checked last — ordered, most-specific first.
157/// Each entry: any of these substrings present in the (stripped) id maps to
158/// the table key. Mirrors `FAMILY_FALLBACK`; the Python regexes here are plain
159/// literal alternations, so substring matching is faithful.
160static FAMILY_FALLBACK: &[(&[&str], &str)] = &[
161    (&["opus-4-5", "opus-4-6", "opus-4-7", "opus-4-8"], "claude-opus-4-8"),
162    (&["opus-4"], "claude-opus-4"),
163    (&["mythos"], "mythos"),
164    (&["sonnet-4"], "claude-sonnet-4-6"),
165    (&["3-7-sonnet"], "claude-3-7-sonnet"),
166    (&["3-5-sonnet"], "claude-3-5-sonnet"),
167    (&["haiku-4"], "claude-haiku-4-5"),
168    (&["3-5-haiku", "haiku"], "claude-3-5-haiku"),
169    (&["gpt-5.5-pro"], "gpt-5.5-pro"),
170    (&["gpt-5.5"], "gpt-5.5"),
171    (&["gpt-5.4-codex"], "gpt-5.4-codex"),
172    (&["gpt-5.4"], "gpt-5.4"),
173    (&["gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.2", "gpt-5.3"], "gpt-5.2"),
174    (&["gpt-5.1-codex"], "gpt-5.1-codex"),
175    (&["gpt-5.1"], "gpt-5.1"),
176    (&["gpt-5-codex", "codex"], "gpt-5-codex"),
177    (&["gpt-5"], "gpt-5"),
178    (&["o4-mini"], "o4-mini"),
179    (&["o3-mini"], "o3-mini"),
180    (&["o3"], "o3"),
181];
182
183/// A resolved rate table (the bundled fallback merged with any live/cached
184/// LiteLLM rates). Keyed by normalized model id.
185pub type Table = std::collections::HashMap<String, Rate>;
186
187/// The bundled offline table as a [`Table`] — the deterministic baseline that
188/// `CONTEXTBAR_PRICING_OFFLINE=1` selects in the Python, and what the golden
189/// tests pin against.
190pub fn fallback_table() -> Table {
191    FALLBACK_PRICING.iter().map(|(k, r)| (k.to_string(), *r)).collect()
192}
193
194/// Normalize a transcript model id: lowercase, strip provider prefixes, drop
195/// the 1M-context tag (pricing is identical to the base model).
196pub fn normalize_model(model: &str) -> String {
197    let mut m = model.trim().to_ascii_lowercase();
198    if m.is_empty() {
199        return m;
200    }
201    for prefix in [
202        "anthropic/",
203        "anthropic.",
204        "us.anthropic.",
205        "eu.anthropic.",
206        "apac.anthropic.",
207        "openai/",
208        "openrouter/",
209        "claude-code/",
210        "github_copilot/",
211        "bedrock/",
212        "vertex_ai/",
213    ] {
214        if let Some(rest) = m.strip_prefix(prefix) {
215            m = rest.to_string();
216        }
217    }
218    m = m.replace("[1m]", "").replace("-1m-", "-");
219    if let Some(rest) = m.strip_suffix("-1m") {
220        m = rest.to_string();
221    }
222    m
223}
224
225fn all_ascii_digits(s: &str) -> bool {
226    !s.is_empty() && s.bytes().all(|c| c.is_ascii_digit())
227}
228
229/// `YYYY-MM-DD`.
230fn is_ymd(s: &str) -> bool {
231    let b = s.as_bytes();
232    s.len() == 10
233        && b[4] == b'-'
234        && b[7] == b'-'
235        && all_ascii_digits(&s[0..4])
236        && all_ascii_digits(&s[5..7])
237        && all_ascii_digits(&s[8..10])
238}
239
240/// If `s` ends with `-v<digits>:<digits>`, return the byte index where it
241/// starts (mirrors `_VER_SUFFIX`).
242fn ver_suffix_start(s: &str) -> Option<usize> {
243    let pos = s.rfind("-v")?;
244    let rest = &s[pos + 2..];
245    let (a, b) = rest.split_once(':')?;
246    if all_ascii_digits(a) && all_ascii_digits(b) {
247        Some(pos)
248    } else {
249        None
250    }
251}
252
253/// `_DATE_SUFFIX`: strip a trailing `-<date>` (8 digits or `YYYY-MM-DD`) plus an
254/// optional `-v<n>:<n>` tail. No-op when no date is present.
255fn strip_date_suffix(s: &str) -> &str {
256    // Optional `-vN:M` tail comes AFTER the date.
257    let head_end = ver_suffix_start(s).unwrap_or(s.len());
258    let head = &s[..head_end];
259    // Form B: -YYYY-MM-DD (11 chars).
260    if head.len() >= 11 {
261        let cand = &head[head.len() - 11..];
262        if cand.as_bytes()[0] == b'-' && is_ymd(&cand[1..]) {
263            return &s[..head.len() - 11];
264        }
265    }
266    // Form A: -DDDDDDDD (9 chars).
267    if head.len() >= 9 {
268        let cand = &head[head.len() - 9..];
269        if cand.as_bytes()[0] == b'-' && all_ascii_digits(&cand[1..]) {
270            return &s[..head.len() - 9];
271        }
272    }
273    s
274}
275
276/// `_VER_SUFFIX`: strip a trailing `-v<n>:<n>`.
277fn strip_ver_suffix(s: &str) -> &str {
278    match ver_suffix_start(s) {
279        Some(i) => &s[..i],
280        None => s,
281    }
282}
283
284/// Resolve a transcript model id onto a rate using `table`. `None` when
285/// unpriceable (cost 0 — an honest undercount, never a crash). Mirrors
286/// `match_pricing(model, table)`.
287pub fn match_pricing(model: &str, table: &Table) -> Option<Rate> {
288    let norm = normalize_model(model);
289    if norm.is_empty() {
290        return None;
291    }
292    if let Some(r) = table.get(&norm) {
293        return Some(*r);
294    }
295    // Strip trailing release date / bedrock version, then retry exact.
296    let stripped = strip_ver_suffix(strip_date_suffix(&norm)).to_string();
297    if let Some(r) = table.get(&stripped) {
298        return Some(*r);
299    }
300    // Longest table key that is a prefix of the stripped id.
301    let mut best: Option<Rate> = None;
302    let mut best_len = 0usize;
303    for (key, rate) in table {
304        if stripped.starts_with(key.as_str()) && key.len() > best_len {
305            best = Some(*rate);
306            best_len = key.len();
307        }
308    }
309    if best.is_some() {
310        return best;
311    }
312    // Coarse family fallback.
313    for (needles, key) in FAMILY_FALLBACK {
314        if needles.iter().any(|n| stripped.contains(n)) {
315            if let Some(r) = table.get(*key) {
316                return Some(*r);
317            }
318        }
319    }
320    None
321}
322
323/// Anthropic >200K tiering for one token category (ccusage-compatible).
324pub fn tiered(tokens: u64, base: Option<f64>, above: Option<f64>) -> f64 {
325    let base = match base {
326        Some(b) => b,
327        None => return 0.0,
328    };
329    if tokens == 0 {
330        return 0.0;
331    }
332    if let Some(above) = above {
333        if tokens > TIER_THRESHOLD {
334            return TIER_THRESHOLD as f64 * base + (tokens - TIER_THRESHOLD) as f64 * above;
335        }
336    }
337    tokens as f64 * base
338}
339
340/// Estimated USD for one turn given its rate + token buckets. Arg order mirrors
341/// the Python `turn_cost(rate, inp, cache_create, cache_read, outp)`.
342pub fn turn_cost(rate: Option<&Rate>, inp: u64, cache_create: u64, cache_read: u64, outp: u64) -> f64 {
343    let rate = match rate {
344        Some(r) => r,
345        None => return 0.0,
346    };
347    tiered(inp, rate.input, rate.input_200k)
348        + tiered(outp, rate.output, rate.output_200k)
349        + tiered(cache_create, rate.cache_write, rate.cache_write_200k)
350        + tiered(cache_read, rate.cache_read, rate.cache_read_200k)
351}
352
353/// NET USD that prompt caching saved this turn (can be slightly negative on a
354/// write-heavy turn). Mirrors `turn_cache_savings`.
355pub fn turn_cache_savings(rate: Option<&Rate>, cache_create: u64, cache_read: u64) -> f64 {
356    let rate = match rate {
357        Some(r) => r,
358        None => return 0.0,
359    };
360    let in_rate = match rate.input {
361        Some(r) => r,
362        None => return 0.0,
363    };
364    let in_200k = rate.input_200k;
365    let no_cache =
366        tiered(cache_create, Some(in_rate), in_200k) + tiered(cache_read, Some(in_rate), in_200k);
367    let actual = tiered(cache_create, rate.cache_write, rate.cache_write_200k)
368        + tiered(cache_read, rate.cache_read, rate.cache_read_200k);
369    no_cache - actual
370}
371
372/// Live + cached pricing resolution (native only — needs HTTP + filesystem).
373/// Mirrors the Python `load_pricing`: fresh 24h cache → live LiteLLM fetch
374/// (then cache) → stale cache → bundled fallback. `CONTEXTBAR_PRICING_OFFLINE`
375/// forces the offline (fallback) path.
376#[cfg(not(target_arch = "wasm32"))]
377pub use live::load_pricing;
378
379#[cfg(not(target_arch = "wasm32"))]
380mod live {
381    use super::{fallback_table, Rate, Table};
382    use std::sync::OnceLock;
383
384    const LITELLM_URL: &str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
385    const PRICING_TTL: u64 = 24 * 3600;
386    static MEMO: OnceLock<(Table, String)> = OnceLock::new();
387
388    fn cache_path() -> Option<std::path::PathBuf> {
389        let home = std::env::var("HOME").ok()?;
390        Some(std::path::PathBuf::from(home).join(".context-bar").join("pricing.cache.json"))
391    }
392
393    fn relevant(key: &str) -> bool {
394        let k = key.to_ascii_lowercase();
395        [
396            "claude", "sonnet", "opus", "haiku", "mythos", "gpt-5", "gpt-4", "codex", "o1", "o3",
397            "o4", "gemini", "glm", "zai", "deepseek", "qwen", "kimi", "moonshot", "minimax",
398            "mistral", "grok", "llama",
399        ]
400        .iter()
401        .any(|s| k.contains(s))
402    }
403
404    /// Project a LiteLLM entry onto a short [`Rate`], dropping nulls/negatives.
405    fn normalize_entry(entry: &serde_json::Value) -> Option<Rate> {
406        let obj = entry.as_object()?;
407        let get = |k: &str| -> Option<f64> {
408            obj.get(k)
409                .and_then(|v| v.as_f64())
410                .filter(|v| *v >= 0.0)
411        };
412        let rate = Rate {
413            input: get("input_cost_per_token"),
414            output: get("output_cost_per_token"),
415            cache_write: get("cache_creation_input_token_cost"),
416            cache_read: get("cache_read_input_token_cost"),
417            input_200k: get("input_cost_per_token_above_200k_tokens"),
418            output_200k: get("output_cost_per_token_above_200k_tokens"),
419            cache_write_200k: get("cache_creation_input_token_cost_above_200k_tokens"),
420            cache_read_200k: get("cache_read_input_token_cost_above_200k_tokens"),
421        };
422        // Need at least an input or output rate to be useful.
423        if rate.input.is_some() || rate.output.is_some() {
424            Some(rate)
425        } else {
426            None
427        }
428    }
429
430    fn parse_live(raw: &serde_json::Value) -> Option<std::collections::HashMap<String, Rate>> {
431        let obj = raw.as_object()?;
432        let mut table = std::collections::HashMap::new();
433        for (key, entry) in obj {
434            if !relevant(key) {
435                continue;
436            }
437            if let Some(rate) = normalize_entry(entry) {
438                table.insert(key.to_ascii_lowercase(), rate);
439            }
440        }
441        if table.is_empty() { None } else { Some(table) }
442    }
443
444    fn fetch_live() -> Option<std::collections::HashMap<String, Rate>> {
445        let resp = ureq::get(LITELLM_URL)
446            .set("User-Agent", "context-bar/usage")
447            .set("Accept", "application/json")
448            .timeout(std::time::Duration::from_secs(15))
449            .call()
450            .ok()?;
451        let raw: serde_json::Value = resp.into_json().ok()?;
452        parse_live(&raw)
453    }
454
455    fn now_secs() -> u64 {
456        std::time::SystemTime::now()
457            .duration_since(std::time::UNIX_EPOCH)
458            .map(|d| d.as_secs())
459            .unwrap_or(0)
460    }
461
462    fn read_cache_table(path: &std::path::Path) -> Option<std::collections::HashMap<String, Rate>> {
463        let bytes = std::fs::read(path).ok()?;
464        let v: serde_json::Value = serde_json::from_slice(&bytes).ok()?;
465        let tbl = v.get("table")?.as_object()?;
466        let mut out = std::collections::HashMap::new();
467        for (k, rv) in tbl {
468            if let Ok(rate) = serde_json::from_value::<Rate>(rv.clone()) {
469                out.insert(k.clone(), rate);
470            }
471        }
472        Some(out)
473    }
474
475    fn cache_age(path: &std::path::Path) -> Option<u64> {
476        let m = std::fs::metadata(path).ok()?.modified().ok()?;
477        Some(now_secs().saturating_sub(m.duration_since(std::time::UNIX_EPOCH).ok()?.as_secs()))
478    }
479
480    fn write_cache(path: &std::path::Path, live: &std::collections::HashMap<String, Rate>) {
481        if let Some(parent) = path.parent() {
482            let _ = std::fs::create_dir_all(parent);
483        }
484        let doc = serde_json::json!({ "timestamp": now_secs(), "table": live });
485        if let Ok(bytes) = serde_json::to_vec(&doc) {
486            let _ = std::fs::write(path, bytes);
487        }
488    }
489
490    pub fn load_pricing() -> (Table, String) {
491        if let Some(v) = MEMO.get() {
492            return v.clone();
493        }
494        let resolved = resolve();
495        let _ = MEMO.set(resolved.clone());
496        resolved
497    }
498
499    fn resolve() -> (Table, String) {
500        let mut base = fallback_table();
501        let path = cache_path();
502
503        // 1. Fresh on-disk cache.
504        if let Some(p) = &path {
505            if cache_age(p).is_some_and(|age| age < PRICING_TTL) {
506                if let Some(tbl) = read_cache_table(p) {
507                    base.extend(tbl);
508                    return (base, "cache".to_string());
509                }
510            }
511        }
512
513        // 2. Live fetch (unless offline forced).
514        let offline = std::env::var("CONTEXTBAR_PRICING_OFFLINE")
515            .map(|v| matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes"))
516            .unwrap_or(false);
517        if !offline {
518            if let Some(live) = fetch_live() {
519                base.extend(live.clone());
520                if let Some(p) = &path {
521                    write_cache(p, &live);
522                }
523                return (base, "live".to_string());
524            }
525        }
526
527        // 3. Stale cache.
528        if let Some(p) = &path {
529            if let Some(tbl) = read_cache_table(p) {
530                base.extend(tbl);
531                return (base, "cache".to_string());
532            }
533        }
534
535        // 4. Bundled fallback only.
536        (base, "fallback".to_string())
537    }
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543
544    fn t() -> Table {
545        fallback_table()
546    }
547
548    #[test]
549    fn normalize_strips_prefixes_and_1m_tag() {
550        assert_eq!(normalize_model("anthropic/claude-opus-4-8"), "claude-opus-4-8");
551        assert_eq!(normalize_model("claude-opus-4-8[1m]"), "claude-opus-4-8");
552        assert_eq!(normalize_model("us.anthropic.claude-sonnet-4-5"), "claude-sonnet-4-5");
553        assert_eq!(normalize_model("claude-sonnet-4-5-1m"), "claude-sonnet-4-5");
554        assert_eq!(normalize_model("  GPT-5.1-Codex "), "gpt-5.1-codex");
555    }
556
557    #[test]
558    fn date_and_version_suffix_stripping() {
559        assert_eq!(strip_date_suffix("claude-opus-4-8-20260514"), "claude-opus-4-8");
560        assert_eq!(strip_date_suffix("claude-opus-4-8-2026-05-14"), "claude-opus-4-8");
561        assert_eq!(
562            strip_ver_suffix(strip_date_suffix("claude-sonnet-4-5-20260101-v1:0")),
563            "claude-sonnet-4-5"
564        );
565        assert_eq!(strip_ver_suffix("claude-sonnet-4-5-v1:0"), "claude-sonnet-4-5");
566        // No date/ver -> unchanged.
567        assert_eq!(strip_date_suffix("gpt-5.1-codex"), "gpt-5.1-codex");
568    }
569
570    #[test]
571    fn match_exact_dated_and_family() {
572        assert!(match_pricing("claude-opus-4-8", &t()).is_some());
573        // dated variant resolves to the base.
574        assert_eq!(match_pricing("claude-opus-4-8-20260514", &t()), match_pricing("claude-opus-4-8", &t()));
575        // 1M tag.
576        assert_eq!(match_pricing("claude-opus-4-8[1m]", &t()), match_pricing("claude-opus-4-8", &t()));
577        // family fallback: unknown opus-4-7-ish id -> new flagship tier.
578        assert_eq!(match_pricing("some-opus-4-7-preview", &t()), match_pricing("claude-opus-4-8", &t()));
579        // unknown -> None.
580        assert_eq!(match_pricing("totally-unknown-model", &t()), None);
581    }
582
583    #[test]
584    fn turn_cost_matches_hand_computed() {
585        // opus-4-8: in 5e-6, out 25e-6, cw 6.25e-6, cr 0.5e-6.
586        let rate = match_pricing("claude-opus-4-8", &t());
587        let c = turn_cost(rate.as_ref(), 1000, 2000, 3000, 4000);
588        // 1000*5e-6 + 4000*25e-6 + 2000*6.25e-6 + 3000*0.5e-6
589        let expect = 1000.0 * 5e-6 + 4000.0 * 25e-6 + 2000.0 * 6.25e-6 + 3000.0 * 0.5e-6;
590        assert!((c - expect).abs() < 1e-12, "{c} vs {expect}");
591    }
592
593    #[test]
594    fn tiering_only_applies_above_threshold_when_rate_present() {
595        // sonnet-4-5 has a >200K input tier (6e-6 above).
596        let rate = match_pricing("claude-sonnet-4-5", &t()).unwrap();
597        let n = TIER_THRESHOLD + 100;
598        let c = tiered(n, rate.input, rate.input_200k);
599        let expect = TIER_THRESHOLD as f64 * 3e-6 + 100.0 * 6e-6;
600        assert!((c - expect).abs() < 1e-12);
601        // opus-4-8 has no tier: linear even above threshold.
602        let r2 = match_pricing("claude-opus-4-8", &t()).unwrap();
603        assert!((tiered(n, r2.input, r2.input_200k) - n as f64 * 5e-6).abs() < 1e-12);
604    }
605}