Skip to main content

context_bar_core/
pricing.rs

1//! Cost kernel — the API-equivalent pricing math, ported 1:1 from
2//! `usage_signal.py` (the `FALLBACK_PRICING` table, model matcher, `_tiered`,
3//! `turn_cost`, `turn_cache_savings`). This is the first slice of folding the
4//! Python aggregator into Rust (ROADMAP E1). It is PURE — no I/O, no clock — so
5//! it is pinned by a golden fixture generated from the Python (`tests/`),
6//! guaranteeing byte-for-byte cost parity (see `docs/ai/COST_MODEL.md`).
7//!
8//! Rates are USD per token. The bundled table mirrors the LiteLLM dataset
9//! ccusage uses; the live LiteLLM fetch + 24h cache (the I/O half of
10//! `load_pricing`) is a later slice — until then this is the offline table,
11//! which is exactly what `CONTEXTBAR_PRICING_OFFLINE=1` selects in the Python.
12
13/// Anthropic's long-context tier threshold: tokens strictly above this in a
14/// category bill at the `*_200k` rate (when the model carries one).
15pub const TIER_THRESHOLD: u64 = 200_000;
16
17/// One model's per-token rates. `None` = the category isn't billed / has no
18/// tier (e.g. OpenAI models have no cache-write `cw`; flat models have no
19/// `*_200k`). Mirrors the Python short-rate dict keys exactly.
20#[derive(Clone, Copy, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
21pub struct Rate {
22    // Short keys match the Python short-rate dict + the on-disk pricing cache,
23    // so the Rust engine reads/writes the same `pricing.cache.json`.
24    #[serde(rename = "in", default, skip_serializing_if = "Option::is_none")]
25    pub input: Option<f64>,
26    #[serde(rename = "out", default, skip_serializing_if = "Option::is_none")]
27    pub output: Option<f64>,
28    #[serde(rename = "cw", default, skip_serializing_if = "Option::is_none")]
29    pub cache_write: Option<f64>,
30    #[serde(rename = "cr", default, skip_serializing_if = "Option::is_none")]
31    pub cache_read: Option<f64>,
32    #[serde(rename = "in_200k", default, skip_serializing_if = "Option::is_none")]
33    pub input_200k: Option<f64>,
34    #[serde(rename = "out_200k", default, skip_serializing_if = "Option::is_none")]
35    pub output_200k: Option<f64>,
36    #[serde(rename = "cw_200k", default, skip_serializing_if = "Option::is_none")]
37    pub cache_write_200k: Option<f64>,
38    #[serde(rename = "cr_200k", default, skip_serializing_if = "Option::is_none")]
39    pub cache_read_200k: Option<f64>,
40}
41
42// Constructors mirroring the three shapes in FALLBACK_PRICING.
43const fn anthropic(input: f64, output: f64, cw: f64, cr: f64) -> Rate {
44    Rate {
45        input: Some(input),
46        output: Some(output),
47        cache_write: Some(cw),
48        cache_read: Some(cr),
49        input_200k: None,
50        output_200k: None,
51        cache_write_200k: None,
52        cache_read_200k: None,
53    }
54}
55
56#[allow(clippy::too_many_arguments)]
57const fn anthropic_tiered(
58    input: f64,
59    output: f64,
60    cw: f64,
61    cr: f64,
62    i2: f64,
63    o2: f64,
64    cw2: f64,
65    cr2: f64,
66) -> Rate {
67    Rate {
68        input: Some(input),
69        output: Some(output),
70        cache_write: Some(cw),
71        cache_read: Some(cr),
72        input_200k: Some(i2),
73        output_200k: Some(o2),
74        cache_write_200k: Some(cw2),
75        cache_read_200k: Some(cr2),
76    }
77}
78
79/// OpenAI/Codex shape: input + output + cache-read, no cache-write charge.
80const fn openai(input: f64, output: f64, cr: f64) -> Rate {
81    Rate {
82        input: Some(input),
83        output: Some(output),
84        cache_write: None,
85        cache_read: Some(cr),
86        input_200k: None,
87        output_200k: None,
88        cache_write_200k: None,
89        cache_read_200k: None,
90    }
91}
92
93/// OpenAI shape with no published cache-read rate (e.g. `gpt-5-pro`).
94const fn openai_no_cr(input: f64, output: f64) -> Rate {
95    Rate {
96        input: Some(input),
97        output: Some(output),
98        cache_write: None,
99        cache_read: None,
100        input_200k: None,
101        output_200k: None,
102        cache_write_200k: None,
103        cache_read_200k: None,
104    }
105}
106
107/// Bundled offline rate table (USD/token), captured from LiteLLM — verbatim
108/// from `FALLBACK_PRICING` in `usage_signal.py`. Order is preserved so the
109/// longest-prefix match in [`match_pricing`] is deterministic.
110pub static FALLBACK_PRICING: &[(&str, Rate)] = &[
111    // Fable 5 flagship ($10/$50). 5m cache write 1.25x, read 0.1x; the 1h write
112    // (2x input) is derived in `turn_cost`, not stored.
113    ("claude-fable-5", anthropic(10e-6, 50e-6, 12.5e-6, 1.0e-6)),
114    ("claude-opus-4-8", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
115    ("claude-opus-4-7", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
116    ("claude-opus-4-6", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
117    ("claude-opus-4-5", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
118    ("claude-opus-4-1", anthropic(15e-6, 75e-6, 18.75e-6, 1.5e-6)),
119    ("claude-opus-4", anthropic(15e-6, 75e-6, 18.75e-6, 1.5e-6)),
120    ("claude-sonnet-4-6", anthropic(3e-6, 15e-6, 3.75e-6, 0.3e-6)),
121    (
122        "claude-sonnet-4-5",
123        anthropic_tiered(3e-6, 15e-6, 3.75e-6, 0.3e-6, 6e-6, 22.5e-6, 7.5e-6, 0.6e-6),
124    ),
125    (
126        "claude-sonnet-4",
127        anthropic_tiered(3e-6, 15e-6, 3.75e-6, 0.3e-6, 6e-6, 22.5e-6, 7.5e-6, 0.6e-6),
128    ),
129    ("claude-3-7-sonnet", anthropic(3e-6, 15e-6, 3.75e-6, 0.3e-6)),
130    ("claude-3-5-sonnet", anthropic(3e-6, 15e-6, 3.75e-6, 0.3e-6)),
131    ("claude-haiku-4-5", anthropic(1e-6, 5e-6, 1.25e-6, 0.1e-6)),
132    ("claude-3-5-haiku", anthropic(0.8e-6, 4e-6, 1e-6, 0.08e-6)),
133    ("mythos", anthropic(5e-6, 25e-6, 6.25e-6, 0.5e-6)),
134    ("gpt-5", openai(1.25e-6, 10e-6, 0.125e-6)),
135    ("gpt-5-codex", openai(1.25e-6, 10e-6, 0.125e-6)),
136    ("gpt-5-pro", openai_no_cr(15e-6, 120e-6)),
137    ("gpt-5-mini", openai(0.25e-6, 2e-6, 0.025e-6)),
138    ("gpt-5-nano", openai(0.05e-6, 0.4e-6, 0.005e-6)),
139    ("gpt-5.1", openai(1.25e-6, 10e-6, 0.125e-6)),
140    ("gpt-5.1-codex", openai(1.25e-6, 10e-6, 0.125e-6)),
141    ("gpt-5.1-codex-max", openai(1.25e-6, 10e-6, 0.125e-6)),
142    ("gpt-5.1-codex-mini", openai(0.25e-6, 2e-6, 0.025e-6)),
143    ("gpt-5.2", openai(1.75e-6, 14e-6, 0.175e-6)),
144    ("gpt-5.2-codex", openai(1.75e-6, 14e-6, 0.175e-6)),
145    ("gpt-5.3-codex", openai(1.75e-6, 14e-6, 0.175e-6)),
146    ("gpt-5.4", openai(2.5e-6, 15e-6, 0.25e-6)),
147    ("gpt-5.4-codex", openai(2.5e-6, 15e-6, 0.25e-6)),
148    ("gpt-5.4-mini", openai(0.75e-6, 4.5e-6, 0.075e-6)),
149    ("gpt-5.4-nano", openai(0.2e-6, 1.25e-6, 0.02e-6)),
150    ("gpt-5.4-pro", openai(30e-6, 180e-6, 3e-6)),
151    ("gpt-5.5", openai(5e-6, 30e-6, 0.5e-6)),
152    ("gpt-5.5-pro", openai(30e-6, 180e-6, 3e-6)),
153    ("codex-mini-latest", openai(1.5e-6, 6e-6, 0.375e-6)),
154    ("o4-mini", openai(1.1e-6, 4.4e-6, 0.275e-6)),
155    ("o3", openai(2e-6, 8e-6, 0.5e-6)),
156    ("o3-mini", openai(1.1e-6, 4.4e-6, 0.55e-6)),
157];
158
159/// Coarse family fallback, checked last — ordered, most-specific first.
160/// Each entry: any of these substrings present in the (stripped) id maps to
161/// the table key. Mirrors `FAMILY_FALLBACK`; the Python regexes here are plain
162/// literal alternations, so substring matching is faithful.
163static FAMILY_FALLBACK: &[(&[&str], &str)] = &[
164    (&["fable-5", "fable"], "claude-fable-5"),
165    (&["opus-4-5", "opus-4-6", "opus-4-7", "opus-4-8"], "claude-opus-4-8"),
166    (&["opus-4"], "claude-opus-4"),
167    (&["mythos"], "mythos"),
168    (&["sonnet-4"], "claude-sonnet-4-6"),
169    (&["3-7-sonnet"], "claude-3-7-sonnet"),
170    (&["3-5-sonnet"], "claude-3-5-sonnet"),
171    (&["haiku-4"], "claude-haiku-4-5"),
172    (&["3-5-haiku", "haiku"], "claude-3-5-haiku"),
173    (&["gpt-5.5-pro"], "gpt-5.5-pro"),
174    (&["gpt-5.5"], "gpt-5.5"),
175    (&["gpt-5.4-codex"], "gpt-5.4-codex"),
176    (&["gpt-5.4"], "gpt-5.4"),
177    (&["gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.2", "gpt-5.3"], "gpt-5.2"),
178    (&["gpt-5.1-codex"], "gpt-5.1-codex"),
179    (&["gpt-5.1"], "gpt-5.1"),
180    (&["gpt-5-codex", "codex"], "gpt-5-codex"),
181    (&["gpt-5"], "gpt-5"),
182    (&["o4-mini"], "o4-mini"),
183    (&["o3-mini"], "o3-mini"),
184    (&["o3"], "o3"),
185];
186
187/// A resolved rate table (the bundled fallback merged with any live/cached
188/// LiteLLM rates). Keyed by normalized model id.
189pub type Table = std::collections::HashMap<String, Rate>;
190
191/// The bundled offline table as a [`Table`] — the deterministic baseline that
192/// `CONTEXTBAR_PRICING_OFFLINE=1` selects in the Python, and what the golden
193/// tests pin against.
194pub fn fallback_table() -> Table {
195    FALLBACK_PRICING.iter().map(|(k, r)| (k.to_string(), *r)).collect()
196}
197
198/// Normalize a transcript model id: lowercase, strip provider prefixes, drop
199/// the 1M-context tag (pricing is identical to the base model).
200pub fn normalize_model(model: &str) -> String {
201    let mut m = model.trim().to_ascii_lowercase();
202    if m.is_empty() {
203        return m;
204    }
205    for prefix in [
206        "anthropic/",
207        "anthropic.",
208        "us.anthropic.",
209        "eu.anthropic.",
210        "apac.anthropic.",
211        "openai/",
212        "openrouter/",
213        "claude-code/",
214        "github_copilot/",
215        "bedrock/",
216        "vertex_ai/",
217    ] {
218        if let Some(rest) = m.strip_prefix(prefix) {
219            m = rest.to_string();
220        }
221    }
222    m = m.replace("[1m]", "").replace("-1m-", "-");
223    if let Some(rest) = m.strip_suffix("-1m") {
224        m = rest.to_string();
225    }
226    m
227}
228
229fn all_ascii_digits(s: &str) -> bool {
230    !s.is_empty() && s.bytes().all(|c| c.is_ascii_digit())
231}
232
233/// `YYYY-MM-DD`.
234fn is_ymd(s: &str) -> bool {
235    let b = s.as_bytes();
236    s.len() == 10
237        && b[4] == b'-'
238        && b[7] == b'-'
239        && all_ascii_digits(&s[0..4])
240        && all_ascii_digits(&s[5..7])
241        && all_ascii_digits(&s[8..10])
242}
243
244/// If `s` ends with `-v<digits>:<digits>`, return the byte index where it
245/// starts (mirrors `_VER_SUFFIX`).
246fn ver_suffix_start(s: &str) -> Option<usize> {
247    let pos = s.rfind("-v")?;
248    let rest = &s[pos + 2..];
249    let (a, b) = rest.split_once(':')?;
250    if all_ascii_digits(a) && all_ascii_digits(b) {
251        Some(pos)
252    } else {
253        None
254    }
255}
256
257/// `_DATE_SUFFIX`: strip a trailing `-<date>` (8 digits or `YYYY-MM-DD`) plus an
258/// optional `-v<n>:<n>` tail. No-op when no date is present.
259fn strip_date_suffix(s: &str) -> &str {
260    // Optional `-vN:M` tail comes AFTER the date.
261    let head_end = ver_suffix_start(s).unwrap_or(s.len());
262    let head = &s[..head_end];
263    // Form B: -YYYY-MM-DD (11 chars).
264    if head.len() >= 11 {
265        let cand = &head[head.len() - 11..];
266        if cand.as_bytes()[0] == b'-' && is_ymd(&cand[1..]) {
267            return &s[..head.len() - 11];
268        }
269    }
270    // Form A: -DDDDDDDD (9 chars).
271    if head.len() >= 9 {
272        let cand = &head[head.len() - 9..];
273        if cand.as_bytes()[0] == b'-' && all_ascii_digits(&cand[1..]) {
274            return &s[..head.len() - 9];
275        }
276    }
277    s
278}
279
280/// `_VER_SUFFIX`: strip a trailing `-v<n>:<n>`.
281fn strip_ver_suffix(s: &str) -> &str {
282    match ver_suffix_start(s) {
283        Some(i) => &s[..i],
284        None => s,
285    }
286}
287
288/// Resolve a transcript model id onto a rate using `table`. `None` when
289/// unpriceable (cost 0 — an honest undercount, never a crash). Mirrors
290/// `match_pricing(model, table)`.
291pub fn match_pricing(model: &str, table: &Table) -> Option<Rate> {
292    let norm = normalize_model(model);
293    if norm.is_empty() {
294        return None;
295    }
296    if let Some(r) = table.get(&norm) {
297        return Some(*r);
298    }
299    // Strip trailing release date / bedrock version, then retry exact.
300    let stripped = strip_ver_suffix(strip_date_suffix(&norm)).to_string();
301    if let Some(r) = table.get(&stripped) {
302        return Some(*r);
303    }
304    // Longest table key that is a prefix of the stripped id.
305    let mut best: Option<Rate> = None;
306    let mut best_len = 0usize;
307    for (key, rate) in table {
308        if stripped.starts_with(key.as_str()) && key.len() > best_len {
309            best = Some(*rate);
310            best_len = key.len();
311        }
312    }
313    if best.is_some() {
314        return best;
315    }
316    // Coarse family fallback.
317    for (needles, key) in FAMILY_FALLBACK {
318        if needles.iter().any(|n| stripped.contains(n)) {
319            if let Some(r) = table.get(*key) {
320                return Some(*r);
321            }
322        }
323    }
324    None
325}
326
327/// Anthropic >200K tiering for one token category (ccusage-compatible).
328pub fn tiered(tokens: u64, base: Option<f64>, above: Option<f64>) -> f64 {
329    let base = match base {
330        Some(b) => b,
331        None => return 0.0,
332    };
333    if tokens == 0 {
334        return 0.0;
335    }
336    if let Some(above) = above {
337        if tokens > TIER_THRESHOLD {
338            return TIER_THRESHOLD as f64 * base + (tokens - TIER_THRESHOLD) as f64 * above;
339        }
340    }
341    tokens as f64 * base
342}
343
344/// Estimated USD for one turn given its rate + token buckets. Cache creation is
345/// split: 5-minute writes bill at the table `cache_write` rate (1.25x input);
346/// 1-hour writes bill at 2x input (Anthropic spec — derived, not stored). Arg
347/// order mirrors `turn_cost(rate, inp, cc_5m, cc_1h, cache_read, outp)`.
348pub fn turn_cost(
349    rate: Option<&Rate>,
350    inp: u64,
351    cache_create_5m: u64,
352    cache_create_1h: u64,
353    cache_read: u64,
354    outp: u64,
355) -> f64 {
356    let rate = match rate {
357        Some(r) => r,
358        None => return 0.0,
359    };
360    let oneh = rate.input.map(|i| i * 2.0);
361    let oneh_200k = rate.input_200k.map(|i| i * 2.0);
362    tiered(inp, rate.input, rate.input_200k)
363        + tiered(outp, rate.output, rate.output_200k)
364        + tiered(cache_create_5m, rate.cache_write, rate.cache_write_200k)
365        + tiered(cache_create_1h, oneh, oneh_200k)
366        + tiered(cache_read, rate.cache_read, rate.cache_read_200k)
367}
368
369/// NET USD that prompt caching saved this turn (can be slightly negative on a
370/// write-heavy turn). Mirrors `turn_cache_savings`.
371pub fn turn_cache_savings(
372    rate: Option<&Rate>,
373    cache_create_5m: u64,
374    cache_create_1h: u64,
375    cache_read: u64,
376) -> f64 {
377    let rate = match rate {
378        Some(r) => r,
379        None => return 0.0,
380    };
381    let in_rate = match rate.input {
382        Some(r) => r,
383        None => return 0.0,
384    };
385    let in_200k = rate.input_200k;
386    let oneh = Some(in_rate * 2.0);
387    let oneh_200k = in_200k.map(|i| i * 2.0);
388    let no_cache = tiered(cache_create_5m, Some(in_rate), in_200k)
389        + tiered(cache_create_1h, Some(in_rate), in_200k)
390        + tiered(cache_read, Some(in_rate), in_200k);
391    let actual = tiered(cache_create_5m, rate.cache_write, rate.cache_write_200k)
392        + tiered(cache_create_1h, oneh, oneh_200k)
393        + tiered(cache_read, rate.cache_read, rate.cache_read_200k);
394    no_cache - actual
395}
396
397/// Live + cached pricing resolution (native only — needs HTTP + filesystem).
398/// Mirrors the Python `load_pricing`: fresh 24h cache → live LiteLLM fetch
399/// (then cache) → stale cache → bundled fallback. `CONTEXTBAR_PRICING_OFFLINE`
400/// forces the offline (fallback) path.
401#[cfg(not(target_arch = "wasm32"))]
402pub use live::load_pricing;
403
404#[cfg(not(target_arch = "wasm32"))]
405mod live {
406    use super::{fallback_table, Rate, Table};
407    use std::sync::OnceLock;
408
409    const LITELLM_URL: &str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
410    const PRICING_TTL: u64 = 24 * 3600;
411    static MEMO: OnceLock<(Table, String)> = OnceLock::new();
412
413    fn cache_path() -> Option<std::path::PathBuf> {
414        let home = std::env::var("HOME").ok()?;
415        Some(std::path::PathBuf::from(home).join(".context-bar").join("pricing.cache.json"))
416    }
417
418    fn relevant(key: &str) -> bool {
419        let k = key.to_ascii_lowercase();
420        [
421            "claude", "sonnet", "opus", "haiku", "mythos", "gpt-5", "gpt-4", "codex", "o1", "o3",
422            "o4", "gemini", "glm", "zai", "deepseek", "qwen", "kimi", "moonshot", "minimax",
423            "mistral", "grok", "llama",
424        ]
425        .iter()
426        .any(|s| k.contains(s))
427    }
428
429    /// Project a LiteLLM entry onto a short [`Rate`], dropping nulls/negatives.
430    fn normalize_entry(entry: &serde_json::Value) -> Option<Rate> {
431        let obj = entry.as_object()?;
432        let get = |k: &str| -> Option<f64> {
433            obj.get(k)
434                .and_then(|v| v.as_f64())
435                .filter(|v| *v >= 0.0)
436        };
437        let rate = Rate {
438            input: get("input_cost_per_token"),
439            output: get("output_cost_per_token"),
440            cache_write: get("cache_creation_input_token_cost"),
441            cache_read: get("cache_read_input_token_cost"),
442            input_200k: get("input_cost_per_token_above_200k_tokens"),
443            output_200k: get("output_cost_per_token_above_200k_tokens"),
444            cache_write_200k: get("cache_creation_input_token_cost_above_200k_tokens"),
445            cache_read_200k: get("cache_read_input_token_cost_above_200k_tokens"),
446        };
447        // Need at least an input or output rate to be useful.
448        if rate.input.is_some() || rate.output.is_some() {
449            Some(rate)
450        } else {
451            None
452        }
453    }
454
455    fn parse_live(raw: &serde_json::Value) -> Option<std::collections::HashMap<String, Rate>> {
456        let obj = raw.as_object()?;
457        let mut table = std::collections::HashMap::new();
458        for (key, entry) in obj {
459            if !relevant(key) {
460                continue;
461            }
462            if let Some(rate) = normalize_entry(entry) {
463                table.insert(key.to_ascii_lowercase(), rate);
464            }
465        }
466        if table.is_empty() { None } else { Some(table) }
467    }
468
469    fn fetch_live() -> Option<std::collections::HashMap<String, Rate>> {
470        let resp = ureq::get(LITELLM_URL)
471            .set("User-Agent", "context-bar/usage")
472            .set("Accept", "application/json")
473            .timeout(std::time::Duration::from_secs(15))
474            .call()
475            .ok()?;
476        let raw: serde_json::Value = resp.into_json().ok()?;
477        parse_live(&raw)
478    }
479
480    fn now_secs() -> u64 {
481        std::time::SystemTime::now()
482            .duration_since(std::time::UNIX_EPOCH)
483            .map(|d| d.as_secs())
484            .unwrap_or(0)
485    }
486
487    fn read_cache_table(path: &std::path::Path) -> Option<std::collections::HashMap<String, Rate>> {
488        let bytes = std::fs::read(path).ok()?;
489        let v: serde_json::Value = serde_json::from_slice(&bytes).ok()?;
490        let tbl = v.get("table")?.as_object()?;
491        let mut out = std::collections::HashMap::new();
492        for (k, rv) in tbl {
493            if let Ok(rate) = serde_json::from_value::<Rate>(rv.clone()) {
494                out.insert(k.clone(), rate);
495            }
496        }
497        Some(out)
498    }
499
500    fn cache_age(path: &std::path::Path) -> Option<u64> {
501        let m = std::fs::metadata(path).ok()?.modified().ok()?;
502        Some(now_secs().saturating_sub(m.duration_since(std::time::UNIX_EPOCH).ok()?.as_secs()))
503    }
504
505    fn write_cache(path: &std::path::Path, live: &std::collections::HashMap<String, Rate>) {
506        if let Some(parent) = path.parent() {
507            let _ = std::fs::create_dir_all(parent);
508        }
509        let doc = serde_json::json!({ "timestamp": now_secs(), "table": live });
510        if let Ok(bytes) = serde_json::to_vec(&doc) {
511            let _ = std::fs::write(path, bytes);
512        }
513    }
514
515    pub fn load_pricing() -> (Table, String) {
516        if let Some(v) = MEMO.get() {
517            return v.clone();
518        }
519        let resolved = resolve();
520        let _ = MEMO.set(resolved.clone());
521        resolved
522    }
523
524    fn resolve() -> (Table, String) {
525        let mut base = fallback_table();
526        let path = cache_path();
527
528        // 1. Fresh on-disk cache.
529        if let Some(p) = &path {
530            if cache_age(p).is_some_and(|age| age < PRICING_TTL) {
531                if let Some(tbl) = read_cache_table(p) {
532                    base.extend(tbl);
533                    return (base, "cache".to_string());
534                }
535            }
536        }
537
538        // 2. Live fetch (unless offline forced).
539        let offline = std::env::var("CONTEXTBAR_PRICING_OFFLINE")
540            .map(|v| matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes"))
541            .unwrap_or(false);
542        if !offline {
543            if let Some(live) = fetch_live() {
544                base.extend(live.clone());
545                if let Some(p) = &path {
546                    write_cache(p, &live);
547                }
548                return (base, "live".to_string());
549            }
550        }
551
552        // 3. Stale cache.
553        if let Some(p) = &path {
554            if let Some(tbl) = read_cache_table(p) {
555                base.extend(tbl);
556                return (base, "cache".to_string());
557            }
558        }
559
560        // 4. Bundled fallback only.
561        (base, "fallback".to_string())
562    }
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568
569    fn t() -> Table {
570        fallback_table()
571    }
572
573    #[test]
574    fn normalize_strips_prefixes_and_1m_tag() {
575        assert_eq!(normalize_model("anthropic/claude-opus-4-8"), "claude-opus-4-8");
576        assert_eq!(normalize_model("claude-opus-4-8[1m]"), "claude-opus-4-8");
577        assert_eq!(normalize_model("us.anthropic.claude-sonnet-4-5"), "claude-sonnet-4-5");
578        assert_eq!(normalize_model("claude-sonnet-4-5-1m"), "claude-sonnet-4-5");
579        assert_eq!(normalize_model("  GPT-5.1-Codex "), "gpt-5.1-codex");
580    }
581
582    #[test]
583    fn date_and_version_suffix_stripping() {
584        assert_eq!(strip_date_suffix("claude-opus-4-8-20260514"), "claude-opus-4-8");
585        assert_eq!(strip_date_suffix("claude-opus-4-8-2026-05-14"), "claude-opus-4-8");
586        assert_eq!(
587            strip_ver_suffix(strip_date_suffix("claude-sonnet-4-5-20260101-v1:0")),
588            "claude-sonnet-4-5"
589        );
590        assert_eq!(strip_ver_suffix("claude-sonnet-4-5-v1:0"), "claude-sonnet-4-5");
591        // No date/ver -> unchanged.
592        assert_eq!(strip_date_suffix("gpt-5.1-codex"), "gpt-5.1-codex");
593    }
594
595    #[test]
596    fn match_exact_dated_and_family() {
597        assert!(match_pricing("claude-opus-4-8", &t()).is_some());
598        // dated variant resolves to the base.
599        assert_eq!(match_pricing("claude-opus-4-8-20260514", &t()), match_pricing("claude-opus-4-8", &t()));
600        // 1M tag.
601        assert_eq!(match_pricing("claude-opus-4-8[1m]", &t()), match_pricing("claude-opus-4-8", &t()));
602        // family fallback: unknown opus-4-7-ish id -> new flagship tier.
603        assert_eq!(match_pricing("some-opus-4-7-preview", &t()), match_pricing("claude-opus-4-8", &t()));
604        // unknown -> None.
605        assert_eq!(match_pricing("totally-unknown-model", &t()), None);
606    }
607
608    #[test]
609    fn turn_cost_matches_hand_computed() {
610        // opus-4-8: in 5e-6, out 25e-6, cw 6.25e-6, cr 0.5e-6.
611        let rate = match_pricing("claude-opus-4-8", &t());
612        // 5m-only: 1000*5e-6 + 4000*25e-6 + 2000*6.25e-6 + 3000*0.5e-6
613        let c = turn_cost(rate.as_ref(), 1000, 2000, 0, 3000, 4000);
614        let expect = 1000.0 * 5e-6 + 4000.0 * 25e-6 + 2000.0 * 6.25e-6 + 3000.0 * 0.5e-6;
615        assert!((c - expect).abs() < 1e-12, "{c} vs {expect}");
616        // 1-hour writes bill at 2x input (10e-6), not the 5m rate (6.25e-6).
617        let c1h = turn_cost(rate.as_ref(), 1000, 0, 2000, 3000, 4000);
618        let expect1h = 1000.0 * 5e-6 + 4000.0 * 25e-6 + 2000.0 * 10e-6 + 3000.0 * 0.5e-6;
619        assert!((c1h - expect1h).abs() < 1e-12, "{c1h} vs {expect1h}");
620    }
621
622    #[test]
623    fn tiering_only_applies_above_threshold_when_rate_present() {
624        // sonnet-4-5 has a >200K input tier (6e-6 above).
625        let rate = match_pricing("claude-sonnet-4-5", &t()).unwrap();
626        let n = TIER_THRESHOLD + 100;
627        let c = tiered(n, rate.input, rate.input_200k);
628        let expect = TIER_THRESHOLD as f64 * 3e-6 + 100.0 * 6e-6;
629        assert!((c - expect).abs() < 1e-12);
630        // opus-4-8 has no tier: linear even above threshold.
631        let r2 = match_pricing("claude-opus-4-8", &t()).unwrap();
632        assert!((tiered(n, r2.input, r2.input_200k) - n as f64 * 5e-6).abs() < 1e-12);
633    }
634}