opencrabs 0.3.6

//! Centralized model pricing table
//!
//! Loaded from `~/.opencrabs/usage_pricing.toml` at runtime.
//! Falls back to compiled-in defaults if the file is missing.
//! Users can edit the file live — changes take effect on next `/usage` open.

use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::OnceLock;

/// A single model pricing entry.
/// `prefix` is matched as a substring of the model name (case-insensitive).
/// First match wins, so put more specific prefixes before general ones.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PricingEntry {
    pub prefix: String,
    pub input_per_m: f64,
    pub output_per_m: f64,
    /// Cache write cost per million tokens (defaults to 1.25x input_per_m if absent)
    #[serde(default)]
    pub cache_write_per_m: Option<f64>,
    /// Cache read cost per million tokens (defaults to 0.1x input_per_m if absent)
    #[serde(default)]
    pub cache_read_per_m: Option<f64>,
}

/// Per-provider block in the TOML file.
/// TOML format: `[providers.anthropic]\nentries = [...]`
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ProviderBlock {
    #[serde(default)]
    pub entries: Vec<PricingEntry>,
}

/// The full pricing table, keyed by provider name (for display only).
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PricingConfig {
    #[serde(default)]
    pub providers: HashMap<String, ProviderBlock>,
}

impl PricingConfig {
    /// Calculate cost for a model + token counts (no cache breakdown).
    /// Treats all input tokens at the regular input rate.
    pub fn calculate_cost(&self, model: &str, input_tokens: u32, output_tokens: u32) -> f64 {
        self.calculate_cost_with_cache(model, input_tokens, output_tokens, 0, 0)
    }

    /// Calculate cost with full cache breakdown.
    /// `input_tokens` = non-cached input only.
    /// Cache write defaults to 1.25x input rate, cache read to 0.1x.
    pub fn calculate_cost_with_cache(
        &self,
        model: &str,
        input_tokens: u32,
        output_tokens: u32,
        cache_creation_tokens: u32,
        cache_read_tokens: u32,
    ) -> f64 {
        let m = model.to_lowercase();
        for block in self.providers.values() {
            for entry in &block.entries {
                if m.contains(&entry.prefix.to_lowercase()) {
                    let input = (input_tokens as f64 / 1_000_000.0) * entry.input_per_m;
                    let output = (output_tokens as f64 / 1_000_000.0) * entry.output_per_m;
                    let cache_write_rate =
                        entry.cache_write_per_m.unwrap_or(entry.input_per_m * 1.25);
                    let cache_read_rate = entry.cache_read_per_m.unwrap_or(entry.input_per_m * 0.1);
                    let cache_write =
                        (cache_creation_tokens as f64 / 1_000_000.0) * cache_write_rate;
                    let cache_read = (cache_read_tokens as f64 / 1_000_000.0) * cache_read_rate;
                    return input + output + cache_write + cache_read;
                }
            }
        }
        0.0
    }

    /// Estimate cost from a combined token count using an 80/20 input/output split.
    /// Returns None if model is unknown.
    pub fn estimate_cost(&self, model: &str, token_count: i64) -> Option<f64> {
        let m = model.to_lowercase();
        for block in self.providers.values() {
            for entry in &block.entries {
                if m.contains(&entry.prefix.to_lowercase()) {
                    let input = (token_count as f64 * 0.80 / 1_000_000.0) * entry.input_per_m;
                    let output = (token_count as f64 * 0.20 / 1_000_000.0) * entry.output_per_m;
                    return Some(input + output);
                }
            }
        }
        None
    }

    /// Load from ~/.opencrabs/usage_pricing.toml.
    /// Supports both the current schema (`[providers.X] entries = [...]`) and the
    /// legacy on-disk schema (`[[usage.pricing.X]]` array-of-tables).
    /// Returns compiled-in defaults if file is missing, unreadable, or both schemas fail.
    pub fn load() -> Self {
        let path = crate::config::opencrabs_home().join("usage_pricing.toml");
        let content = match std::fs::read_to_string(&path) {
            Ok(c) => c,
            Err(_) => return Self::defaults(),
        };

        // Try current schema first.
        if let Ok(cfg) = toml::from_str::<PricingConfig>(&content)
            && !cfg.providers.is_empty()
        {
            return cfg;
        }

        // Try legacy schema: [[usage.pricing.<provider>]] entries
        if let Ok(cfg) = Self::load_legacy(&content)
            && !cfg.providers.is_empty()
        {
            tracing::warn!(
                "usage_pricing.toml uses old format — please update it to the new schema. \
                 See ~/.opencrabs/usage_pricing.toml.example"
            );
            let new_content = Self::serialize_to_toml(&cfg);
            let _ = std::fs::write(&path, new_content);
            return cfg;
        }

        tracing::warn!(
            "usage_pricing.toml failed to parse with both schemas — using built-in defaults"
        );
        Self::defaults()
    }

    /// Parse the legacy `[[usage.pricing.<provider>]]` format.
    fn load_legacy(content: &str) -> Result<Self, toml::de::Error> {
        #[derive(serde::Deserialize)]
        struct LegacyRoot {
            usage: Option<LegacyUsage>,
        }
        #[derive(serde::Deserialize)]
        struct LegacyUsage {
            pricing: Option<toml::Value>,
        }

        let root: LegacyRoot = toml::from_str(content)?;
        let pricing_val = root
            .usage
            .and_then(|u| u.pricing)
            .unwrap_or(toml::Value::Table(toml::map::Map::new()));

        let mut providers: HashMap<String, ProviderBlock> = HashMap::new();
        if let toml::Value::Table(table) = pricing_val {
            for (provider_name, entries_val) in table {
                if let toml::Value::Array(arr) = entries_val {
                    let entries: Vec<PricingEntry> =
                        arr.into_iter().filter_map(|v| v.try_into().ok()).collect();
                    if !entries.is_empty() {
                        providers.insert(provider_name, ProviderBlock { entries });
                    }
                }
            }
        }

        Ok(PricingConfig { providers })
    }

    /// Serialize a PricingConfig back to the canonical TOML schema.
    fn serialize_to_toml(cfg: &PricingConfig) -> String {
        let mut out = String::from(
            "# OpenCrabs Usage Pricing — auto-migrated to current schema.\n\
             # Edit freely. Changes take effect immediately on next /usage open.\n\
             # prefix is matched case-insensitively as a substring of the model name.\n\
             # Costs are per 1 million tokens (USD).\n\n",
        );
        let mut providers: Vec<(&String, &ProviderBlock)> = cfg.providers.iter().collect();
        providers.sort_by_key(|(k, _)| k.as_str());
        for (name, block) in providers {
            out.push_str(&format!("[providers.{}]\nentries = [\n", name));
            for e in &block.entries {
                out.push_str(&format!(
                    "  {{ prefix = {:?}, input_per_m = {}, output_per_m = {} }},\n",
                    e.prefix, e.input_per_m, e.output_per_m
                ));
            }
            out.push_str("]\n\n");
        }
        out
    }

    /// Write the default pricing file to ~/.opencrabs/usage_pricing.toml if it doesn't exist.
    pub fn write_defaults_if_missing() {
        let path = crate::config::opencrabs_home().join("usage_pricing.toml");
        if !path.exists() {
            let _ = std::fs::write(&path, DEFAULT_PRICING_TOML);
        }
    }

    /// Compiled-in defaults — used as fallback if file missing.
    pub fn defaults() -> Self {
        toml::from_str(DEFAULT_PRICING_TOML).unwrap_or_default()
    }
}

/// Global pricing instance — reloaded fresh on each `/usage` open via `pricing()`.
/// Not a true singleton; callers should use `PricingConfig::load()` directly for freshness.
static PRICING: OnceLock<PricingConfig> = OnceLock::new();

/// Returns the global pricing config, initialized once per process.
/// For live-reload behavior, call `PricingConfig::load()` directly instead.
pub fn pricing() -> &'static PricingConfig {
    PRICING.get_or_init(PricingConfig::load)
}

// ─────────────────────────────────────────────────────────────────────────────
// Default pricing table (compiled in as fallback)
// Rates verified via OpenRouter API 2026-02-25
// ─────────────────────────────────────────────────────────────────────────────
pub const DEFAULT_PRICING_TOML: &str = r#"
# OpenCrabs Usage Pricing Table
# Edit this file to customize pricing or add new models.
# Changes take effect immediately — no restart needed.
#
# Rules:
#   - `prefix` is matched as a case-insensitive substring of the model name
#   - First match within each provider wins — put specific prefixes before general ones
#   - Costs are per 1 million tokens (USD)

[providers.anthropic]
entries = [
  # Opus 4.6 / 4.5 — $5/$25, cache write $6.25, cache read $0.50
  { prefix = "opus-4-6",           input_per_m = 5.0,   output_per_m = 25.0, cache_write_per_m = 6.25,  cache_read_per_m = 0.50 },
  { prefix = "opus-4-5",           input_per_m = 5.0,   output_per_m = 25.0, cache_write_per_m = 6.25,  cache_read_per_m = 0.50 },
  { prefix = "claude-opus-4-6",    input_per_m = 5.0,   output_per_m = 25.0, cache_write_per_m = 6.25,  cache_read_per_m = 0.50 },
  { prefix = "claude-opus-4-5",    input_per_m = 5.0,   output_per_m = 25.0, cache_write_per_m = 6.25,  cache_read_per_m = 0.50 },
  # Opus 4.1 / 4 — $15/$75, cache write $18.75, cache read $1.50
  { prefix = "opus-4-1",           input_per_m = 15.0,  output_per_m = 75.0, cache_write_per_m = 18.75, cache_read_per_m = 1.50 },
  { prefix = "opus-4",             input_per_m = 15.0,  output_per_m = 75.0, cache_write_per_m = 18.75, cache_read_per_m = 1.50 },
  { prefix = "claude-opus-4-1",    input_per_m = 15.0,  output_per_m = 75.0, cache_write_per_m = 18.75, cache_read_per_m = 1.50 },
  { prefix = "claude-opus-4",      input_per_m = 15.0,  output_per_m = 75.0, cache_write_per_m = 18.75, cache_read_per_m = 1.50 },
  # Opus 3 (legacy) — $15/$75
  { prefix = "claude-3-opus",      input_per_m = 15.0,  output_per_m = 75.0, cache_write_per_m = 18.75, cache_read_per_m = 1.50 },
  # Sonnet 4.6 / 4.5 / 4 — $3/$15, cache write $3.75, cache read $0.30
  { prefix = "sonnet-4-6",         input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75,  cache_read_per_m = 0.30 },
  { prefix = "sonnet-4-5",         input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75,  cache_read_per_m = 0.30 },
  { prefix = "sonnet-4",           input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75,  cache_read_per_m = 0.30 },
  { prefix = "claude-sonnet-4-6",  input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75,  cache_read_per_m = 0.30 },
  { prefix = "claude-sonnet-4-5",  input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75,  cache_read_per_m = 0.30 },
  { prefix = "claude-sonnet-4",    input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75,  cache_read_per_m = 0.30 },
  # Claude 3.7 Sonnet — $3/$15
  { prefix = "claude-3-7-sonnet",  input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75, cache_read_per_m = 0.30 },
  # Claude 3.5 Sonnet — $3/$15
  { prefix = "claude-3-5-sonnet",  input_per_m = 3.0,   output_per_m = 15.0, cache_write_per_m = 3.75, cache_read_per_m = 0.30 },
  # Claude 3 Sonnet (legacy) — $3/$15
  { prefix = "claude-3-sonnet",    input_per_m = 3.0,   output_per_m = 15.0  },
  # Haiku 4.x — $1/$5, cache write $1.25, cache read $0.10
  { prefix = "haiku-4",            input_per_m = 1.0,   output_per_m = 5.0, cache_write_per_m = 1.25, cache_read_per_m = 0.10 },
  { prefix = "claude-haiku-4",     input_per_m = 1.0,   output_per_m = 5.0, cache_write_per_m = 1.25, cache_read_per_m = 0.10 },
  # Claude 3.5 Haiku — $0.80/$4
  { prefix = "claude-3-5-haiku",   input_per_m = 0.80,  output_per_m = 4.0, cache_write_per_m = 1.0, cache_read_per_m = 0.08 },
  # Claude 3 Haiku (legacy) — $0.25/$1.25
  { prefix = "claude-3-haiku",     input_per_m = 0.25,  output_per_m = 1.25  },
]

[providers.openai]
entries = [
  { prefix = "gpt-5-nano",          input_per_m = 0.10, output_per_m = 0.40  },
  { prefix = "gpt-5-mini",         input_per_m = 0.30, output_per_m = 1.20  },
  { prefix = "gpt-5",              input_per_m = 1.25, output_per_m = 10.0  },
  { prefix = "gpt-4.1",            input_per_m = 2.0,  output_per_m = 8.0   },
  { prefix = "gpt-4o",             input_per_m = 2.5,  output_per_m = 10.0  },
  { prefix = "gpt-4-turbo",        input_per_m = 10.0, output_per_m = 30.0  },
  { prefix = "gpt-4",              input_per_m = 30.0, output_per_m = 60.0  },
  { prefix = "o4-mini",            input_per_m = 1.10, output_per_m = 4.40  },
  { prefix = "o3-mini",            input_per_m = 1.10, output_per_m = 4.40  },
  { prefix = "o3",                 input_per_m = 2.0,  output_per_m = 8.0   },
  { prefix = "o1-mini",            input_per_m = 1.10, output_per_m = 4.40  },
  { prefix = "o1",                 input_per_m = 15.0, output_per_m = 60.0  },
]

[providers.minimax]
entries = [
  # MiniMax-M2.7 highspeed — $0.60/$2.40
  { prefix = "minimax-m2.7-high",  input_per_m = 0.60, output_per_m = 2.40  },
  # MiniMax-M2.7 standard — $0.30/$1.20
  { prefix = "minimax-m2.7",       input_per_m = 0.30, output_per_m = 1.20  },
  # MiniMax-M2.5 highspeed — $0.60/$2.40
  { prefix = "minimax-m2.5-high",  input_per_m = 0.60, output_per_m = 2.40  },
  # MiniMax-M2.5 standard — $0.30/$1.20
  { prefix = "minimax-m2.5",       input_per_m = 0.30, output_per_m = 1.20  },
  # MiniMax-M2.1 — $0.30/$1.20
  { prefix = "minimax-m2.1",       input_per_m = 0.30, output_per_m = 1.20  },
  # MiniMax-Text-01 — $0.20/$1.10
  { prefix = "minimax-text-01",    input_per_m = 0.20, output_per_m = 1.10  },
  # MiniMax generic fallback
  { prefix = "minimax",            input_per_m = 0.30, output_per_m = 1.20  },
]

[providers.google]
entries = [
  { prefix = "gemini-2.5-pro",     input_per_m = 1.25, output_per_m = 10.0  },
  { prefix = "gemini-2.5-flash",   input_per_m = 0.15, output_per_m = 0.60  },
  { prefix = "gemini-2.0-flash",   input_per_m = 0.10, output_per_m = 0.40  },
  { prefix = "gemini-1.5-pro",     input_per_m = 1.25, output_per_m = 5.0   },
  { prefix = "gemini-1.5-flash",   input_per_m = 0.075,output_per_m = 0.30  },
]

[providers.moonshot]
entries = [
  # Kimi K2.5 (multimodal) — $0.60/$3.00
  { prefix = "kimi-k2.5",          input_per_m = 0.60, output_per_m = 3.0   },
  # Kimi K2 Turbo — $1.15/$8.00
  { prefix = "kimi-k2-turbo",      input_per_m = 1.15, output_per_m = 8.0   },
  # Kimi K2 — $0.60/$2.50
  { prefix = "kimi-k2",            input_per_m = 0.60, output_per_m = 2.50  },
  # Kimi generic fallback
  { prefix = "kimi",               input_per_m = 0.60, output_per_m = 2.50  },
]

[providers.deepseek]
entries = [
  { prefix = "deepseek-r1",        input_per_m = 0.55, output_per_m = 2.19  },
  { prefix = "deepseek-v3",        input_per_m = 0.27, output_per_m = 1.10  },
  { prefix = "deepseek",           input_per_m = 0.27, output_per_m = 1.10  },
]

[providers.meta]
entries = [
  { prefix = "llama-3.3-70b",      input_per_m = 0.59, output_per_m = 0.79  },
  { prefix = "llama-3.1-405b",     input_per_m = 2.70, output_per_m = 2.70  },
  { prefix = "llama-3.1-70b",      input_per_m = 0.52, output_per_m = 0.75  },
  { prefix = "llama-3.1-8b",       input_per_m = 0.07, output_per_m = 0.07  },
]

[providers.opencode]
entries = [
  # MiMo V2 Pro — $1.00/$3.00
  { prefix = "mimo-v2-pro",         input_per_m = 1.0,  output_per_m = 3.0   },
  # MiMo V2 Omni — $0.40/$2.00
  { prefix = "mimo-v2-omni",        input_per_m = 0.40, output_per_m = 2.0   },
  # Nemotron 3 Super — $0.10/$0.50
  { prefix = "nemotron-3-super",    input_per_m = 0.10, output_per_m = 0.50  },
  # Big Pickle (free)
  { prefix = "big-pickle",          input_per_m = 0.0,  output_per_m = 0.0   },
  # OpenCode Zen (free)
  { prefix = "opencode-zen",        input_per_m = 0.0,  output_per_m = 0.0   },
  # OpenCode Go (free)
  { prefix = "opencode-go",         input_per_m = 0.0,  output_per_m = 0.0   },
]
"#;

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_defaults_parse() {
        let cfg = PricingConfig::defaults();
        assert!(!cfg.providers.is_empty());
    }

    #[test]
    fn test_calculate_cost_sonnet4() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("claude-sonnet-4-6", 1_000_000, 1_000_000);
        assert_eq!(cost, 18.0); // $3 + $15
    }

    #[test]
    fn test_calculate_cost_opus4() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("claude-opus-4-6", 1_000_000, 1_000_000);
        assert_eq!(cost, 30.0); // $5 + $25
    }

    #[test]
    fn test_calculate_cost_minimax() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("MiniMax-M2.7", 1_000_000, 1_000_000);
        assert_eq!(cost, 1.50); // $0.30 + $1.20
    }

    #[test]
    fn test_calculate_cost_kimi_k25() {
        let cfg = PricingConfig::defaults();
        // NVIDIA model name: moonshotai/kimi-k2.5 — prefix "kimi-k2.5" matches
        let cost = cfg.calculate_cost("moonshotai/kimi-k2.5", 1_000_000, 1_000_000);
        assert_eq!(cost, 3.60); // $0.60 + $3.00
    }

    #[test]
    fn test_calculate_cost_gpt4o() {
        let cfg = PricingConfig::defaults();
        // "gpt-4o" matches "gpt-4" prefix — $30/$60
        let cost = cfg.calculate_cost("gpt-4o", 1_000_000, 1_000_000);
        assert!(cost > 0.0);
    }

    #[test]
    fn test_calculate_cost_o3_mini() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("o3-mini", 1_000_000, 1_000_000);
        assert_eq!(cost, 5.50); // $1.10 + $4.40
    }

    #[test]
    fn test_calculate_cost_o1() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("o1", 1_000_000, 1_000_000);
        assert_eq!(cost, 75.0); // $15 + $60
    }

    #[test]
    fn test_calculate_cost_llama_github_id() {
        let cfg = PricingConfig::defaults();
        // GitHub marketplace ID "Llama-3.3-70B-Instruct" matches "llama-3.3-70b" prefix
        let cost = cfg.calculate_cost("Llama-3.3-70B-Instruct", 1_000_000, 1_000_000);
        assert_eq!(cost, 1.38); // $0.59 + $0.79
    }

    #[test]
    fn test_calculate_cost_deepseek_r1() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("DeepSeek-R1", 1_000_000, 1_000_000);
        assert_eq!(cost, 2.74); // $0.55 + $2.19
    }

    #[test]
    fn test_calculate_cost_gpt5() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("gpt-5", 1_000_000, 1_000_000);
        assert_eq!(cost, 11.25); // $1.25 + $10.0
    }

    #[test]
    fn test_calculate_cost_opencode_mimo_pro() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("opencode/mimo-v2-pro-free", 1_000_000, 1_000_000);
        assert_eq!(cost, 4.0); // $1.0 + $3.0
    }

    #[test]
    fn test_calculate_cost_opencode_mimo_omni() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("opencode/mimo-v2-omni-free", 1_000_000, 1_000_000);
        assert_eq!(cost, 2.40); // $0.40 + $2.0
    }

    #[test]
    fn test_calculate_cost_opencode_nemotron() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("opencode/nemotron-3-super-free", 1_000_000, 1_000_000);
        assert_eq!(cost, 0.60); // $0.10 + $0.50
    }

    #[test]
    fn test_calculate_cost_opencode_free_models() {
        let cfg = PricingConfig::defaults();
        assert_eq!(
            cfg.calculate_cost("opencode/big-pickle", 1_000_000, 1_000_000),
            0.0
        );
        assert_eq!(
            cfg.calculate_cost("opencode/opencode-zen", 1_000_000, 1_000_000),
            0.0
        );
        assert_eq!(
            cfg.calculate_cost("opencode/opencode-go", 1_000_000, 1_000_000),
            0.0
        );
    }

    #[test]
    fn test_unknown_model_zero() {
        let cfg = PricingConfig::defaults();
        let cost = cfg.calculate_cost("some-unknown-model-xyz", 1_000_000, 1_000_000);
        assert_eq!(cost, 0.0);
    }

    #[test]
    fn test_estimate_cost() {
        let cfg = PricingConfig::defaults();
        // 1M tokens, 80% input = 800k @ $3, 20% output = 200k @ $15
        let est = cfg.estimate_cost("claude-sonnet-4-6", 1_000_000);
        assert!(est.is_some());
        let est = est.unwrap();
        assert!((est - 5.40).abs() < 0.001); // 0.8*3 + 0.2*15 = 2.4 + 3.0 = 5.4
    }
}