default_provider = "anthropic"
[providers.anthropic]
base_url = "https://api.anthropic.com/v1"
auth_style = "header"
auth_header = "x-api-key"
auth_env = "ANTHROPIC_API_KEY"
chat_endpoint = "/messages"
features = ["prompt_caching", "thinking"]
cost_per_1k_in = 0.003
cost_per_1k_out = 0.015
latency_p50_ms = 2500
extra_headers = { "anthropic-version" = "2023-06-01" }
[providers.anthropic.healthcheck]
method = "POST"
path = "/messages/count_tokens"
body = '{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"x"}]}'
[providers.openai]
base_url = "https://api.openai.com/v1"
auth_style = "bearer"
auth_env = "OPENAI_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0025
cost_per_1k_out = 0.010
latency_p50_ms = 1800
[providers.openai.healthcheck]
method = "GET"
path = "/models"
[providers.openrouter]
base_url = "https://openrouter.ai/api/v1"
auth_style = "bearer"
auth_env = "OPENROUTER_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.003
cost_per_1k_out = 0.015
latency_p50_ms = 2200
[providers.openrouter.healthcheck]
method = "GET"
path = "/auth/key"
[providers.huggingface]
base_url = "https://router.huggingface.co/v1"
auth_style = "bearer"
auth_env = ["HF_TOKEN", "HUGGINGFACE_API_KEY"]
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0002
cost_per_1k_out = 0.0006
latency_p50_ms = 2400
[providers.huggingface.healthcheck]
method = "GET"
url = "https://huggingface.co/api/whoami-v2"
[providers.ollama]
base_url = "http://localhost:11434"
base_url_env = "OLLAMA_HOST"
auth_style = "none"
chat_endpoint = "/api/chat"
completion_endpoint = "/api/generate"
cost_per_1k_in = 0.0
cost_per_1k_out = 0.0
latency_p50_ms = 1200
[providers.ollama.healthcheck]
method = "GET"
path = "/api/tags"
[providers.gemini]
base_url = "https://generativelanguage.googleapis.com"
base_url_env = "GEMINI_BASE_URL"
auth_style = "header"
auth_header = "x-goog-api-key"
auth_env = ["GEMINI_API_KEY", "GOOGLE_API_KEY"]
chat_endpoint = "/v1beta/models"
cost_per_1k_in = 0.00125
cost_per_1k_out = 0.005
latency_p50_ms = 1800
[providers.gemini.healthcheck]
method = "GET"
path = "/v1beta/models"
[providers.together]
base_url = "https://api.together.xyz/v1"
base_url_env = "TOGETHER_AI_BASE_URL"
auth_style = "bearer"
auth_env = "TOGETHER_AI_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0002
cost_per_1k_out = 0.0006
latency_p50_ms = 1600
[providers.together.healthcheck]
method = "GET"
path = "/models"
[providers.groq]
base_url = "https://api.groq.com/openai/v1"
base_url_env = "GROQ_BASE_URL"
auth_style = "bearer"
auth_env = "GROQ_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0001
cost_per_1k_out = 0.0003
latency_p50_ms = 450
[providers.groq.healthcheck]
method = "GET"
path = "/models"
[providers.cerebras]
base_url = "https://api.cerebras.ai/v1"
base_url_env = "CEREBRAS_BASE_URL"
auth_style = "bearer"
auth_env = "CEREBRAS_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.00025
cost_per_1k_out = 0.00069
latency_p50_ms = 150
features = ["native_tools"]
[providers.cerebras.healthcheck]
method = "GET"
path = "/models"
[providers.deepseek]
base_url = "https://api.deepseek.com/v1"
base_url_env = "DEEPSEEK_BASE_URL"
auth_style = "bearer"
auth_env = "DEEPSEEK_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.00014
cost_per_1k_out = 0.00028
latency_p50_ms = 1800
[providers.deepseek.healthcheck]
method = "GET"
path = "/models"
[providers.fireworks]
base_url = "https://api.fireworks.ai/inference/v1"
base_url_env = "FIREWORKS_BASE_URL"
auth_style = "bearer"
auth_env = "FIREWORKS_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0002
cost_per_1k_out = 0.0006
latency_p50_ms = 1400
[providers.fireworks.healthcheck]
method = "GET"
path = "/models"
[providers.dashscope]
base_url = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
base_url_env = "DASHSCOPE_BASE_URL"
auth_style = "bearer"
auth_env = "DASHSCOPE_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0003
cost_per_1k_out = 0.0012
latency_p50_ms = 1600
[providers.dashscope.healthcheck]
method = "GET"
path = "/models"
[providers.minimax]
base_url = "https://api.minimax.io/v1"
base_url_env = "MINIMAX_BASE_URL"
auth_style = "bearer"
auth_env = "MINIMAX_API_KEY"
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0003
cost_per_1k_out = 0.0012
latency_p50_ms = 1700
[providers.minimax.healthcheck]
method = "GET"
path = "/models"
[providers.zai]
base_url = "https://api.z.ai/v1"
base_url_env = "ZAI_BASE_URL"
auth_style = "bearer"
auth_env = ["ZAI_API_KEY", "ZHIPU_API_KEY"]
chat_endpoint = "/chat/completions"
completion_endpoint = "/completions"
cost_per_1k_in = 0.0004
cost_per_1k_out = 0.0017
latency_p50_ms = 1900
[providers.zai.healthcheck]
method = "GET"
path = "/models"
[providers.bedrock]
base_url = ""
base_url_env = "BEDROCK_BASE_URL"
auth_style = "aws_sigv4"
chat_endpoint = "/model/{model}/converse"
features = ["native_tools"]
latency_p50_ms = 2600
[providers.azure_openai]
base_url = "https://{resource}.openai.azure.com"
base_url_env = "AZURE_OPENAI_ENDPOINT"
auth_style = "azure_openai"
auth_env = ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_AD_TOKEN", "AZURE_OPENAI_BEARER_TOKEN"]
chat_endpoint = "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
features = ["native_tools"]
cost_per_1k_in = 0.0025
cost_per_1k_out = 0.010
latency_p50_ms = 1900
[providers.vertex]
base_url = "https://aiplatform.googleapis.com/v1"
base_url_env = "VERTEX_AI_BASE_URL"
auth_style = "bearer"
auth_env = ["VERTEX_AI_ACCESS_TOKEN", "GOOGLE_OAUTH_ACCESS_TOKEN", "GOOGLE_APPLICATION_CREDENTIALS"]
chat_endpoint = "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
features = ["native_tools"]
cost_per_1k_in = 0.00125
cost_per_1k_out = 0.005
latency_p50_ms = 2100
[providers.local]
base_url = "http://localhost:8000"
base_url_env = "LOCAL_LLM_BASE_URL"
auth_style = "none"
chat_endpoint = "/v1/chat/completions"
completion_endpoint = "/v1/completions"
cost_per_1k_in = 0.0
cost_per_1k_out = 0.0
latency_p50_ms = 900
[providers.local.healthcheck]
method = "GET"
path = "/v1/models"
[providers.llamacpp]
base_url = "http://127.0.0.1:8001"
base_url_env = "LLAMACPP_BASE_URL"
auth_style = "none"
chat_endpoint = "/v1/chat/completions"
completion_endpoint = "/v1/completions"
cost_per_1k_in = 0.0
cost_per_1k_out = 0.0
latency_p50_ms = 900
[providers.llamacpp.healthcheck]
method = "GET"
path = "/v1/models"
[providers.mlx]
base_url = "http://127.0.0.1:8002"
base_url_env = "MLX_BASE_URL"
auth_style = "none"
chat_endpoint = "/v1/chat/completions"
completion_endpoint = "/v1/completions"
cost_per_1k_in = 0.0
cost_per_1k_out = 0.0
latency_p50_ms = 900
[providers.mlx.healthcheck]
method = "GET"
path = "/v1/models"
[providers.vllm]
base_url = "http://localhost:8000"
base_url_env = "VLLM_BASE_URL"
auth_style = "none"
chat_endpoint = "/v1/chat/completions"
completion_endpoint = "/v1/completions"
cost_per_1k_in = 0.0
cost_per_1k_out = 0.0
latency_p50_ms = 800
[providers.vllm.healthcheck]
method = "GET"
path = "/v1/models"
[providers.tgi]
base_url = "http://localhost:8080"
base_url_env = "TGI_BASE_URL"
auth_style = "none"
chat_endpoint = "/v1/chat/completions"
completion_endpoint = "/v1/completions"
cost_per_1k_in = 0.0
cost_per_1k_out = 0.0
latency_p50_ms = 950
[providers.tgi.healthcheck]
method = "GET"
path = "/health"
[[inference_rules]]
pattern = "claude-*"
provider = "anthropic"
[[inference_rules]]
pattern = "gpt-*"
provider = "openai"
[[inference_rules]]
pattern = "o1*"
provider = "openai"
[[inference_rules]]
pattern = "o3*"
provider = "openai"
[[inference_rules]]
pattern = "o4*"
provider = "openai"
[[inference_rules]]
pattern = "anthropic.claude-*"
provider = "bedrock"
[[inference_rules]]
pattern = "meta.llama*"
provider = "bedrock"
[[inference_rules]]
pattern = "amazon.*"
provider = "bedrock"
[[inference_rules]]
pattern = "mistral.*"
provider = "bedrock"
[[inference_rules]]
pattern = "cohere.*"
provider = "bedrock"
[[inference_rules]]
pattern = "gemini-*"
provider = "gemini"
[[inference_rules]]
pattern = "cerebras/*"
provider = "cerebras"
[[inference_rules]]
pattern = "MiniMax-*"
provider = "minimax"
[[inference_rules]]
pattern = "glm-*"
provider = "zai"
[[inference_rules]]
pattern = "zhipu/*"
provider = "zai"
[[inference_rules]]
pattern = "deepseek-v4*"
provider = "deepseek"
[[inference_rules]]
pattern = "deepseek-chat"
provider = "deepseek"
[[inference_rules]]
pattern = "deepseek-reasoner"
provider = "deepseek"
[tier_defaults]
default = "mid"
[aliases.sonnet]
id = "claude-sonnet-4-6"
provider = "anthropic"
[aliases.opus]
id = "claude-opus-4-7"
provider = "anthropic"
[aliases.haiku]
id = "claude-haiku-4-5-20251001"
provider = "anthropic"
[aliases.frontier]
id = "claude-sonnet-4-6"
provider = "anthropic"
[aliases."tier/frontier"]
id = "claude-sonnet-4-6"
provider = "anthropic"
[aliases.mid]
id = "gpt-4o-mini"
provider = "openai"
[aliases."tier/mid"]
id = "gpt-4o-mini"
provider = "openai"
[aliases.small]
id = "Qwen/Qwen3.5-9B"
provider = "openrouter"
[aliases."tier/small"]
id = "Qwen/Qwen3.5-9B"
provider = "openrouter"
[aliases.local-gemma4]
id = "gemma-4-26b-a4b-it"
provider = "local"
[aliases.local-gemma4-26b]
id = "gemma-4-26b-a4b-it"
provider = "local"
[aliases.local-gemma4-31b]
id = "gemma-4-31b-it"
provider = "local"
[aliases.local-gemma4-e4b]
id = "gemma-4-e4b-it"
provider = "local"
[aliases.local-gemma4-e2b]
id = "gemma-4-e2b-it"
provider = "local"
[aliases.ollama-gemma4]
id = "gemma4:26b"
provider = "ollama"
tool_format = "text"
[aliases.ollama-gemma4-26b]
id = "gemma4:26b"
provider = "ollama"
tool_format = "text"
[aliases."qwen3.6-coding"]
id = "qwen3.6:35b-a3b-coding-nvfp4"
provider = "ollama"
tool_format = "text"
[aliases."qwen3.6-35b-coding"]
id = "qwen3.6:35b-a3b-coding-nvfp4"
provider = "ollama"
tool_format = "text"
[aliases."qwen3.6-coding-nvfp4"]
id = "qwen3.6:35b-a3b-coding-nvfp4"
provider = "ollama"
tool_format = "text"
[aliases."qwen3.6-coding-native"]
id = "qwen3.6:35b-a3b-coding-nvfp4"
provider = "ollama"
tool_format = "native"
[aliases."llamacpp-qwen3.6"]
id = "qwen3.6-35b-a3b"
provider = "llamacpp"
tool_format = "text"
[aliases."llamacpp-qwen3.6-q4"]
id = "qwen3.6-35b-a3b-ud-q4-k-xl"
provider = "llamacpp"
tool_format = "text"
[aliases."local-qwen3.6"]
id = "qwen3.6-35b-a3b-ud-q4-k-xl"
provider = "llamacpp"
tool_format = "text"
[aliases."local-qwen3.6-gguf"]
id = "qwen3.6-35b-a3b-ud-q4-k-xl"
provider = "llamacpp"
tool_format = "text"
[aliases.mlx-qwen36-27b]
id = "unsloth/Qwen3.6-27B-UD-MLX-4bit"
provider = "mlx"
[aliases."mlx-qwen3.6-27b"]
id = "unsloth/Qwen3.6-27B-UD-MLX-4bit"
provider = "mlx"
tool_format = "native"
[aliases."mlx-qwen3.6-27b-q4"]
id = "unsloth/Qwen3.6-27B-UD-MLX-4bit"
provider = "mlx"
tool_format = "native"
[aliases."local-qwen3.6-27b"]
id = "unsloth/Qwen3.6-27B-UD-MLX-4bit"
provider = "mlx"
tool_format = "native"
[aliases.minimax]
id = "MiniMax-M2.7"
provider = "minimax"
[aliases."minimax-m2"]
id = "MiniMax-M2"
provider = "minimax"
[aliases."minimax-m2.5"]
id = "MiniMax-M2.5"
provider = "minimax"
[aliases."minimax-m2.7"]
id = "MiniMax-M2.7"
provider = "minimax"
[aliases.glm]
id = "glm-5.1"
provider = "zai"
[aliases."glm-5"]
id = "glm-5"
provider = "zai"
[aliases."glm-5.1"]
id = "glm-5.1"
provider = "zai"
[aliases.deepseek]
id = "deepseek-v4-flash"
provider = "deepseek"
[aliases."deepseek-flash"]
id = "deepseek-v4-flash"
provider = "deepseek"
[aliases."deepseek-pro"]
id = "deepseek-v4-pro"
provider = "deepseek"
[aliases."deepseek-v4-flash"]
id = "deepseek-v4-flash"
provider = "deepseek"
[aliases."deepseek-v4-pro"]
id = "deepseek-v4-pro"
provider = "deepseek"
[aliases.devstral-small-2]
id = "devstral-small-2:24b"
provider = "ollama"
tool_format = "text"
[aliases.ollama-devstral-small-2]
id = "devstral-small-2:24b"
provider = "ollama"
tool_format = "text"
[aliases.ollama-devstral-small-2-native]
id = "devstral-small-2:24b"
provider = "ollama"
tool_format = "native"
[alias_tool_calling."qwen3.6-coding"]
native = "unknown"
text = "unknown"
streaming_native = "unknown"
fallback_mode = "text"
[alias_tool_calling."qwen3.6-coding-native"]
native = "unknown"
text = "unknown"
streaming_native = "unknown"
fallback_mode = "native"
[alias_tool_calling.ollama-gemma4]
native = "unknown"
text = "unknown"
streaming_native = "unknown"
fallback_mode = "disabled"
failure_reason = "requires_tool_probe"
[alias_tool_calling."llamacpp-qwen3.6-q4"]
native = "unknown"
text = "unknown"
streaming_native = "unknown"
fallback_mode = "text"
failure_reason = "requires_tool_probe_and_cache_probe"
[alias_tool_calling."mlx-qwen3.6-27b"]
native = "unknown"
text = "unknown"
streaming_native = "unknown"
fallback_mode = "native"
failure_reason = "requires_served_identity_and_tool_probe"
[qc_defaults]
anthropic = "claude-haiku-4-5-20251001"
openai = "gpt-4o-mini"
openrouter = "google/gemini-2.5-flash"
ollama = "llama3.2"
local = "gpt-4o"
minimax = "MiniMax-M2.5-highspeed"
zai = "glm-5"
deepseek = "deepseek-v4-flash"
[models."claude-3-5-haiku-20241022"]
name = "Claude Haiku 3.5"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 0.80, output_per_mtok = 4.00, cache_read_per_mtok = 0.08, cache_write_per_mtok = 1.00 }
tier = "small"
open_weight = false
strengths = ["speed", "cheap", "summarization", "tool_use"]
[models."claude-haiku-4-5-20251001"]
name = "Claude Haiku 4.5"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 1.00, output_per_mtok = 5.00, cache_read_per_mtok = 0.10, cache_write_per_mtok = 1.25 }
tier = "mid"
open_weight = false
strengths = ["speed", "cheap", "coding", "tool_use", "summarization"]
[models."claude-3-5-sonnet-20240620"]
name = "Claude Sonnet 3.5 (2024-06-20)"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 15.00, cache_read_per_mtok = 0.30, cache_write_per_mtok = 3.75 }
tier = "frontier"
open_weight = false
strengths = ["coding", "reasoning", "tool_use", "long_context"]
[models."claude-3-5-sonnet-20241022"]
name = "Claude Sonnet 3.5 (2024-10-22)"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 15.00, cache_read_per_mtok = 0.30, cache_write_per_mtok = 3.75 }
tier = "frontier"
open_weight = false
strengths = ["coding", "reasoning", "tool_use", "long_context"]
[models."claude-sonnet-4-20250514"]
name = "Claude Sonnet 4"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 15.00, cache_read_per_mtok = 0.30, cache_write_per_mtok = 3.75 }
deprecated = true
deprecation_note = "Sunset 2026-06-15 per Anthropic deprecations page. Replaced by claude-sonnet-4-6."
tier = "frontier"
open_weight = false
strengths = ["coding", "reasoning", "tool_use", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 49.0 }
[models."claude-sonnet-4-5"]
name = "Claude Sonnet 4.5"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 15.00, cache_read_per_mtok = 0.30, cache_write_per_mtok = 3.75 }
deprecated = true
deprecation_note = "Sunset 2026-05-15 per Anthropic deprecations page. Replaced by claude-sonnet-4-6."
tier = "frontier"
open_weight = false
strengths = ["coding", "reasoning", "tool_use", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 77.2 }
[models."claude-sonnet-4-6"]
name = "Claude Sonnet 4.6"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 15.00, cache_read_per_mtok = 0.30, cache_write_per_mtok = 3.75 }
tier = "frontier"
open_weight = false
strengths = ["coding", "reasoning", "tool_use", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 79.6 }
[models."claude-sonnet-4-7"]
name = "Claude Sonnet 4.7"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 15.00, cache_read_per_mtok = 0.30, cache_write_per_mtok = 3.75 }
tier = "frontier"
open_weight = false
strengths = ["coding", "reasoning", "tool_use", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 81.0 }
[models."anthropic/claude-haiku-4-5"]
name = "Claude Haiku 4.5 (via OpenRouter)"
provider = "openrouter"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching"]
pricing = { input_per_mtok = 1.00, output_per_mtok = 5.00, cache_read_per_mtok = 0.10, cache_write_per_mtok = 1.25 }
tier = "mid"
open_weight = false
strengths = ["speed", "cheap", "coding", "tool_use", "summarization"]
[models."anthropic/claude-sonnet-4-6"]
name = "Claude Sonnet 4.6 (via OpenRouter)"
provider = "openrouter"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 15.00, cache_read_per_mtok = 0.30, cache_write_per_mtok = 3.75 }
tier = "frontier"
open_weight = false
strengths = ["coding", "reasoning", "tool_use", "long_context", "agentic"]
[models."claude-3-opus-20240229"]
name = "Claude Opus 3"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 15.00, output_per_mtok = 75.00, cache_read_per_mtok = 1.50, cache_write_per_mtok = 18.75 }
tier = "frontier"
open_weight = false
strengths = ["reasoning", "long_context"]
[models."claude-opus-4-20250514"]
name = "Claude Opus 4"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 15.00, output_per_mtok = 75.00, cache_read_per_mtok = 1.50, cache_write_per_mtok = 18.75 }
deprecated = true
deprecation_note = "Sunset 2026-06-15 per Anthropic deprecations page. Replaced by claude-opus-4-7."
tier = "frontier"
open_weight = false
strengths = ["reasoning", "coding", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 77.6 }
[models."claude-opus-4-1-20250805"]
name = "Claude Opus 4.1"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 15.00, output_per_mtok = 75.00, cache_read_per_mtok = 1.50, cache_write_per_mtok = 18.75 }
deprecated = true
deprecation_note = "Superseded by claude-opus-4-7. No formal sunset yet; switch when convenient."
tier = "frontier"
open_weight = false
strengths = ["reasoning", "coding", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 78.9 }
[models."claude-opus-4-6"]
name = "Claude Opus 4.6"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 15.00, output_per_mtok = 75.00, cache_read_per_mtok = 1.50, cache_write_per_mtok = 18.75 }
tier = "frontier"
open_weight = false
strengths = ["reasoning", "coding", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 80.8, swe_bench_pro = 53.4 }
[models."claude-opus-4-7"]
name = "Claude Opus 4.7"
provider = "anthropic"
context_window = 200000
capabilities = ["tools", "streaming", "prompt_caching", "thinking"]
pricing = { input_per_mtok = 15.00, output_per_mtok = 75.00, cache_read_per_mtok = 1.50, cache_write_per_mtok = 18.75 }
tier = "frontier"
open_weight = false
strengths = ["reasoning", "coding", "long_context", "agentic"]
benchmarks = { swe_bench_verified = 87.6, swe_bench_pro = 64.3 }
[models."gpt-4o"]
name = "GPT-4o"
provider = "openai"
context_window = 128000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 2.50, output_per_mtok = 10.00, cache_read_per_mtok = 1.25 }
deprecated = true
deprecation_note = "API sunset 2026-02-17 per OpenAI deprecations page. Switch to gpt-5-mini for cheap routing or gpt-5 for frontier."
tier = "frontier"
open_weight = false
strengths = ["coding", "vision", "tool_use"]
[models."gpt-4o-mini"]
name = "GPT-4o Mini"
provider = "openai"
context_window = 128000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.15, output_per_mtok = 0.60 }
tier = "mid"
open_weight = false
strengths = ["speed", "cheap", "summarization", "tool_use"]
[models."gpt-4-turbo"]
name = "GPT-4 Turbo"
provider = "openai"
context_window = 128000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 10.00, output_per_mtok = 30.00 }
deprecated = true
deprecation_note = "Superseded by gpt-5 family. Listed for cost-attribution backfill only."
tier = "frontier"
open_weight = false
strengths = ["coding", "tool_use"]
[models.o1]
name = "OpenAI o1"
provider = "openai"
context_window = 200000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 15.00, output_per_mtok = 60.00, cache_read_per_mtok = 7.50 }
tier = "reasoning"
open_weight = false
strengths = ["reasoning"]
[models."o1-mini"]
name = "OpenAI o1-mini"
provider = "openai"
context_window = 128000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 3.00, output_per_mtok = 12.00, cache_read_per_mtok = 1.50 }
tier = "reasoning"
open_weight = false
strengths = ["reasoning", "cheap"]
[models.o3]
name = "OpenAI o3"
provider = "openai"
context_window = 200000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 15.00, output_per_mtok = 60.00, cache_read_per_mtok = 7.50 }
tier = "reasoning"
open_weight = false
strengths = ["reasoning", "coding"]
benchmarks = { swe_bench_verified = 69.1 }
[models."o3-mini"]
name = "OpenAI o3-mini"
provider = "openai"
context_window = 200000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 1.10, output_per_mtok = 4.40, cache_read_per_mtok = 0.55 }
tier = "reasoning"
open_weight = false
strengths = ["reasoning", "coding", "cheap"]
benchmarks = { swe_bench_verified = 49.3 }
[models."gemini-2.5-flash"]
name = "Gemini 2.5 Flash"
provider = "gemini"
context_window = 1048576
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.10, output_per_mtok = 0.40, cache_read_per_mtok = 0.025 }
tier = "mid"
open_weight = false
strengths = ["speed", "long_context", "vision", "cheap", "tool_use"]
[models."google/gemini-2.5-flash"]
name = "Gemini 2.5 Flash (via OpenRouter)"
provider = "openrouter"
context_window = 1048576
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.10, output_per_mtok = 0.40, cache_read_per_mtok = 0.025 }
tier = "mid"
open_weight = false
strengths = ["speed", "long_context", "vision", "cheap", "tool_use"]
[models."gemini-2.5-pro"]
name = "Gemini 2.5 Pro"
provider = "gemini"
context_window = 2097152
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 1.25, output_per_mtok = 5.00, cache_read_per_mtok = 0.3125 }
tier = "frontier"
open_weight = false
strengths = ["long_context", "vision", "reasoning", "coding"]
benchmarks = { swe_bench_verified = 63.8 }
[models."mistralai/mistral-large-2512"]
name = "Mistral Large 3 2512"
provider = "openrouter"
context_window = 262144
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.50, output_per_mtok = 1.50, cache_read_per_mtok = 0.05 }
tier = "frontier"
open_weight = true
strengths = ["coding", "tool_use", "long_context"]
[models."mistralai/mistral-small-2603"]
name = "Mistral Small 4"
provider = "openrouter"
context_window = 262144
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.15, output_per_mtok = 0.60, cache_read_per_mtok = 0.015 }
tier = "mid"
open_weight = true
strengths = ["cheap", "coding", "speed"]
[models."qwen/qwen3-coder"]
name = "Qwen3 Coder 480B A35B"
provider = "openrouter"
context_window = 262144
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.22, output_per_mtok = 1.80 }
availability = "serverless"
tier = "frontier"
open_weight = true
strengths = ["coding", "long_context", "agentic", "tool_use"]
benchmarks = { swe_bench_verified = 67.0 }
[models."Qwen/Qwen3-Coder-Next-FP8"]
name = "Qwen3 Coder Next FP8 (Together, dedicated)"
provider = "together"
context_window = 262144
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.18, output_per_mtok = 0.18 }
availability = "dedicated"
tier = "frontier"
open_weight = true
strengths = ["coding", "long_context", "agentic"]
[models."deepseek/deepseek-v3.2"]
name = "DeepSeek V3.2"
provider = "openrouter"
context_window = 131072
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.28, output_per_mtok = 0.42 }
tier = "mid"
open_weight = true
strengths = ["coding", "tool_use", "cheap"]
[models."moonshotai/kimi-k2.6"]
name = "Kimi K2.6"
provider = "openrouter"
context_window = 262144
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.73, output_per_mtok = 3.49 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "long_context", "tool_use", "reasoning"]
benchmarks = { swe_bench_pro = 58.6, humanitys_last_exam_with_tools = 54.0 }
[models."openai/gpt-oss-120b"]
name = "GPT-OSS 120B"
provider = "openrouter"
context_window = 131072
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.15, output_per_mtok = 0.60 }
tier = "mid"
open_weight = true
strengths = ["cheap", "tool_use"]
[models."gpt-oss-120b"]
name = "GPT-OSS 120B (Cerebras)"
provider = "cerebras"
context_window = 131072
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.25, output_per_mtok = 0.69 }
tier = "mid"
open_weight = true
strengths = ["speed", "cheap", "tool_use"]
[models."llama-3.3-70b"]
name = "Llama 3.3 70B (Cerebras)"
provider = "cerebras"
context_window = 131072
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.85, output_per_mtok = 1.20 }
tier = "mid"
open_weight = true
strengths = ["speed", "tool_use"]
[models."MiniMax-M2"]
name = "MiniMax M2"
provider = "minimax"
context_window = 204800
capabilities = ["tools", "streaming", "thinking"]
pricing = { input_per_mtok = 0.255, output_per_mtok = 1.00, cache_read_per_mtok = 0.051 }
tier = "mid"
open_weight = true
strengths = ["coding", "agentic", "cheap", "tool_use"]
benchmarks = { aa_intelligence_index = 45.0 }
[models."MiniMax-M2.5"]
name = "MiniMax M2.5"
provider = "minimax"
context_window = 204800
capabilities = ["tools", "streaming", "thinking"]
pricing = { input_per_mtok = 0.28, output_per_mtok = 1.10, cache_read_per_mtok = 0.056 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "long_context"]
[models."MiniMax-M2.5-highspeed"]
name = "MiniMax M2.5 (highspeed)"
provider = "minimax"
context_window = 204800
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.28, output_per_mtok = 1.10, cache_read_per_mtok = 0.056 }
tier = "mid"
open_weight = true
strengths = ["speed", "coding", "agentic"]
[models."MiniMax-M2.7"]
name = "MiniMax M2.7"
provider = "minimax"
context_window = 204800
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.30, output_per_mtok = 1.20, cache_read_per_mtok = 0.06 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
benchmarks = { aa_intelligence_index = 50.0 }
[models."MiniMax-M2.7-highspeed"]
name = "MiniMax M2.7 (highspeed)"
provider = "minimax"
context_window = 204800
capabilities = ["tools", "streaming", "prompt_caching"]
pricing = { input_per_mtok = 0.30, output_per_mtok = 1.20, cache_read_per_mtok = 0.06 }
tier = "mid"
open_weight = true
strengths = ["speed", "coding", "agentic"]
[models."MiniMax-Text-01"]
name = "MiniMax Text 01"
provider = "minimax"
context_window = 1000000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.20, output_per_mtok = 1.10 }
tier = "mid"
open_weight = true
strengths = ["long_context"]
[models."minimax/minimax-m2.7"]
name = "MiniMax M2.7 (via OpenRouter)"
provider = "openrouter"
context_window = 204800
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.40, output_per_mtok = 1.50 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
[models."minimax/minimax-m2"]
name = "MiniMax M2 (via OpenRouter)"
provider = "openrouter"
context_window = 204800
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.33, output_per_mtok = 1.20 }
tier = "mid"
open_weight = true
strengths = ["coding", "agentic", "cheap"]
[models."glm-5"]
name = "GLM 5"
provider = "zai"
context_window = 202752
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.98, output_per_mtok = 3.08, cache_read_per_mtok = 0.20 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use"]
[models."glm-5.1"]
name = "GLM 5.1"
provider = "zai"
context_window = 202752
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 1.40, output_per_mtok = 4.40, cache_read_per_mtok = 0.26 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
benchmarks = { swe_bench_pro_lead = 1.0 }
[models."z-ai/glm-5"]
name = "GLM 5 (via OpenRouter)"
provider = "openrouter"
context_window = 202752
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 1.20, output_per_mtok = 4.00 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic"]
[models."z-ai/glm-5.1"]
name = "GLM 5.1 (via OpenRouter)"
provider = "openrouter"
context_window = 202752
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.98, output_per_mtok = 3.08 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
[models."z-ai/glm-5v-turbo"]
name = "GLM 5V Turbo (via OpenRouter)"
provider = "openrouter"
context_window = 202752
capabilities = ["tools", "streaming", "vision"]
pricing = { input_per_mtok = 1.20, output_per_mtok = 4.00 }
tier = "mid"
open_weight = true
strengths = ["vision", "speed"]
[models."deepseek-v4-flash"]
name = "DeepSeek V4 Flash"
provider = "deepseek"
context_window = 1000000
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.14, output_per_mtok = 0.28, cache_read_per_mtok = 0.0028 }
tier = "mid"
open_weight = true
strengths = ["speed", "cheap", "tool_use", "reasoning", "long_context"]
benchmarks = { aa_intelligence_index = 58.0 }
[models."deepseek-v4-pro"]
name = "DeepSeek V4 Pro"
provider = "deepseek"
context_window = 1000000
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.435, output_per_mtok = 0.87, cache_read_per_mtok = 0.003625 }
tier = "frontier"
open_weight = true
strengths = ["reasoning", "coding", "tool_use", "long_context"]
benchmarks = { aa_intelligence_index = 68.0 }
[models."deepseek-chat"]
name = "DeepSeek Chat (legacy → V4 Flash, non-thinking)"
provider = "deepseek"
context_window = 1000000
capabilities = ["tools", "streaming", "prompt_caching"]
pricing = { input_per_mtok = 0.14, output_per_mtok = 0.28, cache_read_per_mtok = 0.0028 }
deprecated = true
deprecation_note = "Maps to deepseek-v4-flash non-thinking mode; retirement 2026-07-24 15:59 UTC per provider docs."
tier = "mid"
open_weight = true
strengths = ["coding", "tool_use"]
[models."deepseek-reasoner"]
name = "DeepSeek Reasoner (legacy → V4 Flash, thinking)"
provider = "deepseek"
context_window = 1000000
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.14, output_per_mtok = 0.28, cache_read_per_mtok = 0.0028 }
deprecated = true
deprecation_note = "Maps to deepseek-v4-flash thinking mode; retirement 2026-07-24 15:59 UTC per provider docs."
tier = "reasoning"
open_weight = true
strengths = ["reasoning", "coding"]
[models."deepseek/deepseek-v4-flash"]
name = "DeepSeek V4 Flash (via OpenRouter)"
provider = "openrouter"
context_window = 1000000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.10, output_per_mtok = 0.20 }
tier = "mid"
open_weight = true
strengths = ["speed", "cheap", "tool_use", "reasoning", "long_context"]
[models."deepseek/deepseek-v4-pro"]
name = "DeepSeek V4 Pro (via OpenRouter)"
provider = "openrouter"
context_window = 1000000
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.435, output_per_mtok = 0.87 }
tier = "frontier"
open_weight = true
strengths = ["reasoning", "coding", "tool_use", "long_context"]
[models."Qwen/Qwen3.5-9B"]
name = "Qwen3.5 9B"
provider = "openrouter"
context_window = 131072
capabilities = ["tools", "streaming"]
tier = "small"
open_weight = true
strengths = ["cheap", "speed"]
[models."llama3.2"]
name = "Llama 3.2"
provider = "ollama"
context_window = 32000
stream_timeout = 300.0
capabilities = ["tools", "streaming"]
tier = "small"
open_weight = true
strengths = ["cheap", "speed"]
[models."gemma4:26b"]
name = "Gemma 4 26B MoE"
provider = "ollama"
context_window = 262144
runtime_context_window = 32768
stream_timeout = 300.0
capabilities = ["tools", "vision", "streaming", "thinking"]
tier = "mid"
open_weight = true
strengths = ["vision", "tool_use"]
[models."qwen3.6:35b-a3b-coding-nvfp4"]
name = "Qwen3.6 35B A3B Coding (NVFP4)"
provider = "ollama"
context_window = 262144
runtime_context_window = 32768
stream_timeout = 900.0
capabilities = ["tools", "streaming", "thinking"]
tier = "mid"
open_weight = true
strengths = ["coding", "speed"]
[models."devstral-small-2:24b"]
name = "Devstral Small 2 24B"
provider = "ollama"
context_window = 262144
runtime_context_window = 32768
stream_timeout = 600.0
capabilities = ["tools", "streaming"]
tier = "mid"
open_weight = true
strengths = ["coding", "agentic"]
[models."qwen3.6-35b-a3b-ud-q4-k-xl"]
name = "Qwen3.6 35B (Unsloth Q4_K_XL, llama.cpp)"
provider = "llamacpp"
context_window = 262144
runtime_context_window = 65536
stream_timeout = 900.0
capabilities = ["tools", "streaming", "thinking"]
tier = "mid"
open_weight = true
strengths = ["coding"]
[models."qwen3.6-35b-a3b-ud-q5-k-xl"]
name = "Qwen3.6 35B (Unsloth Q5_K_XL, llama.cpp)"
provider = "llamacpp"
context_window = 262144
runtime_context_window = 65536
stream_timeout = 900.0
capabilities = ["tools", "streaming", "thinking"]
tier = "mid"
open_weight = true
strengths = ["coding"]
[models."qwen3.6-35b-a3b"]
name = "Qwen3.6 35B (llama.cpp)"
provider = "llamacpp"
context_window = 262144
runtime_context_window = 65536
stream_timeout = 900.0
capabilities = ["tools", "streaming", "thinking"]
tier = "mid"
open_weight = true
strengths = ["coding"]
[models."unsloth/Qwen3.6-27B-UD-MLX-4bit"]
name = "Qwen3.6 27B (MLX 4-bit)"
provider = "mlx"
context_window = 262144
stream_timeout = 900.0
capabilities = ["tools", "vision", "streaming", "thinking"]
tier = "mid"
open_weight = true
strengths = ["coding", "vision"]
[models."gemma-4-e2b-it"]
name = "Gemma 4 E2B (local)"
provider = "local"
context_window = 131072
stream_timeout = 300.0
capabilities = ["streaming", "thinking"]
tier = "small"
open_weight = true
strengths = ["cheap", "speed"]
[models."gemma-4-e4b-it"]
name = "Gemma 4 E4B (local)"
provider = "local"
context_window = 131072
stream_timeout = 300.0
capabilities = ["streaming", "thinking"]
tier = "small"
open_weight = true
strengths = ["cheap"]
[models."gemma-4-26b-a4b-it"]
name = "Gemma 4 26B MoE (local)"
provider = "local"
context_window = 131072
stream_timeout = 600.0
capabilities = ["streaming", "thinking"]
tier = "mid"
open_weight = true
strengths = ["coding"]
[models."gemma-4-31b-it"]
name = "Gemma 4 31B (local)"
provider = "local"
context_window = 131072
stream_timeout = 600.0
capabilities = ["streaming", "thinking"]
tier = "frontier"
open_weight = true
strengths = ["coding", "long_context"]