models:
- provider: "claude"
model: "Claude Opus 4.6"
api_identifier: "claude-opus-4-6"
max_output_tokens: 128000
input_context: 200000
generation: 4.6
tier: "flagship"
beta_headers:
- key: "anthropic-beta"
value: "context-1m-2025-08-07"
input_context: 1000000
- provider: "claude"
model: "Claude Sonnet 4.6"
api_identifier: "claude-sonnet-4-6"
max_output_tokens: 64000
input_context: 200000
generation: 4.6
tier: "balanced"
beta_headers:
- key: "anthropic-beta"
value: "context-1m-2025-08-07"
input_context: 1000000
- provider: "claude"
model: "Claude Sonnet 4.5"
api_identifier: "claude-sonnet-4-5-20250929"
max_output_tokens: 64000
input_context: 200000
generation: 4.5
tier: "balanced"
beta_headers:
- key: "anthropic-beta"
value: "context-1m-2025-08-07"
input_context: 1000000
- provider: "claude"
model: "Claude Haiku 4.5"
api_identifier: "claude-haiku-4-5-20251001"
max_output_tokens: 64000
input_context: 200000
generation: 4.5
tier: "fast"
- provider: "claude"
model: "Claude Opus 4.5"
api_identifier: "claude-opus-4-5-20251101"
max_output_tokens: 64000
input_context: 200000
generation: 4.5
tier: "flagship"
legacy: true
- provider: "claude"
model: "Claude Opus 4.1"
api_identifier: "claude-opus-4-1-20250805"
max_output_tokens: 32000
input_context: 200000
generation: 4.1
tier: "flagship"
legacy: true
- provider: "claude"
model: "Claude Opus 4"
api_identifier: "claude-opus-4-20250514"
max_output_tokens: 32000
input_context: 200000
generation: 4
tier: "flagship"
legacy: true
- provider: "claude"
model: "Claude Sonnet 4"
api_identifier: "claude-sonnet-4-20250514"
max_output_tokens: 64000
input_context: 200000
generation: 4
tier: "balanced"
legacy: true
beta_headers:
- key: "anthropic-beta"
value: "context-1m-2025-08-07"
input_context: 1000000
- provider: "claude"
model: "Claude Sonnet 3.7"
api_identifier: "claude-3-7-sonnet-20250219"
max_output_tokens: 64000
input_context: 200000
generation: 3.7
tier: "balanced"
legacy: true
beta_headers:
- key: "anthropic-beta"
value: "output-128k-2025-02-19"
max_output_tokens: 128000
- key: "anthropic-beta"
value: "context-1m-2025-08-07"
input_context: 1000000
- provider: "claude"
model: "Claude Haiku 3.5"
api_identifier: "claude-3-5-haiku-20241022"
max_output_tokens: 8192
input_context: 200000
generation: 3.5
tier: "fast"
legacy: true
- provider: "claude"
model: "Claude Haiku 3"
api_identifier: "claude-3-haiku-20240307"
max_output_tokens: 4096
input_context: 200000
generation: 3
tier: "fast"
legacy: true
- provider: "claude"
model: "Claude Opus 3"
api_identifier: "claude-3-opus-20240229"
max_output_tokens: 4096
input_context: 200000
generation: 3
tier: "flagship"
legacy: true
- provider: "claude"
model: "Claude Sonnet 3.5"
api_identifier: "claude-3-5-sonnet-20241022"
max_output_tokens: 8192
input_context: 200000
generation: 3.5
tier: "balanced"
legacy: true
- provider: "openai"
model: "GPT-5.2"
api_identifier: "gpt-5.2"
max_output_tokens: 128000
input_context: 400000
generation: 5.2
tier: "flagship"
- provider: "openai"
model: "GPT-5.2 Pro"
api_identifier: "gpt-5.2-pro"
max_output_tokens: 128000
input_context: 400000
generation: 5.2
tier: "flagship"
- provider: "openai"
model: "GPT-5"
api_identifier: "gpt-5"
max_output_tokens: 128000
input_context: 400000
generation: 5
tier: "flagship"
- provider: "openai"
model: "GPT-5 Pro"
api_identifier: "gpt-5-pro"
max_output_tokens: 128000
input_context: 400000
generation: 5
tier: "flagship"
- provider: "openai"
model: "GPT-5 Mini"
api_identifier: "gpt-5-mini"
max_output_tokens: 128000
input_context: 400000
generation: 5
tier: "balanced"
- provider: "openai"
model: "GPT-5 Nano"
api_identifier: "gpt-5-nano"
max_output_tokens: 128000
input_context: 400000
generation: 5
tier: "fast"
- provider: "openai"
model: "o3"
api_identifier: "o3"
max_output_tokens: 100000
input_context: 200000
generation: 5
tier: "flagship"
- provider: "openai"
model: "o3-pro"
api_identifier: "o3-pro"
max_output_tokens: 100000
input_context: 200000
generation: 5
tier: "flagship"
- provider: "openai"
model: "o3-mini"
api_identifier: "o3-mini"
max_output_tokens: 100000
input_context: 200000
generation: 5
tier: "fast"
- provider: "openai"
model: "o4-mini"
api_identifier: "o4-mini"
max_output_tokens: 100000
input_context: 200000
generation: 5
tier: "fast"
legacy: true
- provider: "openai"
model: "o1"
api_identifier: "o1"
max_output_tokens: 100000
input_context: 200000
generation: 5
tier: "flagship"
legacy: true
- provider: "openai"
model: "o1-pro"
api_identifier: "o1-pro"
max_output_tokens: 100000
input_context: 200000
generation: 5
tier: "flagship"
legacy: true
- provider: "openai"
model: "o1-mini"
api_identifier: "o1-mini"
max_output_tokens: 65536
input_context: 128000
generation: 5
tier: "fast"
legacy: true
- provider: "openai"
model: "o1-preview"
api_identifier: "o1-preview"
max_output_tokens: 32768
input_context: 128000
generation: 5
tier: "flagship"
legacy: true
- provider: "openai"
model: "GPT-4.1"
api_identifier: "gpt-4.1"
max_output_tokens: 32768
input_context: 1047576
generation: 4.1
tier: "flagship"
legacy: true
- provider: "openai"
model: "GPT-4.1 Mini"
api_identifier: "gpt-4.1-mini"
max_output_tokens: 32768
input_context: 1047576
generation: 4.1
tier: "balanced"
legacy: true
- provider: "openai"
model: "GPT-4.1 Nano"
api_identifier: "gpt-4.1-nano"
max_output_tokens: 32768
input_context: 1047576
generation: 4.1
tier: "fast"
legacy: true
- provider: "openai"
model: "GPT-4o"
api_identifier: "gpt-4o"
max_output_tokens: 16384
input_context: 128000
generation: 4
tier: "flagship"
legacy: true
- provider: "openai"
model: "GPT-4o Mini"
api_identifier: "gpt-4o-mini"
max_output_tokens: 16384
input_context: 128000
generation: 4
tier: "fast"
legacy: true
- provider: "openai"
model: "GPT-4 Turbo"
api_identifier: "gpt-4-turbo"
max_output_tokens: 4096
input_context: 128000
generation: 4
tier: "balanced"
legacy: true
- provider: "gemini"
model: "Gemini 3 Pro"
api_identifier: "gemini-3-pro-preview"
max_output_tokens: 65536
input_context: 1048576
generation: 3.0
tier: "flagship"
- provider: "gemini"
model: "Gemini 3 Flash"
api_identifier: "gemini-3-flash-preview"
max_output_tokens: 65536
input_context: 1048576
generation: 3.0
tier: "balanced"
- provider: "gemini"
model: "Gemini 2.5 Pro"
api_identifier: "gemini-2.5-pro"
max_output_tokens: 65536
input_context: 1048576
generation: 2.5
tier: "flagship"
- provider: "gemini"
model: "Gemini 2.5 Flash"
api_identifier: "gemini-2.5-flash"
max_output_tokens: 65536
input_context: 1048576
generation: 2.5
tier: "balanced"
- provider: "gemini"
model: "Gemini 2.5 Flash-Lite"
api_identifier: "gemini-2.5-flash-lite"
max_output_tokens: 65536
input_context: 1048576
generation: 2.5
tier: "fast"
- provider: "gemini"
model: "Gemini 2.0 Flash"
api_identifier: "gemini-2.0-flash"
max_output_tokens: 8192
input_context: 1048576
generation: 2.0
tier: "fast"
legacy: true
- provider: "gemini"
model: "Gemini 2.0 Flash-Lite"
api_identifier: "gemini-2.0-flash-lite"
max_output_tokens: 8192
input_context: 1048576
generation: 2.0
tier: "fast"
legacy: true
- provider: "gemini"
model: "Gemini 1.5 Pro"
api_identifier: "gemini-1.5-pro"
max_output_tokens: 8192
input_context: 2097152
generation: 1.5
tier: "flagship"
legacy: true
- provider: "gemini"
model: "Gemini 1.5 Flash"
api_identifier: "gemini-1.5-flash"
max_output_tokens: 8192
input_context: 1048576
generation: 1.5
tier: "balanced"
legacy: true
providers:
claude:
name: "Anthropic Claude"
api_base: "https://api.anthropic.com/v1"
default_model: "claude-sonnet-4-6"
tiers:
flagship:
description: "Most capable models for complex reasoning and analysis"
use_cases: ["complex analysis", "research", "advanced reasoning"]
balanced:
description: "Good balance of capability and speed"
use_cases: ["general tasks", "coding", "writing"]
fast:
description: "Optimized for speed and cost efficiency"
use_cases: ["simple tasks", "high-volume processing", "quick responses"]
defaults:
max_output_tokens: 4096
input_context: 200000
openai:
name: "OpenAI"
api_base: "https://api.openai.com/v1"
default_model: "gpt-5-mini"
tiers:
flagship:
description: "Most capable models for complex reasoning and analysis"
use_cases: ["complex analysis", "research", "advanced reasoning"]
balanced:
description: "Good balance of capability and speed"
use_cases: ["general tasks", "coding", "writing"]
fast:
description: "Optimized for speed and cost efficiency"
use_cases: ["simple tasks", "high-volume processing", "quick responses"]
defaults:
max_output_tokens: 16384
input_context: 128000
gemini:
name: "Google Gemini"
api_base: "https://generativelanguage.googleapis.com/v1beta"
default_model: "gemini-2.5-flash"
tiers:
flagship:
description: "Most capable models for complex reasoning and analysis"
use_cases: ["complex analysis", "research", "advanced reasoning"]
balanced:
description: "Good balance of capability and speed"
use_cases: ["general tasks", "coding", "writing"]
fast:
description: "Optimized for speed and cost efficiency"
use_cases: ["simple tasks", "high-volume processing", "quick responses"]
defaults:
max_output_tokens: 8192
input_context: 1048576