- provider: openai
models:
- name: gpt-4o
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 2.5
output_price: 10
supports_vision: true
supports_function_calling: true
- name: gpt-4o-search-preview
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 2.5
output_price: 10
supports_vision: true
- name: chatgpt-4o-latest
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 5
output_price: 15
supports_vision: true
supports_function_calling: true
- name: gpt-4o-mini
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 0.15
output_price: 0.6
supports_vision: true
supports_function_calling: true
- name: gpt-4o-mini-search-preview
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 0.15
output_price: 0.6
supports_vision: true
- name: gpt-4-turbo
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
supports_function_calling: true
- name: gpt-4.5-preview
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 75
output_price: 150
supports_vision: true
supports_function_calling: true
- name: o3-mini
max_input_tokens: 200000
input_price: 1.1
output_price: 4.4
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: o3-mini-high
real_name: o3-mini
max_input_tokens: 200000
input_price: 1.1
output_price: 4.4
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
reasoning_effort: high
max_tokens: null
temperature: null
top_p: null
- name: o1-pro
max_input_tokens: 200000
input_price: 150
output_price: 600
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: o1
max_input_tokens: 200000
input_price: 15
output_price: 60
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: o1-preview
max_input_tokens: 128000
max_output_tokens: 32768
input_price: 15
output_price: 60
no_system_message: true
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: o1-mini
max_input_tokens: 128000
max_output_tokens: 65536
input_price: 3
output_price: 12
no_system_message: true
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: gpt-3.5-turbo
max_input_tokens: 16385
max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
supports_function_calling: true
- name: text-embedding-3-large
type: embedding
input_price: 0.13
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
input_price: 0.02
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- provider: gemini
models:
- name: gemini-2.0-flash
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash-lite
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash-thinking-exp
max_input_tokens: 32767
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
- name: gemini-2.0-pro-exp
max_input_tokens: 2097152
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-2.5-pro-exp-03-25
max_input_tokens: 1048576
max_output_tokens: 65536
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemma-3-27b-it
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0
output_price: 0
- name: gemini-1.5-pro-latest
max_input_tokens: 2097152
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-flash-latest
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-flash-8b-latest
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: text-embedding-004
type: embedding
input_price: 0
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 100
- provider: claude
models:
- name: claude-3-7-sonnet-20250219
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-7-sonnet-20250219:thinking
real_name: claude-3-7-sonnet-20250219
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
- name: claude-3-5-sonnet-20241022
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-5-sonnet-20240620
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-5-haiku-20241022
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.8
output_price: 4
supports_vision: true
supports_function_calling: true
- name: claude-3-opus-20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: claude-3-sonnet-20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-haiku-20240307
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
- provider: mistral
models:
- name: mistral-large-latest
max_input_tokens: 128000
input_price: 2
output_price: 6
supports_function_calling: true
- name: mistral-small-latest
max_input_tokens: 32000
input_price: 0.1
output_price: 0.3
supports_function_calling: true
- name: codestral-latest
max_input_tokens: 256000
input_price: 0.3
output_price: 0.9
supports_function_calling: true
- name: ministral-8b-latest
max_input_tokens: 128000
input_price: 0.1
output_price: 0.1
supports_function_calling: true
- name: open-mistral-nemo
max_input_tokens: 128000
input_price: 0.15
output_price: 0.15
supports_function_calling: true
- name: pixtral-large-latest
max_input_tokens: 128000
input_price: 2
output_price: 6
supports_vision: true
- name: pixtral-12b-latest
max_input_tokens: 128000
input_price: 0.15
output_price: 0.15
supports_vision: true
- name: mistral-embed
type: embedding
max_input_tokens: 8092
input_price: 0.1
max_tokens_per_chunk: 8092
default_chunk_size: 2000
- provider: ai21
models:
- name: jamba-large
max_input_tokens: 256000
input_price: 2
output_price: 8
supports_function_calling: true
- name: jamba-mini
max_input_tokens: 256000
input_price: 0.2
output_price: 0.4
supports_function_calling: true
- provider: cohere
models:
- name: command-a-03-2025
max_input_tokens: 256000
max_output_tokens: 8192
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: command-r-plus-08-2024
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: command-r-08-2024
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 0.15
output_price: 0.6
supports_function_calling: true
- name: command-r7b-12-2024
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 0.0375
output_price: 0.15
- name: embed-english-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: embed-english-light-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 700
max_batch_size: 96
- name: embed-multilingual-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: embed-multilingual-light-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 700
max_batch_size: 96
- name: rerank-v3.5
type: reranker
max_input_tokens: 4096
- name: rerank-english-v3.0
type: reranker
max_input_tokens: 4096
- name: rerank-multilingual-v3.0
type: reranker
max_input_tokens: 4096
- provider: xai
models:
- name: grok-2-latest
max_input_tokens: 131072
input_price: 2
output_price: 10
supports_function_calling: true
- name: grok-2-1212
max_input_tokens: 131072
input_price: 2
output_price: 10
supports_function_calling: true
- name: grok-beta
max_input_tokens: 131072
input_price: 5
output_price: 15
supports_function_calling: true
- name: grok-2-vision-latest
max_input_tokens: 32768
input_price: 2
output_price: 10
supports_vision: true
supports_function_calling: true
- name: grok-2-vision-1212
max_input_tokens: 32768
input_price: 2
output_price: 10
supports_vision: true
supports_function_calling: true
- name: grok-vision-beta
max_input_tokens: 8192
input_price: 5
output_price: 15
supports_vision: true
- provider: perplexity
models:
- name: sonar-pro
max_input_tokens: 200000
input_price: 3
output_price: 15
- name: sonar
max_input_tokens: 128000
input_price: 1
output_price: 1
- name: sonar-reasoning-pro
max_input_tokens: 128000
input_price: 2
output_price: 8
- name: sonar-reasoning
max_input_tokens: 128000
input_price: 1
output_price: 5
- name: sonar-deep-research
max_input_tokens: 128000
input_price: 2
output_price: 8
- name: r1-1776
max_input_tokens: 128000
input_price: 2
output_price: 8
- provider: groq
models:
- name: llama-3.3-70b-versatile
max_input_tokens: 131072
input_price: 0
output_price: 0
supports_function_calling: true
- name: llama-3.1-8b-instant
max_input_tokens: 131072
input_price: 0
output_price: 0
supports_function_calling: true
- name: llama-3.2-90b-vision-preview
max_input_tokens: 131072
input_price: 0
output_price: 0
supports_vision: true
- name: llama-3.2-11b-vision-preview
max_input_tokens: 131072
input_price: 0
output_price: 0
supports_vision: true
- name: deepseek-r1-distill-llama-70b
max_input_tokens: 131072
input_price: 0
output_price: 0
- name: deepseek-r1-distill-qwen-32b
max_input_tokens: 131072
input_price: 0
output_price: 0
- name: qwen-qwq-32b
max_input_tokens: 131072
input_price: 0
output_price: 0
supports_function_calling: true
- name: qwen-2.5-32b
max_input_tokens: 131072
input_price: 0
output_price: 0
supports_function_calling: true
- name: qwen-2.5-coder-32b
max_input_tokens: 131072
input_price: 0
output_price: 0
- provider: vertexai
models:
- name: gemini-2.0-flash-001
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0.15
output_price: 0.6
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash-lite-001
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0.075
output_price: 0.3
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash-thinking-exp-01-21
max_input_tokens: 32760
max_output_tokens: 8192
supports_vision: true
- name: gemini-2.0-pro-exp-02-05
max_input_tokens: 2097152
max_output_tokens: 8192
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-pro-002
max_input_tokens: 2097152
max_output_tokens: 8192
input_price: 1.25
output_price: 3.75
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-flash-002
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0.019
output_price: 0.075
supports_vision: true
supports_function_calling: true
- name: claude-3-7-sonnet@20250219
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-7-sonnet@20250219:thinking
real_name: claude-3-7-sonnet@20250219
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
- name: claude-3-5-sonnet-v2@20241022
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-5-sonnet@20240620
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-5-haiku@20241022
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.8
output_price: 4
supports_vision: true
supports_function_calling: true
- name: claude-3-opus@20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: claude-3-sonnet@20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-haiku@20240307
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
- name: mistral-large-2411
max_input_tokens: 128000
input_price: 2
output_price: 6
supports_function_calling: true
- name: codestral-2501
max_input_tokens: 256000
input_price: 0.3
output_price: 0.9
supports_function_calling: true
- name: mistral-nemo@2407
max_input_tokens: 128000
input_price: 0.15
output_price: 0.15
supports_function_calling: true
- name: text-embedding-005
type: embedding
max_input_tokens: 20000
input_price: 0.025
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 5
- name: text-multilingual-embedding-002
type: embedding
max_input_tokens: 20000
input_price: 0.2
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 5
- provider: bedrock
models:
- name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: us.anthropic.claude-3-7-sonnet-20250219-v1:0:thinking
real_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
patch:
body:
inferenceConfig:
temperature: null
topP: null
additionalModelRequestFields:
thinking:
type: enabled
budget_tokens: 16000
- name: anthropic.claude-3-5-sonnet-20241022-v2:0
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-5-sonnet-20240620-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-5-haiku-20241022-v1:0
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.8
output_price: 4
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-opus-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-sonnet-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-haiku-20240307-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
- name: us.meta.llama3-3-70b-instruct-v1:0
max_input_tokens: 131072
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.72
output_price: 0.72
supports_function_calling: true
- name: meta.llama3-1-405b-instruct-v1:0
max_input_tokens: 131072
max_output_tokens: 4096
require_max_tokens: true
input_price: 2.4
output_price: 2.4
supports_function_calling: true
- name: meta.llama3-1-70b-instruct-v1:0
max_input_tokens: 131072
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.72
output_price: 0.72
supports_function_calling: true
- name: meta.llama3-1-8b-instruct-v1:0
max_input_tokens: 131072
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.22
output_price: 0.22
supports_function_calling: true
- name: us.meta.llama3-2-90b-instruct-v1:0
max_input_tokens: 131072
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.72
output_price: 0.72
supports_function_calling: true
supports_vision: true
- name: us.meta.llama3-2-11b-instruct-v1:0
max_input_tokens: 131072
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.16
output_price: 0.16
supports_function_calling: true
supports_vision: true
- name: us.amazon.nova-pro-v1:0
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.8
output_price: 3.2
supports_vision: true
- name: us.amazon.nova-lite-v1:0
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.06
output_price: 0.24
supports_vision: true
- name: us.amazon.nova-micro-v1:0
max_input_tokens: 128000
max_output_tokens: 5120
input_price: 0.035
output_price: 0.14
- name: mistral.mistral-large-2407-v1:0
max_input_tokens: 128000
input_price: 2
output_price: 6
supports_function_calling: true
- name: cohere.command-r-plus-v1:0
max_input_tokens: 128000
input_price: 3
output_price: 15
supports_function_calling: true
- name: cohere.command-r-v1:0
max_input_tokens: 128000
input_price: 0.5
output_price: 1.5
supports_function_calling: true
- name: cohere.embed-english-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: cohere.embed-multilingual-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: ai21.jamba-1-5-large-v1:0
max_input_tokens: 256000
input_price: 2
output_price: 8
supports_function_calling: true
- name: ai21.jamba-1-5-mini-v1:0
max_input_tokens: 256000
input_price: 0.2
output_price: 0.4
supports_function_calling: true
- provider: cloudflare
models:
- name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
max_input_tokens: 131072
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/meta/llama-3.1-70b-instruct'
max_input_tokens: 131072
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/meta/llama-3.1-8b-instruct'
max_input_tokens: 131072
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b'
max_input_tokens: 131072
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/baai/bge-large-en-v1.5'
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- provider: ernie
models:
- name: ernie-4.0-8k-latest
max_input_tokens: 8192
input_price: 0.56
output_price: 2.24
supports_function_calling: true
- name: ernie-4.0-turbo-8k-latest
max_input_tokens: 8192
input_price: 0.42
output_price: 1.26
supports_function_calling: true
- name: ernie-4.0-turbo-128k
max_input_tokens: 128000
input_price: 0.42
output_price: 1.26
supports_function_calling: true
- name: ernie-4.0-8k-latest
max_input_tokens: 8192
input_price: 0.56
output_price: 2.24
supports_function_calling: true
- name: ernie-3.5-128k
max_input_tokens: 128000
input_price: 0.112
output_price: 0.28
supports_function_calling: true
- name: ernie-speed-pro-128k
max_input_tokens: 128000
input_price: 0.042
output_price: 0.084
- name: deepseek-v3
max_input_tokens: 131072
input_price: 0.112
output_price: 0.224
- name: deepseek-r1
max_input_tokens: 131072
input_price: 0.28
output_price: 1.12
- name: deepseek-r1-distill-llama-70b
max_input_tokens: 131072
input_price: 0.28
output_price: 1.12
- name: deepseek-r1-distill-qwen-32b
max_input_tokens: 131072
input_price: 0.21
output_price: 0.84
- name: bge-large-zh
type: embedding
input_price: 0.07
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 16
- name: bge-large-en
type: embedding
input_price: 0.07
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 16
- name: bce-reranker-base
type: reranker
max_input_tokens: 1024
input_price: 0.07
- provider: qianwen
models:
- name: qwen-max-latest
max_input_tokens: 32678
max_output_tokens: 8192
input_price: 1.6
output_price: 6.4
supports_function_calling: true
- name: qwen-plus-latest
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.112
output_price: 0.28
supports_function_calling: true
- name: qwen-turbo-latest
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 0.042
output_price: 0.084
supports_function_calling: true
- name: qwen-long
max_input_tokens: 1000000
input_price: 0.07
output_price: 0.28
- name: qwen-omni-turbo-latest
max_input_tokens: 32768
max_output_tokens: 2048
supports_vision: true
- name: qwq-plus-latest
max_input_tokens: 131072
max_output_tokens: 8192
- name: qwq-32b
max_input_tokens: 131072
max_output_tokens: 8192
- name: qwen-vl-max-latest
max_input_tokens: 30720
max_output_tokens: 2048
input_price: 0.42
output_price: 1.26
supports_vision: true
- name: qwen-vl-plus-latest
max_input_tokens: 30000
max_output_tokens: 2048
input_price: 0.21
output_price: 0.63
supports_vision: true
- name: qwen2.5-72b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.56
output_price: 1.68
supports_function_calling: true
- name: qwen2.5-vl-72b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 2.24
output_price: 6.72
supports_vision: true
- name: qwen2.5-coder-32b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.49
output_price: 0.98
supports_function_calling: true
- name: deepseek-v3
max_input_tokens: 65792
input_price: 0.14
output_price: 0.56
- name: deepseek-r1
max_input_tokens: 65792
input_price: 0.28
output_price: 1.12
- name: deepseek-r1-distill-llama-70b
max_input_tokens: 32768
- name: deepseek-r1-distill-qwen-32b
max_input_tokens: 32768
input_price: 0.28
output_price: 0.84
- name: text-embedding-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 6
- name: text-embedding-v2
type: embedding
input_price: 0.1
max_tokens_per_chunk: 2048
default_chunk_size: 2000
max_batch_size: 25
- provider: hunyuan
models:
- name: hunyuan-turbos-latest
max_input_tokens: 24000
max_output_tokens: 8192
input_price: 0.112
output_price: 0.28
supports_function_calling: true
- name: hunyuan-t1-latest
max_input_tokens: 28000
max_output_tokens: 64000
input_price: 0.14
output_price: 0.56
- name: hunyuan-turbo-latest
max_input_tokens: 28000
max_output_tokens: 4096
input_price: 0.336
output_price: 1.344
supports_function_calling: true
- name: hunyuan-large
max_input_tokens: 28000
max_output_tokens: 4096
input_price: 0.56
output_price: 1.68
supports_function_calling: true
- name: hunyuan-large-longcontext
max_input_tokens: 128000
max_output_tokens: 6144
input_price: 0.84
output_price: 2.52
supports_function_calling: true
- name: hunyuan-standard
max_input_tokens: 30000
max_output_tokens: 2048
input_price: 0.112
output_price: 0.28
supports_function_calling: true
- name: hunyuan-standard-256K
max_input_tokens: 250000
max_output_tokens: 6144
input_price: 0.07
output_price: 0.28
supports_function_calling: true
- name: hunyuan-lite
max_input_tokens: 250000
max_output_tokens: 6144
input_price: 0
output_price: 0
supports_function_calling: true
- name: hunyuan-turbo-vision
max_input_tokens: 6144
max_output_tokens: 2048
input_price: 11.2
output_price: 11.2
supports_vision: true
- name: hunyuan-vision
max_input_tokens: 6144
max_output_tokens: 2048
input_price: 2.52
output_price: 2.52
supports_vision: true
- name: hunyuan-embedding
type: embedding
input_price: 0.01
max_tokens_per_chunk: 1024
default_chunk_size: 1000
max_batch_size: 100
- provider: moonshot
models:
- name: kimi-latest
supports_vision: true
supports_function_calling: true
- name: moonshot-v1-8k
max_input_tokens: 8192
input_price: 1.68
output_price: 1.68
supports_function_calling: true
- name: moonshot-v1-32k
max_input_tokens: 32768
input_price: 3.36
output_price: 3.36
supports_function_calling: true
- name: moonshot-v1-128k
max_input_tokens: 131072
input_price: 8.4
output_price: 8.4
supports_function_calling: true
- name: moonshot-v1-8k-vision-preview
max_input_tokens: 8192
input_price: 1.68
output_price: 1.68
supports_vision: true
- name: moonshot-v1-32k-vision-preview
max_input_tokens: 32768
input_price: 3.36
output_price: 3.36
supports_vision: true
- name: moonshot-v1-128k-vision-preview
max_input_tokens: 131072
input_price: 8.4
output_price: 8.4
supports_vision: true
- provider: deepseek
models:
- name: deepseek-chat
max_input_tokens: 64000
max_output_tokens: 8192
input_price: 0.27
output_price: 1.1
supports_function_calling: true
- name: deepseek-reasoner
max_input_tokens: 64000
max_output_tokens: 8192
input_price: 0.55
output_price: 2.19
- provider: zhipuai
models:
- name: glm-4-plus
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 7
output_price: 7
supports_function_calling: true
- name: glm-4-alltools
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 14
output_price: 14
supports_function_calling: true
- name: glm-4-long
max_input_tokens: 1000000
max_output_tokens: 4096
input_price: 0.14
output_price: 0.14
supports_function_calling: true
- name: glm-4-flash
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 0
output_price: 0
supports_function_calling: true
- name: glm-4v-plus
max_input_tokens: 8192
input_price: 0.56
output_price: 0.56
supports_vision: true
- name: glm-4v-flash
max_input_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
- name: glm-zero-preview
max_input_tokens: 16384
input_price: 1.4
output_price: 1.4
- name: embedding-3
type: embedding
max_input_tokens: 8192
input_price: 0.07
max_tokens_per_chunk: 8192
default_chunk_size: 2000
- name: rerank
type: reranker
max_input_tokens: 4096
input_price: 0.112
- provider: lingyiwanwu
models:
- name: yi-lightning
max_input_tokens: 16384
input_price: 0.14
output_price: 0.14
- name: yi-vision-v2
max_input_tokens: 16384
input_price: 0.84
output_price: 0.84
supports_vision: true
- provider: minimax
models:
- name: minimax-text-01
max_input_tokens: 1000192
input_price: 0.14
output_price: 1.12
supports_vision: true
- name: abab6.5s-chat
max_input_tokens: 245760
input_price: 0.14
output_price: 0.14
supports_vision: true
- name: deepseek-r1
max_input_tokens: 131072
input_price: 0.56
output_price: 2.24
- provider: openrouter
models:
- name: openai/gpt-4o
max_input_tokens: 128000
input_price: 2.5
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4o-search-preview
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 2.5
output_price: 10
supports_vision: true
- name: openai/chatgpt-4o-latest
max_input_tokens: 128000
input_price: 5
output_price: 15
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4o-mini
max_input_tokens: 128000
input_price: 0.15
output_price: 0.6
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4o-mini-search-preview
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 0.15
output_price: 0.6
supports_vision: true
- name: openai/gpt-4-turbo
max_input_tokens: 128000
input_price: 10
output_price: 30
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4.5-preview
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 75
output_price: 150
supports_vision: true
supports_function_calling: true
- name: openai/o3-mini
max_input_tokens: 200000
input_price: 1.1
output_price: 4.4
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
temperature: null
top_p: null
- name: openai/o3-mini-high
max_input_tokens: 200000
input_price: 1.1
output_price: 4.4
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
temperature: null
top_p: null
- name: openai/o1-pro
max_input_tokens: 200000
input_price: 150
output_price: 600
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: openai/o1
max_input_tokens: 128000
input_price: 15
output_price: 60
supports_vision: true
supports_function_calling: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
temperature: null
top_p: null
- name: openai/o1-preview
max_input_tokens: 128000
input_price: 15
output_price: 60
no_system_message: true
patch:
body:
temperature: null
top_p: null
- name: openai/o1-mini
max_input_tokens: 128000
input_price: 3
output_price: 12
no_system_message: true
patch:
body:
temperature: null
top_p: null
- name: openai/gpt-3.5-turbo
max_input_tokens: 16385
input_price: 0.5
output_price: 1.5
supports_function_calling: true
- name: google/gemini-pro-1.5
max_input_tokens: 2000000
input_price: 1.25
output_price: 5
supports_vision: true
supports_function_calling: true
- name: google/gemini-flash-1.5
max_input_tokens: 1000000
input_price: 0.075
output_price: 0.3
supports_vision: true
supports_function_calling: true
- name: google/gemini-flash-1.5-8b
max_input_tokens: 1000000
input_price: 0.0375
output_price: 0.15
supports_vision: true
supports_function_calling: true
- name: google/gemini-2.0-flash-001
max_input_tokens: 1000000
input_price: 0.1
output_price: 0.4
supports_vision: true
supports_function_calling: true
- name: google/gemini-2.0-flash-lite-001
max_input_tokens: 1048576
input_price: 0.075
output_price: 0.3
supports_vision: true
supports_function_calling: true
- name: google/gemma-3-27b-it
max_input_tokens: 131072
input_price: 0.1
output_price: 0.2
- name: anthropic/claude-3.7-sonnet
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3.7-sonnet:thinking
max_input_tokens: 200000
max_output_tokens: 24000
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
patch:
body:
include_reasoning: true
- name: anthropic/claude-3.5-sonnet
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-5-haiku
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 0.8
output_price: 4
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-opus
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-sonnet
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-haiku
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
- name: meta-llama/llama-3.3-70b-instruct
max_input_tokens: 131072
input_price: 0.12
output_price: 0.3
- name: meta-llama/llama-3.1-405b-instruct
max_input_tokens: 32768
input_price: 0.8
output_price: 0.8
supports_function_calling: true
- name: meta-llama/llama-3.1-70b-instruct
max_input_tokens: 131072
input_price: 0.12
output_price: 0.3
supports_function_calling: true
- name: meta-llama/llama-3.1-8b-instruct
max_input_tokens: 131072
input_price: 0.02
output_price: 0.05
- name: meta-llama/llama-3.2-90b-vision-instruct
max_input_tokens: 131072
input_price: 0.9
output_price: 0.9
supports_vision: true
- name: meta-llama/llama-3.2-11b-vision-instruct
max_input_tokens: 131072
input_price: 0.055
output_price: 0.055
supports_vision: true
- name: mistralai/mistral-large-2411
max_input_tokens: 128000
input_price: 2
output_price: 6
supports_function_calling: true
- name: mistralai/mistral-small-3.1-24b-instruct
max_input_tokens: 131072
input_price: 0.1
output_price: 0.3
- name: mistralai/codestral-2501
max_input_tokens: 256000
input_price: 0.3
output_price: 0.9
supports_function_calling: true
- name: mistralai/ministral-8b
max_input_tokens: 128000
input_price: 0.1
output_price: 0.1
supports_function_calling: true
- name: mistralai/mistral-nemo
max_input_tokens: 128000
input_price: 0.035
output_price: 0.08
supports_function_calling: true
- name: mistralai/pixtral-large-2411
max_input_tokens: 128000
input_price: 2
output_price: 6
supports_vision: true
- name: mistralai/pixtral-12b
max_input_tokens: 128000
input_price: 0.1
output_price: 0.1
supports_vision: true
- name: ai21/jamba-1.6-large
max_input_tokens: 256000
input_price: 2
output_price: 8
supports_function_calling: true
- name: ai21/jamba-1.6-mini
max_input_tokens: 256000
input_price: 0.2
output_price: 0.4
supports_function_calling: true
- name: cohere/command-a
max_input_tokens: 256000
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: cohere/command-r-plus-08-2024
max_input_tokens: 128000
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: cohere/command-r-08-2024
max_input_tokens: 128000
input_price: 0.15
output_price: 0.6
supports_function_calling: true
- name: cohere/command-r7b-12-2024
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 0.0375
output_price: 0.15
- name: deepseek/deepseek-chat-v3-0324
max_input_tokens: 64000
input_price: 0.27
output_price: 1.1
supports_function_calling: true
- name: deepseek/deepseek-r1
max_input_tokens: 163840
input_price: 0.55
output_price: 2.19
patch:
body:
include_reasoning: true
- name: deepseek/deepseek-r1-distill-llama-70b
max_input_tokens: 131072
input_price: 0.23
output_price: 0.69
patch:
body:
include_reasoning: true
- name: deepseek/deepseek-r1-distill-qwen-32b
max_input_tokens: 131072
input_price: 0.12
output_price: 0.18
patch:
body:
include_reasoning: true
- name: qwen/qwen-max
max_input_tokens: 32768
max_output_tokens: 8192
input_price: 1.6
output_price: 6.4
supports_function_calling: true
- name: qwen/qwen-plus
max_input_tokens: 131072
max_output_tokens: 8192
input_price: 0.4
output_price: 1.2
supports_function_calling: true
- name: qwen/qwen-turbo
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 0.05
output_price: 0.2
supports_function_calling: true
- name: qwen/qwen-vl-plus
max_input_tokens: 7500
input_price: 0.21
output_price: 0.63
supports_vision: true
- name: qwen/qwq-32b
max_input_tokens: 128000
input_price: 0.29
output_price: 0.39
- name: qwen/qwen-2.5-72b-instruct
max_input_tokens: 131072
input_price: 0.35
output_price: 0.4
supports_function_calling: true
- name: qwen/qwen2.5-vl-72b-instruct
max_input_tokens: 32000
input_price: 0.7
output_price: 0.7
supports_vision: true
- name: qwen/qwen-2.5-coder-32b-instruct
max_input_tokens: 32768
input_price: 0.18
output_price: 0.18
- name: x-ai/grok-2-1212
max_input_tokens: 131072
input_price: 2
output_price: 10
supports_function_calling: true
- name: x-ai/grok-beta
max_input_tokens: 32768
input_price: 5
output_price: 15
supports_function_calling: true
- name: x-ai/grok-2-vision-1212
max_input_tokens: 32768
input_price: 2
output_price: 10
supports_vision: true
supports_function_calling: true
- name: x-ai/grok-vision-beta
max_input_tokens: 8192
input_price: 5
output_price: 15
supports_vision: true
- name: amazon/nova-pro-v1
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.8
output_price: 3.2
supports_vision: true
- name: amazon/nova-lite-v1
max_input_tokens: 300000
max_output_tokens: 5120
input_price: 0.06
output_price: 0.24
supports_vision: true
- name: amazon/nova-micro-v1
max_input_tokens: 128000
max_output_tokens: 5120
input_price: 0.035
output_price: 0.14
- name: perplexity/sonar-pro
max_input_tokens: 200000
input_price: 3
output_price: 15
- name: perplexity/sonar
max_input_tokens: 127072
input_price: 1
output_price: 1
- name: perplexity/sonar-reasoning-pro
max_input_tokens: 128000
input_price: 2
output_price: 8
patch:
body:
include_reasoning: true
- name: perplexity/sonar-reasoning
max_input_tokens: 127000
input_price: 1
output_price: 5
patch:
body:
include_reasoning: true
- name: perplexity/sonar-deep-research
max_input_tokens: 200000
input_price: 2
output_price: 8
patch:
body:
include_reasoning: true
- name: perplexity/r1-1776
max_input_tokens: 127000
input_price: 2
output_price: 8
patch:
body:
include_reasoning: true
- name: minimax/minimax-01
max_input_tokens: 1000192
input_price: 0.2
output_price: 1.1
- provider: github
models:
- name: gpt-4o
max_input_tokens: 128000
supports_function_calling: true
- name: gpt-4o-mini
max_input_tokens: 128000
supports_function_calling: true
- name: o3-mini
max_input_tokens: 200000
supports_function_calling: true
supports_vision: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: o3-mini-high
real_name: o3-mini
max_input_tokens: 200000
supports_function_calling: true
supports_vision: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
reasoning_effort: high
max_tokens: null
temperature: null
top_p: null
- name: o1
max_input_tokens: 200000
supports_function_calling: true
supports_vision: true
system_prompt_prefix: Formatting re-enabled
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: o1-preview
max_input_tokens: 128000
no_stream: true
no_system_message: true
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: o1-mini
max_input_tokens: 128000
no_stream: true
no_system_message: true
patch:
body:
max_tokens: null
temperature: null
top_p: null
- name: text-embedding-3-large
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- name: llama-3.3-70b-instruct
max_input_tokens: 131072
- name: meta-llama-3.1-405b-instruct
max_input_tokens: 131072
- name: meta-llama-3.1-70b-instruct
max_input_tokens: 131072
- name: meta-llama-3.1-8b-instruct
max_input_tokens: 131072
- name: llama-3.2-90b-vision-instruct
max_input_tokens: 131072
supports_vision: true
- name: llama-3.2-11b-vision-instruct
max_input_tokens: 131072
supports_vision: true
- name: mistral-large-2411
max_input_tokens: 128000
supports_function_calling: true
- name: mistral-small-2503
max_input_tokens: 128000
supports_function_calling: true
- name: codestral-2501
max_input_tokens: 256000
supports_function_calling: true
- name: mistral-nemo
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r-plus-08-2024
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r-08-2024
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-embed-v3-english
type: embedding
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: cohere-embed-v3-multilingual
type: embedding
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: ai21-jamba-1.5-large
max_input_tokens: 256000
supports_function_calling: true
- name: ai21-jamba-1.5-mini
max_input_tokens: 256000
supports_function_calling: true
- name: deepseek-r1
max_input_tokens: 163840
- name: phi-4
max_input_tokens: 16384
- name: phi-4-mini-instruct
max_input_tokens: 128000
- name: phi-3.5-moe-instruct
max_input_tokens: 128000
- name: phi-3.5-mini-instruct
max_input_tokens: 128000
- name: phi-3.5-vision-instruct
max_input_tokens: 128000
supports_vision: true
- provider: deepinfra
models:
- name: meta-llama/Llama-3.3-70B-Instruct
max_input_tokens: 131072
input_price: 0.23
output_price: 0.40
- name: meta-llama/Meta-Llama-3.1-405B-Instruct
max_input_tokens: 32768
input_price: 0.8
output_price: 0.8
supports_function_calling: true
- name: meta-llama/Meta-Llama-3.1-70B-Instruct
max_input_tokens: 131072
input_price: 0.23
output_price: 0.4
supports_function_calling: true
- name: meta-llama/Meta-Llama-3.1-8B-Instruct
max_input_tokens: 131072
input_price: 0.03
output_price: 0.05
supports_function_calling: true
- name: meta-llama/Llama-3.2-90B-Vision-Instruct
max_input_tokens: 131072
input_price: 0.35
output_price: 0.4
- name: meta-llama/Llama-3.2-11B-Vision-Instruct
max_input_tokens: 131072
input_price: 0.055
output_price: 0.055
- name: Qwen/Qwen2.5-72B-Instruct
max_input_tokens: 32768
input_price: 0.23
output_price: 0.40
supports_function_calling: true
- name: Qwen/QwQ-32B
max_input_tokens: 131072
input_price: 0.12
output_price: 0.18
- name: Qwen/Qwen2.5-Coder-32B-Instruct
max_input_tokens: 32768
input_price: 0.07
output_price: 0.16
- name: deepseek-ai/DeepSeek-V3-0324
max_input_tokens: 163840
input_price: 0.40
output_price: 0.89
- name: deepseek-ai/DeepSeek-R1
max_input_tokens: 65536
input_price: 0.75
output_price: 2.4
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
max_input_tokens: 131072
input_price: 0.23
output_price: 0.69
- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
max_input_tokens: 131072
input_price: 0.12
output_price: 0.18
- name: google/gemma-3-27b-it
max_input_tokens: 131072
input_price: 0.1
output_price: 0.2
- name: mistralai/Mistral-Small-24B-Instruct-2501
max_input_tokens: 32768
input_price: 0.07
output_price: 0.14
- name: mistralai/Mistral-Nemo-Instruct-2407
max_input_tokens: 131072
input_price: 0.035
output_price: 0.08
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-m3
type: embedding
input_price: 0.01
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: intfloat/e5-large-v2
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: intfloat/multilingual-e5-large
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-large
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- provider: jina
models:
- name: jina-embeddings-v3
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: jina-colbert-v2
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-clip-v2
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-colbert-v2
type: reranker
max_input_tokens: 8192
input_price: 0
- name: jina-reranker-v2-base-multilingual
type: reranker
max_input_tokens: 8192
input_price: 0
- provider: voyageai
models:
- name: voyage-3-large
type: embedding
max_input_tokens: 120000
input_price: 0.18
max_tokens_per_chunk: 32000
default_chunk_size: 2000
max_batch_size: 128
- name: voyage-3
type: embedding
max_input_tokens: 320000
input_price: 0.06
max_tokens_per_chunk: 32000
default_chunk_size: 2000
max_batch_size: 128
- name: voyage-3-lite
type: embedding
max_input_tokens: 1000000
input_price: 0.02
max_tokens_per_chunk: 32000
default_chunk_size: 1000
max_batch_size: 128
- name: rerank-2
type: reranker
max_input_tokens: 16000
input_price: 0.05
- name: rerank-2-lite
type: reranker
max_input_tokens: 8000
input_price: 0.02