- platform: openai
models:
- name: gpt-3.5-turbo
max_input_tokens: 16385
max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
- name: gpt-3.5-turbo-1106
max_input_tokens: 16385
max_output_tokens: 4096
input_price: 1
output_price: 2
- name: gpt-4-turbo
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4-turbo-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
- name: gpt-4-1106-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
- name: gpt-4-vision-preview
max_input_tokens: 128000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4
max_input_tokens: 8192
max_output_tokens: 4096
input_price: 30
output_price: 60
- name: gpt-4-32k
max_input_tokens: 32768
max_output_tokens: 4096
input_price: 60
output_price: 120
- platform: gemini
models:
- name: gemini-1.0-pro-latest
max_input_tokens: 30720
max_output_tokens: 2048
input_price: 0.5
output_price: 1.5
- name: gemini-1.0-pro-vision-latest
max_input_tokens: 12288
max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
supports_vision: true
- name: gemini-1.5-pro-latest
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 7
output_price: 21
supports_vision: true
- platform: claude
models:
- name: claude-3-opus-20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet-20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku-20240307
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
- platform: mistral
models:
- name: open-mistral-7b
max_input_tokens: 32000
input_price: 0.25
output_price: 0.25
- name: open-mixtral-8x7b
max_input_tokens: 32000
input_price: 0.7
output_price: 0.7
- name: open-mixtral-8x22b
max_input_tokens: 64000
input_price: 2
output_price: 6
- name: mistral-small-latest
max_input_tokens: 32000
input_price: 2
output_price: 6
- name: mistral-large-latest
max_input_tokens: 32000
input_price: 8
output_price: 24
- platform: cohere
models:
- name: command-r
max_input_tokens: 128000
max_output_tokens: 4000
input_price: 0.5
output_price: 1.5
- name: command-r-plus
max_input_tokens: 128000
max_output_tokens: 4000
input_price: 3
output_price: 15
- platform: perplexity
models:
- name: llama-3-sonar-small-32k-chat
max_input_tokens: 32768
max_output_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: llama-3-sonar-large-32k-chat
max_input_tokens: 32768
max_output_tokens: 32768
input_price: 0.6
output_price: 0.6
- name: llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens: 8192
input_price: 1
output_price: 1
- name: mixtral-8x7b-instruct
max_input_tokens: 16384
max_output_tokens: 16384
input_price: 0.6
output_price: 0.6
- platform: groq
models:
- name: llama3-8b-8192
max_input_tokens: 8192
max_output_tokens: 8192
input_price: 0.05
output_price: 0.10
- name: llama3-70b-8192
max_input_tokens: 8192
max_output_tokens: 8192
input_price: 0.59
output_price: 0.79
- name: mixtral-8x7b-32768
max_input_tokens: 32768
max_output_tokens: 32768
input_price: 0.27
output_price: 0.27
- name: gemma-7b-it
max_input_tokens: 8192
max_output_tokens: 8192
input_price: 0.10
output_price: 0.10
- platform: vertexai
models:
- name: gemini-1.0-pro
max_input_tokens: 24568
max_output_tokens: 8192
input_price: 0.125
output_price: 0.375
- name: gemini-1.0-pro-vision
max_input_tokens: 14336
max_output_tokens: 2048
input_price: 0.125
output_price: 0.375
supports_vision: true
- name: gemini-1.5-pro-preview-0409
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 2.5
output_price: 7.5
supports_vision: true
- platform: vertexai-claude
models:
- name: claude-3-opus@20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet@20240229
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku@20240307
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
- platform: bedrock
models:
- name: anthropic.claude-3-opus-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic.claude-3-sonnet-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic.claude-3-haiku-20240307-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
- name: meta.llama3-8b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.4
output_price: 0.6
- name: meta.llama3-70b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 2.65
output_price: 3.5
- name: mistral.mistral-7b-instruct-v0:2
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.15
output_price: 0.2
- name: mistral.mixtral-8x7b-instruct-v0:1
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.45
output_price: 0.7
- name: mistral.mistral-large-2402-v1:0
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 8
output_price: 2.4
- platform: cloudflare
models:
- name: '@cf/meta/llama-3-8b-instruct'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@cf/mistral/mistral-7b-instruct-v0.2-lora'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@cf/google/gemma-7b-it-lora'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@cf/qwen/qwen1.5-14b-chat-awq'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@hf/thebloke/deepseek-coder-6.7b-instruct-awq'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- name: '@hf/nexusflow/starling-lm-7b-beta'
max_input_tokens: 4096
max_output_tokens: 4096
pass_max_tokens: true
- platform: replicate
models:
- name: meta/meta-llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.65
output_price: 2.75
- name: meta/meta-llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.05
output_price: 0.25
- name: mistralai/mistral-7b-instruct-v0.2
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.05
output_price: 0.25
- name: mistralai/mixtral-8x7b-instruct-v0.1
max_input_tokens: 32000
max_output_tokens: 8192
pass_max_tokens: true
input_price: 0.3
output_price: 1
- platform: ernie
models:
- name: ernie-4.0-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
pass_max_tokens: true
input_price: 16.8
output_price: 16.8
- name: ernie-3.5-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
pass_max_tokens: true
input_price: 1.68
output_price: 1.68
- name: ernie-speed-128k
max_input_tokens: 124000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.56
output_price: 1.12
- name: ernie-lite-8k
max_input_tokens: 7168
max_output_tokens: 2048
pass_max_tokens: true
input_price: 0.42
output_price: 0.84
- name: ernie-tiny-8k
max_input_tokens: 7168
max_output_tokens: 2048
pass_max_tokens: true
input_price: 0.14
output_price: 0.14
- platform: qianwen
models:
- name: qwen-turbo
max_input_tokens: 6000
max_output_tokens: 1500
input_price: 1.12
output_price: 1.12
- name: qwen-plus
max_input_tokens: 30000
max_output_tokens: 2000
input_price: 2.8
output_price: 2.8
- name: qwen-max
max_input_tokens: 6000
max_output_tokens: 2000
input_price: 16.8
output_price: 16.8
- name: qwen-max-longcontext
max_input_tokens: 28000
max_output_tokens: 2000
- name: qwen-vl-plus
input_price: 1.12
output_price: 1.12
supports_vision: true
- name: qwen-vl-max
input_price: 2.8
output_price: 2.8
supports_vision: true
- platform: moonshot
models:
- name: moonshot-v1-8k
max_input_tokens: 8000
input_price: 1.68
output_price: 1.68
- name: moonshot-v1-32k
max_input_tokens: 32000
input_price: 3.36
output_price: 3.36
- name: moonshot-v1-128k
max_input_tokens: 128000
input_price: 8.4
output_price: 8.4
- platform: deepseek
models:
- name: deepseek-chat
max_input_tokens: 32768
input_price: 0.14
output_price: 0.28
- name: deepseek-coder
max_input_tokens: 16384
input_price: 0.14
output_price: 0.28
- platform: zhipuai
models:
- name: glm-4
max_input_tokens: 128000
input_price: 14
output_price: 14
- name: glm-4v
max_input_tokens: 2048
input_price: 14
output_price: 14
supports_vision: true
- name: glm-3-turbo
max_input_tokens: 128000
input_price: 0.7
output_price: 0.7
- platform: anyscale
models:
- name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192
input_price: 0.15
output_price: 0.15
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 1.0
output_price: 1.0
- name: codellama/CodeLlama-70b-Instruct-hf
max_input_tokens: 4096
input_price: 1.0
output_price: 1.0
- name: mistralai/Mistral-7B-Instruct-v0.1
max_input_tokens: 16384
input_price: 0.15
output_price: 0.15
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
max_input_tokens: 32768
input_price: 0.50
output_price: 0.50
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536
input_price: 0.90
output_price: 0.90
- name: google/gemma-7b-it
max_input_tokens: 8192
input_price: 0.15
output_price: 0.15
- platform: deepinfra
models:
- name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192
input_price: 0.08
output_price: 0.08
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 0.59
output_price: 0.79
- name: mistralai/Mistral-7B-Instruct-v0.2
max_input_tokens: 32768
input_price: 0.07
output_price: 0.07
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
max_input_tokens: 32768
input_price: 0.24
output_price: 0.24
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536
input_price: 0.65
output_price: 0.65
- name: google/gemma-1.1-7b-it
max_input_tokens: 8192
input_price: 0.07
output_price: 0.07
- name: databricks/dbrx-instruct
max_input_tokens: 32768
input_price: 0.6
output_price: 0.6
- name: 01-ai/Yi-34B-Chat
max_input_tokens: 4096
input_price: 0.6
output_price: 0.6
- platform: fireworks
models:
- name: accounts/fireworks/models/llama-v3-8b-instruct
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/llama-v3-70b-instruct
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/mistral-7b-instruct-v0p2
max_input_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/mixtral-8x7b-instruct
max_input_tokens: 32768
input_price: 0.5
output_price: 0.5
- name: accounts/fireworks/models/mixtral-8x22b-instruct
max_input_tokens: 65536
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/qwen-72b-chat
max_input_tokens: 4096
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/gemma-7b-it
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/dbrx-instruct
max_input_tokens: 32768
input_price: 1.6
output_price: 1.6
- platform: openrouter
models:
- name: meta-llama/llama-3-8b-instruct
max_input_tokens: 8192
input_price: 0.1
output_price: 0.1
- name: meta-llama/llama-3-8b-instruct:nitro
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: meta-llama/llama-3-8b-instruct:extended
max_input_tokens: 16384
input_price: 0.275
output_price: 0.283
- name: meta-llama/llama-3-70b-instruct
max_input_tokens: 8192
input_price: 0.81
output_price: 0.81
- name: meta-llama/llama-3-70b-instruct:nitro
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
- name: mistralai/mistral-7b-instruct:free
max_input_tokens: 32768
input_price: 0.0
output_price: 0.0
- name: codellama/codellama-70b-instruct
max_input_tokens: 2048
input_price: 0.81
output_price: 0.81
- name: google/gemma-7b-it:free
max_input_tokens: 8192
input_price: 0.0
output_price: 0.0
- name: 01-ai/yi-34b-chat
max_input_tokens: 4096
input_price: 0.72
output_price: 0.72
- name: openai/gpt-3.5-turbo
max_input_tokens: 16385
input_price: 0.5
output_price: 1.5
- name: openai/gpt-4-turbo
max_input_tokens: 128000
input_price: 10
output_price: 30
supports_vision: true
- name: openai/gpt-4-turbo-preview
max_input_tokens: 128000
input_price: 10
output_price: 30
- name: gpt-4-vision-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: openai/gpt-4
max_input_tokens: 8192
input_price: 30
output_price: 60
- name: openai/gpt-4-32k
max_input_tokens: 32768
input_price: 60
output_price: 120
- name: google/gemini-pro
max_input_tokens: 91728
input_price: 0.125
output_price: 0.375
- name: google/gemini-pro-vision
max_input_tokens: 45875
input_price: 0.125
output_price: 0.375
supports_vision: true
- name: google/gemini-pro-1.5
max_input_tokens: 2800000
input_price: 2.5
output_price: 7.5
supports_vision: true
- name: anthropic/claude-3-opus
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic/claude-3-sonnet
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic/claude-3-haiku
max_input_tokens: 200000
max_output_tokens: 4096
pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
- name: mistralai/mixtral-8x7b-instruct
max_input_tokens: 32768
input_price: 0.24
output_price: 0.24
- name: mistralai/mixtral-8x22b-instruct
max_input_tokens: 65536
input_price: 0.65
output_price: 0.65
- name: mistralai/mistral-small
max_input_tokens: 32000
input_price: 2
output_price: 6
- name: mistralai/mistral-large
max_input_tokens: 32000
input_price: 8
output_price: 24
- name: databricks/dbrx-instruct
max_input_tokens: 32768
input_price: 0.6
output_price: 0.6
- name: cohere/command-r
max_input_tokens: 128000
input_price: 0.5
output_price: 1.5
- name: cohere/command-r-plus
max_input_tokens: 128000
input_price: 3
output_price: 15
- platform: octoai
models:
- name: meta-llama-3-8b-instruct
max_input_tokens: 8192
input_price: 0.13
output_price: 0.13
- name: meta-llama-3-70b-instruct
max_input_tokens: 8192
input_price: 0.86
output_price: 0.86
- name: mistral-7b-instruct
max_input_tokens: 32768
input_price: 0.13
output_price: 0.13
- name: mixtral-8x7b-instruct
max_input_tokens: 32768
input_price: 0.34
output_price: 0.34
- name: mixtral-8x22b-instruct
max_input_tokens: 65536
input_price: 0.86
output_price: 0.86
- platform: together
models:
- name: meta-llama/Llama-3-8b-chat-hf
max_input_tokens: 8000
input_price: 0.2
output_price: 0.2
- name: meta-llama/Llama-3-70b-chat-hf
max_input_tokens: 8000
input_price: 0.9
output_price: 0.9
- name: mistralai/Mistral-7B-Instruct-v0.2
max_input_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536
input_price: 1.2
output_price: 1.2
- name: google/gemma-7b-it
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: Qwen/Qwen1.5-72B-Chat
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: databricks/dbrx-instruct
max_input_tokens: 32768
input_price: 1.2
output_price: 1.2
- name: zero-one-ai/Yi-34B-Chat
max_input_tokens: 4096
input_price: 0.8
output_price: 0.8
- name: deepseek-ai/deepseek-llm-67b-chat
max_input_tokens: 4096
input_price: 0.9
output_price: 0.9
- name: deepseek-ai/deepseek-coder-33b-instruct
max_input_tokens: 16384
input_price: 0.8
output_price: 0.8
- name: allenai/OLMo-7B-Instruct
max_input_tokens: 2048
input_price: 0.2
output_price: 0.2