[
{
"id": "qwen/qwen3-embedding-0.6b:q8_0",
"name": "Qwen3-Embedding-0.6B",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"embed"
],
"context_length": 8192,
"param_count": "0.6B",
"quantization": "Q8_0",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 639,
"ram_mb": 639
},
"source": {
"type": "local",
"hf_repo": "Qwen/Qwen3-Embedding-0.6B-GGUF",
"hf_filename": "Qwen3-Embedding-0.6B-Q8_0.gguf",
"tokenizer_repo": "Qwen/Qwen3-Embedding-0.6B"
},
"tags": [
"builtin",
"embedding"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "qwen/qwen3-reranker-0.6b:q8_0",
"name": "Qwen3-Reranker-0.6B",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"rerank"
],
"context_length": 32768,
"param_count": "0.6B",
"quantization": "Q8_0",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 640,
"ram_mb": 640
},
"source": {
"type": "local",
"hf_repo": "Qwen/Qwen3-Reranker-0.6B-GGUF",
"hf_filename": "Qwen3-Reranker-0.6B-Q8_0.gguf",
"tokenizer_repo": "Qwen/Qwen3-Reranker-0.6B"
},
"tags": [
"builtin",
"reranker"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "qwen/qwen3-0.6b:q8_0",
"name": "Qwen3-0.6B",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"classify",
"reasoning"
],
"context_length": 32768,
"param_count": "0.6B",
"quantization": "Q8_0",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 100.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 650,
"ram_mb": 650
},
"source": {
"type": "local",
"hf_repo": "Qwen/Qwen3-0.6B-GGUF",
"hf_filename": "Qwen3-0.6B-Q8_0.gguf",
"tokenizer_repo": "Qwen/Qwen3-0.6B"
},
"tags": [
"builtin",
"fast"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "qwen/qwen3-1.7b:q8_0",
"name": "Qwen3-1.7B",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning"
],
"context_length": 32768,
"param_count": "1.7B",
"quantization": "Q8_0",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 70.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 1800,
"ram_mb": 1800
},
"source": {
"type": "local",
"hf_repo": "Qwen/Qwen3-1.7B-GGUF",
"hf_filename": "Qwen3-1.7B-Q8_0.gguf",
"tokenizer_repo": "Qwen/Qwen3-1.7B"
},
"tags": [
"builtin"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "qwen/qwen3-4b:q4_k_m",
"name": "Qwen3-4B",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call"
],
"context_length": 32768,
"param_count": "4B",
"quantization": "Q4_K_M",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 45.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 2500,
"ram_mb": 2500
},
"source": {
"type": "local",
"hf_repo": "Qwen/Qwen3-4B-GGUF",
"hf_filename": "Qwen3-4B-Q4_K_M.gguf",
"tokenizer_repo": "Qwen/Qwen3-4B"
},
"tags": [
"builtin",
"code"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "qwen/qwen3-8b:q4_k_m",
"name": "Qwen3-8B",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning",
"summarize",
"tool_use",
"multi_tool_call"
],
"context_length": 131072,
"param_count": "8B",
"quantization": "Q4_K_M",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 25.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 4900,
"ram_mb": 4900
},
"source": {
"type": "local",
"hf_repo": "Qwen/Qwen3-8B-GGUF",
"hf_filename": "Qwen3-8B-Q4_K_M.gguf",
"tokenizer_repo": "Qwen/Qwen3-8B"
},
"tags": [
"builtin",
"reasoning"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "qwen/qwen3-30b-a3b:q4_k_m",
"name": "Qwen3-30B-A3B",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning",
"summarize",
"tool_use",
"multi_tool_call"
],
"context_length": 131072,
"param_count": "30B (3B active)",
"quantization": "Q4_K_M",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 35.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 17000,
"ram_mb": 17000
},
"source": {
"type": "local",
"hf_repo": "Qwen/Qwen3-30B-A3B-GGUF",
"hf_filename": "Qwen3-30B-A3B-Q4_K_M.gguf",
"tokenizer_repo": "Qwen/Qwen3-30B-A3B"
},
"tags": [
"builtin",
"moe",
"reasoning"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-embedding-0.6b:mxfp8",
"name": "Qwen3-Embedding-0.6B-MLX",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"embed"
],
"context_length": 8192,
"param_count": "0.6B",
"quantization": "mxfp8",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 620,
"ram_mb": 620
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-Embedding-0.6B-mxfp8",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"embedding"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-reranker-0.6b:bf16",
"name": "Qwen3-Reranker-0.6B-MLX",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"rerank"
],
"context_length": 32768,
"param_count": "0.6B",
"quantization": "bf16",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 1200,
"ram_mb": 1200
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-Reranker-0.6B-bf16",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"reranker"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-0.6b:6bit",
"name": "Qwen3-0.6B-MLX",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"classify",
"reasoning"
],
"context_length": 32768,
"param_count": "0.6B",
"quantization": "6bit",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 140.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 500,
"ram_mb": 500
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-0.6B-6bit",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"fast"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-1.7b:3bit",
"name": "Qwen3-1.7B-MLX",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning"
],
"context_length": 32768,
"param_count": "1.7B",
"quantization": "3bit",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 110.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 800,
"ram_mb": 800
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-1.7B-3bit",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-4b:4bit",
"name": "Qwen3-4B-MLX",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call"
],
"context_length": 32768,
"param_count": "4B",
"quantization": "4bit",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 90.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 2400,
"ram_mb": 2400
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-4B-4bit",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"code"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-8b:4bit",
"name": "Qwen3-8B-MLX",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning",
"summarize",
"tool_use",
"multi_tool_call"
],
"context_length": 131072,
"param_count": "8B",
"quantization": "4bit",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 55.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 4800,
"ram_mb": 4800
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-8B-4bit",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"reasoning"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-30b-a3b:4bit",
"name": "Qwen3-30B-A3B-MLX",
"provider": "qwen",
"family": "qwen3",
"version": "1.0",
"capabilities": [
"generate",
"code",
"reasoning",
"summarize",
"tool_use",
"multi_tool_call"
],
"context_length": 131072,
"param_count": "30B (3B active)",
"quantization": "4bit",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 80.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 16500,
"ram_mb": 16500
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-30B-A3B-4bit",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"moe",
"reasoning"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "vllm-mlx/qwen3.6-35b-a3b:4bit",
"name": "Qwen3.6-35B-A3B-MLX-vLLM",
"provider": "qwen",
"family": "qwen3.6",
"version": "3.6",
"capabilities": [
"generate",
"code",
"reasoning",
"summarize",
"tool_use",
"multi_tool_call",
"vision"
],
"context_length": 262144,
"param_count": "35B (3B active)",
"quantization": "4bit",
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 80.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 20400,
"ram_mb": 24000
},
"source": {
"type": "vllm_mlx",
"endpoint": "http://localhost:8000",
"model_name": "mlx-community/Qwen3.6-35B-A3B-4bit"
},
"tags": [
"builtin",
"vllm-mlx",
"mlx-vlm",
"local",
"apple-silicon",
"latest",
"qwen3.6",
"qwen3_5_moe",
"moe",
"reasoning"
],
"supported_params": [
"temperature",
"top_p",
"max_tokens",
"stop_sequences"
],
"public_benchmarks": []
},
{
"id": "mlx-vlm/qwen3-vl-2b:bf16",
"name": "Qwen3-VL-2B-mlx-vlm",
"provider": "qwen",
"family": "qwen3-vl",
"version": "bf16",
"capabilities": [
"generate",
"vision",
"grounding"
],
"context_length": 262144,
"param_count": "2B",
"quantization": null,
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": 50.0
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 4000,
"ram_mb": 6000
},
"source": {
"type": "mlx",
"hf_repo": "Qwen/Qwen3-VL-2B-Instruct",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx-vlm-cli",
"local",
"vision",
"requires-mlx-vlm",
"qwen3-vl"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "vllm-mlx/gemma-4-e2b-it",
"name": "gemma-4-E2B-it",
"provider": "google",
"family": "gemma-4",
"version": "1.0",
"capabilities": [
"generate",
"tool_use",
"vision",
"video_understanding",
"audio_understanding",
"reasoning"
],
"context_length": 131072,
"param_count": "2B",
"quantization": null,
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 2000,
"ram_mb": 2000
},
"source": {
"type": "vllm_mlx",
"endpoint": "http://localhost:8000",
"model_name": "google/gemma-4-E2B-it"
},
"tags": [
"builtin",
"vllm-mlx",
"local",
"multimodal"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "vllm-mlx/gemma-4-e4b-it",
"name": "gemma-4-E4B-it",
"provider": "google",
"family": "gemma-4",
"version": "1.0",
"capabilities": [
"generate",
"tool_use",
"vision",
"video_understanding",
"audio_understanding",
"reasoning"
],
"context_length": 131072,
"param_count": "4B",
"quantization": null,
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 4000,
"ram_mb": 4000
},
"source": {
"type": "vllm_mlx",
"endpoint": "http://localhost:8000",
"model_name": "google/gemma-4-E4B-it"
},
"tags": [
"builtin",
"vllm-mlx",
"local",
"multimodal"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "vllm-mlx/gemma-4-26b-a4b-it",
"name": "gemma-4-26B-A4B-it",
"provider": "google",
"family": "gemma-4",
"version": "1.0",
"capabilities": [
"generate",
"tool_use",
"vision",
"video_understanding",
"reasoning"
],
"context_length": 131072,
"param_count": "26B (4B active)",
"quantization": null,
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 16000,
"ram_mb": 16000
},
"source": {
"type": "vllm_mlx",
"endpoint": "http://localhost:8000",
"model_name": "google/gemma-4-26B-A4B-it"
},
"tags": [
"builtin",
"vllm-mlx",
"local",
"moe",
"multimodal"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/parakeet-tdt-0.6b-v3:default",
"name": "Parakeet-TDT-0.6B-v3-MLX",
"provider": "mlx-community",
"family": "parakeet",
"version": "v3",
"capabilities": [
"speech_to_text"
],
"context_length": 0,
"param_count": "0.6B",
"quantization": null,
"performance": {
"latency_p50_ms": 350,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 2510,
"ram_mb": 3000
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/parakeet-tdt-0.6b-v3",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"speech",
"stt",
"realtime"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/kokoro-82m:6bit",
"name": "Kokoro-82M-6bit",
"provider": "mlx-community",
"family": "kokoro",
"version": "6bit",
"capabilities": [
"text_to_speech"
],
"context_length": 0,
"param_count": "82M",
"quantization": "6bit",
"performance": {
"latency_p50_ms": 180,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 348,
"ram_mb": 768
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Kokoro-82M-6bit",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"speech",
"tts",
"realtime",
"fast"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/kokoro-82m:bf16",
"name": "Kokoro-82M-bf16",
"provider": "mlx-community",
"family": "kokoro",
"version": "bf16",
"capabilities": [
"text_to_speech"
],
"context_length": 0,
"param_count": "82M",
"quantization": "bf16",
"performance": {
"latency_p50_ms": 220,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 512,
"ram_mb": 1024
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Kokoro-82M-bf16",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"speech",
"tts",
"realtime",
"stable"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/qwen3-tts-1.7b:5bit",
"name": "Qwen3-TTS-12Hz-1.7B-Base-5bit",
"provider": "qwen",
"family": "qwen3-tts",
"version": "5bit",
"capabilities": [
"text_to_speech"
],
"context_length": 0,
"param_count": "1.7B",
"quantization": "5bit",
"performance": {
"latency_p50_ms": 900,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 2200,
"ram_mb": 3000
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-5bit",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"speech",
"tts",
"quality"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "elevenlabs/scribe-v1:latest",
"name": "scribe_v1",
"provider": "elevenlabs",
"family": "scribe",
"version": "latest",
"capabilities": [
"speech_to_text"
],
"context_length": 0,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 700,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "proprietary",
"provider": "elevenlabs",
"endpoint": "https://api.elevenlabs.io",
"auth": {
"type": "api_key_env",
"env_var": "ELEVENLABS_API_KEY"
},
"protocol": {
"chat_path": "/chat",
"content_type": "application/json",
"streaming": false,
"extra_headers": {}
}
},
"tags": [
"builtin",
"speech",
"stt",
"remote"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "elevenlabs/eleven-flash-v2.5:latest",
"name": "eleven_flash_v2_5",
"provider": "elevenlabs",
"family": "eleven-flash",
"version": "latest",
"capabilities": [
"text_to_speech"
],
"context_length": 0,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 250,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "proprietary",
"provider": "elevenlabs",
"endpoint": "https://api.elevenlabs.io",
"auth": {
"type": "api_key_env",
"env_var": "ELEVENLABS_API_KEY"
},
"protocol": {
"chat_path": "/chat",
"content_type": "application/json",
"streaming": false,
"extra_headers": {}
}
},
"tags": [
"builtin",
"speech",
"tts",
"remote",
"fast"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "anthropic/claude-opus-4-7:latest",
"name": "claude-opus-4-7",
"provider": "anthropic",
"family": "claude-4",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call",
"summarize",
"vision"
],
"context_length": 1000000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": null,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.anthropic.com/v1/messages",
"api_key_env": "ANTHROPIC_API_KEY",
"api_key_envs": [],
"api_version": "2023-06-01",
"protocol": "anthropic"
},
"tags": [
"builtin",
"frontier"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "anthropic/claude-opus-4-6:latest",
"name": "claude-opus-4-6",
"provider": "anthropic",
"family": "claude-4",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call",
"summarize",
"vision"
],
"context_length": 1000000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 3000,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 5.0,
"output_per_mtok": 25.0,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.anthropic.com/v1/messages",
"api_key_env": "ANTHROPIC_API_KEY",
"api_key_envs": [],
"api_version": "2023-06-01",
"protocol": "anthropic"
},
"tags": [
"builtin",
"frontier"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "anthropic/claude-sonnet-4-6:latest",
"name": "claude-sonnet-4-6",
"provider": "anthropic",
"family": "claude-4",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call",
"summarize",
"vision"
],
"context_length": 200000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 1500,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 3.0,
"output_per_mtok": 15.0,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.anthropic.com/v1/messages",
"api_key_env": "ANTHROPIC_API_KEY",
"api_key_envs": [],
"api_version": "2023-06-01",
"protocol": "anthropic"
},
"tags": [
"builtin",
"balanced"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "anthropic/claude-haiku-4-5:latest",
"name": "claude-haiku-4-5",
"provider": "anthropic",
"family": "claude-4",
"version": "latest",
"capabilities": [
"generate",
"code",
"tool_use",
"multi_tool_call",
"summarize",
"vision"
],
"context_length": 200000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 600,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 1.0,
"output_per_mtok": 5.0,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.anthropic.com/v1/messages",
"api_key_env": "ANTHROPIC_API_KEY",
"api_key_envs": [],
"api_version": "2023-06-01",
"protocol": "anthropic"
},
"tags": [
"builtin",
"fast",
"cheap"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "openai/gpt-5.4:latest",
"name": "gpt-5.4",
"provider": "openai",
"family": "gpt-5",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call",
"summarize",
"vision"
],
"context_length": 1000000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 2000,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 2.5,
"output_per_mtok": 10.0,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.openai.com",
"api_key_env": "OPENAI_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "open_ai_responses"
},
"tags": [
"builtin",
"frontier"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "openai/gpt-5.4-mini:latest",
"name": "gpt-5.4-mini",
"provider": "openai",
"family": "gpt-5",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call",
"summarize",
"vision"
],
"context_length": 1000000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 800,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 0.4,
"output_per_mtok": 1.6,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.openai.com",
"api_key_env": "OPENAI_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "open_ai_responses"
},
"tags": [
"builtin",
"fast",
"cheap"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "openai/o3:latest",
"name": "o3",
"provider": "openai",
"family": "o-series",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call",
"vision"
],
"context_length": 200000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 5000,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 2.0,
"output_per_mtok": 8.0,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.openai.com",
"api_key_env": "OPENAI_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "open_ai_responses"
},
"tags": [
"builtin",
"reasoning"
],
"supported_params": [
"max_tokens",
"stop_sequences"
],
"public_benchmarks": []
},
{
"id": "openai/o4-mini:latest",
"name": "o4-mini",
"provider": "openai",
"family": "o-series",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"vision"
],
"context_length": 200000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 2500,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 1.1,
"output_per_mtok": 4.4,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.openai.com",
"api_key_env": "OPENAI_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "open_ai_responses"
},
"tags": [
"builtin",
"reasoning",
"cheap"
],
"supported_params": [
"max_tokens",
"stop_sequences"
],
"public_benchmarks": []
},
{
"id": "openai/gpt-5.3-codex:latest",
"name": "gpt-5.3-codex",
"provider": "openai",
"family": "gpt-5",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"tool_use",
"multi_tool_call"
],
"context_length": 192000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 2000,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 3.0,
"output_per_mtok": 15.0,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.openai.com",
"api_key_env": "OPENAI_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "open_ai_responses"
},
"tags": [
"builtin",
"code"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "openai/gpt-4.1-mini:latest",
"name": "gpt-4.1-mini",
"provider": "openai",
"family": "gpt-4",
"version": "latest",
"capabilities": [
"generate",
"code",
"tool_use",
"multi_tool_call",
"vision"
],
"context_length": 1000000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 600,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 0.4,
"output_per_mtok": 1.6,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://api.openai.com",
"api_key_env": "OPENAI_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "open_ai_responses"
},
"tags": [
"builtin",
"fast",
"cheap"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "google/gemini-2.5-pro:latest",
"name": "gemini-2.5-pro",
"provider": "google",
"family": "gemini-2",
"version": "latest",
"capabilities": [
"generate",
"code",
"reasoning",
"summarize",
"tool_use",
"multi_tool_call",
"vision"
],
"context_length": 1000000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 2500,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 1.25,
"output_per_mtok": 10.0,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://generativelanguage.googleapis.com",
"api_key_env": "GOOGLE_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "google"
},
"tags": [
"builtin",
"balanced"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "google/gemini-2.5-flash:latest",
"name": "gemini-2.5-flash",
"provider": "google",
"family": "gemini-2",
"version": "latest",
"capabilities": [
"generate",
"code",
"tool_use",
"multi_tool_call",
"summarize",
"vision"
],
"context_length": 1000000,
"param_count": "",
"quantization": null,
"performance": {
"latency_p50_ms": 500,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": 0.3,
"output_per_mtok": 2.5,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "remote_api",
"endpoint": "https://generativelanguage.googleapis.com",
"api_key_env": "GOOGLE_API_KEY",
"api_key_envs": [],
"api_version": null,
"protocol": "google"
},
"tags": [
"builtin",
"fast",
"cheap"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/flux-1-lite-8b:q4",
"name": "Flux-1.lite-8B-MLX-Q4",
"provider": "mlx-community",
"family": "flux",
"version": "q4",
"capabilities": [
"image_generation"
],
"context_length": 0,
"param_count": "8B",
"quantization": "Q4",
"performance": {
"latency_p50_ms": 12000,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 7680,
"ram_mb": 12288
},
"source": {
"type": "mlx",
"hf_repo": "mlx-community/Flux-1.lite-8B-MLX-Q4",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"image"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "mlx/ltx-2.3:q4",
"name": "ltx-2.3-mlx-q4",
"provider": "dgrauet",
"family": "ltx-2.3",
"version": "4bit",
"capabilities": [
"video_generation"
],
"context_length": 0,
"param_count": "22B",
"quantization": "4bit",
"performance": {
"latency_p50_ms": 45000,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": 40800,
"ram_mb": 40800
},
"source": {
"type": "mlx",
"hf_repo": "dgrauet/ltx-2.3-mlx-q4",
"hf_weight_file": null
},
"tags": [
"builtin",
"mlx",
"video"
],
"supported_params": [],
"public_benchmarks": []
},
{
"id": "apple/foundation:default",
"name": "apple-foundation",
"provider": "apple",
"family": "foundation",
"version": "1.0",
"capabilities": [
"generate",
"summarize",
"classify"
],
"context_length": 8192,
"param_count": "system",
"quantization": null,
"performance": {
"latency_p50_ms": 50,
"latency_p99_ms": null,
"tokens_per_second": null
},
"cost": {
"input_per_mtok": null,
"output_per_mtok": null,
"size_mb": null,
"ram_mb": null
},
"source": {
"type": "apple_foundation_models",
"use_case": null
},
"tags": [
"builtin",
"local",
"low_latency",
"private"
],
"supported_params": [
"temperature",
"max_tokens"
],
"public_benchmarks": []
}
]