tier = "mid"
open_weight = true
strengths = ["cheap", "tool_use"]
[models."gpt-oss-120b"]
name = "GPT-OSS 120B (Cerebras)"
provider = "cerebras"
context_window = 131072
logical_model = "openai-gpt-oss-120b"
equivalence_group = "openai-gpt-oss-120b"
served_variant = "cerebras-wafer-scale"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.35, output_per_mtok = 0.75 }
rate_limits = { rpm = 5, tpm = 30000, tph = 1000000, tpd = 1000000, tier = "free", source_url = "https://inference-docs.cerebras.ai/support/rate-limits", last_verified = "2026-06-05", notes = "Published Free Trial row; Developer (Pay as You Go) lists 1K RPM and 1M TPM with no hourly/daily cap." }
architecture = { parameter_count_b = 117.0, active_parameter_count_b = 5.1, moe = true, license = "Apache-2.0", source_url = "https://developers.openai.com/api/docs/models/gpt-oss-120b", last_verified = "2026-06-05" }
tier = "frontier"
open_weight = true
strengths = ["speed", "cheap", "tool_use"]
[models."zai-glm-4.7"]
name = "Z.ai GLM 4.7 (Cerebras)"
provider = "cerebras"
context_window = 131072
capabilities = ["tools", "streaming", "thinking"]
pricing = { input_per_mtok = 2.25, output_per_mtok = 2.75 }
tier = "frontier"
open_weight = true
strengths = ["speed", "coding", "agentic", "tool_use", "reasoning"]
[models."llama-3.3-70b"]
name = "Llama 3.3 70B (Cerebras, dedicated legacy)"
provider = "cerebras"
context_window = 131072
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.85, output_per_mtok = 1.20 }
availability = "dedicated"
deprecated = true
deprecation_note = "Cerebras no longer returns this model from public discovery; use a provisioned dedicated endpoint alias if your organization still serves these weights."