harn-vm 0.8.167

# Z.AI GLM-5 family. GLM-5.2 is the current 1M-context open-weight flagship;
# GLM-5.1 / GLM-5 are kept for pinned callers. Direct Z.AI tariff via the
# OpenAI-compatible /v1 endpoint. OpenRouter mirrors live below.
[models."glm-5"]
name = "GLM 5"
provider = "zai"
context_window = 202752
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
# docs.z.ai pricing verified 2026-07-02: $1.00 / $3.20 per MTok (cache
# hit $0.20). The prior 0.98/3.08 figures matched no published tier.
pricing = { input_per_mtok = 1.00, output_per_mtok = 3.20, cache_read_per_mtok = 0.20 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use"]

# GLM-4.7-Flash — Z.AI's free API tier (31B dense). Zero-cost route for
# healthcheck/QC probes and cheap background tasks. Pricing is OMITTED
# (the unbilled-route convention, like local models) rather than listed
# as $0/$0: a literal zero rate would win every price-capped selection
# (complementary reviewer, cheapest-route) and make cost-fallback paths
# unreachable, which misrepresents a rate-limited free tier.
[models."glm-4.7-flash"]
name = "GLM 4.7 Flash"
provider = "zai"
context_window = 131072
capabilities = ["tools", "streaming", "thinking"]
tier = "small"
open_weight = true
strengths = ["speed", "cheap", "coding", "tool_use"]
[models."glm-5.1"]
name = "GLM 5.1"
provider = "zai"
context_window = 202752
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 1.40, output_per_mtok = 4.40, cache_read_per_mtok = 0.26 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
benchmarks = { swe_bench_pro_lead = 1.0 }
[models."glm-5.2"]
name = "GLM 5.2"
provider = "zai"
context_window = 1048576
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 1.40, output_per_mtok = 4.40, cache_read_per_mtok = 0.26 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
benchmarks = { swe_bench_pro_lead = 1.0 }

# OpenRouter mirror of GLM-5 family so callers without a Z.AI key still
# resolve a route. OR doesn't list GLM-4.6/4.7 — the canonical OR slugs
# are the GLM-5 generation.
[models."z-ai/glm-5"]
name = "GLM 5 (via OpenRouter)"
provider = "openrouter"
context_window = 202752
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 1.20, output_per_mtok = 4.00 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic"]
[models."z-ai/glm-5.1"]
name = "GLM 5.1 (via OpenRouter)"
provider = "openrouter"
context_window = 202752
capabilities = ["tools", "streaming"]
pricing = { input_per_mtok = 0.98, output_per_mtok = 3.08 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
[models."z-ai/glm-5.2"]
name = "GLM 5.2 (via OpenRouter)"
provider = "openrouter"
context_window = 1048576
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 1.20, output_per_mtok = 4.10 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
[models."z-ai/glm-5v-turbo"]
name = "GLM 5V Turbo (via OpenRouter)"
provider = "openrouter"
context_window = 202752
capabilities = ["tools", "streaming", "vision"]
pricing = { input_per_mtok = 1.20, output_per_mtok = 4.00 }
# Inline metadata keeps this row independent of fragment-order defaults.
tier = "frontier"
open_weight = true
strengths = ["vision", "coding", "agentic"]