harn-vm 0.9.2 - Docs.rs

# ── Aliases ──────────────────────────────────────────────────────────────────
# Short symbolic names → (model id, provider, optional tool_format). The
# tier-resolution path (`resolve_tier_model("frontier", None)`) reads
# `frontier`, `mid`, `small`; provider-scoped tiers like `tier/mid` let
# callers force a specific resolution per provider.

# Short flagship aliases — these track whatever the current
# generation is. Bump these when a successor lands.
[aliases.sonnet]
id = "claude-sonnet-5"
provider = "anthropic"

[aliases.sonnet5]
id = "claude-sonnet-5"
provider = "anthropic"

# Previous Sonnet generation, kept addressable for pinned workflows.
[aliases.sonnet46]
id = "claude-sonnet-4-6"
provider = "anthropic"

[aliases.opus]
id = "claude-opus-4-8"
provider = "anthropic"

# Fable is the Mythos-class tier above Opus, not an Opus successor — the
# `opus` alias intentionally stays on claude-opus-4-8.
[aliases.fable]
id = "claude-fable-5"
provider = "anthropic"

[aliases.haiku]
id = "claude-haiku-4-5-20251001"
provider = "anthropic"

[aliases.frontier]
id = "claude-sonnet-5"
provider = "anthropic"

[aliases."tier/frontier"]
id = "claude-sonnet-5"
provider = "anthropic"

# Default balanced hosted route for routine coding-agent work. Qwen 3.6 Flash
# matched Plus on the 2026-07-02 Harn/OpenRouter structured JSON + native-tool
# + text-tool smoke matrix while landing materially lower latency and token
# price. OpenAI stays addressable through the provider-scoped tier alias below.
[aliases.mid]
id = "qwen/qwen3.6-flash"
provider = "openrouter"

[aliases."tier/mid"]
id = "qwen/qwen3.6-flash"
provider = "openrouter"

[aliases."openai/mid"]
id = "gpt-5.4-mini"
provider = "openai"

# The small tier default stays on an open-weight host by policy (see
# test_resolve_tier_model_default_aliases). Was Qwen/Qwen3.5-9B on
# OpenRouter — that route went stale (Burin suppresses it); Gemma 4
# 26B-A4B on OpenRouter is the current cheap open-weight default
# ($0.06/$0.33, 4B active params, native tools).
[aliases.small]
id = "google/gemma-4-26b-a4b-it"
provider = "openrouter"

[aliases."tier/small"]
id = "google/gemma-4-26b-a4b-it"
provider = "openrouter"

# Local Gemma 4 variants (vLLM / OpenAI-compat backend at `providers.local`).
[aliases.local-gemma4]
id = "gemma-4-26b-a4b-it"
provider = "local"

[aliases.local-gemma4-26b]
id = "gemma-4-26b-a4b-it"
provider = "local"

[aliases.local-gemma4-31b]
id = "gemma-4-31b-it"
provider = "local"

[aliases.local-gemma4-e4b]
id = "gemma-4-e4b-it"
provider = "local"

[aliases.local-gemma4-e2b]
id = "gemma-4-e2b-it"
provider = "local"

[aliases.ollama-gemma4]
id = "gemma4:26b"
provider = "ollama"
tool_format = "text"

[aliases.ollama-gemma4-26b]
id = "gemma4:26b"
provider = "ollama"
tool_format = "text"

[aliases.ollama-gemma4-12b]
id = "gemma4:12b-mlx"
provider = "ollama"
tool_format = "text"

[aliases.ollama-gemma4-12b-nvfp4]
id = "gemma4:12b-nvfp4"
provider = "ollama"
tool_format = "text"

[aliases.local-gemma4-12b]
id = "gemma-4-12b-it"
provider = "local"

# Gemma 4 26B/31B via hosted APIs (the 12B is on-device only). The Gemini API
# serves Gemma under its bare id; OpenRouter/Together use org-prefixed ids.
[aliases.gemini-gemma4-31b]
id = "models/gemma-4-31b-it"
provider = "gemini"

[aliases.gemini-gemma4-26b]
id = "models/gemma-4-26b-a4b-it"
provider = "gemini"

[aliases.openrouter-gemma4-31b]
id = "google/gemma-4-31b-it"
provider = "openrouter"

[aliases.openrouter-gemma4-26b]
id = "google/gemma-4-26b-a4b-it"
provider = "openrouter"

[aliases.together-gemma4-31b]
id = "google/gemma-4-31B-it"
provider = "together"

# Kimi via OpenRouter. Keep the short alias on the current code-focused route;
# the full model id remains available for callers that want explicit slugs.
[aliases."kimi-k2.7-code"]
id = "moonshotai/kimi-k2.7-code"
provider = "openrouter"

[aliases."openrouter-kimi-k2.7-code"]
id = "moonshotai/kimi-k2.7-code"
provider = "openrouter"

# Kimi via Moonshot's first-party API. Native tool calls are reliable on
# the direct route, so pin `native`.
[aliases."kimi-direct"]
id = "moonshot/kimi-k2.6"
provider = "moonshot"
tool_format = "native"

[aliases."moonshot-kimi"]
id = "moonshot/kimi-k2.6"
provider = "moonshot"
tool_format = "native"

[aliases."moonshot-kimi-k2.7-code"]
id = "moonshot/kimi-k2.7-code"
provider = "moonshot"
tool_format = "json"

[aliases."moonshot-kimi-k2.7-code-highspeed"]
id = "moonshot/kimi-k2.7-code-highspeed"
provider = "moonshot"
tool_format = "json"

# DeepInfra — open-weight OpenAI-compatible host.
[aliases."deepinfra-deepseek"]
id = "deepinfra/deepseek-ai/DeepSeek-V4-Pro"
provider = "deepinfra"
tool_format = "native"

[aliases."deepinfra-glm-5.2"]
id = "deepinfra/zai-org/GLM-5.2"
provider = "deepinfra"
tool_format = "json"

[aliases."deepinfra-kimi-k2.7-code"]
id = "deepinfra/moonshotai/Kimi-K2.7-Code"
provider = "deepinfra"
tool_format = "native"

# probed 2026-06-24 (provider-tool-mode-sweep): native bills empty / json flaky,
# heredoc text 5/5 byte-clean. Route is native_unreliable; pin text.
[aliases."deepinfra-qwen3.6"]
id = "deepinfra/Qwen/Qwen3.6-35B-A3B"
provider = "deepinfra"
tool_format = "text"

# Baseten Model APIs. GLM-5.x currently emits tool XML in assistant content on
# `tool_choice=required` in Harn smoke probes, so pin GLM aliases to the text
# grammar. Kimi, DeepSeek, GPT-OSS, and Nemotron returned OpenAI `tool_calls`.
[aliases."baseten-glm-5.2"]
id = "baseten/zai-org/GLM-5.2"
provider = "baseten"
tool_format = "text"

[aliases."baseten-kimi-k2.7-code"]
id = "baseten/moonshotai/Kimi-K2.7-Code"
provider = "baseten"
tool_format = "native"

[aliases."baseten-deepseek-v4-pro"]
id = "baseten/deepseek-ai/DeepSeek-V4-Pro"
provider = "baseten"
tool_format = "native"

[aliases."baseten-nemotron-ultra"]
id = "baseten/nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B"
provider = "baseten"
tool_format = "native"

[aliases."baseten-nemotron-super"]
id = "baseten/nvidia/Nemotron-120B-A12B"
provider = "baseten"
tool_format = "native"

[aliases."baseten-gpt-oss-120b"]
id = "baseten/openai/gpt-oss-120b"
provider = "baseten"
tool_format = "native"

# SambaNova Cloud — fast RDU inference.
[aliases."sambanova-deepseek"]
id = "sambanova/DeepSeek-V3.2"
provider = "sambanova"
tool_format = "native"

[aliases."sambanova-llama"]
id = "sambanova/Meta-Llama-3.3-70B-Instruct"
provider = "sambanova"
tool_format = "native"

# probed 2026-06-24 (provider-tool-mode-sweep): native AND fenced-JSON both
# corrupt backslash bodies (0/5), heredoc text 5/5 byte-clean. Pin text.
[aliases."sambanova-minimax-m2.7"]
id = "sambanova/MiniMax-M2.7"
provider = "sambanova"
tool_format = "text"

# gpt-oss (Harmony) native channel is a footgun on the SambaNova pay-per-token
# route (empty tool_calls / reasoning-channel-only); the route is pinned to TEXT
# in 37-sambanova.toml, so this alias pins `text` to match (a `native` pin would
# auto-correct anyway).
[aliases."sambanova-gpt-oss-120b"]
id = "sambanova/gpt-oss-120b"
provider = "sambanova"
tool_format = "text"

# Qwen 3.7 via OpenRouter/Together. Keep bare aliases on OpenRouter because it
# exposes both Max and Plus; Together currently serves only Max.
[aliases."qwen3.7"]
id = "qwen/qwen3.7-max"
provider = "openrouter"

[aliases."qwen3.7-max"]
id = "qwen/qwen3.7-max"
provider = "openrouter"

[aliases."qwen3.7-plus"]
id = "qwen/qwen3.7-plus"
provider = "openrouter"

[aliases."openrouter-qwen3.7-max"]
id = "qwen/qwen3.7-max"
provider = "openrouter"

[aliases."openrouter-qwen3.7-plus"]
id = "qwen/qwen3.7-plus"
provider = "openrouter"

[aliases."together-qwen3.7-max"]
id = "Qwen/Qwen3.7-Max"
provider = "together"

# Qwen 3.6 via OpenRouter. These route through Harn capability rows that keep
# provider-native tools, disable Qwen reasoning automatically for tool-bearing
# turns, and deny the OpenRouter Ambient upstream for structured tool calls.
[aliases."qwen3.6"]
id = "qwen/qwen3.6-flash"
provider = "openrouter"

[aliases."qwen3.6-flash"]
id = "qwen/qwen3.6-flash"
provider = "openrouter"

[aliases."qwen3.6-plus"]
id = "qwen/qwen3.6-plus"
provider = "openrouter"

[aliases."qwen3.6-35b"]
id = "qwen/qwen3.6-35b-a3b"
provider = "openrouter"

[aliases."openrouter-qwen3.6"]
id = "qwen/qwen3.6-flash"
provider = "openrouter"

[aliases."openrouter-qwen3.6-flash"]
id = "qwen/qwen3.6-flash"
provider = "openrouter"

[aliases."openrouter-qwen3.6-plus"]
id = "qwen/qwen3.6-plus"
provider = "openrouter"

[aliases."openrouter-qwen3.6-35b"]
id = "qwen/qwen3.6-35b-a3b"
provider = "openrouter"

# OpenRouter-only coding specialist.
[aliases."kat-coder-pro-v2"]
id = "kwaipilot/kat-coder-pro-v2"
provider = "openrouter"

[aliases."openrouter-kat-coder-pro-v2"]
id = "kwaipilot/kat-coder-pro-v2"
provider = "openrouter"

# Together mirror of DeepSeek V4 Pro.
[aliases."together-deepseek"]
id = "deepseek-ai/DeepSeek-V4-Pro"
provider = "together"

[aliases."together-deepseek-v4-pro"]
id = "deepseek-ai/DeepSeek-V4-Pro"
provider = "together"

# StepFun long-context agent route via OpenRouter.
[aliases."step-3.7-flash"]
id = "stepfun/step-3.7-flash"
provider = "openrouter"

[aliases."openrouter-step-3.7-flash"]
id = "stepfun/step-3.7-flash"
provider = "openrouter"

# Together mirrors for current GLM/MiniMax agent routes.
[aliases."together-glm-5.1"]
id = "zai-org/GLM-5.1"
provider = "together"

[aliases."together-glm-5.2"]
id = "zai-org/GLM-5.2"
provider = "together"

[aliases."together-minimax-m2.7"]
id = "MiniMaxAI/MiniMax-M2.7"
provider = "together"

# NVIDIA NIM.
[aliases."nvidia-nemotron-ultra"]
id = "nvidia/nemotron-3-ultra-550b-a55b"
provider = "nvidia"
tool_format = "native"

[aliases."nvidia-nemotron-super"]
id = "nvidia/nemotron-3-super-120b-a12b"
provider = "nvidia"
tool_format = "native"

[aliases."nvidia-nemotron-nano"]
id = "nvidia/nemotron-3-nano-30b-a3b"
provider = "nvidia"
tool_format = "native"

[aliases."nvidia-deepseek-v4-pro"]
id = "nvidia/deepseek-v4-pro"
provider = "nvidia"
tool_format = "native"

[aliases."nvidia-minimax-m3"]
id = "nvidia/minimax-m3"
provider = "nvidia"
tool_format = "native"

[aliases."nvidia-kimi-k2.6"]
id = "nvidia/kimi-k2.6"
provider = "nvidia"
tool_format = "native"

# qwen3.6 has no working Ollama route — Ollama's qwen3.5-family server-side
# tool-call parser 500s on text-tool output (ollama/ollama#14986, #14570).
# Use the llamacpp provider for local qwen3.x.

# llama.cpp — Unsloth Dynamic 2.0 GGUF served by llama-server.
# No `tool_format` pin: this non-native-quant id matches the `*qwen3.6*`
# text-channel capability row's `json` (fenced-JSON) default. json's ```tool
# fence sidesteps the reserved <tool_call> token (so the reserved-token remap
# stays correct) and is delimiter-safe vs heredoc. To force heredoc, set
# `tool_format = "text"`.
[aliases."llamacpp-qwen3.6"]
id = "qwen3.6-35b-a3b"
provider = "llamacpp"

[aliases."llamacpp-qwen3.6-q4"]
id = "qwen3.6-35b-a3b-ud-q4-k-xl"
provider = "llamacpp"
tool_format = "native"

[aliases."local-qwen3.6"]
id = "qwen3.6-35b-a3b-ud-q4-k-xl"
provider = "llamacpp"
tool_format = "native"

[aliases."local-qwen3.6-gguf"]
id = "qwen3.6-35b-a3b-ud-q4-k-xl"
provider = "llamacpp"
tool_format = "native"

# MLX (Apple Silicon). Local MLX routes use the coding-tuned Qwen3.6-35B-A3B
# MoE served via `mlx_lm.server` (burin #2717). Keep every MLX alias pointed at
# live MoE weights so `auto`/preset selection lands on real weights. These
# share the `qwen3.6-35b-a3b` equivalence_group with the llama.cpp GGUF route,
# so eval aggregation compares the two runtimes directly.
[aliases."mlx-qwen3.6"]
id = "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"
provider = "mlx"

[aliases."mlx-qwen3.6-q4"]
id = "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"
provider = "mlx"

[aliases."mlx-qwen3.6-q8"]
id = "unsloth/Qwen3.6-35B-A3B-UD-MLX-8bit"
provider = "mlx"

# Back-compat: the old 27B alias names resolve to the live 35B-A3B MoE so any
# pinned config keeps working instead of pointing at non-existent weights.
[aliases.mlx-qwen36-27b]
id = "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"
provider = "mlx"

[aliases."mlx-qwen3.6-27b"]
id = "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"
provider = "mlx"

[aliases."mlx-qwen3.6-27b-q4"]
id = "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"
provider = "mlx"

[aliases."local-qwen3.6-27b"]
id = "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"
provider = "mlx"

# MiniMax direct API aliases.
[aliases.minimax]
id = "MiniMax-M3"
provider = "minimax"

[aliases."minimax-m2"]
id = "MiniMax-M2"
provider = "minimax"

[aliases."minimax-m2.5"]
id = "MiniMax-M2.5"
provider = "minimax"

[aliases."minimax-m2.7"]
id = "MiniMax-M2.7"
provider = "minimax"

[aliases."minimax-m3"]
id = "MiniMax-M3"
provider = "minimax"

# Z.AI GLM aliases. GLM-5.x native channel emits `<tool_call>` markup instead of
# OpenAI message.tool_calls (see 98-zai.toml + the Baseten probe), so the zai
# GLM-5 rows are pinned to TEXT; these aliases pin `text` to match.
[aliases.glm]
id = "glm-5.2"
provider = "zai"
tool_format = "text"

[aliases."glm-5"]
id = "glm-5"
provider = "zai"

[aliases."glm-5.1"]
id = "glm-5.1"
provider = "zai"

[aliases."glm-5.2"]
id = "glm-5.2"
provider = "zai"
tool_format = "text"

[aliases."openrouter-glm-5.2"]
id = "z-ai/glm-5.2"
provider = "openrouter"
tool_format = "text"

# DeepSeek V4 direct API aliases.
[aliases.deepseek]
id = "deepseek-v4-flash"
provider = "deepseek"

[aliases."deepseek-flash"]
id = "deepseek-v4-flash"
provider = "deepseek"

[aliases."deepseek-pro"]
id = "deepseek-v4-pro"
provider = "deepseek"

[aliases."deepseek-v4-flash"]
id = "deepseek-v4-flash"
provider = "deepseek"

[aliases."deepseek-v4-pro"]
id = "deepseek-v4-pro"
provider = "deepseek"

[aliases.cohere]
id = "command-a-plus-05-2026"
provider = "cohere"

[aliases."command-a-plus"]
id = "command-a-plus-05-2026"
provider = "cohere"

[aliases.grok]
id = "grok-4.3"
provider = "xai"
tool_format = "native"

[aliases."grok-4.3"]
id = "grok-4.3"
provider = "xai"
tool_format = "native"

[aliases.grok-code]
id = "grok-build-0.1"
provider = "xai"

[aliases."grok-code-fast"]
id = "grok-build-0.1"
provider = "xai"

# Devstral (Mistral's agentic-coding tune).
# No `tool_format` pin: these aliases inherit the `devstral-small-2*` capability
# row's `json` (fenced-JSON) text-channel default. devstral has no
# reserved-token constraint, so json is delimiter-safe and avoids heredoc's
# `<<EOF` content leak. To force heredoc, set `tool_format = "text"` here or pin
# the capability row.
[aliases.devstral-small-2]
id = "devstral-small-2:24b"
provider = "ollama"

[aliases.ollama-devstral-small-2]
id = "devstral-small-2:24b"
provider = "ollama"

# NOTE: there is intentionally no `ollama-devstral-small-2-native` alias.
# Devstral Small 2 on Ollama is text-tool-only (see the `devstral-small-2*`
# capability rule: native_tools = false). A `tool_format = "native"` pin here
# would be silently half-supported — accepted by resolution but degraded to
# the text protocol downstream — which the catalog validator rejects.

# Mistral direct code routes.
[aliases.codestral]
id = "codestral-2508"
provider = "mistral"

[aliases."devstral-medium"]
id = "devstral-medium-2512"
provider = "mistral"

[aliases."devstral-small"]
id = "devstral-small-2512"
provider = "mistral"

# Current cheap open-weight coder routes.
[aliases."qwen3-coder-next"]
id = "qwen/qwen3-coder-next"
provider = "openrouter"

[aliases."openrouter-qwen3-coder-next"]
id = "qwen/qwen3-coder-next"
provider = "openrouter"

[aliases."openrouter-qwen3.5-397b"]
id = "qwen/qwen3.5-397b-a17b"
provider = "openrouter"

# Groq LPU speed route for Qwen3.6 (native tool calls, ~500 tok/s).
[aliases."groq-qwen3.6"]
id = "qwen/qwen3.6-27b"
provider = "groq"
tool_format = "native"

# Z.AI free tier.
[aliases."glm-4.7-flash"]
id = "glm-4.7-flash"
provider = "zai"

# Together mirrors for MiniMax M3 / Kimi K2.7 Code.
[aliases."together-minimax-m3"]
id = "MiniMaxAI/MiniMax-M3"
provider = "together"

[aliases."together-kimi-k2.7-code"]
id = "moonshotai/Kimi-K2.7-Code"
provider = "together"

# DashScope first-party Qwen routes.
[aliases."dashscope-qwen3-coder-next"]
id = "dashscope/qwen3-coder-next"
provider = "dashscope"

[aliases."dashscope-qwen3.7-max"]
id = "dashscope/qwen3.7-max"
provider = "dashscope"