harn-vm 0.8.113

# ---------- Together reasoning models ----------------------------------------
#
# Together exposes three reasoning surfaces today:
#   - Qwen uses `chat_template_kwargs.thinking` / `enable_thinking`.
#   - Hybrid Kimi/GLM/Gemma models use `reasoning.enabled`.
#   - GPT-OSS uses top-level `reasoning_effort`.
# Keep each host-specific rule explicit so the generic OpenAI fallback does
# not accidentally claim the wrong wire format.

# gpt-oss on Together uses NATIVE tool calls. The prior `json`/text pin was a
# defensive workaround for an "empty native streaming payload" defect that no
# longer reproduces: 2026-06-12 live probes (raw curl + Harn's streaming path)
# confirm gpt-oss-120b returns clean OpenAI-shape `tool_calls` in both
# non-streaming and SSE modes, and Harn's index-keyed SSE reassembler
# (transport.rs `oai_tool_map`) accumulates the arg-fragment deltas correctly.
# Conversely, BOTH `json` and `text` formats yield ZERO parsed calls because
# gpt-oss does not emit Harn's ```tool/name/args contract — it emits a bare
# `{"tool":..,"arguments":..}` dialect that the fenced-JSON parser does not
# recognize. native is therefore the measured-best (and only working) channel.
# Pinned EXPLICITLY (not inherited) because `openai/gpt-oss-120b` is a
# catalogued model and the catalog invariant requires the rule to set both
# fields. To change, set `preferred_tool_format` ("native" | "json" | "text").
#
# `reasoning_required_for_tools = true`: gpt-oss (Harmony) performs tool
# calls INSIDE the chain-of-thought channel, so reasoning-off breaks tool
# calling (billed-noncommittal: 0 tool_calls + tiny completion). This is the
# OPPOSITE of the Qwen3 quirk — gpt-oss must NOT carry an
# `auto_reasoning_overrides = { agent/verify/code = "off" }` override; the
# flag makes reasoning_policy refuse to floor tool tasks to off.
[[provider.together]]
model_match = "openai/gpt-oss-*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true
reasoning_required_for_tools = true
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "reasoning_summary"

[[provider.together]]
model_match = "deepseek-ai/deepseek-v4*"
native_tools = true
preferred_tool_format = "native"
thinking_modes = ["enabled", "effort"]
reasoning_effort_supported = true
prompt_caching = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "reasoning_summary"

[[provider.together]]
model_match = "moonshotai/kimi-k2*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
honors_chat_template_kwargs = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.together]]
model_match = "zai-org/glm-5*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
auto_reasoning_overrides = { agent = "off", verify = "off", code = "off" }
prompt_caching = true
honors_chat_template_kwargs = false
reasoning_text_promotable = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.together]]
model_match = "minimaxai/minimax-m2.7*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
prompt_caching = true
honors_chat_template_kwargs = false
reasoning_text_promotable = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.together]]
model_match = "google/gemma-4*"
native_tools = true
vision_supported = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
honors_chat_template_kwargs = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.together]]
model_match = "moonshotai/*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "none"