harn-vm 0.8.141

# ---------- Together reasoning models ----------------------------------------
#
# Together exposes three reasoning surfaces today:
#   - Qwen uses `chat_template_kwargs.thinking` / `enable_thinking`.
#   - Hybrid Kimi/GLM/Gemma models use `reasoning.enabled`.
#   - GPT-OSS uses top-level `reasoning_effort`.
# Keep each host-specific rule explicit so the generic OpenAI fallback does
# not accidentally claim the wrong wire format.

# gpt-oss on Together uses NATIVE tool calls. The prior `json`/text pin was a
# defensive workaround for an "empty native streaming payload" defect that no
# longer reproduces: 2026-06-12 live probes (raw curl + Harn's streaming path)
# confirm gpt-oss-120b returns clean OpenAI-shape `tool_calls` in both
# non-streaming and SSE modes, and Harn's index-keyed SSE reassembler
# (transport.rs `oai_tool_map`) accumulates the arg-fragment deltas correctly.
# Conversely, BOTH `json` and `text` formats yield ZERO parsed calls because
# gpt-oss does not emit Harn's ```tool/name/args contract — it emits a bare
# `{"tool":..,"arguments":..}` dialect that the fenced-JSON parser does not
# recognize. native is therefore the measured-best (and only working) channel.
# Pinned EXPLICITLY (not inherited) because `openai/gpt-oss-120b` is a
# catalogued model and the catalog invariant requires the rule to set both
# fields. To change, set `preferred_tool_format` ("native" | "json" | "text").
#
# `reasoning_required_for_tools = true`: gpt-oss (Harmony) performs tool
# calls INSIDE the chain-of-thought channel, so reasoning-off breaks tool
# calling (billed-noncommittal: 0 tool_calls + tiny completion). This is the
# OPPOSITE of the Qwen3 quirk — gpt-oss must NOT carry an
# `auto_reasoning_overrides = { agent/verify/code = "off" }` override; the
# flag makes reasoning_policy refuse to floor tool tasks to off.
[[provider.together]]
model_match = "openai/gpt-oss-*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true
reasoning_required_for_tools = true
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "reasoning_summary"

[[provider.together]]
model_match = "deepseek-ai/deepseek-v4*"
native_tools = true
preferred_tool_format = "native"
thinking_modes = ["enabled", "effort"]
reasoning_effort_supported = true
prompt_caching = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "reasoning_summary"

[[provider.together]]
model_match = "moonshotai/kimi-k2*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
honors_chat_template_kwargs = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.together]]
model_match = "zai-org/glm-5*"
native_tools = true
preferred_tool_format = "text"
tool_mode_parity = "native_unreliable"
tool_mode_parity_notes = "2026-06-20 Harn agent-loop smoke after parser fix: forced native/off emitted no dispatchable tool_calls and hallucinated a result token; heredoc text tools completed the loop."
structured_output = "native"
thinking_modes = ["enabled"]
auto_reasoning_overrides = { agent = "off", verify = "off", code = "off" }
prompt_caching = true
honors_chat_template_kwargs = false
reasoning_text_promotable = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

# probed 2026-06-24 (docs/eval/provider-tool-mode-sweep-2026-06-24.md, N=5,
# forced-format single-tool authoring of a backslash-heavy Zig body): heredoc
# text beat fenced-JSON on BOTH dispatch (4/5 vs 3/5) and fidelity (4/5 vs 2/5);
# native was 1/5 fidelity. The prior `json` pin still rode the escaping-prone
# channel — flip to escape-free heredoc text so backslash-heavy code round-trips.
[[provider.together]]
model_match = "minimaxai/minimax-m2.7*"
native_tools = true
preferred_tool_format = "text"
tool_mode_parity = "native_unreliable"
tool_mode_parity_notes = "2026-06-24 forced-format sweep (N=5): Together MiniMax-M2.7 native 1/5 fidelity, fenced-JSON 2/5; heredoc text 4/5 (best on both dispatch and fidelity). Backslash-heavy bodies only round-trip on the escape-free text channel. Supersedes the 2026-06-20 json pin."
structured_output = "native"
thinking_modes = ["enabled"]
prompt_caching = true
honors_chat_template_kwargs = false
reasoning_text_promotable = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.together]]
model_match = "google/gemma-4*"
native_tools = true
vision_supported = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
honors_chat_template_kwargs = false
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.together]]
model_match = "moonshotai/*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "none"