harn-vm 0.8.114

# ---------- DeepInfra — open-weight OpenAI-compatible host -------------------
# DeepInfra serves open weights (DeepSeek, Qwen, Llama, Kimi, GPT-OSS) on a
# standard OpenAI chat-completions surface with native tool calls. Reasoning
# families expose an inline thinking trace; DeepSeek routes honor prompt
# caching. Catalog keys are `deepinfra/<hf-id>`, so patterns match the
# family substring.

[[provider.deepinfra]]
model_match = "*deepseek*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
prompt_caching = true
text_tool_wire_format_supported = true
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

[[provider.deepinfra]]
model_match = "*qwen3.6*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["enabled"]
vision = true
vision_supported = true
text_tool_wire_format_supported = true
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "inline"

# DeepInfra-hosted GPT-OSS is the same Harmony model as cerebras/fireworks
# gpt-oss-120b. Placed BEFORE the catch-all `*` rule (first match wins) so it
# does not fall through to the non-reasoning default — without it
# `reasoning_required_for_tools` resolves OFF and the eval loop bills a
# noncommittal because gpt-oss calls tools INSIDE the chain-of-thought channel.
# Mirror the Cerebras gpt-oss row: NATIVE tools, reasoning-effort thinking
# {low, medium, high}, `reasoning_required_for_tools = true`. Must NOT carry a
# Qwen-style `auto_reasoning_overrides = "off"`.
[[provider.deepinfra]]
model_match = "*gpt-oss*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true
reasoning_effort_levels = ["low", "medium", "high"]
reasoning_required_for_tools = true
text_tool_wire_format_supported = true
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "reasoning_summary"

[[provider.deepinfra]]
model_match = "*"
native_tools = true
preferred_tool_format = "native"
structured_output = "native"
text_tool_wire_format_supported = true
prefers_xml_scaffolding = false
prefers_markdown_scaffolding = true
structured_output_mode = "native_json"
supports_assistant_prefill = false
prefers_role_developer = false
prefers_xml_tools = false
thinking_block_style = "none"