harn-vm 0.8.4 - Docs.rs

# Harn provider capability matrix.
#
# One `[[provider.<name>]]` array entry per rule; first match wins per
# (provider, model). Place more specific `model_match` patterns before
# wildcards. `version_min = [major, minor]` narrows the match to a model
# ID whose `(major, minor)` version (parsed from the Anthropic / OpenAI
# naming schemes) is greater than or equal to the given tuple. Rules
# whose `version_min` is unparseable for the given model are skipped.
#
# `[provider_family]` declares the sibling providers that inherit rules
# from a canonical family when they have no rule of their own (OpenRouter
# et al. speak the same Responses API and forward `tool_search` /
# `defer_loading` unchanged — they fall through to `[[provider.openai]]`
# by default).
#
# Users override or extend this table per-project via
# `[[capabilities.provider.<name>]]` entries in `harn.toml`. Project
# overrides are checked before the built-in rules for the same provider
# name and are authoritative on overlap.
#
# Supported per-rule fields:
#   model_match     : glob pattern matched against the lowercased model ID.
#   version_min     : [major, minor] lower bound, provider-aware parse.
#   native_tools    : whether the model accepts native tool-call wire shape.
#   defer_loading   : whether `defer_loading: true` is honored on tool defs.
#   tool_search     : list of native tool-search variants, preferred first.
#                     Anthropic = ["bm25", "regex"];
#                     OpenAI    = ["hosted", "client"].
#   max_tools       : cap on tool-definition count the provider will accept.
#                     Used by harn-lint to warn about oversized registries.
#   prompt_caching  : whether the provider honors cache_control blocks.
#   vision          : whether Harn can send visual input blocks on this route.
#   audio_supported : whether Harn can send audio input blocks on this route.
#   pdf_supported   : whether Harn can send PDF/document input blocks on this route.
#   files_api_supported:
#                     whether file_id references from std/files::upload are accepted.
#   structured_output: structured-output transport: native, tool_use, format_kw, none.
#   thinking_modes  : supported script-facing modes: enabled, adaptive, effort.
#   interleaved_thinking_supported:
#                     whether `thinking` can opt Anthropic Messages API
#                     requests into the interleaved-thinking beta header.
#   anthropic_beta_features:
#                     unconditional Anthropic beta feature names to request
#                     for this route.
#   vision_supported: whether image content blocks are accepted.
#   preserve_thinking: whether prior <think> blocks should be carried forward.
#   server_parser   : server-side response parser that transforms model output.
#   honors_chat_template_kwargs: whether chat_template_kwargs are honored.
#   requires_completion_tokens: whether to send max_completion_tokens instead of max_tokens.
#   reasoning_effort_supported: whether reasoning_effort is accepted.
#   reasoning_none_supported: whether reasoning_effort="none" is accepted.
#   recommended_endpoint: preferred endpoint family for this route.
#   text_tool_wire_format_supported: whether Harn text tool calls survive.
#   thinking_disable_directive:
#                     in-prompt directive (e.g. "/no_think" for Qwen3 chat
#                     templates) that disables the model's thinking mode.
#                     When set, Harn auto-prepends this to the system message
#                     whenever the resolved `thinking` config is `Disabled`,
#                     so script authors don't need to know provider-specific
#                     prompt directives. Idempotent — never injected twice.

# ---------- Anthropic (Claude) ------------------------------------------------

# Claude 4.7+ uses adaptive thinking instead of explicit budgets.
[[provider.anthropic]]
model_match = "claude-haiku-*"
version_min = [4, 7]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["adaptive"]
vision_supported = true

[[provider.anthropic]]
model_match = "claude-opus-*"
version_min = [4, 7]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["adaptive"]
interleaved_thinking_supported = true
vision_supported = true

[[provider.anthropic]]
model_match = "claude-sonnet-*"
version_min = [4, 7]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["adaptive"]
vision_supported = true

# Haiku 4.5+ supports server-side tool search.
[[provider.anthropic]]
model_match = "claude-haiku-*"
version_min = [4, 5]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["enabled"]
vision_supported = true

# Opus 4.6+ supports interleaved thinking.
[[provider.anthropic]]
model_match = "claude-opus-*"
version_min = [4, 6]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
json_schema = "tool_use"
thinking_modes = ["enabled"]
interleaved_thinking_supported = true
vision_supported = true

[[provider.anthropic]]
model_match = "claude-opus-*"
version_min = [4, 0]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["enabled"]
vision_supported = true

# Sonnet 4.0+ supports tool search.
[[provider.anthropic]]
model_match = "claude-sonnet-*"
version_min = [4, 0]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["enabled"]
vision_supported = true

# OpenRouter-style `anthropic/claude-...` prefixes.
[[provider.anthropic]]
model_match = "anthropic/claude-haiku-*"
version_min = [4, 7]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["adaptive"]
vision_supported = true

[[provider.anthropic]]
model_match = "anthropic/claude-opus-*"
version_min = [4, 7]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["adaptive"]
interleaved_thinking_supported = true
vision_supported = true

[[provider.anthropic]]
model_match = "anthropic/claude-sonnet-*"
version_min = [4, 7]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["adaptive"]
vision_supported = true

[[provider.anthropic]]
model_match = "anthropic/claude-haiku-*"
version_min = [4, 5]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["enabled"]
vision_supported = true

[[provider.anthropic]]
model_match = "anthropic/claude-opus-*"
version_min = [4, 6]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
json_schema = "tool_use"
thinking_modes = ["enabled"]
interleaved_thinking_supported = true
vision_supported = true

[[provider.anthropic]]
model_match = "anthropic/claude-opus-*"
version_min = [4, 0]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["enabled"]
vision_supported = true

[[provider.anthropic]]
model_match = "anthropic/claude-sonnet-*"
version_min = [4, 0]
native_tools = true
defer_loading = true
tool_search = ["bm25", "regex"]
max_tools = 10000
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["enabled"]
vision_supported = true

# Catch-all for older Claude models — native tools + prompt caching +
# thinking, but no progressive tool disclosure.
[[provider.anthropic]]
model_match = "claude-*"
native_tools = true
prompt_caching = true
vision = true
audio_supported = true
pdf_supported = true
files_api_supported = true
structured_output = "tool_use"
thinking_modes = ["enabled"]
vision_supported = true

# ---------- OpenAI family -----------------------------------------------------
#
# `provider.openai` rules are inherited by the sibling providers declared
# in `[provider_family]` below (OpenRouter, Together, Groq, DeepSeek,
# Fireworks, HuggingFace, local vLLM/SGLang). Siblings may still add their
# own `[[provider.<name>]]` rules and those win over the openai fallback.

# gpt-5.4+ exposes native `tool_search` on the Responses API.
[[provider.openai]]
model_match = "gpt-4o*"
native_tools = true
vision = true
audio_supported = true
structured_output = "native"
vision_supported = true

[[provider.openai]]
model_match = "gpt-4.1*"
native_tools = true
vision_supported = true
structured_output = "native"

[[provider.openai]]
model_match = "gpt-*"
version_min = [5, 4]
native_tools = true
defer_loading = true
tool_search = ["hosted", "client"]
vision_supported = true
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true
reasoning_none_supported = true

[[provider.openai]]
model_match = "gpt-*"
version_min = [5, 1]
native_tools = true
vision_supported = true
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true
reasoning_none_supported = true

[[provider.openai]]
model_match = "gpt-*"
version_min = [5, 0]
native_tools = true
vision_supported = true
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true

# Legacy GPT: native tool calls only.
[[provider.openai]]
model_match = "gpt-*"
native_tools = true
structured_output = "native"

# Reasoning family (o1, o3, o4, ...).
[[provider.openai]]
model_match = "o1*"
native_tools = true
structured_output = "native"
thinking_modes = ["effort"]
requires_completion_tokens = true
reasoning_effort_supported = true

[[provider.openai]]
model_match = "o3*"
native_tools = true
structured_output = "native"
thinking_modes = ["effort"]
requires_completion_tokens = true
reasoning_effort_supported = true

[[provider.openai]]
model_match = "o4*"
native_tools = true
structured_output = "native"
thinking_modes = ["effort"]
vision_supported = true
requires_completion_tokens = true
reasoning_effort_supported = true

# OpenRouter-style provider-prefixed IDs.
[[provider.openai]]
model_match = "openai/gpt-4o*"
native_tools = true
vision = true
audio_supported = true
structured_output = "native"
vision_supported = true

[[provider.openai]]
model_match = "openai/gpt-4.1*"
native_tools = true
vision_supported = true
structured_output = "native"

[[provider.openai]]
model_match = "openai/gpt-*"
version_min = [5, 4]
native_tools = true
defer_loading = true
tool_search = ["hosted", "client"]
vision_supported = true
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true
reasoning_none_supported = true

[[provider.openai]]
model_match = "openai/gpt-*"
version_min = [5, 1]
native_tools = true
json_schema = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true
reasoning_none_supported = true

[[provider.openai]]
model_match = "openai/gpt-*"
version_min = [5, 0]
native_tools = true
json_schema = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true

[[provider.openai]]
model_match = "openai/gpt-*"
native_tools = true
structured_output = "native"

[[provider.openai]]
model_match = "openai/o1*"
native_tools = true
json_schema = "native"
thinking_modes = ["effort"]
requires_completion_tokens = true
reasoning_effort_supported = true

[[provider.openai]]
model_match = "openai/o3*"
native_tools = true
json_schema = "native"
thinking_modes = ["effort"]
requires_completion_tokens = true
reasoning_effort_supported = true

[[provider.openai]]
model_match = "openai/o4*"
native_tools = true
json_schema = "native"
thinking_modes = ["effort"]
vision_supported = true
requires_completion_tokens = true
reasoning_effort_supported = true

# ---------- Local / Ollama ----------------------------------------------------
#
# Local providers don't advertise native tool_search or prompt caching.
# Native-tools coverage depends on the specific model. Qwen 3 / 3.5
# packages on Ollama expose native tool calling and thinking on the
# official docs + model pages. Qwen 3.6 adds `preserve_thinking` which
# Alibaba recommends for multi-turn agentic / coding workflows (the
# flagship use case we're building for) — keep it on by default.

[[provider.ollama]]
model_match = "llava*"
vision_supported = true

[[provider.ollama]]
model_match = "bakllava*"
vision_supported = true

[[provider.ollama]]
model_match = "llama3.2-vision*"
vision_supported = true

[[provider.ollama]]
model_match = "gemma3*"
vision_supported = true

[[provider.ollama]]
model_match = "gemma4*"
vision_supported = true

[[provider.ollama]]
model_match = "qwen3.6*"
native_tools = true
structured_output = "format_kw"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "ollama_qwen3coder"
honors_chat_template_kwargs = false
recommended_endpoint = "/api/generate-raw"
text_tool_wire_format_supported = false
thinking_disable_directive = "/no_think"

[[provider.ollama]]
model_match = "qwen3*"
native_tools = true
structured_output = "format_kw"
thinking_modes = ["enabled"]
server_parser = "ollama_qwen3coder"
honors_chat_template_kwargs = false
recommended_endpoint = "/api/generate-raw"
text_tool_wire_format_supported = false
thinking_disable_directive = "/no_think"

# llama.cpp / llama-server speaks OpenAI-compat but is a separate
# provider entry from Ollama. Give it the same Qwen3.6 wiring so the
# local GGUF path gets `preserve_thinking` by default too.
[[provider.llamacpp]]
model_match = "*qwen3.6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.llamacpp]]
model_match = "*qwen3*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

# Local vLLM / SGLang — same story. The capability rules here gate
# chat_template_kwargs emission in openai_compat; the actual model has
# to have been launched with the matching --chat-template.
[[provider.local]]
model_match = "*qwen3.6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.local]]
model_match = "*qwen3*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

# Apple Silicon MLX server (mlx-vlm). OpenAI-compat with vision; honors
# the same `chat_template_kwargs.preserve_thinking` knob as vLLM, since
# mlx-vlm reuses HF tokenizers / chat templates.
[[provider.mlx]]
model_match = "*qwen3.6*"
native_tools = true
vision = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.mlx]]
model_match = "*qwen3*"
native_tools = true
vision = true
structured_output = "native"
thinking_modes = ["enabled"]
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

# ---------- Qwen3.6 family (routed providers) --------------------------------
#
# DashScope is the authoritative Qwen host; Fireworks and the local
# OpenAI-compatible servers honor Qwen `chat_template_kwargs`.
# OpenRouter forwards thinking through its `reasoning` field instead.
# The sibling fallthrough to `[[provider.openai]]` would match the gpt-*
# rules (which don't apply) so we declare Qwen explicitly here to prevent
# a silent fallthrough to empty capabilities.

[[provider.dashscope]]
model_match = "qwen3.6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.dashscope]]
model_match = "qwen*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.fireworks]]
model_match = "*qwen3.6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

# Fireworks's own model IDs squash dots to `p` (`qwen3p6`, `qwen3p5`),
# so we need a second rule to catch their canonical naming.
[[provider.fireworks]]
model_match = "*qwen3p6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.fireworks]]
model_match = "*qwen*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

# OpenRouter: forwards Qwen thinking via its `reasoning` field (not
# chat_template_kwargs — transform_request strips that). Preservation
# works transparently since thinking blocks round-trip as assistant
# content; the reasoning enablement lives in `openrouter_reasoning_config`.
[[provider.openrouter]]
model_match = "qwen/qwen3.6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled", "effort"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = false
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.openrouter]]
model_match = "qwen/qwen3-coder*"
native_tools = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = false
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true

[[provider.openrouter]]
model_match = "qwen/*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled", "effort"]
server_parser = "none"
honors_chat_template_kwargs = false
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

# HuggingFace router + Together pass Qwen chat_template_kwargs through
# unchanged. Match the qwen3.6 model card shape (`Qwen/Qwen3.6-*`).
[[provider.huggingface]]
model_match = "qwen/qwen3.6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.huggingface]]
model_match = "qwen/qwen3-coder*"
native_tools = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true

[[provider.huggingface]]
model_match = "qwen/*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.together]]
model_match = "qwen/qwen3.6*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
preserve_thinking = true
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

[[provider.together]]
model_match = "qwen/qwen3-coder*"
native_tools = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true

[[provider.together]]
model_match = "qwen/*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
server_parser = "none"
honors_chat_template_kwargs = true
recommended_endpoint = "/v1/chat/completions"
text_tool_wire_format_supported = true
thinking_disable_directive = "/no_think"

# ---------- DeepSeek reasoning (across host providers) -----------------------
#
# DeepSeek V3.1+ exposes reasoning via `chat_template_kwargs.enable_thinking`
# when routed through vLLM-style hosts (HuggingFace router, Together). On
# OpenRouter it surfaces as the `reasoning` field like other models on
# that platform. Prompt caching is DeepSeek-native (automatic, billed as
# cache_read tokens) so declare it wherever the host forwards the usage.

[[provider.together]]
model_match = "deepseek-ai/deepseek-v3*"
native_tools = true
thinking_modes = ["enabled"]
prompt_caching = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = true

[[provider.huggingface]]
model_match = "deepseek-ai/deepseek-v3*"
native_tools = true
thinking_modes = ["enabled"]
prompt_caching = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = true

[[provider.openrouter]]
model_match = "deepseek/deepseek-v3*"
native_tools = true
thinking_modes = ["enabled", "effort"]
prompt_caching = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = false

# ---------- Google Gemini + Gemma 4 (via OpenRouter) -------------------------
#
# Gemini 2.5 exposes thinking budgets through OpenRouter's `reasoning`
# field. Gemma 4 does not; it's a plain instruction-tuned dense / MoE
# model with native tool calling.

[[provider.openrouter]]
model_match = "google/gemini-2.5*"
native_tools = true
thinking_modes = ["enabled", "effort"]
prompt_caching = true
vision_supported = true
vision = true
audio_supported = true
pdf_supported = true
structured_output = "native"

[[provider.gemini]]
model_match = "gemini-2.5*"
thinking_modes = ["enabled", "adaptive", "effort"]
vision_supported = true
audio_supported = true
pdf_supported = true
files_api_supported = true

[[provider.gemini]]
model_match = "models/gemini-2.5*"
thinking_modes = ["enabled", "adaptive", "effort"]
vision_supported = true
audio_supported = true
pdf_supported = true
files_api_supported = true

[[provider.gemini]]
model_match = "gemini-*"
vision_supported = true
audio_supported = true
pdf_supported = true
files_api_supported = true

[[provider.gemini]]
model_match = "models/gemini-*"
vision_supported = true
audio_supported = true
pdf_supported = true
files_api_supported = true

[[provider.openrouter]]
model_match = "google/gemma-4*"
native_tools = true
structured_output = "native"

# ---------- Together reasoning models ----------------------------------------
#
# Together exposes three reasoning surfaces today:
#   - Qwen uses `chat_template_kwargs.thinking` / `enable_thinking`.
#   - Hybrid Kimi/GLM/Gemma models use `reasoning.enabled`.
#   - GPT-OSS uses top-level `reasoning_effort`.
# Keep each host-specific rule explicit so the generic OpenAI fallback does
# not accidentally claim the wrong wire format.

[[provider.together]]
model_match = "openai/gpt-oss-*"
structured_output = "native"
thinking_modes = ["effort"]
reasoning_effort_supported = true

[[provider.together]]
model_match = "deepseek-ai/deepseek-v4*"
native_tools = true
thinking_modes = ["enabled", "effort"]
reasoning_effort_supported = true
prompt_caching = true
structured_output = "native"
server_parser = "none"
honors_chat_template_kwargs = false

[[provider.together]]
model_match = "moonshotai/kimi-k2*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
honors_chat_template_kwargs = false

[[provider.together]]
model_match = "zai-org/glm-5*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
honors_chat_template_kwargs = false

[[provider.together]]
model_match = "google/gemma-4*"
native_tools = true
structured_output = "native"
thinking_modes = ["enabled"]
honors_chat_template_kwargs = false

[[provider.together]]
model_match = "moonshotai/*"
native_tools = true
structured_output = "native"

# ---------- Enterprise cloud providers ---------------------------------------

# Bedrock Converse normalizes native tool calling across Anthropic, Meta,
# Amazon, Mistral, and other Bedrock-hosted model families. Tool-search and
# prompt-cache semantics remain provider/model-specific, so don't advertise
# those through the generic Bedrock route yet.
[[provider.bedrock]]
model_match = "*"
native_tools = true
recommended_endpoint = "/model/{model}/converse"
text_tool_wire_format_supported = true

# Azure OpenAI uses deployment-name routing but keeps the OpenAI chat
# completions tool-call body. Do not inherit OpenAI Responses API
# tool_search flags: this shim targets Azure's chat-completions endpoint.
[[provider.azure_openai]]
model_match = "gpt-*"
native_tools = true
recommended_endpoint = "/openai/deployments/{deployment}/chat/completions"
text_tool_wire_format_supported = true

[[provider.azure_openai]]
model_match = "o1*"
native_tools = true
recommended_endpoint = "/openai/deployments/{deployment}/chat/completions"
text_tool_wire_format_supported = true

[[provider.azure_openai]]
model_match = "o3*"
native_tools = true
recommended_endpoint = "/openai/deployments/{deployment}/chat/completions"
text_tool_wire_format_supported = true

[[provider.azure_openai]]
model_match = "o4*"
native_tools = true
recommended_endpoint = "/openai/deployments/{deployment}/chat/completions"
text_tool_wire_format_supported = true

# Vertex Gemini exposes function declarations in generateContent. Harn maps
# native tool schemas to that shape, but leaves provider-specific cached
# content / thinking flags off until they are wired as first-class options.
[[provider.vertex]]
model_match = "gemini-*"
native_tools = true
recommended_endpoint = "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
text_tool_wire_format_supported = true

# ---------- Mock --------------------------------------------------------------
#
# Mock spoofs either Anthropic or OpenAI shape depending on the model ID.
# Handled specially in the loader (see `capabilities::lookup`): Claude-
# shape model strings route to the `anthropic` rule list first, otherwise
# fall through to the `openai` rule list.

# ---------- provider_family aliases ------------------------------------------

[provider_family]
openrouter = "openai"
together = "openai"
groq = "openai"
deepseek = "openai"
fireworks = "openai"
huggingface = "openai"
local = "openai"
# New April 2026 routes for Qwen3.6:
dashscope = "openai"
llamacpp = "openai"
mlx = "openai"