harn-vm 0.8.128

# NVIDIA NIM hosted LLM routes. The API Catalog exposes 100+ OpenAI-compatible
# model IDs; Harn catalogues the current agent/coding routes that are useful as
# first-class selectors and lets the live refresh workflow surface the rest.

[models."nvidia/nemotron-3-ultra-550b-a55b"]
name = "Nemotron 3 Ultra 550B A55B (NVIDIA NIM)"
provider = "nvidia"
context_window = 1048576
logical_model = "nemotron-3-ultra-550b-a55b"
equivalence_group = "nemotron-3-ultra-550b-a55b"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking"]
architecture = { parameter_count_b = 550.0, active_parameter_count_b = 55.0, moe = true, source_url = "https://build.nvidia.com/nvidia/nemotron-3-ultra-550b-a55b/modelcard", last_verified = "2026-06-20" }
tier = "frontier"
open_weight = true
strengths = ["agentic", "reasoning", "coding", "tool_use", "long_context"]
complementary_with = ["anthropic-claude", "openai-gpt", "google-gemini", "qwen", "deepseek", "kimi"]

[models."nvidia/nemotron-3-super-120b-a12b"]
name = "Nemotron 3 Super 120B A12B (NVIDIA NIM)"
provider = "nvidia"
context_window = 1048576
logical_model = "nemotron-3-super-120b-a12b"
equivalence_group = "nemotron-3-super-120b-a12b"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking"]
architecture = { parameter_count_b = 120.0, active_parameter_count_b = 12.0, moe = true, source_url = "https://build.nvidia.com/nvidia/nemotron-3-super-120b-a12b/modelcard", last_verified = "2026-06-20" }
tier = "frontier"
open_weight = true
strengths = ["agentic", "reasoning", "coding", "tool_use", "long_context", "speed"]
complementary_with = ["anthropic-claude", "openai-gpt", "google-gemini", "qwen", "deepseek", "kimi"]

[models."nvidia/nemotron-3-nano-30b-a3b"]
name = "Nemotron 3 Nano 30B A3B (NVIDIA NIM)"
provider = "nvidia"
context_window = 262144
logical_model = "nemotron-3-nano-30b-a3b"
equivalence_group = "nemotron-3-nano-30b-a3b"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking"]
architecture = { parameter_count_b = 30.0, active_parameter_count_b = 3.0, moe = true, source_url = "https://build.nvidia.com/nvidia/nemotron-3-nano-30b-a3b/modelcard", last_verified = "2026-06-20" }
tier = "mid"
open_weight = true
strengths = ["speed", "cheap", "reasoning", "coding", "tool_use", "long_context"]

[models."nvidia/nemotron-3-nano-omni-30b-a3b-reasoning"]
name = "Nemotron 3 Nano Omni 30B A3B Reasoning (NVIDIA NIM)"
provider = "nvidia"
context_window = 262144
logical_model = "nemotron-3-nano-omni-30b-a3b-reasoning"
equivalence_group = "nemotron-3-nano-omni-30b-a3b-reasoning"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "vision", "video", "audio", "streaming", "thinking"]
architecture = { parameter_count_b = 30.0, active_parameter_count_b = 3.0, moe = true, source_url = "https://build.nvidia.com/nvidia/nemotron-3-nano-omni-30b-a3b-reasoning/modelcard", last_verified = "2026-06-20" }
tier = "mid"
open_weight = true
strengths = ["vision", "reasoning", "tool_use", "summarization", "long_context"]

[models."nvidia/deepseek-v4-pro"]
name = "DeepSeek V4 Pro (NVIDIA NIM)"
provider = "nvidia"
wire_model = "deepseek-ai/deepseek-v4-pro"
context_window = 1048576
logical_model = "deepseek-v4-pro"
equivalence_group = "deepseek-v4-pro"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]
complementary_with = ["anthropic-claude", "openai-gpt", "google-gemini", "qwen", "kimi"]

[models."nvidia/deepseek-v4-flash"]
name = "DeepSeek V4 Flash (NVIDIA NIM)"
provider = "nvidia"
wire_model = "deepseek-ai/deepseek-v4-flash"
context_window = 1048576
logical_model = "deepseek-v4-flash"
equivalence_group = "deepseek-v4-flash"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
tier = "mid"
open_weight = true
strengths = ["speed", "coding", "agentic", "tool_use", "reasoning", "long_context"]
complementary_with = ["anthropic-claude", "openai-gpt", "google-gemini", "qwen", "kimi"]

[models."nvidia/minimax-m3"]
name = "MiniMax M3 (NVIDIA NIM)"
provider = "nvidia"
wire_model = "minimaxai/minimax-m3"
context_window = 1048576
logical_model = "minimax-m3"
equivalence_group = "minimax-m3"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "vision", "video", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.60, output_per_mtok = 2.40, cache_read_per_mtok = 0.12 }
tier = "frontier"
open_weight = false
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context", "vision"]

[models."nvidia/minimax-m2.7"]
name = "MiniMax M2.7 (NVIDIA NIM)"
provider = "nvidia"
wire_model = "minimaxai/minimax-m2.7"
context_window = 196608
logical_model = "minimax-m2.7"
equivalence_group = "minimax-m2.7"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.30, output_per_mtok = 1.20, cache_read_per_mtok = 0.06 }
tier = "frontier"
open_weight = true
strengths = ["speed", "coding", "agentic", "tool_use", "reasoning", "long_context"]

[models."nvidia/kimi-k2.6"]
name = "Kimi K2.6 (NVIDIA NIM)"
provider = "nvidia"
wire_model = "moonshotai/kimi-k2.6"
context_window = 262144
logical_model = "moonshot-kimi-k2.6"
equivalence_group = "moonshot-kimi-k2.6"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "vision", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 0.60, output_per_mtok = 2.50, cache_read_per_mtok = 0.15 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "long_context", "tool_use", "reasoning", "vision"]
complementary_with = ["anthropic-claude", "openai-gpt", "google-gemini", "qwen", "deepseek"]

[models."nvidia/mistral-medium-3.5-128b"]
name = "Mistral Medium 3.5 128B (NVIDIA NIM)"
provider = "nvidia"
wire_model = "mistralai/mistral-medium-3.5-128b"
context_window = 262144
logical_model = "mistral-medium-3.5-128b"
equivalence_group = "mistral-medium-3.5-128b"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming"]
architecture = { parameter_count_b = 128.0, source_url = "https://build.nvidia.com/models", last_verified = "2026-06-20" }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use"]

[models."nvidia/openai/gpt-oss-120b"]
name = "GPT-OSS 120B (NVIDIA NIM)"
provider = "nvidia"
wire_model = "openai/gpt-oss-120b"
context_window = 131072
logical_model = "openai-gpt-oss-120b"
equivalence_group = "openai-gpt-oss-120b"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking"]
architecture = { parameter_count_b = 117.0, active_parameter_count_b = 5.1, moe = true, license = "Apache-2.0", source_url = "https://developers.openai.com/api/docs/models/gpt-oss-120b", last_verified = "2026-06-20" }
tier = "frontier"
open_weight = true
strengths = ["cheap", "tool_use", "reasoning"]

[models."nvidia/openai/gpt-oss-20b"]
name = "GPT-OSS 20B (NVIDIA NIM)"
provider = "nvidia"
wire_model = "openai/gpt-oss-20b"
context_window = 131072
logical_model = "openai-gpt-oss-20b"
equivalence_group = "openai-gpt-oss-20b"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking"]
architecture = { parameter_count_b = 21.0, active_parameter_count_b = 3.6, moe = true, license = "Apache-2.0", source_url = "https://developers.openai.com/api/docs/models/gpt-oss-20b", last_verified = "2026-06-20" }
tier = "mid"
open_weight = true
strengths = ["speed", "cheap", "tool_use", "reasoning"]

[models."nvidia/z-ai/glm-5.1"]
name = "GLM 5.1 (NVIDIA NIM)"
provider = "nvidia"
wire_model = "z-ai/glm-5.1"
context_window = 202752
logical_model = "glm-5.1"
equivalence_group = "glm-5.1"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "streaming", "thinking", "prompt_caching"]
pricing = { input_per_mtok = 1.40, output_per_mtok = 4.40, cache_read_per_mtok = 0.26 }
tier = "frontier"
open_weight = true
strengths = ["coding", "agentic", "tool_use", "reasoning", "long_context"]

[models."nvidia/step-3.7-flash"]
name = "Step 3.7 Flash (NVIDIA NIM)"
provider = "nvidia"
wire_model = "stepfun-ai/step-3.7-flash"
context_window = 131072
logical_model = "step-3.7-flash"
equivalence_group = "step-3.7-flash"
served_variant = "nvidia-nim"
api_dialect = "openai_chat"
capabilities = ["tools", "vision", "streaming", "thinking", "prompt_caching"]
tier = "frontier"
open_weight = false
strengths = ["coding", "agentic", "long_context", "tool_use", "reasoning", "vision"]