Skip to main content

harn_vm/llm/capabilities/
model.rs

1//! Capability DTOs and the wire-dialect model.
2//!
3//! Pure data types: the on-disk [`CapabilitiesFile`] schema, per-provider
4//! [`ProviderDefaults`], the resolved [`Capabilities`] struct callers consume,
5//! and the [`WireDialect`] enum that types a route's message wire format. The
6//! `ProviderRule` matrix row and the resolution engine that turns these DTOs
7//! into a `Capabilities` live in `super::rule`.
8
9use std::collections::BTreeMap;
10
11use serde::Deserialize;
12
13use super::rule::ProviderRule;
14
15/// Parsed on-disk capabilities schema. Public so harn-cli can
16/// construct one directly when wiring harn.toml overrides.
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct CapabilitiesFile {
19    /// Per-provider ordered rule lists. The first matching rule wins; a
20    /// matching rule with `extends = true` contributes only the fields it
21    /// sets and lets resolution continue to later matching rules (see
22    /// [`ProviderRule::extends`]).
23    #[serde(default)]
24    pub provider: BTreeMap<String, Vec<ProviderRule>>,
25    /// Per-provider defaults applied to every matching row and to
26    /// provider/model pairs that have no model-specific row. This keeps
27    /// transport-shape facts in data without repeating them on every
28    /// generation-specific capability row.
29    #[serde(default)]
30    pub provider_defaults: BTreeMap<String, ProviderDefaults>,
31    /// Sibling → canonical family mapping. Providers with no rule of
32    /// their own fall through to the named family (recursively).
33    #[serde(default)]
34    pub provider_family: BTreeMap<String, String>,
35    /// Per-provider adaptive rate/concurrency governor limits, keyed by
36    /// provider id. Consumed by `crate::llm::rate_governor` when the
37    /// `llm.rate_governor` flag is enabled, so provider limits stay catalog
38    /// data instead of call-site branches.
39    #[serde(default)]
40    pub provider_limits: BTreeMap<String, ProviderLimits>,
41}
42
43/// Adaptive-governor limits for one provider. Every field is optional so a
44/// catalog fragment can pin just the axes it knows; unset axes fall back to the
45/// governor's conservative built-in defaults.
46#[derive(Debug, Clone, Deserialize, Default, PartialEq)]
47pub struct ProviderLimits {
48    /// Ceiling the AIMD concurrency limiter additively climbs toward on
49    /// sustained success.
50    #[serde(default)]
51    pub max_concurrency: Option<u32>,
52    /// Floor the AIMD limiter multiplicatively decreases toward on a throttle
53    /// signal.
54    #[serde(default)]
55    pub min_concurrency: Option<u32>,
56    /// Requests-per-minute token bucket. `None` disables the RPM bucket.
57    #[serde(default)]
58    pub rpm: Option<u32>,
59    /// Tokens-per-minute token bucket, charged by estimated input + output
60    /// tokens. `None` disables the TPM bucket.
61    #[serde(default)]
62    pub tpm: Option<u64>,
63    /// Whether the AIMD adaptive concurrency loop is active. When `false`, the
64    /// concurrency limit is pinned at `max_concurrency`.
65    #[serde(default)]
66    pub adaptive: Option<bool>,
67    /// Circuit-breaker / backoff parameters. Absent means built-in defaults.
68    #[serde(default)]
69    pub backoff: Option<GovernorBackoff>,
70}
71
72/// Exponential-backoff-with-jitter parameters for the governor circuit breaker.
73/// Provider `Retry-After` values always take precedence over the computed
74/// window.
75#[derive(Debug, Clone, Deserialize, PartialEq)]
76pub struct GovernorBackoff {
77    /// First OPEN window, in milliseconds.
78    #[serde(default)]
79    pub base_ms: Option<u64>,
80    /// Ceiling for the OPEN window, in milliseconds.
81    #[serde(default)]
82    pub max_ms: Option<u64>,
83    /// Growth factor applied per consecutive OPEN cycle.
84    #[serde(default)]
85    pub multiplier: Option<f64>,
86    /// Full-jitter toggle.
87    #[serde(default)]
88    pub jitter: Option<bool>,
89}
90
91/// Provider-wide default fields merged into matching rules.
92#[derive(Debug, Clone, Deserialize, Default)]
93pub struct ProviderDefaults {
94    /// Message/request/response wire format used by shared helpers.
95    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
96    #[serde(default)]
97    pub message_wire_format: Option<String>,
98    /// Native tool definition wire shape. Known values are `openai`
99    /// and `anthropic`.
100    #[serde(default)]
101    pub native_tool_wire_format: Option<String>,
102    /// Whether image content blocks may reference remote URLs.
103    #[serde(default)]
104    pub image_url_input_supported: Option<bool>,
105    /// File-upload transport used by `std/files.upload`. Known values
106    /// are `anthropic` and `gemini`.
107    #[serde(default)]
108    pub file_upload_wire_format: Option<String>,
109    /// Provider-specific reasoning request shape for OpenAI-compatible
110    /// transports. Known values are `openrouter` and `enabled`.
111    #[serde(default)]
112    pub reasoning_wire_format: Option<String>,
113    #[serde(default)]
114    pub files_api_supported: Option<bool>,
115    #[serde(default)]
116    pub batch_api: Option<bool>,
117    #[serde(default)]
118    pub batch_wire_format: Option<String>,
119    #[serde(default)]
120    pub batch_input_mode: Option<String>,
121    #[serde(default)]
122    pub batch_discount_percent: Option<u32>,
123    #[serde(default)]
124    pub batch_turnaround_hours: Option<u32>,
125    #[serde(default)]
126    pub seed_supported: Option<bool>,
127    #[serde(default)]
128    pub top_k_supported: Option<bool>,
129    #[serde(default)]
130    pub temperature_supported: Option<bool>,
131    #[serde(default)]
132    pub top_p_supported: Option<bool>,
133    #[serde(default)]
134    pub frequency_penalty_supported: Option<bool>,
135    #[serde(default)]
136    pub presence_penalty_supported: Option<bool>,
137}
138
139/// Copies `src` into `dst` when `src` is set (last-writer-wins overlay).
140pub(super) fn overlay_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
141    if src.is_some() {
142        dst.clone_from(src);
143    }
144}
145
146/// Copies `src` into `dst` only when `dst` is still unset (fill-the-gaps).
147pub(super) fn fill_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
148    if dst.is_none() {
149        dst.clone_from(src);
150    }
151}
152
153/// Visits every `ProviderDefaults` field once, applying `$op` (`overlay_opt`
154/// or `fill_opt`) to each `(dst, src)` pair. The field roster lives here only;
155/// `overlay`/`fill_missing_from` differ solely in the merge rule they pass.
156macro_rules! merge_provider_defaults {
157    ($dst:expr, $src:expr, $op:path) => {{
158        $op(&mut $dst.message_wire_format, &$src.message_wire_format);
159        $op(
160            &mut $dst.native_tool_wire_format,
161            &$src.native_tool_wire_format,
162        );
163        $op(
164            &mut $dst.image_url_input_supported,
165            &$src.image_url_input_supported,
166        );
167        $op(
168            &mut $dst.file_upload_wire_format,
169            &$src.file_upload_wire_format,
170        );
171        $op(&mut $dst.reasoning_wire_format, &$src.reasoning_wire_format);
172        $op(&mut $dst.files_api_supported, &$src.files_api_supported);
173        $op(&mut $dst.batch_api, &$src.batch_api);
174        $op(&mut $dst.batch_wire_format, &$src.batch_wire_format);
175        $op(&mut $dst.batch_input_mode, &$src.batch_input_mode);
176        $op(
177            &mut $dst.batch_discount_percent,
178            &$src.batch_discount_percent,
179        );
180        $op(
181            &mut $dst.batch_turnaround_hours,
182            &$src.batch_turnaround_hours,
183        );
184        $op(&mut $dst.seed_supported, &$src.seed_supported);
185        $op(&mut $dst.top_k_supported, &$src.top_k_supported);
186        $op(&mut $dst.temperature_supported, &$src.temperature_supported);
187        $op(&mut $dst.top_p_supported, &$src.top_p_supported);
188        $op(
189            &mut $dst.frequency_penalty_supported,
190            &$src.frequency_penalty_supported,
191        );
192        $op(
193            &mut $dst.presence_penalty_supported,
194            &$src.presence_penalty_supported,
195        );
196    }};
197}
198
199impl ProviderDefaults {
200    pub(super) fn overlay(&mut self, other: &ProviderDefaults) {
201        merge_provider_defaults!(self, other, overlay_opt);
202    }
203
204    pub(super) fn fill_missing_from(&mut self, other: &ProviderDefaults) {
205        merge_provider_defaults!(self, other, fill_opt);
206    }
207
208    pub(super) fn has_any_field(&self) -> bool {
209        self.message_wire_format.is_some()
210            || self.native_tool_wire_format.is_some()
211            || self.image_url_input_supported.is_some()
212            || self.file_upload_wire_format.is_some()
213            || self.reasoning_wire_format.is_some()
214            || self.files_api_supported.is_some()
215            || self.batch_api.is_some()
216            || self.batch_wire_format.is_some()
217            || self.batch_input_mode.is_some()
218            || self.batch_discount_percent.is_some()
219            || self.batch_turnaround_hours.is_some()
220            || self.seed_supported.is_some()
221            || self.top_k_supported.is_some()
222            || self.temperature_supported.is_some()
223            || self.top_p_supported.is_some()
224            || self.frequency_penalty_supported.is_some()
225            || self.presence_penalty_supported.is_some()
226    }
227}
228
229/// The message/request/response wire dialect a route speaks.
230///
231/// This is the single typed representation of what used to be encoded two
232/// different, drift-prone ways: the stringly `Capabilities.message_wire_format`
233/// field (compared against `"anthropic"`/`"gemini"`/`"ollama"` literals at a
234/// dozen call sites) and the `(is_anthropic_style, is_ollama)` boolean pair
235/// threaded independently through the transport/response layers. A closed enum
236/// makes an unhandled or mistyped dialect a compile error and removes the
237/// boolean-blindness where two `bool`s could silently disagree.
238#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub enum WireDialect {
240    /// Anthropic native Messages API (`/v1/messages`). The only dialect that
241    /// surfaces Claude's extended-thinking stream. `message_wire_format =
242    /// "anthropic"`.
243    Anthropic,
244    /// OpenAI-compatible Chat Completions (`/v1/chat/completions`). The default
245    /// for hosted/openai-shape routes. `message_wire_format = "openai"`.
246    OpenAiCompat,
247    /// Ollama native `/api/chat`. `message_wire_format = "ollama"`.
248    Ollama,
249    /// Google Gemini `generateContent`. `message_wire_format = "gemini"`.
250    Gemini,
251}
252
253impl WireDialect {
254    /// Parse the catalog's `message_wire_format` string. Unrecognized values
255    /// (including the explicit `"openai"`) resolve to [`WireDialect::OpenAiCompat`],
256    /// exactly matching the pre-cutover behavior where every
257    /// `== "anthropic"/"gemini"/"ollama"` check fell through to the
258    /// OpenAI-compatible path.
259    pub fn from_message_wire_format(value: &str) -> WireDialect {
260        match value {
261            "anthropic" => WireDialect::Anthropic,
262            "ollama" => WireDialect::Ollama,
263            "gemini" => WireDialect::Gemini,
264            _ => WireDialect::OpenAiCompat,
265        }
266    }
267
268    /// The canonical `message_wire_format` string for display and round-trip.
269    pub fn as_str(self) -> &'static str {
270        match self {
271            WireDialect::Anthropic => "anthropic",
272            WireDialect::OpenAiCompat => "openai",
273            WireDialect::Ollama => "ollama",
274            WireDialect::Gemini => "gemini",
275        }
276    }
277
278    /// Whether this route speaks Anthropic's native Messages shape.
279    pub fn is_anthropic(self) -> bool {
280        matches!(self, WireDialect::Anthropic)
281    }
282
283    /// Whether this route speaks Ollama's native `/api/chat` shape.
284    pub fn is_ollama(self) -> bool {
285        matches!(self, WireDialect::Ollama)
286    }
287
288    /// Whether this route speaks Google Gemini's `generateContent` shape.
289    pub fn is_gemini(self) -> bool {
290        matches!(self, WireDialect::Gemini)
291    }
292}
293
294/// How the neutral `computer` tool projects onto a route's native computer-use
295/// surface (the `computer_use_style` capability). A typed enum rather than a raw
296/// string so an unknown value in a capability source is a load-time
297/// deserialize error instead of a silently-disabled computer tool.
298#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
299#[serde(rename_all = "snake_case")]
300pub enum ComputerUseStyle {
301    /// Anthropic `computer_20251124` native tool.
302    NativeAnthropic,
303    /// OpenAI Responses `computer` native tool.
304    NativeOpenai,
305    /// Accessibility / set-of-marks grounding over the universal function tool.
306    Grounded,
307    /// The plain function-schema `computer` tool (the universal default).
308    Function,
309}
310
311/// Screenshot downscaling policy applied before an image reaches the model (the
312/// `screenshot_scaling` capability). Typed for the same reason as
313/// [`ComputerUseStyle`] — an unknown value fails the capability load loudly.
314#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
315#[serde(rename_all = "snake_case")]
316pub enum ScreenshotScaling {
317    /// Fit within Anthropic's XGA (1024x768), preserving aspect ratio.
318    Xga,
319    /// Send the capture at its native resolution (OpenAI et al.).
320    Original,
321}
322
323/// Resolved capabilities for a `(provider, model)` pair. Unset rule
324/// fields resolve to `false` / empty / `None` so callers never have to
325/// unwrap an `Option<bool>` for what are really boolean gates.
326#[derive(Debug, Clone, PartialEq, Eq)]
327pub struct Capabilities {
328    pub native_tools: bool,
329    pub message_wire_format: WireDialect,
330    pub native_tool_wire_format: String,
331    pub defer_loading: bool,
332    pub tool_search: Vec<String>,
333    pub responses_api: bool,
334    pub hosted_tools: Vec<String>,
335    pub remote_mcp: bool,
336    pub conversation_state: bool,
337    pub compaction: bool,
338    pub background_mode: bool,
339    pub batch_api: bool,
340    pub batch_wire_format: Option<String>,
341    pub batch_input_mode: Option<String>,
342    pub batch_discount_percent: Option<u32>,
343    pub batch_turnaround_hours: Option<u32>,
344    pub tool_approval_policy: Option<String>,
345    pub max_tools: Option<u32>,
346    pub prompt_caching: bool,
347    pub cache_breakpoint_style: String,
348    pub vision: bool,
349    pub audio: bool,
350    pub pdf: bool,
351    pub video: bool,
352    pub files_api_supported: bool,
353    pub file_upload_wire_format: Option<String>,
354    pub structured_output: Option<String>,
355    /// Legacy mirror for CLI display and older callers.
356    pub json_schema: Option<String>,
357    pub prefers_xml_scaffolding: bool,
358    /// See [`ProviderRule::reserved_tool_call_token`].
359    pub reserved_tool_call_token: bool,
360    pub prefers_markdown_scaffolding: bool,
361    pub structured_output_mode: String,
362    pub supports_assistant_prefill: bool,
363    pub prefers_role_developer: bool,
364    pub prefers_xml_tools: bool,
365    pub thinking_block_style: String,
366    /// Whether this route emits its reasoning INLINE in the text channel as
367    /// `<think>...</think>` blocks (local Ollama/llama.cpp reasoning models,
368    /// Qwen3 via vLLM, Kimi) rather than in a separate provider reasoning
369    /// field. When true, the `llm_call` envelope builder splits those blocks
370    /// out of `text`/`prose`/`visible_text` and folds them into the reasoning
371    /// channel, mirroring how hosted providers surface a dedicated thinking
372    /// field. Derived from `thinking_block_style == "inline"` — the same
373    /// population that represents reasoning as inline `<think>` in prompt
374    /// context is the one that emits it that way in responses.
375    pub emits_inline_reasoning: bool,
376    pub thinking_modes: Vec<String>,
377    pub interleaved_thinking_supported: bool,
378    pub anthropic_beta_features: Vec<String>,
379    pub vision_supported: bool,
380    pub image_url_input_supported: bool,
381    pub preserve_thinking: bool,
382    pub server_parser: String,
383    pub honors_chat_template_kwargs: bool,
384    pub chat_template_options_field: Option<String>,
385    pub requires_completion_tokens: bool,
386    /// True when the route is served ONLY by the provider Responses API and
387    /// rejects `/v1/chat/completions` (OpenAI `*-codex` models). Harn routes
388    /// such calls through the Responses provider automatically.
389    pub chat_completions_unsupported: bool,
390    pub requires_streaming: bool,
391    pub reasoning_effort_supported: bool,
392    pub reasoning_effort_levels: Vec<String>,
393    pub reasoning_none_supported: bool,
394    /// See [`ProviderRule::max_thinking_budget`]. `None` means the model uses
395    /// the provider's own default ceiling.
396    pub max_thinking_budget: Option<i64>,
397    pub reasoning_disable_supported: bool,
398    /// See [`ProviderRule::reasoning_required_for_tools`].
399    pub reasoning_required_for_tools: bool,
400    pub reasoning_text_promotable: bool,
401    pub reasoning_wire_format: Option<String>,
402    pub seed_supported: bool,
403    pub top_k_supported: bool,
404    pub temperature_supported: bool,
405    pub top_p_supported: bool,
406    pub frequency_penalty_supported: bool,
407    pub presence_penalty_supported: bool,
408    pub allowed_tool_choice_modes: Vec<String>,
409    pub requires_tool_result_adjacency: bool,
410    pub supports_parallel_tool_calls: bool,
411    pub tools_exclude_response_format: bool,
412    pub recommended_endpoint: Option<String>,
413    pub text_tool_wire_format_supported: bool,
414    pub preferred_tool_format: Option<String>,
415    pub tool_mode_parity: Option<String>,
416    pub tool_mode_parity_notes: Option<String>,
417    pub thinking_disable_directive: Option<String>,
418    /// Per-task auto-policy reasoning-level overrides for this route.
419    /// See [`ProviderRule::auto_reasoning_overrides`].
420    pub auto_reasoning_overrides: BTreeMap<String, String>,
421    /// OpenRouter upstream provider names to exclude from routing for this
422    /// row. See [`ProviderRule::provider_route_denylist`]. Empty means "no
423    /// route restriction".
424    pub provider_route_denylist: Vec<String>,
425    /// OpenRouter upstream provider names this row is PINNED to (allowlist), in
426    /// preference order. See [`ProviderRule::openrouter_provider_order`]. Empty
427    /// means "no pin" (free OpenRouter routing).
428    pub openrouter_provider_order: Vec<String>,
429    /// Serving-quality / precision trust verdict for this route. See
430    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
431    pub serving_precision: String,
432    /// How the neutral `computer` tool projects onto this route's native
433    /// computer-use surface. `None` means the route exposes no computer-use
434    /// surface. See [`ComputerUseStyle`].
435    pub computer_use_style: Option<ComputerUseStyle>,
436    /// Screenshot downscaling policy applied before the image reaches the
437    /// model. `None` means unset. See [`ScreenshotScaling`].
438    pub screenshot_scaling: Option<ScreenshotScaling>,
439    /// Whether this route requires echoing acknowledged safety checks on the
440    /// computer-use follow-up turn (OpenAI Responses `pending_safety_checks`
441    /// → `acknowledged_safety_checks`). See [`ProviderRule::safety_ack_flow`].
442    pub safety_ack_flow: bool,
443}
444
445impl Default for Capabilities {
446    fn default() -> Self {
447        Self {
448            native_tools: false,
449            message_wire_format: WireDialect::OpenAiCompat,
450            native_tool_wire_format: "openai".to_string(),
451            defer_loading: false,
452            tool_search: Vec::new(),
453            responses_api: false,
454            hosted_tools: Vec::new(),
455            remote_mcp: false,
456            conversation_state: false,
457            compaction: false,
458            background_mode: false,
459            batch_api: false,
460            batch_wire_format: None,
461            batch_input_mode: None,
462            batch_discount_percent: None,
463            batch_turnaround_hours: None,
464            tool_approval_policy: None,
465            max_tools: None,
466            prompt_caching: false,
467            cache_breakpoint_style: "none".to_string(),
468            vision: false,
469            audio: false,
470            pdf: false,
471            video: false,
472            files_api_supported: false,
473            file_upload_wire_format: None,
474            structured_output: None,
475            json_schema: None,
476            prefers_xml_scaffolding: false,
477            reserved_tool_call_token: false,
478            prefers_markdown_scaffolding: false,
479            structured_output_mode: "none".to_string(),
480            supports_assistant_prefill: false,
481            prefers_role_developer: false,
482            prefers_xml_tools: false,
483            thinking_block_style: "none".to_string(),
484            emits_inline_reasoning: false,
485            thinking_modes: Vec::new(),
486            interleaved_thinking_supported: false,
487            anthropic_beta_features: Vec::new(),
488            vision_supported: false,
489            image_url_input_supported: true,
490            preserve_thinking: false,
491            server_parser: "none".to_string(),
492            honors_chat_template_kwargs: false,
493            chat_template_options_field: None,
494            requires_completion_tokens: false,
495            chat_completions_unsupported: false,
496            requires_streaming: false,
497            reasoning_effort_supported: false,
498            reasoning_effort_levels: Vec::new(),
499            reasoning_none_supported: false,
500            max_thinking_budget: None,
501            reasoning_disable_supported: true,
502            reasoning_required_for_tools: false,
503            reasoning_text_promotable: true,
504            reasoning_wire_format: None,
505            seed_supported: true,
506            top_k_supported: true,
507            temperature_supported: true,
508            top_p_supported: true,
509            frequency_penalty_supported: true,
510            presence_penalty_supported: true,
511            allowed_tool_choice_modes: Vec::new(),
512            requires_tool_result_adjacency: false,
513            supports_parallel_tool_calls: true,
514            tools_exclude_response_format: false,
515            recommended_endpoint: None,
516            text_tool_wire_format_supported: true,
517            preferred_tool_format: None,
518            tool_mode_parity: None,
519            tool_mode_parity_notes: None,
520            thinking_disable_directive: None,
521            auto_reasoning_overrides: BTreeMap::new(),
522            provider_route_denylist: Vec::new(),
523            openrouter_provider_order: Vec::new(),
524            serving_precision: "unverified".to_string(),
525            computer_use_style: None,
526            screenshot_scaling: None,
527            safety_ack_flow: false,
528        }
529    }
530}