harn-vm 0.9.11

Async bytecode virtual machine for the Harn programming language
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
//! Capability DTOs and the wire-dialect model.
//!
//! Pure data types: the on-disk [`CapabilitiesFile`] schema, per-provider
//! [`ProviderDefaults`], the resolved [`Capabilities`] struct callers consume,
//! and the [`WireDialect`] enum that types a route's message wire format. The
//! `ProviderRule` matrix row and the resolution engine that turns these DTOs
//! into a `Capabilities` live in `super::rule`.

use std::collections::BTreeMap;

use serde::Deserialize;

use super::rule::ProviderRule;

/// Parsed on-disk capabilities schema. Public so harn-cli can
/// construct one directly when wiring harn.toml overrides.
#[derive(Debug, Clone, Deserialize, Default)]
pub struct CapabilitiesFile {
    /// Per-provider ordered rule lists. The first matching rule wins; a
    /// matching rule with `extends = true` contributes only the fields it
    /// sets and lets resolution continue to later matching rules (see
    /// [`ProviderRule::extends`]).
    #[serde(default)]
    pub provider: BTreeMap<String, Vec<ProviderRule>>,
    /// Per-provider defaults applied to every matching row and to
    /// provider/model pairs that have no model-specific row. This keeps
    /// transport-shape facts in data without repeating them on every
    /// generation-specific capability row.
    #[serde(default)]
    pub provider_defaults: BTreeMap<String, ProviderDefaults>,
    /// Sibling → canonical family mapping. Providers with no rule of
    /// their own fall through to the named family (recursively).
    #[serde(default)]
    pub provider_family: BTreeMap<String, String>,
    /// Per-provider adaptive rate/concurrency governor limits, keyed by
    /// provider id. Consumed by `crate::llm::rate_governor` when the
    /// `llm.rate_governor` flag is enabled, so provider limits stay catalog
    /// data instead of call-site branches.
    #[serde(default)]
    pub provider_limits: BTreeMap<String, ProviderLimits>,
}

/// Adaptive-governor limits for one provider. Every field is optional so a
/// catalog fragment can pin just the axes it knows; unset axes fall back to the
/// governor's conservative built-in defaults.
#[derive(Debug, Clone, Deserialize, Default, PartialEq)]
pub struct ProviderLimits {
    /// Ceiling the AIMD concurrency limiter additively climbs toward on
    /// sustained success.
    #[serde(default)]
    pub max_concurrency: Option<u32>,
    /// Floor the AIMD limiter multiplicatively decreases toward on a throttle
    /// signal.
    #[serde(default)]
    pub min_concurrency: Option<u32>,
    /// Requests-per-minute token bucket. `None` disables the RPM bucket.
    #[serde(default)]
    pub rpm: Option<u32>,
    /// Tokens-per-minute token bucket, charged by estimated input + output
    /// tokens. `None` disables the TPM bucket.
    #[serde(default)]
    pub tpm: Option<u64>,
    /// Whether the AIMD adaptive concurrency loop is active. When `false`, the
    /// concurrency limit is pinned at `max_concurrency`.
    #[serde(default)]
    pub adaptive: Option<bool>,
    /// Circuit-breaker / backoff parameters. Absent means built-in defaults.
    #[serde(default)]
    pub backoff: Option<GovernorBackoff>,
}

/// Exponential-backoff-with-jitter parameters for the governor circuit breaker.
/// Provider `Retry-After` values always take precedence over the computed
/// window.
#[derive(Debug, Clone, Deserialize, PartialEq)]
pub struct GovernorBackoff {
    /// First OPEN window, in milliseconds.
    #[serde(default)]
    pub base_ms: Option<u64>,
    /// Ceiling for the OPEN window, in milliseconds.
    #[serde(default)]
    pub max_ms: Option<u64>,
    /// Growth factor applied per consecutive OPEN cycle.
    #[serde(default)]
    pub multiplier: Option<f64>,
    /// Full-jitter toggle.
    #[serde(default)]
    pub jitter: Option<bool>,
}

/// Provider-wide default fields merged into matching rules.
#[derive(Debug, Clone, Deserialize, Default)]
pub struct ProviderDefaults {
    /// Message/request/response wire format used by shared helpers.
    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
    #[serde(default)]
    pub message_wire_format: Option<String>,
    /// Native tool definition wire shape. Known values are `openai`
    /// and `anthropic`.
    #[serde(default)]
    pub native_tool_wire_format: Option<String>,
    /// Whether image content blocks may reference remote URLs.
    #[serde(default)]
    pub image_url_input_supported: Option<bool>,
    /// File-upload transport used by `std/files.upload`. Known values
    /// are `anthropic` and `gemini`.
    #[serde(default)]
    pub file_upload_wire_format: Option<String>,
    /// Provider-specific reasoning request shape for OpenAI-compatible
    /// transports. Known values are `openrouter` and `enabled`.
    #[serde(default)]
    pub reasoning_wire_format: Option<String>,
    #[serde(default)]
    pub files_api_supported: Option<bool>,
    #[serde(default)]
    pub seed_supported: Option<bool>,
    #[serde(default)]
    pub top_k_supported: Option<bool>,
    #[serde(default)]
    pub temperature_supported: Option<bool>,
    #[serde(default)]
    pub top_p_supported: Option<bool>,
    #[serde(default)]
    pub frequency_penalty_supported: Option<bool>,
    #[serde(default)]
    pub presence_penalty_supported: Option<bool>,
}

/// Copies `src` into `dst` when `src` is set (last-writer-wins overlay).
pub(super) fn overlay_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
    if src.is_some() {
        dst.clone_from(src);
    }
}

/// Copies `src` into `dst` only when `dst` is still unset (fill-the-gaps).
pub(super) fn fill_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
    if dst.is_none() {
        dst.clone_from(src);
    }
}

/// Visits every `ProviderDefaults` field once, applying `$op` (`overlay_opt`
/// or `fill_opt`) to each `(dst, src)` pair. The field roster lives here only;
/// `overlay`/`fill_missing_from` differ solely in the merge rule they pass.
macro_rules! merge_provider_defaults {
    ($dst:expr, $src:expr, $op:path) => {{
        $op(&mut $dst.message_wire_format, &$src.message_wire_format);
        $op(
            &mut $dst.native_tool_wire_format,
            &$src.native_tool_wire_format,
        );
        $op(
            &mut $dst.image_url_input_supported,
            &$src.image_url_input_supported,
        );
        $op(
            &mut $dst.file_upload_wire_format,
            &$src.file_upload_wire_format,
        );
        $op(&mut $dst.reasoning_wire_format, &$src.reasoning_wire_format);
        $op(&mut $dst.files_api_supported, &$src.files_api_supported);
        $op(&mut $dst.seed_supported, &$src.seed_supported);
        $op(&mut $dst.top_k_supported, &$src.top_k_supported);
        $op(&mut $dst.temperature_supported, &$src.temperature_supported);
        $op(&mut $dst.top_p_supported, &$src.top_p_supported);
        $op(
            &mut $dst.frequency_penalty_supported,
            &$src.frequency_penalty_supported,
        );
        $op(
            &mut $dst.presence_penalty_supported,
            &$src.presence_penalty_supported,
        );
    }};
}

impl ProviderDefaults {
    pub(super) fn overlay(&mut self, other: &ProviderDefaults) {
        merge_provider_defaults!(self, other, overlay_opt);
    }

    pub(super) fn fill_missing_from(&mut self, other: &ProviderDefaults) {
        merge_provider_defaults!(self, other, fill_opt);
    }

    pub(super) fn has_any_field(&self) -> bool {
        self.message_wire_format.is_some()
            || self.native_tool_wire_format.is_some()
            || self.image_url_input_supported.is_some()
            || self.file_upload_wire_format.is_some()
            || self.reasoning_wire_format.is_some()
            || self.files_api_supported.is_some()
            || self.seed_supported.is_some()
            || self.top_k_supported.is_some()
            || self.temperature_supported.is_some()
            || self.top_p_supported.is_some()
            || self.frequency_penalty_supported.is_some()
            || self.presence_penalty_supported.is_some()
    }
}

/// The message/request/response wire dialect a route speaks.
///
/// This is the single typed representation of what used to be encoded two
/// different, drift-prone ways: the stringly `Capabilities.message_wire_format`
/// field (compared against `"anthropic"`/`"gemini"`/`"ollama"` literals at a
/// dozen call sites) and the `(is_anthropic_style, is_ollama)` boolean pair
/// threaded independently through the transport/response layers. A closed enum
/// makes an unhandled or mistyped dialect a compile error and removes the
/// boolean-blindness where two `bool`s could silently disagree.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WireDialect {
    /// Anthropic native Messages API (`/v1/messages`). The only dialect that
    /// surfaces Claude's extended-thinking stream. `message_wire_format =
    /// "anthropic"`.
    Anthropic,
    /// OpenAI-compatible Chat Completions (`/v1/chat/completions`). The default
    /// for hosted/openai-shape routes. `message_wire_format = "openai"`.
    OpenAiCompat,
    /// Ollama native `/api/chat`. `message_wire_format = "ollama"`.
    Ollama,
    /// Google Gemini `generateContent`. `message_wire_format = "gemini"`.
    Gemini,
}

impl WireDialect {
    /// Parse the catalog's `message_wire_format` string. Unrecognized values
    /// (including the explicit `"openai"`) resolve to [`WireDialect::OpenAiCompat`],
    /// exactly matching the pre-cutover behavior where every
    /// `== "anthropic"/"gemini"/"ollama"` check fell through to the
    /// OpenAI-compatible path.
    pub fn from_message_wire_format(value: &str) -> WireDialect {
        match value {
            "anthropic" => WireDialect::Anthropic,
            "ollama" => WireDialect::Ollama,
            "gemini" => WireDialect::Gemini,
            _ => WireDialect::OpenAiCompat,
        }
    }

    /// The canonical `message_wire_format` string for display and round-trip.
    pub fn as_str(self) -> &'static str {
        match self {
            WireDialect::Anthropic => "anthropic",
            WireDialect::OpenAiCompat => "openai",
            WireDialect::Ollama => "ollama",
            WireDialect::Gemini => "gemini",
        }
    }

    /// Whether this route speaks Anthropic's native Messages shape.
    pub fn is_anthropic(self) -> bool {
        matches!(self, WireDialect::Anthropic)
    }

    /// Whether this route speaks Ollama's native `/api/chat` shape.
    pub fn is_ollama(self) -> bool {
        matches!(self, WireDialect::Ollama)
    }

    /// Whether this route speaks Google Gemini's `generateContent` shape.
    pub fn is_gemini(self) -> bool {
        matches!(self, WireDialect::Gemini)
    }
}

/// Resolved capabilities for a `(provider, model)` pair. Unset rule
/// fields resolve to `false` / empty / `None` so callers never have to
/// unwrap an `Option<bool>` for what are really boolean gates.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Capabilities {
    pub native_tools: bool,
    pub message_wire_format: WireDialect,
    pub native_tool_wire_format: String,
    pub defer_loading: bool,
    pub tool_search: Vec<String>,
    pub responses_api: bool,
    pub hosted_tools: Vec<String>,
    pub remote_mcp: bool,
    pub conversation_state: bool,
    pub compaction: bool,
    pub background_mode: bool,
    pub tool_approval_policy: Option<String>,
    pub max_tools: Option<u32>,
    pub prompt_caching: bool,
    pub cache_breakpoint_style: String,
    pub vision: bool,
    pub audio: bool,
    pub pdf: bool,
    pub video: bool,
    pub files_api_supported: bool,
    pub file_upload_wire_format: Option<String>,
    pub structured_output: Option<String>,
    /// Legacy mirror for CLI display and older callers.
    pub json_schema: Option<String>,
    pub prefers_xml_scaffolding: bool,
    /// See [`ProviderRule::reserved_tool_call_token`].
    pub reserved_tool_call_token: bool,
    pub prefers_markdown_scaffolding: bool,
    pub structured_output_mode: String,
    pub supports_assistant_prefill: bool,
    pub prefers_role_developer: bool,
    pub prefers_xml_tools: bool,
    pub thinking_block_style: String,
    pub thinking_modes: Vec<String>,
    pub interleaved_thinking_supported: bool,
    pub anthropic_beta_features: Vec<String>,
    pub vision_supported: bool,
    pub image_url_input_supported: bool,
    pub preserve_thinking: bool,
    pub server_parser: String,
    pub honors_chat_template_kwargs: bool,
    pub chat_template_options_field: Option<String>,
    pub requires_completion_tokens: bool,
    /// True when the route is served ONLY by the provider Responses API and
    /// rejects `/v1/chat/completions` (OpenAI `*-codex` models). Harn routes
    /// such calls through the Responses provider automatically.
    pub chat_completions_unsupported: bool,
    pub requires_streaming: bool,
    pub reasoning_effort_supported: bool,
    pub reasoning_effort_levels: Vec<String>,
    pub reasoning_none_supported: bool,
    /// See [`ProviderRule::max_thinking_budget`]. `None` means the model uses
    /// the provider's own default ceiling.
    pub max_thinking_budget: Option<i64>,
    pub reasoning_disable_supported: bool,
    /// See [`ProviderRule::reasoning_required_for_tools`].
    pub reasoning_required_for_tools: bool,
    pub reasoning_text_promotable: bool,
    pub reasoning_wire_format: Option<String>,
    pub seed_supported: bool,
    pub top_k_supported: bool,
    pub temperature_supported: bool,
    pub top_p_supported: bool,
    pub frequency_penalty_supported: bool,
    pub presence_penalty_supported: bool,
    pub allowed_tool_choice_modes: Vec<String>,
    pub requires_tool_result_adjacency: bool,
    pub supports_parallel_tool_calls: bool,
    pub tools_exclude_response_format: bool,
    pub recommended_endpoint: Option<String>,
    pub text_tool_wire_format_supported: bool,
    pub preferred_tool_format: Option<String>,
    pub tool_mode_parity: Option<String>,
    pub tool_mode_parity_notes: Option<String>,
    pub thinking_disable_directive: Option<String>,
    /// Per-task auto-policy reasoning-level overrides for this route.
    /// See [`ProviderRule::auto_reasoning_overrides`].
    pub auto_reasoning_overrides: BTreeMap<String, String>,
    /// OpenRouter upstream provider names to exclude from routing for this
    /// row. See [`ProviderRule::provider_route_denylist`]. Empty means "no
    /// route restriction".
    pub provider_route_denylist: Vec<String>,
    /// OpenRouter upstream provider names this row is PINNED to (allowlist), in
    /// preference order. See [`ProviderRule::openrouter_provider_order`]. Empty
    /// means "no pin" (free OpenRouter routing).
    pub openrouter_provider_order: Vec<String>,
    /// Serving-quality / precision trust verdict for this route. See
    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
    pub serving_precision: String,
}

impl Default for Capabilities {
    fn default() -> Self {
        Self {
            native_tools: false,
            message_wire_format: WireDialect::OpenAiCompat,
            native_tool_wire_format: "openai".to_string(),
            defer_loading: false,
            tool_search: Vec::new(),
            responses_api: false,
            hosted_tools: Vec::new(),
            remote_mcp: false,
            conversation_state: false,
            compaction: false,
            background_mode: false,
            tool_approval_policy: None,
            max_tools: None,
            prompt_caching: false,
            cache_breakpoint_style: "none".to_string(),
            vision: false,
            audio: false,
            pdf: false,
            video: false,
            files_api_supported: false,
            file_upload_wire_format: None,
            structured_output: None,
            json_schema: None,
            prefers_xml_scaffolding: false,
            reserved_tool_call_token: false,
            prefers_markdown_scaffolding: false,
            structured_output_mode: "none".to_string(),
            supports_assistant_prefill: false,
            prefers_role_developer: false,
            prefers_xml_tools: false,
            thinking_block_style: "none".to_string(),
            thinking_modes: Vec::new(),
            interleaved_thinking_supported: false,
            anthropic_beta_features: Vec::new(),
            vision_supported: false,
            image_url_input_supported: true,
            preserve_thinking: false,
            server_parser: "none".to_string(),
            honors_chat_template_kwargs: false,
            chat_template_options_field: None,
            requires_completion_tokens: false,
            chat_completions_unsupported: false,
            requires_streaming: false,
            reasoning_effort_supported: false,
            reasoning_effort_levels: Vec::new(),
            reasoning_none_supported: false,
            max_thinking_budget: None,
            reasoning_disable_supported: true,
            reasoning_required_for_tools: false,
            reasoning_text_promotable: true,
            reasoning_wire_format: None,
            seed_supported: true,
            top_k_supported: true,
            temperature_supported: true,
            top_p_supported: true,
            frequency_penalty_supported: true,
            presence_penalty_supported: true,
            allowed_tool_choice_modes: Vec::new(),
            requires_tool_result_adjacency: false,
            supports_parallel_tool_calls: true,
            tools_exclude_response_format: false,
            recommended_endpoint: None,
            text_tool_wire_format_supported: true,
            preferred_tool_format: None,
            tool_mode_parity: None,
            tool_mode_parity_notes: None,
            thinking_disable_directive: None,
            auto_reasoning_overrides: BTreeMap::new(),
            provider_route_denylist: Vec::new(),
            openrouter_provider_order: Vec::new(),
            serving_precision: "unverified".to_string(),
        }
    }
}