harn_vm/llm_config/
model_def.rs

1//! Model catalog DTOs: per-route serving definitions and the sub-records
2//! (pricing, rate limits, serving performance, architecture, fast mode,
3//! local runtime/memory, and aliases) that make up a `ModelDef`.
4use std::collections::BTreeMap;
5
6use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Deserialize)]
9pub struct HealthcheckDef {
10    pub method: String,
11    #[serde(default)]
12    pub path: Option<String>,
13    #[serde(default)]
14    pub url: Option<String>,
15    #[serde(default)]
16    pub body: Option<String>,
17}
18
19#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
20pub struct LocalRuntimeDef {
21    /// Lifecycle style: `daemon_api` for runtimes with their own resident
22    /// daemon (Ollama), `managed_process` for Harn-spawned servers.
23    #[serde(default, skip_serializing_if = "Option::is_none")]
24    pub kind: Option<String>,
25    /// Command Harn should execute for managed-process runtimes.
26    #[serde(default, skip_serializing_if = "Option::is_none")]
27    pub command: Option<String>,
28    /// Arguments that must appear immediately after the command, before model
29    /// and server flags. Used by CLIs such as `vllm serve ...`.
30    #[serde(default, skip_serializing_if = "Vec::is_empty")]
31    pub prefix_args: Vec<String>,
32    /// Default model source/path/repo. User overlays may set this; embedded
33    /// catalog rows avoid machine-specific absolute paths except examples.
34    #[serde(default, skip_serializing_if = "Option::is_none")]
35    pub model_source: Option<String>,
36    /// Environment variable that can provide a model source.
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub model_source_env: Option<String>,
39    /// Default port when the provider base URL has none.
40    #[serde(default, skip_serializing_if = "Option::is_none")]
41    pub default_port: Option<u16>,
42    /// Argument names used by the runtime CLI.
43    #[serde(default, skip_serializing_if = "Option::is_none")]
44    pub model_arg: Option<String>,
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub served_model_arg: Option<String>,
47    #[serde(default, skip_serializing_if = "Option::is_none")]
48    pub host_arg: Option<String>,
49    #[serde(default, skip_serializing_if = "Option::is_none")]
50    pub port_arg: Option<String>,
51    #[serde(default, skip_serializing_if = "Option::is_none")]
52    pub ctx_arg: Option<String>,
53    #[serde(default, skip_serializing_if = "Option::is_none")]
54    pub parallel_arg: Option<String>,
55    #[serde(default, skip_serializing_if = "Option::is_none")]
56    pub gpu_layers_arg: Option<String>,
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    pub cache_type_k_arg: Option<String>,
59    #[serde(default, skip_serializing_if = "Option::is_none")]
60    pub cache_type_v_arg: Option<String>,
61    #[serde(default, skip_serializing_if = "Option::is_none")]
62    pub cache_ram_arg: Option<String>,
63    /// Flag that enables adapter-aware serving for LoRA-capable runtimes.
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub enable_lora_arg: Option<String>,
66    /// Flag that accepts one or more LoRA module specs.
67    #[serde(default, skip_serializing_if = "Option::is_none")]
68    pub lora_modules_arg: Option<String>,
69    /// Runtime value shape for LoRA module specs. Defaults to `name_path`.
70    #[serde(default, skip_serializing_if = "Option::is_none")]
71    pub lora_modules_value_format: Option<String>,
72    /// Optional rank-limit flag for runtimes that need an explicit ceiling.
73    #[serde(default, skip_serializing_if = "Option::is_none")]
74    pub max_lora_rank_arg: Option<String>,
75    /// Extra arguments Harn applies by default when launching this runtime.
76    #[serde(default, skip_serializing_if = "Vec::is_empty")]
77    pub default_args: Vec<String>,
78    /// Stop strategy: `keep_alive_zero`, `pid`, or `external`.
79    #[serde(default, skip_serializing_if = "Option::is_none")]
80    pub stop: Option<String>,
81    /// Official docs/source URL for the lifecycle contract.
82    #[serde(default, skip_serializing_if = "Option::is_none")]
83    pub source_url: Option<String>,
84    /// YYYY-MM-DD date when the local runtime row was last verified.
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub last_verified: Option<String>,
87    /// Short operational note surfaced by CLI docs/help.
88    #[serde(default, skip_serializing_if = "Option::is_none")]
89    pub notes: Option<String>,
90}
91
92#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
93pub struct LocalMemoryDef {
94    /// Empirical resident memory observed for this route/runtime.
95    #[serde(default, skip_serializing_if = "Option::is_none")]
96    pub measured_resident_gib: Option<f64>,
97    /// Context size used for the empirical measurement.
98    #[serde(default, skip_serializing_if = "Option::is_none")]
99    pub measured_context_window: Option<u64>,
100    /// KV-cache type used for the empirical measurement.
101    #[serde(default, skip_serializing_if = "Option::is_none")]
102    pub measured_cache_type: Option<String>,
103    /// Approximate non-context resident footprint for this model/runtime.
104    #[serde(default, skip_serializing_if = "Option::is_none")]
105    pub base_resident_gib: Option<f64>,
106    /// Approximate GiB consumed by KV cache per 1,000 context tokens at the
107    /// default cache type.
108    #[serde(default, skip_serializing_if = "Option::is_none")]
109    pub kv_cache_gib_per_1k_ctx: Option<f64>,
110    /// Cache-type multiplier relative to `kv_cache_gib_per_1k_ctx`.
111    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
112    pub cache_type_multipliers: BTreeMap<String, f64>,
113    /// Cache type assumed when the launch command does not set K/V cache.
114    #[serde(default, skip_serializing_if = "Option::is_none")]
115    pub default_cache_type: Option<String>,
116    /// Minimum headroom Harn should leave for the OS and other apps.
117    #[serde(default, skip_serializing_if = "Option::is_none")]
118    pub safety_margin_gib: Option<f64>,
119    /// Highest context Harn should recommend automatically from this row.
120    #[serde(default, skip_serializing_if = "Option::is_none")]
121    pub max_recommended_context: Option<u64>,
122    /// Official or empirical source for the sizing row.
123    #[serde(default, skip_serializing_if = "Option::is_none")]
124    pub source_url: Option<String>,
125    /// YYYY-MM-DD date when the sizing row was last verified.
126    #[serde(default, skip_serializing_if = "Option::is_none")]
127    pub last_verified: Option<String>,
128    /// Short operational note surfaced by CLI diagnostics/docs.
129    #[serde(default, skip_serializing_if = "Option::is_none")]
130    pub notes: Option<String>,
131}
132
133impl LocalMemoryDef {
134    pub fn is_empty(&self) -> bool {
135        self.measured_resident_gib.is_none()
136            && self.measured_context_window.is_none()
137            && self.measured_cache_type.is_none()
138            && self.base_resident_gib.is_none()
139            && self.kv_cache_gib_per_1k_ctx.is_none()
140            && self.cache_type_multipliers.is_empty()
141            && self.default_cache_type.is_none()
142            && self.safety_margin_gib.is_none()
143            && self.max_recommended_context.is_none()
144            && self.source_url.is_none()
145            && self.last_verified.is_none()
146            && self.notes.is_none()
147    }
148}
149
150#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
151pub struct AliasDef {
152    pub id: String,
153    pub provider: String,
154    /// Per-model tool format override: "native" or "text". When set, this
155    /// takes precedence over the provider-level default. Models with strong
156    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
157    /// models better served by text-based tool calling use "text".
158    #[serde(default)]
159    pub tool_format: Option<String>,
160}
161
162#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
163pub struct AliasToolCallingDef {
164    #[serde(default)]
165    #[serde(skip_serializing_if = "Option::is_none")]
166    pub native: Option<String>,
167    #[serde(default)]
168    #[serde(skip_serializing_if = "Option::is_none")]
169    pub text: Option<String>,
170    #[serde(default)]
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub streaming_native: Option<String>,
173    #[serde(default)]
174    #[serde(skip_serializing_if = "Option::is_none")]
175    pub fallback_mode: Option<String>,
176    #[serde(default)]
177    #[serde(skip_serializing_if = "Option::is_none")]
178    pub failure_reason: Option<String>,
179    #[serde(default)]
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub last_probe_at: Option<String>,
182}
183
184#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
185pub struct ModelPricing {
186    pub input_per_mtok: f64,
187    pub output_per_mtok: f64,
188    #[serde(default)]
189    pub cache_read_per_mtok: Option<f64>,
190    #[serde(default)]
191    pub cache_write_per_mtok: Option<f64>,
192}
193
194/// Provider or model quota metadata. Providers publish these along several
195/// axes, and any one exhausted bucket can trigger throttling.
196#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
197pub struct RateLimitsDef {
198    /// Requests per minute.
199    #[serde(default, skip_serializing_if = "Option::is_none")]
200    pub rpm: Option<u32>,
201    /// Requests per hour.
202    #[serde(default, skip_serializing_if = "Option::is_none")]
203    pub rph: Option<u32>,
204    /// Requests per day.
205    #[serde(default, skip_serializing_if = "Option::is_none")]
206    pub rpd: Option<u32>,
207    /// Total tokens per minute.
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub tpm: Option<u64>,
210    /// Total tokens per hour.
211    #[serde(default, skip_serializing_if = "Option::is_none")]
212    pub tph: Option<u64>,
213    /// Total tokens per day.
214    #[serde(default, skip_serializing_if = "Option::is_none")]
215    pub tpd: Option<u64>,
216    /// Input tokens per minute, when the provider splits input/output quotas.
217    #[serde(default, skip_serializing_if = "Option::is_none")]
218    pub input_tpm: Option<u64>,
219    /// Output tokens per minute, when the provider splits input/output quotas.
220    #[serde(default, skip_serializing_if = "Option::is_none")]
221    pub output_tpm: Option<u64>,
222    /// Concurrent in-flight requests, if published.
223    #[serde(default, skip_serializing_if = "Option::is_none")]
224    pub concurrency: Option<u32>,
225    /// Account tier or route class these limits describe.
226    #[serde(default, skip_serializing_if = "Option::is_none")]
227    pub tier: Option<String>,
228    /// Official source URL for the row.
229    #[serde(default, skip_serializing_if = "Option::is_none")]
230    pub source_url: Option<String>,
231    /// YYYY-MM-DD date when the row was last verified.
232    #[serde(default, skip_serializing_if = "Option::is_none")]
233    pub last_verified: Option<String>,
234    /// Free-text caveat for account-dependent or burst limits.
235    #[serde(default, skip_serializing_if = "Option::is_none")]
236    pub notes: Option<String>,
237}
238
239impl RateLimitsDef {
240    pub fn is_empty(&self) -> bool {
241        self.rpm.is_none()
242            && self.rph.is_none()
243            && self.rpd.is_none()
244            && self.tpm.is_none()
245            && self.tph.is_none()
246            && self.tpd.is_none()
247            && self.input_tpm.is_none()
248            && self.output_tpm.is_none()
249            && self.concurrency.is_none()
250            && self.tier.is_none()
251            && self.source_url.is_none()
252            && self.last_verified.is_none()
253            && self.notes.is_none()
254    }
255
256    pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
257        if self.rpm.is_none() {
258            self.rpm = rpm;
259        }
260        (!self.is_empty()).then_some(self)
261    }
262}
263
264/// Optional provider/model serving-performance observation. This records
265/// benchmark or live-probe facts, not a hard runtime contract; callers should
266/// treat missing fields as unknown and stale dates as advisory.
267#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
268pub struct ServingPerformanceDef {
269    /// Observed time-to-first-token in milliseconds.
270    #[serde(default, skip_serializing_if = "Option::is_none")]
271    pub observed_ttft_ms: Option<u64>,
272    /// Observed output generation rate in tokens per second.
273    #[serde(default, skip_serializing_if = "Option::is_none")]
274    pub output_tokens_per_sec: Option<f64>,
275    /// End-to-end time-to-answer in seconds for the cited benchmark, when
276    /// reported separately from TTFT/generation rate.
277    #[serde(default, skip_serializing_if = "Option::is_none")]
278    pub time_to_answer_s: Option<f64>,
279    /// Source label, e.g. `artificial_analysis`, `harn_probe`, or
280    /// `provider_blog`.
281    #[serde(default, skip_serializing_if = "Option::is_none")]
282    pub source: Option<String>,
283    /// Source URL for the observation.
284    #[serde(default, skip_serializing_if = "Option::is_none")]
285    pub source_url: Option<String>,
286    /// YYYY-MM-DD date when the observation was last verified.
287    #[serde(default, skip_serializing_if = "Option::is_none")]
288    pub last_verified: Option<String>,
289    /// Number of requests or benchmark samples behind this row, if known.
290    #[serde(default, skip_serializing_if = "Option::is_none")]
291    pub sample_size: Option<u32>,
292    /// Short caveat such as streaming mode, warm/cold route, or prompt shape.
293    #[serde(default, skip_serializing_if = "Option::is_none")]
294    pub notes: Option<String>,
295}
296
297impl ServingPerformanceDef {
298    pub fn is_empty(&self) -> bool {
299        self.observed_ttft_ms.is_none()
300            && self.output_tokens_per_sec.is_none()
301            && self.time_to_answer_s.is_none()
302            && self.source.is_none()
303            && self.source_url.is_none()
304            && self.last_verified.is_none()
305            && self.sample_size.is_none()
306            && self.notes.is_none()
307    }
308}
309
310/// Logical-model facts separated from provider serving routes. These fields
311/// describe the underlying weights or public model family, not Harn's alias or
312/// provider/model selector.
313#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
314pub struct ModelArchitectureDef {
315    /// Total parameter count in billions.
316    #[serde(default, skip_serializing_if = "Option::is_none")]
317    pub parameter_count_b: Option<f64>,
318    /// Active parameter count in billions for MoE models.
319    #[serde(default, skip_serializing_if = "Option::is_none")]
320    pub active_parameter_count_b: Option<f64>,
321    /// True for mixture-of-experts models.
322    #[serde(default, skip_serializing_if = "Option::is_none")]
323    pub moe: Option<bool>,
324    /// Quantization advertised by this route, if route-specific.
325    #[serde(default, skip_serializing_if = "Option::is_none")]
326    pub quantization: Option<String>,
327    /// Numeric precision advertised by this route, if known.
328    #[serde(default, skip_serializing_if = "Option::is_none")]
329    pub precision: Option<String>,
330    /// License identifier or short label.
331    #[serde(default, skip_serializing_if = "Option::is_none")]
332    pub license: Option<String>,
333    /// Tokenizer family or implementation hint.
334    #[serde(default, skip_serializing_if = "Option::is_none")]
335    pub tokenizer: Option<String>,
336    /// Public knowledge cutoff claim, when published.
337    #[serde(default, skip_serializing_if = "Option::is_none")]
338    pub knowledge_cutoff: Option<String>,
339    /// Official source URL for these facts.
340    #[serde(default, skip_serializing_if = "Option::is_none")]
341    pub source_url: Option<String>,
342    /// YYYY-MM-DD date when these facts were last verified.
343    #[serde(default, skip_serializing_if = "Option::is_none")]
344    pub last_verified: Option<String>,
345}
346
347impl ModelArchitectureDef {
348    pub fn is_empty(&self) -> bool {
349        self.parameter_count_b.is_none()
350            && self.active_parameter_count_b.is_none()
351            && self.moe.is_none()
352            && self.quantization.is_none()
353            && self.precision.is_none()
354            && self.license.is_none()
355            && self.tokenizer.is_none()
356            && self.knowledge_cutoff.is_none()
357            && self.source_url.is_none()
358            && self.last_verified.is_none()
359    }
360}
361
362/// Optional accelerated-serving ("fast mode") tier for a model. Off by
363/// default: its presence only *describes* that the provider offers a
364/// faster, premium-priced serving path running the same weights — callers
365/// must explicitly opt in via the provider's request knob, so nothing here
366/// changes default behavior. Deliberately provider-agnostic: Anthropic
367/// exposes the tier as `speed = "fast"` (beta-gated), while OpenAI uses
368/// `service_tier = "fast"` / `"priority"`. Premium pricing is stored as
369/// absolute per-MTok rates rather than a single multiplier because
370/// providers price the tier asymmetrically (Anthropic Opus 4.8 is 2x
371/// standard; Opus 4.7 fast mode is 6x).
372#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
373pub struct FastModeDef {
374    /// Request field that opts into the fast tier (e.g. "speed" for
375    /// Anthropic, "service_tier" for OpenAI).
376    pub param: String,
377    /// Value to send on `param` (e.g. "fast", "priority").
378    pub value: String,
379    /// Provider beta/feature header required to use the tier, if any
380    /// (e.g. Anthropic "fast-mode-2026-02-01").
381    #[serde(default)]
382    pub beta_header: Option<String>,
383    /// Output-tokens-per-second speedup vs standard serving (e.g. 2.5).
384    #[serde(default)]
385    pub otps_speedup: Option<f64>,
386    /// Lifecycle of the fast tier: "ga" | "research_preview" |
387    /// "deprecated". None when unspecified.
388    #[serde(default)]
389    pub status: Option<String>,
390    /// Premium pricing charged while the fast tier is active (absolute
391    /// per-MTok rates, not a multiplier on standard pricing).
392    #[serde(default)]
393    pub pricing: Option<ModelPricing>,
394    /// Free-text note: constraints, deprecation timeline, etc.
395    #[serde(default)]
396    pub note: Option<String>,
397}
398
399/// A named model-fallback ladder declared in the catalog under
400/// `[model_ladders.<name>]`. A `models`/`ladder` option on `llm_call`
401/// lowers a ladder onto the first-class `routing_policy` chain: each step
402/// is one transport attempt, and the loop advances to the next step ONLY
403/// on transport-class failures (connection/timeout/429/5xx/throttled).
404///
405/// This data-driven declaration follows the same spirit as `fast_mode`
406/// (#4017): a capability/behavior encoded as catalog data rather than
407/// hand-rolled at each downstream call site (harn-bump-fleet,
408/// harn-cloud free_tier_pool, burin-code all shipped their own copy).
409#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
410pub struct ModelLadderDef {
411    /// Ordered ladder steps, cheapest/first to most-capable/last.
412    #[serde(default)]
413    pub steps: Vec<ModelLadderStepDef>,
414    /// Optional human-readable label surfaced on the routing envelope.
415    #[serde(default)]
416    pub label: Option<String>,
417}
418
419/// One rung of a [`ModelLadderDef`]. Mirrors the `{model, provider?}` shape
420/// accepted by the `models:` option and the `model_ladder(...)` std
421/// constructor. Provider is optional: when omitted it is inferred from the
422/// model id (or the call's base provider) at lowering time.
423#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
424pub struct ModelLadderStepDef {
425    pub model: String,
426    #[serde(default)]
427    pub provider: Option<String>,
428    #[serde(default)]
429    pub label: Option<String>,
430}
431
432#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
433pub struct ModelDef {
434    pub name: String,
435    pub provider: String,
436    pub context_window: u64,
437    /// Provider-independent logical model id, when multiple serving routes map
438    /// to the same weights or model family.
439    #[serde(default)]
440    pub logical_model: Option<String>,
441    /// Equivalence class for failover/escalation candidates. Entries in the
442    /// same group are capability-compatible alternatives, not byte-identical
443    /// APIs; callers must still re-render transcripts for the target provider.
444    #[serde(default)]
445    pub equivalence_group: Option<String>,
446    /// Serving-route detail such as "serverless", "priority", "fp8", or a
447    /// provider route slug. This is intentionally separate from `name`.
448    #[serde(default)]
449    pub served_variant: Option<String>,
450    /// Provider-native model id to send on the wire. Defaults to the catalog
451    /// key. Required when two providers expose the same native id and Harn
452    /// needs a unique catalog key for each route.
453    #[serde(default)]
454    pub wire_model: Option<String>,
455    /// Preferred API dialect for the route, e.g. `openai_chat`,
456    /// `openai_responses`, `anthropic_messages`, `gemini_generate_content`.
457    #[serde(default)]
458    pub api_dialect: Option<String>,
459    /// Route-specific token/request quota metadata.
460    #[serde(default)]
461    pub rate_limits: Option<RateLimitsDef>,
462    /// Optional route-level serving performance observations.
463    #[serde(default)]
464    pub performance: Option<ServingPerformanceDef>,
465    /// Underlying model architecture facts separated from the provider id.
466    #[serde(default)]
467    pub architecture: Option<ModelArchitectureDef>,
468    /// Local launch memory-sizing hints used by `harn local launch`.
469    #[serde(default)]
470    pub local_memory: Option<LocalMemoryDef>,
471    #[serde(default)]
472    pub runtime_context_window: Option<u64>,
473    #[serde(default)]
474    pub stream_timeout: Option<f64>,
475    #[serde(default)]
476    pub capabilities: Vec<String>,
477    #[serde(default)]
478    pub pricing: Option<ModelPricing>,
479    #[serde(default)]
480    pub deprecated: bool,
481    #[serde(default)]
482    pub deprecation_note: Option<String>,
483    /// Structured replacement pointer: the catalog id of the model that
484    /// supersedes this one (e.g. an older Opus row points at the newest
485    /// Opus). Lets release tooling express "migrate to X" in a
486    /// machine-readable way instead of burying it in `deprecation_note`
487    /// free text. A model may be superseded without being `deprecated`
488    /// (a newer option exists but this one is still fully supported);
489    /// pair it with `deprecated = true` once a sunset is announced.
490    #[serde(default)]
491    pub superseded_by: Option<String>,
492    /// Accelerated-serving ("fast mode") tier metadata, when the model's
493    /// provider offers one. Off by default — see [`FastModeDef`]. None for
494    /// models with no faster serving path.
495    #[serde(default)]
496    pub fast_mode: Option<FastModeDef>,
497    #[serde(default)]
498    pub quality_tags: Vec<String>,
499    /// Whether the model can be reached over a normal API-key serverless call,
500    /// or only via a dedicated/provisioned endpoint that the caller must spin
501    /// up out-of-band. Providers like Together list dedicated-only routes
502    /// alongside serverless ones in `/v1/models`, so this metadata lets clients
503    /// avoid presenting them as one-click options.
504    #[serde(default)]
505    pub availability: ModelAvailability,
506    /// Popular-consensus tier label. Enum-typed string: "small" | "mid" |
507    /// "frontier" | "reasoning". Self-declared per model (no pattern-matched
508    /// rule table) so the catalog is the single source of truth. When None
509    /// the resolver returns the catalog default ("mid"). Use the richer
510    /// `strengths` + `benchmarks` fields to pick models for specific
511    /// workloads — `tier` exists only as a coarse popular-consensus shortcut.
512    #[serde(default)]
513    pub tier: Option<String>,
514    /// True when the model weights are downloadable / self-hostable
515    /// (open-weight / open-source license, regardless of commercial-use
516    /// restrictions). False when weights are closed (Anthropic, OpenAI,
517    /// Google, etc.). None when the catalog row predates the migration.
518    #[serde(default)]
519    pub open_weight: Option<bool>,
520    /// Workload-shaped strength tags. Conventional values include
521    /// `coding`, `summarization`, `long_context`, `tool_use`, `reasoning`,
522    /// `vision`, `speed`, `cheap`, `agentic`. Selectors should treat
523    /// missing entries as "no claim" rather than "no strength."
524    #[serde(default)]
525    pub strengths: Vec<String>,
526    /// Public benchmark numbers, keyed by a snake_case identifier
527    /// (`swe_bench_verified`, `humaneval`, `aa_intelligence_index`, etc.).
528    /// Values are the raw published scores. The selector layer is free
529    /// to normalize per benchmark; the catalog records the canonical
530    /// score so future readers can audit the source.
531    #[serde(default)]
532    pub benchmarks: BTreeMap<String, f64>,
533    /// Normalized model-family token used as a diversity signal for
534    /// reviewer selection. Distinct from provider: hosted wrappers should
535    /// keep the underlying family (for example OpenRouter-hosted Claude
536    /// still uses `anthropic-claude`).
537    #[serde(default)]
538    pub family: Option<String>,
539    /// Narrower family lineage used by option-pack calibration.
540    #[serde(default)]
541    pub lineage: Option<String>,
542    /// Preferred reviewer families for critique/review workloads.
543    #[serde(default)]
544    pub complementary_with: Vec<String>,
545    /// Author families, lineages, model ids, or provider/model selectors
546    /// this row should not review.
547    #[serde(default)]
548    pub avoid_as_reviewer_for: Vec<String>,
549}
550
551#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
552#[serde(rename_all = "snake_case")]
553pub enum ModelAvailability {
554    /// Reachable through the provider's normal API-key path with no extra
555    /// setup. The default for cataloged hosted/local models: by cataloging a
556    /// row we are claiming the route works out of the box.
557    #[default]
558    Serverless,
559    /// Requires the caller to provision a dedicated endpoint before requests
560    /// will succeed. The catalog row exists for selection/pricing UI, but
561    /// hosts must not auto-route to it.
562    Dedicated,
563    /// Availability is not known ahead of time. Used for routes that were
564    /// surfaced dynamically (e.g. through `/v1/models`) without a static
565    /// claim from Harn or the user.
566    Unknown,
567}
568
569impl ModelAvailability {
570    pub fn as_str(self) -> &'static str {
571        match self {
572            Self::Serverless => "serverless",
573            Self::Dedicated => "dedicated",
574            Self::Unknown => "unknown",
575        }
576    }
577
578    pub fn parse(value: &str) -> Option<Self> {
579        match value {
580            "serverless" => Some(Self::Serverless),
581            "dedicated" => Some(Self::Dedicated),
582            "unknown" => Some(Self::Unknown),
583            _ => None,
584        }
585    }
586}
harn_vm/llm_config/model_def.rs

harn_vm/llm_config/
model_def.rs