car_inference/schema.rs
1//! Model schema — declarative metadata for models, analogous to ToolSchema for tools.
2//!
3//! Every model (local GGUF, remote API, Ollama) is described by a `ModelSchema`
4//! that declares identity, capabilities, constraints, cost, and source.
5//! The router uses this schema for initial routing; observed outcomes refine it.
6
7use serde::{Deserialize, Serialize};
8
9/// What a model can do.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11#[serde(rename_all = "snake_case")]
12pub enum ModelCapability {
13 /// Text completion / chat generation
14 Generate,
15 /// Vector embeddings
16 Embed,
17 /// Cross-encoder relevance scoring (query + document → relevance
18 /// score). Qwen3-Reranker is the canonical local implementation.
19 Rerank,
20 /// Label assignment / classification
21 Classify,
22 /// Code generation, repair, refactoring
23 Code,
24 /// Chain-of-thought, planning, analysis
25 Reasoning,
26 /// Text condensation
27 Summarize,
28 /// Function/tool calling
29 ToolUse,
30 /// Multiple tool calls in a single response (parallel tool execution)
31 MultiToolCall,
32 /// Vision / image understanding
33 Vision,
34 /// Video understanding (multi-frame sampling + temporal tokens).
35 /// Distinct from `Vision` so routing can prefer video-trained
36 /// models when the caller attaches a video content block.
37 VideoUnderstanding,
38 /// Audio understanding (speech + non-speech audio as an input to
39 /// a chat/reasoning model). Distinct from `SpeechToText` which is
40 /// the transcription-only task. Gemma 4 E2B/E4B and Gemini do
41 /// this; Qwen2.5-VL does not.
42 AudioUnderstanding,
43 /// Visual grounding — structured object-localization output
44 /// (bounding boxes keyed to object labels) in addition to text.
45 Grounding,
46 /// Speech recognition / transcription
47 SpeechToText,
48 /// Speech synthesis / text-to-speech
49 TextToSpeech,
50 /// Image generation
51 ImageGeneration,
52 /// Video generation
53 VideoGeneration,
54}
55
56/// How much the project vouches for a model. Gates automatic upgrades and
57/// is surfaced in recommendation rationale. Closed enum — a new tier is a
58/// deliberate FFI-visible change, never a silent string fallback.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
60#[serde(rename_all = "snake_case")]
61pub enum TrustTier {
62 /// Vetted by the project — the built-in catalog and verified upgrades.
63 /// Eligible for background auto-apply when the user opts in.
64 #[default]
65 Curated,
66 /// User-registered or upstream-discovered, not project-vetted. Always
67 /// notify-only; never auto-applied regardless of update policy.
68 Community,
69}
70
71/// How to access the model.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73#[serde(tag = "type", rename_all = "snake_case")]
74pub enum ModelSource {
75 /// Local GGUF file via Candle backend.
76 Local {
77 hf_repo: String,
78 hf_filename: String,
79 tokenizer_repo: String,
80 },
81 /// Remote API endpoint (OpenAI-compatible, Anthropic, etc.)
82 RemoteApi {
83 endpoint: String,
84 /// Environment variable name containing the API key (never the key itself).
85 /// The env var value may contain comma-separated keys for load balancing.
86 api_key_env: String,
87 /// Additional environment variable names for load balancing across multiple keys.
88 /// Each env var may also contain comma-separated keys.
89 #[serde(default)]
90 api_key_envs: Vec<String>,
91 #[serde(default)]
92 api_version: Option<String>,
93 protocol: ApiProtocol,
94 },
95 /// Ollama local server.
96 Ollama {
97 model_tag: String,
98 #[serde(default = "default_ollama_host")]
99 host: String,
100 },
101 /// Local MLX model via mlx-rs backend (Apple Silicon, safetensors format).
102 /// Models from mlx-community on HuggingFace.
103 Mlx {
104 /// HuggingFace repo (e.g., "mlx-community/Qwen3-4B-4bit").
105 hf_repo: String,
106 /// Optional specific weight filename. If None, auto-discovers safetensors files.
107 #[serde(default)]
108 hf_weight_file: Option<String>,
109 },
110 /// Local vLLM-MLX server (Apple Silicon, OpenAI-compatible API).
111 /// Routes through RemoteBackend with OpenAI protocol handler.
112 VllmMlx {
113 /// Server endpoint (e.g., "http://localhost:8000").
114 endpoint: String,
115 /// The model name as known to vLLM-MLX (e.g., "mlx-community/Qwen3-4B-4bit").
116 model_name: String,
117 },
118 /// Apple's on-device system model via the FoundationModels framework
119 /// (macOS 26+, Apple Silicon). Inference happens in-process through a
120 /// Swift shim — there is no HTTP, no API key, and no model file: the
121 /// OS owns the weights. Availability is checked at runtime via
122 /// `@available(macOS 26.0, *)`; on older macOS or non-Apple-Silicon
123 /// hosts the backend reports `UnsupportedMode` and the router falls
124 /// through to the next candidate.
125 AppleFoundationModels {
126 /// Optional Apple use-case hint passed through to
127 /// `LanguageModelSession`. Apple's framework tunes its prompt and
128 /// safety scaffolding per use case (e.g. "general", "summarize").
129 /// `None` uses the default.
130 #[serde(default)]
131 use_case: Option<String>,
132 },
133 /// Proprietary provider with custom auth and protocol.
134 ///
135 /// For vendor-specific APIs that aren't generic OpenAI-compatible endpoints.
136 /// Parslee is the first proprietary provider — custom auth (OAuth2),
137 /// custom response format, multi-provider routing built into the API.
138 Proprietary {
139 /// Provider identifier (e.g., "parslee").
140 provider: String,
141 /// Base URL for the API.
142 endpoint: String,
143 /// Auth configuration.
144 auth: ProprietaryAuth,
145 /// Custom protocol details.
146 protocol: ProprietaryProtocol,
147 },
148 /// Inference is delegated to a host-registered runner. CAR does
149 /// not own the wire format — the runner (typically a JS / Python
150 /// host) translates the `GenerateRequest` to its provider's API,
151 /// streams chunks back through the runner's event callback, and
152 /// returns the final aggregated result.
153 ///
154 /// Closes Parslee-ai/car-releases#24. Use this when the host
155 /// already has an SDK relationship with a provider (Anthropic,
156 /// OpenAI, GitHub Models, Vercel AI SDK) and wants CAR to sit in
157 /// the lifecycle / policy / replay path without learning every
158 /// provider's wire format.
159 ///
160 /// Routing requires that a runner has been registered via
161 /// [`crate::set_inference_runner`] (or its FFI equivalent —
162 /// `registerInferenceRunner` on JS, `register_inference_runner`
163 /// on Python, the `InferenceRunner` foreign trait on UniFFI,
164 /// `inference.register_runner` on the WebSocket protocol).
165 /// Without a runner, dispatch fails with `InferenceFailed`.
166 Delegated {
167 /// Opaque hint passed through to the runner — typically the
168 /// provider id (`"anthropic"`, `"openai"`, `"vercel-ai-sdk"`)
169 /// so a multi-provider runner can dispatch internally. CAR
170 /// does not interpret this string.
171 #[serde(default)]
172 hint: Option<String>,
173 },
174}
175
176/// Authentication method for proprietary providers.
177#[derive(Debug, Clone, Serialize, Deserialize)]
178#[serde(tag = "type", rename_all = "snake_case")]
179pub enum ProprietaryAuth {
180 /// OAuth2 PKCE flow (e.g., Azure AD for Parslee).
181 OAuth2Pkce {
182 authority: String,
183 client_id: String,
184 scopes: Vec<String>,
185 },
186 /// Static API key from environment variable.
187 ApiKeyEnv { env_var: String },
188 /// Bearer token from environment variable.
189 BearerTokenEnv { env_var: String },
190}
191
192/// Protocol configuration for proprietary providers.
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct ProprietaryProtocol {
195 /// Chat/completion endpoint path (appended to base URL).
196 #[serde(default = "default_chat_path")]
197 pub chat_path: String,
198 /// Content type for requests.
199 #[serde(default = "default_content_type")]
200 pub content_type: String,
201 /// Whether the API streams responses via SSE.
202 #[serde(default)]
203 pub streaming: bool,
204 /// Custom headers to include in every request.
205 #[serde(default)]
206 pub extra_headers: std::collections::HashMap<String, String>,
207}
208
209impl Default for ProprietaryProtocol {
210 fn default() -> Self {
211 Self {
212 chat_path: default_chat_path(),
213 content_type: default_content_type(),
214 streaming: false,
215 extra_headers: std::collections::HashMap::new(),
216 }
217 }
218}
219
220fn default_chat_path() -> String {
221 "/chat".to_string()
222}
223
224fn default_content_type() -> String {
225 "application/json".to_string()
226}
227
228fn default_ollama_host() -> String {
229 "http://localhost:11434".to_string()
230}
231
232#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
233#[serde(rename_all = "snake_case")]
234pub enum ApiProtocol {
235 OpenAiCompat,
236 /// OpenAI Responses API (/v1/responses) — works with all OpenAI models including codex.
237 OpenAiResponses,
238 Anthropic,
239 Google,
240 /// Azure OpenAI — uses api-key header and deployment-based URLs.
241 /// Endpoint format: {base}/openai/deployments/{model}/chat/completions?api-version={version}
242 AzureOpenAi,
243}
244
245/// Declared performance expectations. Overridden by observed data once available.
246#[derive(Debug, Clone, Default, Serialize, Deserialize)]
247pub struct PerformanceEnvelope {
248 /// Median latency in milliseconds (declared/estimated).
249 #[serde(default)]
250 pub latency_p50_ms: Option<u64>,
251 /// 99th percentile latency in milliseconds.
252 #[serde(default)]
253 pub latency_p99_ms: Option<u64>,
254 /// Tokens per second throughput.
255 #[serde(default)]
256 pub tokens_per_second: Option<f64>,
257}
258
259/// Cost model for routing optimization.
260/// Generation parameters that a model may or may not support.
261/// Models declare which params they accept. The inference layer
262/// strips unsupported params before sending to the API.
263#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
264#[serde(rename_all = "snake_case")]
265pub enum GenerateParam {
266 Temperature,
267 TopP,
268 TopK,
269 MaxTokens,
270 StopSequences,
271 FrequencyPenalty,
272 PresencePenalty,
273 Seed,
274 ResponseFormat,
275 /// Extended thinking / internal reasoning before responding.
276 ExtendedThinking,
277}
278
279/// Standard parameter set for most models.
280pub fn standard_params() -> Vec<GenerateParam> {
281 vec![
282 GenerateParam::Temperature,
283 GenerateParam::TopP,
284 GenerateParam::MaxTokens,
285 GenerateParam::StopSequences,
286 GenerateParam::FrequencyPenalty,
287 GenerateParam::PresencePenalty,
288 GenerateParam::Seed,
289 ]
290}
291
292/// Parameter set for reasoning models (no temperature, no top_p).
293pub fn reasoning_params() -> Vec<GenerateParam> {
294 vec![GenerateParam::MaxTokens, GenerateParam::StopSequences]
295}
296
297#[derive(Debug, Clone, Default, Serialize, Deserialize)]
298pub struct CostModel {
299 /// USD per 1M input tokens (remote models).
300 #[serde(default)]
301 pub input_per_mtok: Option<f64>,
302 /// USD per 1M output tokens (remote models).
303 #[serde(default)]
304 pub output_per_mtok: Option<f64>,
305 /// On-disk size in MB (local models).
306 #[serde(default)]
307 pub size_mb: Option<u64>,
308 /// RAM required during inference in MB.
309 #[serde(default)]
310 pub ram_mb: Option<u64>,
311}
312
313/// A score on a public benchmark from a published source (model card,
314/// paper, leaderboard). The schema is deliberately permissive — no enum
315/// of benchmark names — so the catalog can carry whichever benchmarks
316/// the upstream provider chose to publish, and new ones can be added
317/// without a code change. Scores are stored on a 0.0–1.0 scale (e.g.
318/// 73.5% accuracy → 0.735) so they compare cleanly across benchmarks
319/// and so `routing_ext::apply_benchmark_priors` can consume them
320/// directly when wired in later.
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct BenchmarkScore {
323 /// Benchmark name as published (e.g., "MMLU-Pro", "GPQA-Diamond",
324 /// "SWE-bench-Verified", "HumanEval", "MATH").
325 pub name: String,
326 /// Score on a 0.0–1.0 scale.
327 pub score: f64,
328 /// Evaluation harness or setup label (e.g., "5-shot", "0-shot CoT",
329 /// "agentic", "pass@1"). Optional but strongly recommended — the
330 /// same benchmark name can mean different things under different
331 /// harnesses.
332 #[serde(default)]
333 pub harness: Option<String>,
334 /// Where the score came from (model card URL, paper, leaderboard
335 /// snapshot). Empty when the source is the upstream provider's
336 /// announcement and a stable URL is not yet known.
337 #[serde(default)]
338 pub source_url: Option<String>,
339 /// ISO 8601 date of the score snapshot (e.g., "2025-08-12"). Lets
340 /// downstream code judge how stale a number is.
341 #[serde(default)]
342 pub measured_at: Option<String>,
343}
344
345/// The full declarative schema for a model.
346///
347/// Analogous to `ToolSchema` — describes what a model is, what it can do,
348/// and how to access it. The router uses this for constraint-based filtering
349/// and cold-start scoring before observed performance data is available.
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct ModelSchema {
352 /// Unique identifier: "provider/model-name:variant" (e.g., "qwen/qwen3-4b:q4_k_m").
353 pub id: String,
354 /// Human-readable display name.
355 pub name: String,
356 /// Provider (qwen, openai, anthropic, google, meta, ollama, custom).
357 pub provider: String,
358 /// Model family for grouping (qwen3, gpt-4, claude-4, llama-3).
359 pub family: String,
360 /// Semantic version or checkpoint label.
361 #[serde(default)]
362 pub version: String,
363 /// What this model can do — ordered by primary capability first.
364 pub capabilities: Vec<ModelCapability>,
365 /// Context window in tokens.
366 pub context_length: usize,
367 /// Parameter count as human-readable string (e.g., "4B", "30B (3B active)").
368 #[serde(default)]
369 pub param_count: String,
370 /// Quantization (Q4_K_M, Q8_0, F16, none).
371 #[serde(default)]
372 pub quantization: Option<String>,
373 /// Declared performance envelope (initial estimate, overridden by observed data).
374 #[serde(default)]
375 pub performance: PerformanceEnvelope,
376 /// Cost structure.
377 #[serde(default)]
378 pub cost: CostModel,
379 /// How to access this model.
380 pub source: ModelSource,
381 /// Free-form tags for filtering (e.g., "fast", "multilingual", "moe").
382 #[serde(default)]
383 pub tags: Vec<String>,
384 /// Supported generation parameters. The inference layer strips any parameter
385 /// not in this set before sending to the API. Empty = all supported.
386 #[serde(default)]
387 pub supported_params: Vec<GenerateParam>,
388 /// Public benchmark scores as published by the model provider or
389 /// reproduced on a public leaderboard (MMLU-Pro, GPQA-Diamond,
390 /// SWE-bench, HumanEval, etc.). The built-in catalog ships this
391 /// empty — population is a curation step, not a code change. See
392 /// `BenchmarkScore` for the field shape and the 0.0–1.0 scoring
393 /// convention.
394 #[serde(default)]
395 pub public_benchmarks: Vec<BenchmarkScore>,
396 /// How much the project vouches for this model. The built-in catalog is
397 /// `Curated`; user-registered and upstream-discovered models default to
398 /// `Community` when their config omits this. Gates auto-apply (task #8)
399 /// and is surfaced in recommendation rationale.
400 #[serde(default)]
401 pub trust_tier: TrustTier,
402 /// Superseded models stay listed if installed but are excluded from
403 /// fresh recommendations. `#[serde(default)]` → not deprecated.
404 #[serde(default)]
405 pub deprecated: bool,
406 /// Whether this model is currently available (downloaded / reachable).
407 /// Not serialized — computed at runtime.
408 #[serde(skip)]
409 pub available: bool,
410}
411
412impl ModelSchema {
413 /// Check if this model has a given capability.
414 pub fn has_capability(&self, cap: ModelCapability) -> bool {
415 self.capabilities.contains(&cap)
416 }
417
418 /// Check if this model is local (runs on-device).
419 pub fn is_local(&self) -> bool {
420 matches!(
421 self.source,
422 ModelSource::Local { .. }
423 | ModelSource::Mlx { .. }
424 | ModelSource::VllmMlx { .. }
425 | ModelSource::AppleFoundationModels { .. }
426 )
427 }
428
429 /// Check if this model delegates inference to a host-registered
430 /// runner (closes Parslee-ai/car-releases#24).
431 pub fn is_delegated(&self) -> bool {
432 matches!(self.source, ModelSource::Delegated { .. })
433 }
434
435 /// Check if this model uses the MLX backend.
436 pub fn is_mlx(&self) -> bool {
437 matches!(self.source, ModelSource::Mlx { .. })
438 }
439
440 /// Check if this model routes to Apple's on-device FoundationModels
441 /// framework. True only for `ModelSource::AppleFoundationModels`;
442 /// callers must still verify runtime availability before dispatch
443 /// (the schema can describe the model on any host, but execution
444 /// requires macOS 26+ on Apple Silicon).
445 pub fn is_foundation_models(&self) -> bool {
446 matches!(self.source, ModelSource::AppleFoundationModels { .. })
447 }
448
449 /// Check if this model uses vLLM-MLX backend.
450 pub fn is_vllm_mlx(&self) -> bool {
451 matches!(self.source, ModelSource::VllmMlx { .. })
452 }
453
454 /// Whether this model can only run on Apple Silicon (Metal). True for
455 /// MLX, vLLM-MLX, and Apple FoundationModels sources. The recommender
456 /// uses this to exclude Metal-only picks on CPU-only / CUDA hosts rather
457 /// than storing a redundant backend-compatibility field that could drift
458 /// from `source`.
459 pub fn requires_apple_silicon(&self) -> bool {
460 self.is_mlx() || self.is_vllm_mlx() || self.is_foundation_models()
461 }
462
463 /// Check if this model is remote (requires API call).
464 pub fn is_remote(&self) -> bool {
465 matches!(
466 self.source,
467 ModelSource::RemoteApi { .. } | ModelSource::Proprietary { .. }
468 )
469 }
470
471 /// Collect all API key env var names for this model (primary + extras).
472 /// Returns empty vec for non-remote models.
473 pub fn all_api_key_envs(&self) -> Vec<String> {
474 match &self.source {
475 ModelSource::RemoteApi {
476 api_key_env,
477 api_key_envs,
478 ..
479 } => {
480 let mut all = vec![api_key_env.clone()];
481 all.extend(api_key_envs.iter().cloned());
482 all
483 }
484 ModelSource::Proprietary {
485 auth: ProprietaryAuth::ApiKeyEnv { env_var },
486 ..
487 }
488 | ModelSource::Proprietary {
489 auth: ProprietaryAuth::BearerTokenEnv { env_var },
490 ..
491 } => vec![env_var.clone()],
492 _ => vec![],
493 }
494 }
495
496 /// Get the size in MB (from cost model or 0 if unknown).
497 pub fn size_mb(&self) -> u64 {
498 self.cost.size_mb.unwrap_or(0)
499 }
500
501 /// Get the RAM requirement in MB (from cost model, falls back to size_mb).
502 pub fn ram_mb(&self) -> u64 {
503 self.cost.ram_mb.unwrap_or_else(|| self.size_mb())
504 }
505
506 /// Estimated cost per 1K output tokens in USD. Returns 0.0 for local models.
507 pub fn cost_per_1k_output(&self) -> f64 {
508 self.cost.output_per_mtok.map(|c| c / 1000.0).unwrap_or(0.0)
509 }
510}
511
512#[cfg(test)]
513mod tests {
514 use super::*;
515
516 fn sample_local() -> ModelSchema {
517 ModelSchema {
518 id: "qwen/qwen3-4b:q4_k_m".into(),
519 name: "Qwen3-4B".into(),
520 provider: "qwen".into(),
521 family: "qwen3".into(),
522 version: "1.0".into(),
523 capabilities: vec![ModelCapability::Generate, ModelCapability::Code],
524 context_length: 32768,
525 param_count: "4B".into(),
526 quantization: Some("Q4_K_M".into()),
527 performance: PerformanceEnvelope {
528 tokens_per_second: Some(45.0),
529 ..Default::default()
530 },
531 cost: CostModel {
532 size_mb: Some(2500),
533 ram_mb: Some(2500),
534 ..Default::default()
535 },
536 source: ModelSource::Local {
537 hf_repo: "Qwen/Qwen3-4B-GGUF".into(),
538 hf_filename: "Qwen3-4B-Q4_K_M.gguf".into(),
539 tokenizer_repo: "Qwen/Qwen3-4B".into(),
540 },
541 tags: vec!["code".into(), "fast".into()],
542 supported_params: vec![],
543 public_benchmarks: vec![],
544 trust_tier: TrustTier::Curated,
545 deprecated: false,
546 available: false,
547 }
548 }
549
550 fn sample_remote() -> ModelSchema {
551 ModelSchema {
552 id: "anthropic/claude-sonnet-4-6:latest".into(),
553 name: "Claude Sonnet 4.6".into(),
554 provider: "anthropic".into(),
555 family: "claude-4".into(),
556 version: "latest".into(),
557 capabilities: vec![
558 ModelCapability::Generate,
559 ModelCapability::Code,
560 ModelCapability::Reasoning,
561 ModelCapability::ToolUse,
562 ModelCapability::Vision,
563 ],
564 context_length: 200000,
565 param_count: String::new(),
566 quantization: None,
567 performance: PerformanceEnvelope {
568 latency_p50_ms: Some(2000),
569 latency_p99_ms: Some(8000),
570 tokens_per_second: Some(80.0),
571 },
572 cost: CostModel {
573 input_per_mtok: Some(3.0),
574 output_per_mtok: Some(15.0),
575 ..Default::default()
576 },
577 source: ModelSource::RemoteApi {
578 endpoint: "https://api.anthropic.com/v1/messages".into(),
579 api_key_env: "ANTHROPIC_API_KEY".into(),
580 api_key_envs: vec![],
581 api_version: Some("2023-06-01".into()),
582 protocol: ApiProtocol::Anthropic,
583 },
584 tags: vec!["reasoning".into(), "tool_use".into()],
585 supported_params: vec![],
586 public_benchmarks: vec![],
587 trust_tier: TrustTier::Curated,
588 deprecated: false,
589 available: false,
590 }
591 }
592
593 #[test]
594 fn capabilities() {
595 let m = sample_local();
596 assert!(m.has_capability(ModelCapability::Code));
597 assert!(!m.has_capability(ModelCapability::Vision));
598 }
599
600 #[test]
601 fn local_vs_remote() {
602 assert!(sample_local().is_local());
603 assert!(!sample_local().is_remote());
604 assert!(sample_remote().is_remote());
605 assert!(!sample_remote().is_local());
606 }
607
608 #[test]
609 fn cost() {
610 let local = sample_local();
611 assert_eq!(local.cost_per_1k_output(), 0.0);
612
613 let remote = sample_remote();
614 assert!(remote.cost_per_1k_output() > 0.0);
615 }
616
617 #[test]
618 fn serde_roundtrip() {
619 let local = sample_local();
620 let json = serde_json::to_string(&local).unwrap();
621 let parsed: ModelSchema = serde_json::from_str(&json).unwrap();
622 assert_eq!(parsed.id, local.id);
623 assert_eq!(parsed.capabilities, local.capabilities);
624
625 let remote = sample_remote();
626 let json = serde_json::to_string(&remote).unwrap();
627 let parsed: ModelSchema = serde_json::from_str(&json).unwrap();
628 assert_eq!(parsed.id, remote.id);
629 // available is skip-serialized, defaults to false
630 assert!(!parsed.available);
631 }
632
633 #[test]
634 fn trust_tier_and_deprecated_default_when_absent() {
635 // Pre-existing ~/.car/models.json configs omit the new fields.
636 // They must deserialize to Curated / not-deprecated, not error.
637 let json = serde_json::to_string(&sample_local()).unwrap();
638 let stripped = json
639 .replace(",\"trust_tier\":\"curated\"", "")
640 .replace(",\"deprecated\":false", "");
641 let parsed: ModelSchema = serde_json::from_str(&stripped).unwrap();
642 assert_eq!(parsed.trust_tier, TrustTier::Curated);
643 assert!(!parsed.deprecated);
644 }
645
646 #[test]
647 fn trust_tier_serializes_snake_case() {
648 assert_eq!(
649 serde_json::to_string(&TrustTier::Community).unwrap(),
650 "\"community\""
651 );
652 assert_eq!(TrustTier::default(), TrustTier::Curated);
653 }
654
655 #[test]
656 fn requires_apple_silicon_only_for_metal_backends() {
657 // GGUF/Candle local and remote models run anywhere CAR builds for.
658 assert!(!sample_local().requires_apple_silicon());
659 assert!(!sample_remote().requires_apple_silicon());
660
661 let mlx = ModelSchema {
662 source: ModelSource::Mlx {
663 hf_repo: "mlx-community/Qwen3-4B-4bit".into(),
664 hf_weight_file: None,
665 },
666 ..sample_local()
667 };
668 assert!(mlx.requires_apple_silicon());
669
670 // vLLM-MLX and Apple FoundationModels are equally Metal-bound.
671 let vllm = ModelSchema {
672 source: ModelSource::VllmMlx {
673 endpoint: "http://localhost:8000".into(),
674 model_name: "mlx-community/Qwen3-4B-4bit".into(),
675 },
676 ..sample_local()
677 };
678 assert!(vllm.requires_apple_silicon());
679
680 let foundation = ModelSchema {
681 source: ModelSource::AppleFoundationModels { use_case: None },
682 ..sample_local()
683 };
684 assert!(foundation.requires_apple_silicon());
685 }
686}