car_inference/
schema.rs

1//! Model schema — declarative metadata for models, analogous to ToolSchema for tools.
2//!
3//! Every model (local GGUF, remote API, Ollama) is described by a `ModelSchema`
4//! that declares identity, capabilities, constraints, cost, and source.
5//! The router uses this schema for initial routing; observed outcomes refine it.
6
7use serde::{Deserialize, Serialize};
8
9/// What a model can do.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11#[serde(rename_all = "snake_case")]
12pub enum ModelCapability {
13    /// Text completion / chat generation
14    Generate,
15    /// Vector embeddings
16    Embed,
17    /// Cross-encoder relevance scoring (query + document → relevance
18    /// score). Qwen3-Reranker is the canonical local implementation.
19    Rerank,
20    /// Label assignment / classification
21    Classify,
22    /// Code generation, repair, refactoring
23    Code,
24    /// Chain-of-thought, planning, analysis
25    Reasoning,
26    /// Text condensation
27    Summarize,
28    /// Function/tool calling
29    ToolUse,
30    /// Multiple tool calls in a single response (parallel tool execution)
31    MultiToolCall,
32    /// Vision / image understanding
33    Vision,
34    /// Video understanding (multi-frame sampling + temporal tokens).
35    /// Distinct from `Vision` so routing can prefer video-trained
36    /// models when the caller attaches a video content block.
37    VideoUnderstanding,
38    /// Audio understanding (speech + non-speech audio as an input to
39    /// a chat/reasoning model). Distinct from `SpeechToText` which is
40    /// the transcription-only task. Gemma 4 E2B/E4B and Gemini do
41    /// this; Qwen2.5-VL does not.
42    AudioUnderstanding,
43    /// Visual grounding — structured object-localization output
44    /// (bounding boxes keyed to object labels) in addition to text.
45    Grounding,
46    /// Speech recognition / transcription
47    SpeechToText,
48    /// Speech synthesis / text-to-speech
49    TextToSpeech,
50    /// Image generation
51    ImageGeneration,
52    /// Video generation
53    VideoGeneration,
54}
55
56/// How much the project vouches for a model. Gates automatic upgrades and
57/// is surfaced in recommendation rationale. Closed enum — a new tier is a
58/// deliberate FFI-visible change, never a silent string fallback.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
60#[serde(rename_all = "snake_case")]
61pub enum TrustTier {
62    /// Vetted by the project — the built-in catalog and verified upgrades.
63    /// Eligible for background auto-apply when the user opts in.
64    #[default]
65    Curated,
66    /// User-registered or upstream-discovered, not project-vetted. Always
67    /// notify-only; never auto-applied regardless of update policy.
68    Community,
69}
70
71/// How to access the model.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73#[serde(tag = "type", rename_all = "snake_case")]
74pub enum ModelSource {
75    /// Local GGUF file via Candle backend.
76    Local {
77        hf_repo: String,
78        hf_filename: String,
79        tokenizer_repo: String,
80    },
81    /// Remote API endpoint (OpenAI-compatible, Anthropic, etc.)
82    RemoteApi {
83        endpoint: String,
84        /// Environment variable name containing the API key (never the key itself).
85        /// The env var value may contain comma-separated keys for load balancing.
86        api_key_env: String,
87        /// Additional environment variable names for load balancing across multiple keys.
88        /// Each env var may also contain comma-separated keys.
89        #[serde(default)]
90        api_key_envs: Vec<String>,
91        #[serde(default)]
92        api_version: Option<String>,
93        protocol: ApiProtocol,
94    },
95    /// Ollama local server.
96    Ollama {
97        model_tag: String,
98        #[serde(default = "default_ollama_host")]
99        host: String,
100    },
101    /// Local MLX model via mlx-rs backend (Apple Silicon, safetensors format).
102    /// Models from mlx-community on HuggingFace.
103    Mlx {
104        /// HuggingFace repo (e.g., "mlx-community/Qwen3-4B-4bit").
105        hf_repo: String,
106        /// Optional specific weight filename. If None, auto-discovers safetensors files.
107        #[serde(default)]
108        hf_weight_file: Option<String>,
109    },
110    /// Local vLLM-MLX server (Apple Silicon, OpenAI-compatible API).
111    /// Routes through RemoteBackend with OpenAI protocol handler.
112    VllmMlx {
113        /// Server endpoint (e.g., "http://localhost:8000").
114        endpoint: String,
115        /// The model name as known to vLLM-MLX (e.g., "mlx-community/Qwen3-4B-4bit").
116        model_name: String,
117    },
118    /// Apple's on-device system model via the FoundationModels framework
119    /// (macOS 26+, Apple Silicon). Inference happens in-process through a
120    /// Swift shim — there is no HTTP, no API key, and no model file: the
121    /// OS owns the weights. Availability is checked at runtime via
122    /// `@available(macOS 26.0, *)`; on older macOS or non-Apple-Silicon
123    /// hosts the backend reports `UnsupportedMode` and the router falls
124    /// through to the next candidate.
125    AppleFoundationModels {
126        /// Optional Apple use-case hint passed through to
127        /// `LanguageModelSession`. Apple's framework tunes its prompt and
128        /// safety scaffolding per use case (e.g. "general", "summarize").
129        /// `None` uses the default.
130        #[serde(default)]
131        use_case: Option<String>,
132    },
133    /// Proprietary provider with custom auth and protocol.
134    ///
135    /// For vendor-specific APIs that aren't generic OpenAI-compatible endpoints.
136    /// Parslee is the first proprietary provider — custom auth (OAuth2),
137    /// custom response format, multi-provider routing built into the API.
138    Proprietary {
139        /// Provider identifier (e.g., "parslee").
140        provider: String,
141        /// Base URL for the API.
142        endpoint: String,
143        /// Auth configuration.
144        auth: ProprietaryAuth,
145        /// Custom protocol details.
146        protocol: ProprietaryProtocol,
147    },
148    /// Inference is delegated to a host-registered runner. CAR does
149    /// not own the wire format — the runner (typically a JS / Python
150    /// host) translates the `GenerateRequest` to its provider's API,
151    /// streams chunks back through the runner's event callback, and
152    /// returns the final aggregated result.
153    ///
154    /// Closes Parslee-ai/car-releases#24. Use this when the host
155    /// already has an SDK relationship with a provider (Anthropic,
156    /// OpenAI, GitHub Models, Vercel AI SDK) and wants CAR to sit in
157    /// the lifecycle / policy / replay path without learning every
158    /// provider's wire format.
159    ///
160    /// Routing requires that a runner has been registered via
161    /// [`crate::set_inference_runner`] (or its FFI equivalent —
162    /// `registerInferenceRunner` on JS, `register_inference_runner`
163    /// on Python, the `InferenceRunner` foreign trait on UniFFI,
164    /// `inference.register_runner` on the WebSocket protocol).
165    /// Without a runner, dispatch fails with `InferenceFailed`.
166    Delegated {
167        /// Opaque hint passed through to the runner — typically the
168        /// provider id (`"anthropic"`, `"openai"`, `"vercel-ai-sdk"`)
169        /// so a multi-provider runner can dispatch internally. CAR
170        /// does not interpret this string.
171        #[serde(default)]
172        hint: Option<String>,
173    },
174}
175
176/// Authentication method for proprietary providers.
177#[derive(Debug, Clone, Serialize, Deserialize)]
178#[serde(tag = "type", rename_all = "snake_case")]
179pub enum ProprietaryAuth {
180    /// OAuth2 PKCE flow (e.g., Azure AD for Parslee).
181    OAuth2Pkce {
182        authority: String,
183        client_id: String,
184        scopes: Vec<String>,
185    },
186    /// Static API key from environment variable.
187    ApiKeyEnv { env_var: String },
188    /// Bearer token from environment variable.
189    BearerTokenEnv { env_var: String },
190}
191
192/// Protocol configuration for proprietary providers.
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct ProprietaryProtocol {
195    /// Chat/completion endpoint path (appended to base URL).
196    #[serde(default = "default_chat_path")]
197    pub chat_path: String,
198    /// Content type for requests.
199    #[serde(default = "default_content_type")]
200    pub content_type: String,
201    /// Whether the API streams responses via SSE.
202    #[serde(default)]
203    pub streaming: bool,
204    /// Custom headers to include in every request.
205    #[serde(default)]
206    pub extra_headers: std::collections::HashMap<String, String>,
207}
208
209impl Default for ProprietaryProtocol {
210    fn default() -> Self {
211        Self {
212            chat_path: default_chat_path(),
213            content_type: default_content_type(),
214            streaming: false,
215            extra_headers: std::collections::HashMap::new(),
216        }
217    }
218}
219
220fn default_chat_path() -> String {
221    "/chat".to_string()
222}
223
224fn default_content_type() -> String {
225    "application/json".to_string()
226}
227
228fn default_ollama_host() -> String {
229    "http://localhost:11434".to_string()
230}
231
232#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
233#[serde(rename_all = "snake_case")]
234pub enum ApiProtocol {
235    OpenAiCompat,
236    /// OpenAI Responses API (/v1/responses) — works with all OpenAI models including codex.
237    OpenAiResponses,
238    Anthropic,
239    Google,
240    /// Azure OpenAI — uses api-key header and deployment-based URLs.
241    /// Endpoint format: {base}/openai/deployments/{model}/chat/completions?api-version={version}
242    AzureOpenAi,
243}
244
245/// Declared performance expectations. Overridden by observed data once available.
246#[derive(Debug, Clone, Default, Serialize, Deserialize)]
247pub struct PerformanceEnvelope {
248    /// Median latency in milliseconds (declared/estimated).
249    #[serde(default)]
250    pub latency_p50_ms: Option<u64>,
251    /// 99th percentile latency in milliseconds.
252    #[serde(default)]
253    pub latency_p99_ms: Option<u64>,
254    /// Tokens per second throughput.
255    #[serde(default)]
256    pub tokens_per_second: Option<f64>,
257}
258
259/// Cost model for routing optimization.
260/// Generation parameters that a model may or may not support.
261/// Models declare which params they accept. The inference layer
262/// strips unsupported params before sending to the API.
263#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
264#[serde(rename_all = "snake_case")]
265pub enum GenerateParam {
266    Temperature,
267    TopP,
268    TopK,
269    MaxTokens,
270    StopSequences,
271    FrequencyPenalty,
272    PresencePenalty,
273    Seed,
274    ResponseFormat,
275    /// Extended thinking / internal reasoning before responding.
276    ExtendedThinking,
277}
278
279/// Standard parameter set for most models.
280pub fn standard_params() -> Vec<GenerateParam> {
281    vec![
282        GenerateParam::Temperature,
283        GenerateParam::TopP,
284        GenerateParam::MaxTokens,
285        GenerateParam::StopSequences,
286        GenerateParam::FrequencyPenalty,
287        GenerateParam::PresencePenalty,
288        GenerateParam::Seed,
289    ]
290}
291
292/// Parameter set for reasoning models (no temperature, no top_p).
293pub fn reasoning_params() -> Vec<GenerateParam> {
294    vec![GenerateParam::MaxTokens, GenerateParam::StopSequences]
295}
296
297#[derive(Debug, Clone, Default, Serialize, Deserialize)]
298pub struct CostModel {
299    /// USD per 1M input tokens (remote models).
300    #[serde(default)]
301    pub input_per_mtok: Option<f64>,
302    /// USD per 1M output tokens (remote models).
303    #[serde(default)]
304    pub output_per_mtok: Option<f64>,
305    /// On-disk size in MB (local models).
306    #[serde(default)]
307    pub size_mb: Option<u64>,
308    /// RAM required during inference in MB.
309    #[serde(default)]
310    pub ram_mb: Option<u64>,
311}
312
313/// A score on a public benchmark from a published source (model card,
314/// paper, leaderboard). The schema is deliberately permissive — no enum
315/// of benchmark names — so the catalog can carry whichever benchmarks
316/// the upstream provider chose to publish, and new ones can be added
317/// without a code change. Scores are stored on a 0.0–1.0 scale (e.g.
318/// 73.5% accuracy → 0.735) so they compare cleanly across benchmarks
319/// and so `routing_ext::apply_benchmark_priors` can consume them
320/// directly when wired in later.
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct BenchmarkScore {
323    /// Benchmark name as published (e.g., "MMLU-Pro", "GPQA-Diamond",
324    /// "SWE-bench-Verified", "HumanEval", "MATH").
325    pub name: String,
326    /// Score on a 0.0–1.0 scale.
327    pub score: f64,
328    /// Evaluation harness or setup label (e.g., "5-shot", "0-shot CoT",
329    /// "agentic", "pass@1"). Optional but strongly recommended — the
330    /// same benchmark name can mean different things under different
331    /// harnesses.
332    #[serde(default)]
333    pub harness: Option<String>,
334    /// Where the score came from (model card URL, paper, leaderboard
335    /// snapshot). Empty when the source is the upstream provider's
336    /// announcement and a stable URL is not yet known.
337    #[serde(default)]
338    pub source_url: Option<String>,
339    /// ISO 8601 date of the score snapshot (e.g., "2025-08-12"). Lets
340    /// downstream code judge how stale a number is.
341    #[serde(default)]
342    pub measured_at: Option<String>,
343}
344
345/// The full declarative schema for a model.
346///
347/// Analogous to `ToolSchema` — describes what a model is, what it can do,
348/// and how to access it. The router uses this for constraint-based filtering
349/// and cold-start scoring before observed performance data is available.
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct ModelSchema {
352    /// Unique identifier: "provider/model-name:variant" (e.g., "qwen/qwen3-4b:q4_k_m").
353    pub id: String,
354    /// Human-readable display name.
355    pub name: String,
356    /// Provider (qwen, openai, anthropic, google, meta, ollama, custom).
357    pub provider: String,
358    /// Model family for grouping (qwen3, gpt-4, claude-4, llama-3).
359    pub family: String,
360    /// Semantic version or checkpoint label.
361    #[serde(default)]
362    pub version: String,
363    /// What this model can do — ordered by primary capability first.
364    pub capabilities: Vec<ModelCapability>,
365    /// Context window in tokens.
366    pub context_length: usize,
367    /// Parameter count as human-readable string (e.g., "4B", "30B (3B active)").
368    #[serde(default)]
369    pub param_count: String,
370    /// Quantization (Q4_K_M, Q8_0, F16, none).
371    #[serde(default)]
372    pub quantization: Option<String>,
373    /// Declared performance envelope (initial estimate, overridden by observed data).
374    #[serde(default)]
375    pub performance: PerformanceEnvelope,
376    /// Cost structure.
377    #[serde(default)]
378    pub cost: CostModel,
379    /// How to access this model.
380    pub source: ModelSource,
381    /// Free-form tags for filtering (e.g., "fast", "multilingual", "moe").
382    #[serde(default)]
383    pub tags: Vec<String>,
384    /// Supported generation parameters. The inference layer strips any parameter
385    /// not in this set before sending to the API. Empty = all supported.
386    #[serde(default)]
387    pub supported_params: Vec<GenerateParam>,
388    /// Public benchmark scores as published by the model provider or
389    /// reproduced on a public leaderboard (MMLU-Pro, GPQA-Diamond,
390    /// SWE-bench, HumanEval, etc.). The built-in catalog ships this
391    /// empty — population is a curation step, not a code change. See
392    /// `BenchmarkScore` for the field shape and the 0.0–1.0 scoring
393    /// convention.
394    #[serde(default)]
395    pub public_benchmarks: Vec<BenchmarkScore>,
396    /// How much the project vouches for this model. The built-in catalog is
397    /// `Curated`; user-registered and upstream-discovered models default to
398    /// `Community` when their config omits this. Gates auto-apply (task #8)
399    /// and is surfaced in recommendation rationale.
400    #[serde(default)]
401    pub trust_tier: TrustTier,
402    /// Superseded models stay listed if installed but are excluded from
403    /// fresh recommendations. `#[serde(default)]` → not deprecated.
404    #[serde(default)]
405    pub deprecated: bool,
406    /// Whether this model is currently available (downloaded / reachable).
407    /// Not serialized — computed at runtime.
408    #[serde(skip)]
409    pub available: bool,
410}
411
412impl ModelSchema {
413    /// Check if this model has a given capability.
414    pub fn has_capability(&self, cap: ModelCapability) -> bool {
415        self.capabilities.contains(&cap)
416    }
417
418    /// Check if this model is local (runs on-device).
419    pub fn is_local(&self) -> bool {
420        matches!(
421            self.source,
422            ModelSource::Local { .. }
423                | ModelSource::Mlx { .. }
424                | ModelSource::VllmMlx { .. }
425                | ModelSource::AppleFoundationModels { .. }
426        )
427    }
428
429    /// Check if this model delegates inference to a host-registered
430    /// runner (closes Parslee-ai/car-releases#24).
431    pub fn is_delegated(&self) -> bool {
432        matches!(self.source, ModelSource::Delegated { .. })
433    }
434
435    /// Check if this model uses the MLX backend.
436    pub fn is_mlx(&self) -> bool {
437        matches!(self.source, ModelSource::Mlx { .. })
438    }
439
440    /// Check if this model routes to Apple's on-device FoundationModels
441    /// framework. True only for `ModelSource::AppleFoundationModels`;
442    /// callers must still verify runtime availability before dispatch
443    /// (the schema can describe the model on any host, but execution
444    /// requires macOS 26+ on Apple Silicon).
445    pub fn is_foundation_models(&self) -> bool {
446        matches!(self.source, ModelSource::AppleFoundationModels { .. })
447    }
448
449    /// Check if this model uses vLLM-MLX backend.
450    pub fn is_vllm_mlx(&self) -> bool {
451        matches!(self.source, ModelSource::VllmMlx { .. })
452    }
453
454    /// Whether this model can only run on Apple Silicon (Metal). True for
455    /// MLX, vLLM-MLX, and Apple FoundationModels sources. The recommender
456    /// uses this to exclude Metal-only picks on CPU-only / CUDA hosts rather
457    /// than storing a redundant backend-compatibility field that could drift
458    /// from `source`.
459    pub fn requires_apple_silicon(&self) -> bool {
460        self.is_mlx() || self.is_vllm_mlx() || self.is_foundation_models()
461    }
462
463    /// Check if this model is remote (requires API call).
464    pub fn is_remote(&self) -> bool {
465        matches!(
466            self.source,
467            ModelSource::RemoteApi { .. } | ModelSource::Proprietary { .. }
468        )
469    }
470
471    /// Collect all API key env var names for this model (primary + extras).
472    /// Returns empty vec for non-remote models.
473    pub fn all_api_key_envs(&self) -> Vec<String> {
474        match &self.source {
475            ModelSource::RemoteApi {
476                api_key_env,
477                api_key_envs,
478                ..
479            } => {
480                let mut all = vec![api_key_env.clone()];
481                all.extend(api_key_envs.iter().cloned());
482                all
483            }
484            ModelSource::Proprietary {
485                auth: ProprietaryAuth::ApiKeyEnv { env_var },
486                ..
487            }
488            | ModelSource::Proprietary {
489                auth: ProprietaryAuth::BearerTokenEnv { env_var },
490                ..
491            } => vec![env_var.clone()],
492            _ => vec![],
493        }
494    }
495
496    /// Get the size in MB (from cost model or 0 if unknown).
497    pub fn size_mb(&self) -> u64 {
498        self.cost.size_mb.unwrap_or(0)
499    }
500
501    /// Get the RAM requirement in MB (from cost model, falls back to size_mb).
502    pub fn ram_mb(&self) -> u64 {
503        self.cost.ram_mb.unwrap_or_else(|| self.size_mb())
504    }
505
506    /// Estimated cost per 1K output tokens in USD. Returns 0.0 for local models.
507    pub fn cost_per_1k_output(&self) -> f64 {
508        self.cost.output_per_mtok.map(|c| c / 1000.0).unwrap_or(0.0)
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    fn sample_local() -> ModelSchema {
517        ModelSchema {
518            id: "qwen/qwen3-4b:q4_k_m".into(),
519            name: "Qwen3-4B".into(),
520            provider: "qwen".into(),
521            family: "qwen3".into(),
522            version: "1.0".into(),
523            capabilities: vec![ModelCapability::Generate, ModelCapability::Code],
524            context_length: 32768,
525            param_count: "4B".into(),
526            quantization: Some("Q4_K_M".into()),
527            performance: PerformanceEnvelope {
528                tokens_per_second: Some(45.0),
529                ..Default::default()
530            },
531            cost: CostModel {
532                size_mb: Some(2500),
533                ram_mb: Some(2500),
534                ..Default::default()
535            },
536            source: ModelSource::Local {
537                hf_repo: "Qwen/Qwen3-4B-GGUF".into(),
538                hf_filename: "Qwen3-4B-Q4_K_M.gguf".into(),
539                tokenizer_repo: "Qwen/Qwen3-4B".into(),
540            },
541            tags: vec!["code".into(), "fast".into()],
542            supported_params: vec![],
543            public_benchmarks: vec![],
544            trust_tier: TrustTier::Curated,
545            deprecated: false,
546            available: false,
547        }
548    }
549
550    fn sample_remote() -> ModelSchema {
551        ModelSchema {
552            id: "anthropic/claude-sonnet-4-6:latest".into(),
553            name: "Claude Sonnet 4.6".into(),
554            provider: "anthropic".into(),
555            family: "claude-4".into(),
556            version: "latest".into(),
557            capabilities: vec![
558                ModelCapability::Generate,
559                ModelCapability::Code,
560                ModelCapability::Reasoning,
561                ModelCapability::ToolUse,
562                ModelCapability::Vision,
563            ],
564            context_length: 200000,
565            param_count: String::new(),
566            quantization: None,
567            performance: PerformanceEnvelope {
568                latency_p50_ms: Some(2000),
569                latency_p99_ms: Some(8000),
570                tokens_per_second: Some(80.0),
571            },
572            cost: CostModel {
573                input_per_mtok: Some(3.0),
574                output_per_mtok: Some(15.0),
575                ..Default::default()
576            },
577            source: ModelSource::RemoteApi {
578                endpoint: "https://api.anthropic.com/v1/messages".into(),
579                api_key_env: "ANTHROPIC_API_KEY".into(),
580                api_key_envs: vec![],
581                api_version: Some("2023-06-01".into()),
582                protocol: ApiProtocol::Anthropic,
583            },
584            tags: vec!["reasoning".into(), "tool_use".into()],
585            supported_params: vec![],
586            public_benchmarks: vec![],
587            trust_tier: TrustTier::Curated,
588            deprecated: false,
589            available: false,
590        }
591    }
592
593    #[test]
594    fn capabilities() {
595        let m = sample_local();
596        assert!(m.has_capability(ModelCapability::Code));
597        assert!(!m.has_capability(ModelCapability::Vision));
598    }
599
600    #[test]
601    fn local_vs_remote() {
602        assert!(sample_local().is_local());
603        assert!(!sample_local().is_remote());
604        assert!(sample_remote().is_remote());
605        assert!(!sample_remote().is_local());
606    }
607
608    #[test]
609    fn cost() {
610        let local = sample_local();
611        assert_eq!(local.cost_per_1k_output(), 0.0);
612
613        let remote = sample_remote();
614        assert!(remote.cost_per_1k_output() > 0.0);
615    }
616
617    #[test]
618    fn serde_roundtrip() {
619        let local = sample_local();
620        let json = serde_json::to_string(&local).unwrap();
621        let parsed: ModelSchema = serde_json::from_str(&json).unwrap();
622        assert_eq!(parsed.id, local.id);
623        assert_eq!(parsed.capabilities, local.capabilities);
624
625        let remote = sample_remote();
626        let json = serde_json::to_string(&remote).unwrap();
627        let parsed: ModelSchema = serde_json::from_str(&json).unwrap();
628        assert_eq!(parsed.id, remote.id);
629        // available is skip-serialized, defaults to false
630        assert!(!parsed.available);
631    }
632
633    #[test]
634    fn trust_tier_and_deprecated_default_when_absent() {
635        // Pre-existing ~/.car/models.json configs omit the new fields.
636        // They must deserialize to Curated / not-deprecated, not error.
637        let json = serde_json::to_string(&sample_local()).unwrap();
638        let stripped = json
639            .replace(",\"trust_tier\":\"curated\"", "")
640            .replace(",\"deprecated\":false", "");
641        let parsed: ModelSchema = serde_json::from_str(&stripped).unwrap();
642        assert_eq!(parsed.trust_tier, TrustTier::Curated);
643        assert!(!parsed.deprecated);
644    }
645
646    #[test]
647    fn trust_tier_serializes_snake_case() {
648        assert_eq!(
649            serde_json::to_string(&TrustTier::Community).unwrap(),
650            "\"community\""
651        );
652        assert_eq!(TrustTier::default(), TrustTier::Curated);
653    }
654
655    #[test]
656    fn requires_apple_silicon_only_for_metal_backends() {
657        // GGUF/Candle local and remote models run anywhere CAR builds for.
658        assert!(!sample_local().requires_apple_silicon());
659        assert!(!sample_remote().requires_apple_silicon());
660
661        let mlx = ModelSchema {
662            source: ModelSource::Mlx {
663                hf_repo: "mlx-community/Qwen3-4B-4bit".into(),
664                hf_weight_file: None,
665            },
666            ..sample_local()
667        };
668        assert!(mlx.requires_apple_silicon());
669
670        // vLLM-MLX and Apple FoundationModels are equally Metal-bound.
671        let vllm = ModelSchema {
672            source: ModelSource::VllmMlx {
673                endpoint: "http://localhost:8000".into(),
674                model_name: "mlx-community/Qwen3-4B-4bit".into(),
675            },
676            ..sample_local()
677        };
678        assert!(vllm.requires_apple_silicon());
679
680        let foundation = ModelSchema {
681            source: ModelSource::AppleFoundationModels { use_case: None },
682            ..sample_local()
683        };
684        assert!(foundation.requires_apple_silicon());
685    }
686}
car_inference/schema.rs

car_inference/
schema.rs