Skip to main content

spn_core/
model.rs

1//! Model types and registry for native inference.
2//!
3//! This module provides:
4//! - [`ModelType`]: Capability types (Text, Vision, Embedding, etc.)
5//! - [`ModelArchitecture`]: Architectures supported by mistral.rs
6//! - [`KnownModel`]: Curated model definitions
7//! - [`KNOWN_MODELS`]: Registry of pre-validated models
8//! - [`resolve_model`]: Resolve model ID to curated or HuggingFace model
9//! - [`auto_select_quantization`]: RAM-based quantization selection
10//!
11//! # Example
12//!
13//! ```
14//! use spn_core::{resolve_model, ResolvedModel, KNOWN_MODELS};
15//!
16//! // Resolve a curated model
17//! let model = resolve_model("qwen3:8b");
18//! assert!(matches!(model, Some(ResolvedModel::Curated(_))));
19//!
20//! // Resolve a HuggingFace model
21//! let model = resolve_model("hf:bartowski/Qwen3-30B-GGUF");
22//! assert!(matches!(model, Some(ResolvedModel::HuggingFace { .. })));
23//! ```
24
25use crate::backend::Quantization;
26
27#[cfg(feature = "serde")]
28use serde::{Deserialize, Serialize};
29
30// ============================================================================
31// Model Types
32// ============================================================================
33
34/// Model capability type.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
36#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
37pub enum ModelType {
38    /// Text generation (LLM).
39    Text,
40    /// Vision-language model (VLM).
41    Vision,
42    /// Embedding model for vector representations.
43    Embedding,
44    /// Audio/speech model.
45    Audio,
46    /// Image generation (diffusion).
47    Diffusion,
48}
49
50impl ModelType {
51    /// Human-readable name for this model type.
52    #[must_use]
53    pub const fn name(&self) -> &'static str {
54        match self {
55            Self::Text => "Text",
56            Self::Vision => "Vision",
57            Self::Embedding => "Embedding",
58            Self::Audio => "Audio",
59            Self::Diffusion => "Diffusion",
60        }
61    }
62
63    /// Builder type name in mistral.rs.
64    #[must_use]
65    pub const fn builder_name(&self) -> &'static str {
66        match self {
67            Self::Text => "TextModelBuilder / GgufModelBuilder",
68            Self::Vision => "VisionModelBuilder",
69            Self::Embedding => "EmbeddingModelBuilder",
70            Self::Audio => "AudioModelBuilder",
71            Self::Diffusion => "DiffusionModelBuilder",
72        }
73    }
74}
75
76// ============================================================================
77// Model Architecture
78// ============================================================================
79
80/// Architecture supported by mistral.rs v0.7.0.
81///
82/// See: <https://github.com/EricLBuehler/mistral.rs#supported-models>
83///
84/// **Note:** Architecture names must match mistral.rs exactly (case-sensitive).
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
86#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
87#[allow(non_camel_case_types)]
88pub enum ModelArchitecture {
89    // =========================================================================
90    // TEXT MODELS (loaded via TextModelBuilder or GgufModelBuilder)
91    // =========================================================================
92    /// Mistral 7B and variants.
93    Mistral,
94    /// Gemma (first generation).
95    Gemma,
96    /// Gemma 2.
97    Gemma2,
98    /// Gemma 3.
99    Gemma3,
100    /// Mixtral MoE models.
101    Mixtral,
102    /// Llama (1, 2, 3).
103    Llama,
104    /// Llama 4.
105    Llama4,
106    /// Phi-2.
107    Phi2,
108    /// Phi-3.
109    Phi3,
110    /// Phi-3.5 MoE variant.
111    Phi3_5MoE,
112    /// Qwen 2.
113    Qwen2,
114    /// Qwen 3.
115    Qwen3,
116    /// Qwen 3 MoE variant.
117    Qwen3Moe,
118    /// GLM-4 (ChatGLM).
119    GLM4,
120    /// StarCoder 2.
121    Starcoder2,
122    /// DeepSeek V2.
123    DeepseekV2,
124    /// DeepSeek V3.
125    DeepseekV3,
126    /// SmolLM 3.
127    SmolLM3,
128
129    // =========================================================================
130    // VISION MODELS (loaded via VisionModelBuilder)
131    // =========================================================================
132    /// Phi-3 Vision.
133    Phi3V,
134    /// Phi-4 Multimodal.
135    Phi4MM,
136    /// IDEFICS 2.
137    Idefics2,
138    /// IDEFICS 3.
139    Idefics3,
140    /// LLaVA-NeXT.
141    LlavaNext,
142    /// LLaVA.
143    Llava,
144    /// Vision Llama.
145    VLlama,
146    /// Qwen2 Vision-Language.
147    Qwen2VL,
148    /// Qwen 2.5 Vision-Language.
149    Qwen2_5VL,
150    /// MiniCPM-O (multimodal).
151    MiniCPM_O,
152    /// Gemma 3 with native vision.
153    Gemma3n,
154    /// Mistral 3 with vision.
155    Mistral3,
156
157    // =========================================================================
158    // EMBEDDING MODELS (loaded via EmbeddingModelBuilder)
159    // =========================================================================
160    /// Nomic Embed architecture.
161    NomicEmbed,
162    /// BAAI BGE embedding models.
163    BGE,
164    /// Snowflake Arctic embedding.
165    Arctic,
166
167    // =========================================================================
168    // DIFFUSION (loaded via DiffusionModelBuilder)
169    // =========================================================================
170    /// Flux diffusion model.
171    Flux,
172
173    // =========================================================================
174    // AUDIO (future support)
175    // =========================================================================
176    /// Dia audio model.
177    Dia,
178}
179
180impl ModelArchitecture {
181    /// Returns the model type for this architecture.
182    #[must_use]
183    pub const fn model_type(&self) -> ModelType {
184        match self {
185            // Text models
186            Self::Mistral
187            | Self::Gemma
188            | Self::Gemma2
189            | Self::Gemma3
190            | Self::Mixtral
191            | Self::Llama
192            | Self::Llama4
193            | Self::Phi2
194            | Self::Phi3
195            | Self::Phi3_5MoE
196            | Self::Qwen2
197            | Self::Qwen3
198            | Self::Qwen3Moe
199            | Self::GLM4
200            | Self::Starcoder2
201            | Self::DeepseekV2
202            | Self::DeepseekV3
203            | Self::SmolLM3 => ModelType::Text,
204
205            // Vision models
206            Self::Phi3V
207            | Self::Phi4MM
208            | Self::Idefics2
209            | Self::Idefics3
210            | Self::LlavaNext
211            | Self::Llava
212            | Self::VLlama
213            | Self::Qwen2VL
214            | Self::Qwen2_5VL
215            | Self::MiniCPM_O
216            | Self::Gemma3n
217            | Self::Mistral3 => ModelType::Vision,
218
219            // Embedding models
220            Self::NomicEmbed | Self::BGE | Self::Arctic => ModelType::Embedding,
221
222            // Diffusion
223            Self::Flux => ModelType::Diffusion,
224
225            // Audio
226            Self::Dia => ModelType::Audio,
227        }
228    }
229
230    /// String representation matching mistral.rs enum names.
231    #[must_use]
232    pub const fn as_str(&self) -> &'static str {
233        match self {
234            Self::Mistral => "Mistral",
235            Self::Gemma => "Gemma",
236            Self::Gemma2 => "Gemma2",
237            Self::Gemma3 => "Gemma3",
238            Self::Mixtral => "Mixtral",
239            Self::Llama => "Llama",
240            Self::Llama4 => "Llama4",
241            Self::Phi2 => "Phi2",
242            Self::Phi3 => "Phi3",
243            Self::Phi3_5MoE => "Phi3_5MoE",
244            Self::Qwen2 => "Qwen2",
245            Self::Qwen3 => "Qwen3",
246            Self::Qwen3Moe => "Qwen3Moe",
247            Self::GLM4 => "GLM4",
248            Self::Starcoder2 => "Starcoder2",
249            Self::DeepseekV2 => "DeepseekV2",
250            Self::DeepseekV3 => "DeepseekV3",
251            Self::SmolLM3 => "SmolLM3",
252            Self::Phi3V => "Phi3V",
253            Self::Phi4MM => "Phi4MM",
254            Self::Idefics2 => "Idefics2",
255            Self::Idefics3 => "Idefics3",
256            Self::LlavaNext => "LlavaNext",
257            Self::Llava => "Llava",
258            Self::VLlama => "VLlama",
259            Self::Qwen2VL => "Qwen2VL",
260            Self::Qwen2_5VL => "Qwen2_5VL",
261            Self::MiniCPM_O => "MiniCPM_O",
262            Self::Gemma3n => "Gemma3n",
263            Self::Mistral3 => "Mistral3",
264            Self::NomicEmbed => "NomicEmbed",
265            Self::BGE => "BGE",
266            Self::Arctic => "Arctic",
267            Self::Flux => "Flux",
268            Self::Dia => "Dia",
269        }
270    }
271}
272
273impl std::fmt::Display for ModelArchitecture {
274    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275        write!(f, "{}", self.as_str())
276    }
277}
278
279// ============================================================================
280// Known Model Definition
281// ============================================================================
282
283/// A curated model in the registry.
284///
285/// These models are pre-validated for compatibility with mistral.rs and have
286/// tested quantization options.
287#[derive(Debug, Clone)]
288pub struct KnownModel {
289    /// Short ID used in YAML (e.g., "qwen3:8b").
290    pub id: &'static str,
291
292    /// Human-readable name.
293    pub name: &'static str,
294
295    /// Model type (Text, Vision, Embedding, etc.).
296    pub model_type: ModelType,
297
298    /// Architecture for mistral.rs.
299    pub architecture: ModelArchitecture,
300
301    /// HuggingFace repo (e.g., "Qwen/Qwen3-8B-GGUF").
302    pub hf_repo: &'static str,
303
304    /// Default GGUF filename.
305    pub default_file: &'static str,
306
307    /// Available quantizations with filenames.
308    pub quantizations: &'static [(Quantization, &'static str)],
309
310    /// Model size in billions of parameters.
311    pub param_billions: f32,
312
313    /// Minimum RAM in GB for Q4_K_M quantization.
314    pub min_ram_gb: u32,
315
316    /// Description.
317    pub description: &'static str,
318}
319
320impl KnownModel {
321    /// Get the filename for a specific quantization.
322    #[must_use]
323    pub fn filename_for_quant(&self, quant: Quantization) -> Option<&'static str> {
324        self.quantizations
325            .iter()
326            .find(|(q, _)| *q == quant)
327            .map(|(_, f)| *f)
328    }
329
330    /// Get the default quantization (first in list).
331    #[must_use]
332    pub fn default_quantization(&self) -> Option<Quantization> {
333        self.quantizations.first().map(|(q, _)| *q)
334    }
335
336    /// Check if this model supports a specific quantization.
337    #[must_use]
338    pub fn supports_quant(&self, quant: Quantization) -> bool {
339        self.quantizations.iter().any(|(q, _)| *q == quant)
340    }
341}
342
343// ============================================================================
344// Known Models Registry
345// ============================================================================
346
347/// Curated model registry.
348///
349/// These models are pre-validated for:
350/// - Compatibility with mistral.rs
351/// - Correct architecture mapping
352/// - Working quantizations
353/// - Accurate memory requirements
354pub static KNOWN_MODELS: &[KnownModel] = &[
355    // =========================================================================
356    // TEXT MODELS
357    // =========================================================================
358    KnownModel {
359        id: "qwen3:0.6b",
360        name: "Qwen3 0.6B",
361        model_type: ModelType::Text,
362        architecture: ModelArchitecture::Qwen3,
363        hf_repo: "Qwen/Qwen3-0.6B-GGUF",
364        default_file: "qwen3-0.6b-q4_k_m.gguf",
365        quantizations: &[
366            (Quantization::Q4_K_M, "qwen3-0.6b-q4_k_m.gguf"),
367            (Quantization::Q8_0, "qwen3-0.6b-q8_0.gguf"),
368            (Quantization::F16, "qwen3-0.6b-f16.gguf"),
369        ],
370        param_billions: 0.6,
371        min_ram_gb: 2,
372        description: "Ultra-lightweight model for edge devices",
373    },
374    KnownModel {
375        id: "qwen3:8b",
376        name: "Qwen3 8B",
377        model_type: ModelType::Text,
378        architecture: ModelArchitecture::Qwen3,
379        hf_repo: "Qwen/Qwen3-8B-GGUF",
380        default_file: "qwen3-8b-q4_k_m.gguf",
381        quantizations: &[
382            (Quantization::Q4_K_M, "qwen3-8b-q4_k_m.gguf"),
383            (Quantization::Q5_K_M, "qwen3-8b-q5_k_m.gguf"),
384            (Quantization::Q8_0, "qwen3-8b-q8_0.gguf"),
385        ],
386        param_billions: 8.0,
387        min_ram_gb: 8,
388        description: "Best balance of speed and quality for most tasks",
389    },
390    KnownModel {
391        id: "qwen3:32b",
392        name: "Qwen3 32B",
393        model_type: ModelType::Text,
394        architecture: ModelArchitecture::Qwen3,
395        hf_repo: "Qwen/Qwen3-32B-GGUF",
396        default_file: "qwen3-32b-q4_k_m.gguf",
397        quantizations: &[
398            (Quantization::Q4_K_M, "qwen3-32b-q4_k_m.gguf"),
399            (Quantization::Q5_K_M, "qwen3-32b-q5_k_m.gguf"),
400        ],
401        param_billions: 32.0,
402        min_ram_gb: 24,
403        description: "High-quality reasoning, requires 24GB+ RAM",
404    },
405    KnownModel {
406        id: "llama4:8b",
407        name: "Llama 4 8B",
408        model_type: ModelType::Text,
409        architecture: ModelArchitecture::Llama4,
410        // TODO: Verify HF repo exists when Llama 4 GGUF is released
411        hf_repo: "meta-llama/Llama-4-8B-GGUF",
412        default_file: "llama-4-8b-q4_k_m.gguf",
413        quantizations: &[
414            (Quantization::Q4_K_M, "llama-4-8b-q4_k_m.gguf"),
415            (Quantization::Q8_0, "llama-4-8b-q8_0.gguf"),
416        ],
417        param_billions: 8.0,
418        min_ram_gb: 8,
419        description: "Meta's latest Llama model",
420    },
421    KnownModel {
422        id: "phi4:14b",
423        name: "Phi-4 14B",
424        model_type: ModelType::Text,
425        architecture: ModelArchitecture::Phi3, // Phi4 uses Phi3 arch
426        hf_repo: "microsoft/Phi-4-GGUF",
427        default_file: "phi-4-q4_k_m.gguf",
428        quantizations: &[
429            (Quantization::Q4_K_M, "phi-4-q4_k_m.gguf"),
430            (Quantization::Q8_0, "phi-4-q8_0.gguf"),
431        ],
432        param_billions: 14.0,
433        min_ram_gb: 12,
434        description: "Microsoft's reasoning-focused model",
435    },
436    KnownModel {
437        id: "gemma3:4b",
438        name: "Gemma 3 4B",
439        model_type: ModelType::Text,
440        architecture: ModelArchitecture::Gemma3,
441        hf_repo: "google/gemma-3-4b-gguf",
442        default_file: "gemma-3-4b-q4_k_m.gguf",
443        quantizations: &[
444            (Quantization::Q4_K_M, "gemma-3-4b-q4_k_m.gguf"),
445            (Quantization::Q8_0, "gemma-3-4b-q8_0.gguf"),
446        ],
447        param_billions: 4.0,
448        min_ram_gb: 6,
449        description: "Google's efficient small model",
450    },
451    KnownModel {
452        id: "gemma3:12b",
453        name: "Gemma 3 12B",
454        model_type: ModelType::Text,
455        architecture: ModelArchitecture::Gemma3,
456        hf_repo: "google/gemma-3-12b-gguf",
457        default_file: "gemma-3-12b-q4_k_m.gguf",
458        quantizations: &[
459            (Quantization::Q4_K_M, "gemma-3-12b-q4_k_m.gguf"),
460            (Quantization::Q5_K_M, "gemma-3-12b-q5_k_m.gguf"),
461        ],
462        param_billions: 12.0,
463        min_ram_gb: 10,
464        description: "Google's mid-size model",
465    },
466    KnownModel {
467        id: "mistral:7b",
468        name: "Mistral 7B",
469        model_type: ModelType::Text,
470        architecture: ModelArchitecture::Mistral,
471        hf_repo: "mistralai/Mistral-7B-v0.3-GGUF",
472        default_file: "mistral-7b-v0.3-q4_k_m.gguf",
473        quantizations: &[
474            (Quantization::Q4_K_M, "mistral-7b-v0.3-q4_k_m.gguf"),
475            (Quantization::Q8_0, "mistral-7b-v0.3-q8_0.gguf"),
476        ],
477        param_billions: 7.0,
478        min_ram_gb: 8,
479        description: "Mistral's flagship 7B model",
480    },
481    KnownModel {
482        id: "deepseek:7b",
483        name: "DeepSeek V3 7B",
484        model_type: ModelType::Text,
485        architecture: ModelArchitecture::DeepseekV3,
486        // TODO: Verify HF repo path - DeepSeek V3 may use different naming
487        hf_repo: "deepseek-ai/DeepSeek-V3-7B-GGUF",
488        default_file: "deepseek-v3-7b-q4_k_m.gguf",
489        quantizations: &[(Quantization::Q4_K_M, "deepseek-v3-7b-q4_k_m.gguf")],
490        param_billions: 7.0,
491        min_ram_gb: 8,
492        description: "DeepSeek's latest architecture",
493    },
494    // =========================================================================
495    // VISION MODELS
496    // =========================================================================
497    KnownModel {
498        id: "qwen3-vision:8b",
499        name: "Qwen3 Vision 8B",
500        model_type: ModelType::Vision,
501        architecture: ModelArchitecture::Qwen2_5VL,
502        hf_repo: "Qwen/Qwen2.5-VL-8B-GGUF",
503        default_file: "qwen2.5-vl-8b-q4_k_m.gguf",
504        quantizations: &[(Quantization::Q4_K_M, "qwen2.5-vl-8b-q4_k_m.gguf")],
505        param_billions: 8.0,
506        min_ram_gb: 12,
507        description: "Vision-language model for image understanding",
508    },
509    KnownModel {
510        id: "llama4-vision:8b",
511        name: "Llama 4 Vision 8B",
512        model_type: ModelType::Vision,
513        architecture: ModelArchitecture::VLlama, // Vision uses VLlama, not Llama4
514        // TODO: Verify HF repo exists when Llama 4 Vision is released
515        hf_repo: "meta-llama/Llama-4-Vision-8B-GGUF",
516        default_file: "llama-4-vision-8b-q4_k_m.gguf",
517        quantizations: &[(Quantization::Q4_K_M, "llama-4-vision-8b-q4_k_m.gguf")],
518        param_billions: 8.0,
519        min_ram_gb: 12,
520        description: "Meta's multimodal Llama 4",
521    },
522    KnownModel {
523        id: "phi4-vision:14b",
524        name: "Phi-4 Vision 14B",
525        model_type: ModelType::Vision,
526        architecture: ModelArchitecture::Phi4MM,
527        hf_repo: "microsoft/Phi-4-MM-GGUF",
528        default_file: "phi-4-mm-q4_k_m.gguf",
529        quantizations: &[(Quantization::Q4_K_M, "phi-4-mm-q4_k_m.gguf")],
530        param_billions: 14.0,
531        min_ram_gb: 16,
532        description: "Microsoft's multimodal Phi-4",
533    },
534    KnownModel {
535        id: "gemma3-vision:12b",
536        name: "Gemma 3 Vision 12B",
537        model_type: ModelType::Vision,
538        architecture: ModelArchitecture::Gemma3n, // Vision uses Gemma3n (native vision)
539        // TODO: Verify HF repo exists when Gemma 3 Vision GGUF is released
540        hf_repo: "google/gemma-3-12b-vision-gguf",
541        default_file: "gemma-3-12b-vision-q4_k_m.gguf",
542        quantizations: &[(Quantization::Q4_K_M, "gemma-3-12b-vision-q4_k_m.gguf")],
543        param_billions: 12.0,
544        min_ram_gb: 14,
545        description: "Google's vision-enabled Gemma",
546    },
547    // =========================================================================
548    // EMBEDDING MODELS
549    // =========================================================================
550    KnownModel {
551        id: "nomic-embed",
552        name: "Nomic Embed Text v1.5",
553        model_type: ModelType::Embedding,
554        architecture: ModelArchitecture::NomicEmbed,
555        hf_repo: "nomic-ai/nomic-embed-text-v1.5-GGUF",
556        default_file: "nomic-embed-text-v1.5-f16.gguf",
557        quantizations: &[
558            (Quantization::F16, "nomic-embed-text-v1.5-f16.gguf"),
559            (Quantization::Q8_0, "nomic-embed-text-v1.5-q8_0.gguf"),
560        ],
561        param_billions: 0.137,
562        min_ram_gb: 1,
563        description: "High-quality 768-dim embeddings",
564    },
565    KnownModel {
566        id: "bge-m3",
567        name: "BGE-M3",
568        model_type: ModelType::Embedding,
569        architecture: ModelArchitecture::BGE,
570        hf_repo: "BAAI/bge-m3-GGUF",
571        default_file: "bge-m3-f16.gguf",
572        quantizations: &[(Quantization::F16, "bge-m3-f16.gguf")],
573        param_billions: 0.568,
574        min_ram_gb: 2,
575        description: "Multilingual embedding model",
576    },
577    KnownModel {
578        id: "snowflake-arctic",
579        name: "Snowflake Arctic Embed",
580        model_type: ModelType::Embedding,
581        architecture: ModelArchitecture::Arctic,
582        hf_repo: "Snowflake/snowflake-arctic-embed-m-GGUF",
583        default_file: "snowflake-arctic-embed-m-f16.gguf",
584        quantizations: &[(Quantization::F16, "snowflake-arctic-embed-m-f16.gguf")],
585        param_billions: 0.335,
586        min_ram_gb: 1,
587        description: "Enterprise-grade embeddings",
588    },
589];
590
591// ============================================================================
592// Model Resolution
593// ============================================================================
594
595/// Result of model resolution.
596#[derive(Debug)]
597pub enum ResolvedModel<'a> {
598    /// Curated model from KNOWN_MODELS.
599    Curated(&'a KnownModel),
600    /// HuggingFace passthrough.
601    HuggingFace {
602        /// HuggingFace repository (e.g., "bartowski/Qwen3-30B-GGUF").
603        repo: String,
604    },
605}
606
607/// Resolve a model ID to a [`KnownModel`] or HuggingFace passthrough.
608///
609/// Supports:
610/// - Curated IDs: `"qwen3:8b"` → [`ResolvedModel::Curated`]
611/// - HuggingFace: `"hf:bartowski/Qwen3-30B-GGUF"` → [`ResolvedModel::HuggingFace`]
612///
613/// # Example
614///
615/// ```
616/// use spn_core::{resolve_model, ResolvedModel};
617///
618/// // Curated model
619/// if let Some(ResolvedModel::Curated(model)) = resolve_model("qwen3:8b") {
620///     assert_eq!(model.param_billions, 8.0);
621/// }
622///
623/// // HuggingFace passthrough
624/// if let Some(ResolvedModel::HuggingFace { repo }) = resolve_model("hf:bartowski/Model") {
625///     assert_eq!(repo, "bartowski/Model");
626/// }
627/// ```
628#[must_use]
629pub fn resolve_model(id: &str) -> Option<ResolvedModel<'_>> {
630    if let Some(hf_repo) = id.strip_prefix("hf:") {
631        // HuggingFace passthrough
632        Some(ResolvedModel::HuggingFace {
633            repo: hf_repo.to_string(),
634        })
635    } else {
636        // Curated model lookup
637        KNOWN_MODELS
638            .iter()
639            .find(|m| m.id == id)
640            .map(ResolvedModel::Curated)
641    }
642}
643
644/// Find a curated model by ID.
645///
646/// Unlike [`resolve_model`], this only searches curated models.
647#[must_use]
648pub fn find_model(id: &str) -> Option<&'static KnownModel> {
649    KNOWN_MODELS.iter().find(|m| m.id == id)
650}
651
652/// List all models of a specific type.
653pub fn models_by_type(model_type: ModelType) -> impl Iterator<Item = &'static KnownModel> {
654    KNOWN_MODELS.iter().filter(move |m| m.model_type == model_type)
655}
656
657// ============================================================================
658// Auto-Quantization Selection
659// ============================================================================
660
661/// Auto-select quantization based on available RAM.
662///
663/// Returns the best (highest quality) quantization that fits in available RAM.
664/// Falls back to the smallest quantization if nothing fits.
665///
666/// # Arguments
667///
668/// * `model` - The model to select quantization for
669/// * `available_ram_gb` - Available system RAM in gigabytes
670///
671/// # Example
672///
673/// ```
674/// use spn_core::{auto_select_quantization, find_model, Quantization};
675///
676/// let model = find_model("qwen3:8b").unwrap();
677///
678/// // With 16GB RAM, should select Q8_0 (high quality)
679/// let quant = auto_select_quantization(model, 16);
680/// assert_eq!(quant, Quantization::Q8_0);
681///
682/// // With 8GB RAM, Q5_K_M fits
683/// let quant = auto_select_quantization(model, 8);
684/// assert_eq!(quant, Quantization::Q5_K_M);
685///
686/// // For larger models, falls back to smaller quantization
687/// let large_model = find_model("qwen3:32b").unwrap();
688/// let quant = auto_select_quantization(large_model, 16);
689/// assert_eq!(quant, Quantization::Q4_K_M);
690/// ```
691#[must_use]
692pub fn auto_select_quantization(model: &KnownModel, available_ram_gb: u32) -> Quantization {
693    // Iterate from highest quality to lowest
694    for (quant, _filename) in model.quantizations.iter().rev() {
695        // Model memory + 2GB overhead for KV cache and runtime
696        let required_gb = (model.param_billions * quant.memory_multiplier()) as u32 + 2;
697
698        if required_gb <= available_ram_gb {
699            return *quant;
700        }
701    }
702
703    // Fallback to smallest quantization
704    model
705        .quantizations
706        .first()
707        .map(|(q, _)| *q)
708        .unwrap_or(Quantization::Q4_K_M)
709}
710
711// ============================================================================
712// RAM Detection
713// ============================================================================
714
715/// Detect available system RAM in gigabytes.
716///
717/// Returns a conservative estimate if detection fails.
718#[cfg(target_os = "macos")]
719#[must_use]
720pub fn detect_available_ram_gb() -> u32 {
721    use std::process::Command;
722
723    let output = Command::new("sysctl")
724        .args(["-n", "hw.memsize"])
725        .output()
726        .ok();
727
728    output
729        .and_then(|o| String::from_utf8(o.stdout).ok())
730        .and_then(|s| s.trim().parse::<u64>().ok())
731        .map(|bytes| (bytes / 1_073_741_824) as u32) // bytes to GB
732        .unwrap_or(8) // Conservative default
733}
734
735/// Detect available system RAM in gigabytes.
736#[cfg(target_os = "linux")]
737#[must_use]
738pub fn detect_available_ram_gb() -> u32 {
739    use std::fs;
740
741    fs::read_to_string("/proc/meminfo")
742        .ok()
743        .and_then(|content| {
744            content
745                .lines()
746                .find(|line| line.starts_with("MemTotal:"))
747                .and_then(|line| {
748                    line.split_whitespace()
749                        .nth(1)
750                        .and_then(|kb| kb.parse::<u64>().ok())
751                })
752        })
753        .map(|kb| (kb / 1_048_576) as u32) // KB to GB
754        .unwrap_or(8)
755}
756
757/// Detect available system RAM in gigabytes.
758#[cfg(target_os = "windows")]
759#[must_use]
760pub fn detect_available_ram_gb() -> u32 {
761    // TODO: Use winapi to get actual RAM
762    16 // Assume 16GB on Windows for now
763}
764
765/// Detect available system RAM in gigabytes.
766#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
767#[must_use]
768pub fn detect_available_ram_gb() -> u32 {
769    8 // Conservative default
770}
771
772// ============================================================================
773// Tests
774// ============================================================================
775
776#[cfg(test)]
777mod tests {
778    use super::*;
779
780    #[test]
781    fn test_resolve_curated_model() {
782        let result = resolve_model("qwen3:8b");
783        assert!(matches!(result, Some(ResolvedModel::Curated(_))));
784
785        if let Some(ResolvedModel::Curated(model)) = result {
786            assert_eq!(model.id, "qwen3:8b");
787            assert_eq!(model.param_billions, 8.0);
788        }
789    }
790
791    #[test]
792    fn test_resolve_huggingface_model() {
793        let result = resolve_model("hf:bartowski/Qwen3-30B-GGUF");
794        assert!(matches!(result, Some(ResolvedModel::HuggingFace { .. })));
795
796        if let Some(ResolvedModel::HuggingFace { repo }) = result {
797            assert_eq!(repo, "bartowski/Qwen3-30B-GGUF");
798        }
799    }
800
801    #[test]
802    fn test_resolve_unknown_model() {
803        let result = resolve_model("unknown:model");
804        assert!(result.is_none());
805    }
806
807    #[test]
808    fn test_find_model() {
809        let model = find_model("qwen3:8b");
810        assert!(model.is_some());
811        assert_eq!(model.unwrap().name, "Qwen3 8B");
812    }
813
814    #[test]
815    fn test_models_by_type() {
816        let text_models: Vec<_> = models_by_type(ModelType::Text).collect();
817        assert!(text_models.len() >= 9);
818        assert!(text_models.iter().all(|m| m.model_type == ModelType::Text));
819
820        let vision_models: Vec<_> = models_by_type(ModelType::Vision).collect();
821        assert!(vision_models.len() >= 4);
822
823        let embed_models: Vec<_> = models_by_type(ModelType::Embedding).collect();
824        assert!(embed_models.len() >= 3);
825    }
826
827    #[test]
828    fn test_auto_select_quantization_high_ram() {
829        let model = find_model("qwen3:8b").unwrap();
830        // With 32GB RAM, should select highest quality available
831        let quant = auto_select_quantization(model, 32);
832        assert_eq!(quant, Quantization::Q8_0);
833    }
834
835    #[test]
836    fn test_auto_select_quantization_low_ram() {
837        let model = find_model("qwen3:32b").unwrap();
838        // With 16GB RAM, 32B model should fall back to Q4_K_M
839        let quant = auto_select_quantization(model, 16);
840        assert_eq!(quant, Quantization::Q4_K_M);
841    }
842
843    #[test]
844    fn test_detect_ram() {
845        let ram = detect_available_ram_gb();
846        // Should return a reasonable value (at least 1GB, no more than 1TB)
847        assert!(ram >= 1);
848        assert!(ram <= 1024);
849    }
850
851    #[test]
852    fn test_known_models_count() {
853        // Ensure we have the expected number of models
854        // 9 text + 4 vision + 3 embedding = 16 curated models
855        assert!(
856            KNOWN_MODELS.len() >= 16,
857            "Expected at least 16 models, got {}",
858            KNOWN_MODELS.len()
859        );
860    }
861
862    #[test]
863    fn test_model_architecture_model_type() {
864        // Text architectures
865        assert_eq!(
866            ModelArchitecture::Qwen3.model_type(),
867            ModelType::Text
868        );
869        assert_eq!(
870            ModelArchitecture::Llama4.model_type(),
871            ModelType::Text
872        );
873
874        // Vision architectures
875        assert_eq!(
876            ModelArchitecture::Phi4MM.model_type(),
877            ModelType::Vision
878        );
879        assert_eq!(
880            ModelArchitecture::Qwen2_5VL.model_type(),
881            ModelType::Vision
882        );
883
884        // Embedding architectures
885        assert_eq!(
886            ModelArchitecture::NomicEmbed.model_type(),
887            ModelType::Embedding
888        );
889        assert_eq!(
890            ModelArchitecture::BGE.model_type(),
891            ModelType::Embedding
892        );
893    }
894
895    #[test]
896    fn test_quantization_memory_multiplier() {
897        assert!(Quantization::Q4_K_M.memory_multiplier() < Quantization::Q8_0.memory_multiplier());
898        assert!(Quantization::Q8_0.memory_multiplier() < Quantization::F16.memory_multiplier());
899    }
900
901    #[test]
902    fn test_known_model_filename_for_quant() {
903        let model = find_model("qwen3:8b").unwrap();
904
905        let q4_file = model.filename_for_quant(Quantization::Q4_K_M);
906        assert!(q4_file.is_some());
907        assert!(q4_file.unwrap().contains("q4_k_m"));
908
909        // F16 is not available for this model
910        let f16_file = model.filename_for_quant(Quantization::F16);
911        assert!(f16_file.is_none());
912    }
913}