1use crate::backend::Quantization;
26
27#[cfg(feature = "serde")]
28use serde::{Deserialize, Serialize};
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
36#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
37pub enum ModelType {
38 Text,
40 Vision,
42 Embedding,
44 Audio,
46 Diffusion,
48}
49
50impl ModelType {
51 #[must_use]
53 pub const fn name(&self) -> &'static str {
54 match self {
55 Self::Text => "Text",
56 Self::Vision => "Vision",
57 Self::Embedding => "Embedding",
58 Self::Audio => "Audio",
59 Self::Diffusion => "Diffusion",
60 }
61 }
62
63 #[must_use]
65 pub const fn builder_name(&self) -> &'static str {
66 match self {
67 Self::Text => "TextModelBuilder / GgufModelBuilder",
68 Self::Vision => "VisionModelBuilder",
69 Self::Embedding => "EmbeddingModelBuilder",
70 Self::Audio => "AudioModelBuilder",
71 Self::Diffusion => "DiffusionModelBuilder",
72 }
73 }
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
86#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
87#[allow(non_camel_case_types)]
88pub enum ModelArchitecture {
89 Mistral,
94 Gemma,
96 Gemma2,
98 Gemma3,
100 Mixtral,
102 Llama,
104 Llama4,
106 Phi2,
108 Phi3,
110 Phi3_5MoE,
112 Qwen2,
114 Qwen3,
116 Qwen3Moe,
118 GLM4,
120 Starcoder2,
122 DeepseekV2,
124 DeepseekV3,
126 SmolLM3,
128
129 Phi3V,
134 Phi4MM,
136 Idefics2,
138 Idefics3,
140 LlavaNext,
142 Llava,
144 VLlama,
146 Qwen2VL,
148 Qwen2_5VL,
150 MiniCPM_O,
152 Gemma3n,
154 Mistral3,
156
157 NomicEmbed,
162 BGE,
164 Arctic,
166
167 Flux,
172
173 Dia,
178}
179
180impl ModelArchitecture {
181 #[must_use]
183 pub const fn model_type(&self) -> ModelType {
184 match self {
185 Self::Mistral
187 | Self::Gemma
188 | Self::Gemma2
189 | Self::Gemma3
190 | Self::Mixtral
191 | Self::Llama
192 | Self::Llama4
193 | Self::Phi2
194 | Self::Phi3
195 | Self::Phi3_5MoE
196 | Self::Qwen2
197 | Self::Qwen3
198 | Self::Qwen3Moe
199 | Self::GLM4
200 | Self::Starcoder2
201 | Self::DeepseekV2
202 | Self::DeepseekV3
203 | Self::SmolLM3 => ModelType::Text,
204
205 Self::Phi3V
207 | Self::Phi4MM
208 | Self::Idefics2
209 | Self::Idefics3
210 | Self::LlavaNext
211 | Self::Llava
212 | Self::VLlama
213 | Self::Qwen2VL
214 | Self::Qwen2_5VL
215 | Self::MiniCPM_O
216 | Self::Gemma3n
217 | Self::Mistral3 => ModelType::Vision,
218
219 Self::NomicEmbed | Self::BGE | Self::Arctic => ModelType::Embedding,
221
222 Self::Flux => ModelType::Diffusion,
224
225 Self::Dia => ModelType::Audio,
227 }
228 }
229
230 #[must_use]
232 pub const fn as_str(&self) -> &'static str {
233 match self {
234 Self::Mistral => "Mistral",
235 Self::Gemma => "Gemma",
236 Self::Gemma2 => "Gemma2",
237 Self::Gemma3 => "Gemma3",
238 Self::Mixtral => "Mixtral",
239 Self::Llama => "Llama",
240 Self::Llama4 => "Llama4",
241 Self::Phi2 => "Phi2",
242 Self::Phi3 => "Phi3",
243 Self::Phi3_5MoE => "Phi3_5MoE",
244 Self::Qwen2 => "Qwen2",
245 Self::Qwen3 => "Qwen3",
246 Self::Qwen3Moe => "Qwen3Moe",
247 Self::GLM4 => "GLM4",
248 Self::Starcoder2 => "Starcoder2",
249 Self::DeepseekV2 => "DeepseekV2",
250 Self::DeepseekV3 => "DeepseekV3",
251 Self::SmolLM3 => "SmolLM3",
252 Self::Phi3V => "Phi3V",
253 Self::Phi4MM => "Phi4MM",
254 Self::Idefics2 => "Idefics2",
255 Self::Idefics3 => "Idefics3",
256 Self::LlavaNext => "LlavaNext",
257 Self::Llava => "Llava",
258 Self::VLlama => "VLlama",
259 Self::Qwen2VL => "Qwen2VL",
260 Self::Qwen2_5VL => "Qwen2_5VL",
261 Self::MiniCPM_O => "MiniCPM_O",
262 Self::Gemma3n => "Gemma3n",
263 Self::Mistral3 => "Mistral3",
264 Self::NomicEmbed => "NomicEmbed",
265 Self::BGE => "BGE",
266 Self::Arctic => "Arctic",
267 Self::Flux => "Flux",
268 Self::Dia => "Dia",
269 }
270 }
271}
272
273impl std::fmt::Display for ModelArchitecture {
274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275 write!(f, "{}", self.as_str())
276 }
277}
278
279#[derive(Debug, Clone)]
288pub struct KnownModel {
289 pub id: &'static str,
291
292 pub name: &'static str,
294
295 pub model_type: ModelType,
297
298 pub architecture: ModelArchitecture,
300
301 pub hf_repo: &'static str,
303
304 pub default_file: &'static str,
306
307 pub quantizations: &'static [(Quantization, &'static str)],
309
310 pub param_billions: f32,
312
313 pub min_ram_gb: u32,
315
316 pub description: &'static str,
318}
319
320impl KnownModel {
321 #[must_use]
323 pub fn filename_for_quant(&self, quant: Quantization) -> Option<&'static str> {
324 self.quantizations
325 .iter()
326 .find(|(q, _)| *q == quant)
327 .map(|(_, f)| *f)
328 }
329
330 #[must_use]
332 pub fn default_quantization(&self) -> Option<Quantization> {
333 self.quantizations.first().map(|(q, _)| *q)
334 }
335
336 #[must_use]
338 pub fn supports_quant(&self, quant: Quantization) -> bool {
339 self.quantizations.iter().any(|(q, _)| *q == quant)
340 }
341}
342
343pub static KNOWN_MODELS: &[KnownModel] = &[
355 KnownModel {
359 id: "qwen3:0.6b",
360 name: "Qwen3 0.6B",
361 model_type: ModelType::Text,
362 architecture: ModelArchitecture::Qwen3,
363 hf_repo: "Qwen/Qwen3-0.6B-GGUF",
364 default_file: "qwen3-0.6b-q4_k_m.gguf",
365 quantizations: &[
366 (Quantization::Q4_K_M, "qwen3-0.6b-q4_k_m.gguf"),
367 (Quantization::Q8_0, "qwen3-0.6b-q8_0.gguf"),
368 (Quantization::F16, "qwen3-0.6b-f16.gguf"),
369 ],
370 param_billions: 0.6,
371 min_ram_gb: 2,
372 description: "Ultra-lightweight model for edge devices",
373 },
374 KnownModel {
375 id: "qwen3:8b",
376 name: "Qwen3 8B",
377 model_type: ModelType::Text,
378 architecture: ModelArchitecture::Qwen3,
379 hf_repo: "Qwen/Qwen3-8B-GGUF",
380 default_file: "qwen3-8b-q4_k_m.gguf",
381 quantizations: &[
382 (Quantization::Q4_K_M, "qwen3-8b-q4_k_m.gguf"),
383 (Quantization::Q5_K_M, "qwen3-8b-q5_k_m.gguf"),
384 (Quantization::Q8_0, "qwen3-8b-q8_0.gguf"),
385 ],
386 param_billions: 8.0,
387 min_ram_gb: 8,
388 description: "Best balance of speed and quality for most tasks",
389 },
390 KnownModel {
391 id: "qwen3:32b",
392 name: "Qwen3 32B",
393 model_type: ModelType::Text,
394 architecture: ModelArchitecture::Qwen3,
395 hf_repo: "Qwen/Qwen3-32B-GGUF",
396 default_file: "qwen3-32b-q4_k_m.gguf",
397 quantizations: &[
398 (Quantization::Q4_K_M, "qwen3-32b-q4_k_m.gguf"),
399 (Quantization::Q5_K_M, "qwen3-32b-q5_k_m.gguf"),
400 ],
401 param_billions: 32.0,
402 min_ram_gb: 24,
403 description: "High-quality reasoning, requires 24GB+ RAM",
404 },
405 KnownModel {
406 id: "llama4:8b",
407 name: "Llama 4 8B",
408 model_type: ModelType::Text,
409 architecture: ModelArchitecture::Llama4,
410 hf_repo: "meta-llama/Llama-4-8B-GGUF",
412 default_file: "llama-4-8b-q4_k_m.gguf",
413 quantizations: &[
414 (Quantization::Q4_K_M, "llama-4-8b-q4_k_m.gguf"),
415 (Quantization::Q8_0, "llama-4-8b-q8_0.gguf"),
416 ],
417 param_billions: 8.0,
418 min_ram_gb: 8,
419 description: "Meta's latest Llama model",
420 },
421 KnownModel {
422 id: "phi4:14b",
423 name: "Phi-4 14B",
424 model_type: ModelType::Text,
425 architecture: ModelArchitecture::Phi3, hf_repo: "microsoft/Phi-4-GGUF",
427 default_file: "phi-4-q4_k_m.gguf",
428 quantizations: &[
429 (Quantization::Q4_K_M, "phi-4-q4_k_m.gguf"),
430 (Quantization::Q8_0, "phi-4-q8_0.gguf"),
431 ],
432 param_billions: 14.0,
433 min_ram_gb: 12,
434 description: "Microsoft's reasoning-focused model",
435 },
436 KnownModel {
437 id: "gemma3:4b",
438 name: "Gemma 3 4B",
439 model_type: ModelType::Text,
440 architecture: ModelArchitecture::Gemma3,
441 hf_repo: "google/gemma-3-4b-gguf",
442 default_file: "gemma-3-4b-q4_k_m.gguf",
443 quantizations: &[
444 (Quantization::Q4_K_M, "gemma-3-4b-q4_k_m.gguf"),
445 (Quantization::Q8_0, "gemma-3-4b-q8_0.gguf"),
446 ],
447 param_billions: 4.0,
448 min_ram_gb: 6,
449 description: "Google's efficient small model",
450 },
451 KnownModel {
452 id: "gemma3:12b",
453 name: "Gemma 3 12B",
454 model_type: ModelType::Text,
455 architecture: ModelArchitecture::Gemma3,
456 hf_repo: "google/gemma-3-12b-gguf",
457 default_file: "gemma-3-12b-q4_k_m.gguf",
458 quantizations: &[
459 (Quantization::Q4_K_M, "gemma-3-12b-q4_k_m.gguf"),
460 (Quantization::Q5_K_M, "gemma-3-12b-q5_k_m.gguf"),
461 ],
462 param_billions: 12.0,
463 min_ram_gb: 10,
464 description: "Google's mid-size model",
465 },
466 KnownModel {
467 id: "mistral:7b",
468 name: "Mistral 7B",
469 model_type: ModelType::Text,
470 architecture: ModelArchitecture::Mistral,
471 hf_repo: "mistralai/Mistral-7B-v0.3-GGUF",
472 default_file: "mistral-7b-v0.3-q4_k_m.gguf",
473 quantizations: &[
474 (Quantization::Q4_K_M, "mistral-7b-v0.3-q4_k_m.gguf"),
475 (Quantization::Q8_0, "mistral-7b-v0.3-q8_0.gguf"),
476 ],
477 param_billions: 7.0,
478 min_ram_gb: 8,
479 description: "Mistral's flagship 7B model",
480 },
481 KnownModel {
482 id: "deepseek:7b",
483 name: "DeepSeek V3 7B",
484 model_type: ModelType::Text,
485 architecture: ModelArchitecture::DeepseekV3,
486 hf_repo: "deepseek-ai/DeepSeek-V3-7B-GGUF",
488 default_file: "deepseek-v3-7b-q4_k_m.gguf",
489 quantizations: &[(Quantization::Q4_K_M, "deepseek-v3-7b-q4_k_m.gguf")],
490 param_billions: 7.0,
491 min_ram_gb: 8,
492 description: "DeepSeek's latest architecture",
493 },
494 KnownModel {
498 id: "qwen3-vision:8b",
499 name: "Qwen3 Vision 8B",
500 model_type: ModelType::Vision,
501 architecture: ModelArchitecture::Qwen2_5VL,
502 hf_repo: "Qwen/Qwen2.5-VL-8B-GGUF",
503 default_file: "qwen2.5-vl-8b-q4_k_m.gguf",
504 quantizations: &[(Quantization::Q4_K_M, "qwen2.5-vl-8b-q4_k_m.gguf")],
505 param_billions: 8.0,
506 min_ram_gb: 12,
507 description: "Vision-language model for image understanding",
508 },
509 KnownModel {
510 id: "llama4-vision:8b",
511 name: "Llama 4 Vision 8B",
512 model_type: ModelType::Vision,
513 architecture: ModelArchitecture::VLlama, hf_repo: "meta-llama/Llama-4-Vision-8B-GGUF",
516 default_file: "llama-4-vision-8b-q4_k_m.gguf",
517 quantizations: &[(Quantization::Q4_K_M, "llama-4-vision-8b-q4_k_m.gguf")],
518 param_billions: 8.0,
519 min_ram_gb: 12,
520 description: "Meta's multimodal Llama 4",
521 },
522 KnownModel {
523 id: "phi4-vision:14b",
524 name: "Phi-4 Vision 14B",
525 model_type: ModelType::Vision,
526 architecture: ModelArchitecture::Phi4MM,
527 hf_repo: "microsoft/Phi-4-MM-GGUF",
528 default_file: "phi-4-mm-q4_k_m.gguf",
529 quantizations: &[(Quantization::Q4_K_M, "phi-4-mm-q4_k_m.gguf")],
530 param_billions: 14.0,
531 min_ram_gb: 16,
532 description: "Microsoft's multimodal Phi-4",
533 },
534 KnownModel {
535 id: "gemma3-vision:12b",
536 name: "Gemma 3 Vision 12B",
537 model_type: ModelType::Vision,
538 architecture: ModelArchitecture::Gemma3n, hf_repo: "google/gemma-3-12b-vision-gguf",
541 default_file: "gemma-3-12b-vision-q4_k_m.gguf",
542 quantizations: &[(Quantization::Q4_K_M, "gemma-3-12b-vision-q4_k_m.gguf")],
543 param_billions: 12.0,
544 min_ram_gb: 14,
545 description: "Google's vision-enabled Gemma",
546 },
547 KnownModel {
551 id: "nomic-embed",
552 name: "Nomic Embed Text v1.5",
553 model_type: ModelType::Embedding,
554 architecture: ModelArchitecture::NomicEmbed,
555 hf_repo: "nomic-ai/nomic-embed-text-v1.5-GGUF",
556 default_file: "nomic-embed-text-v1.5-f16.gguf",
557 quantizations: &[
558 (Quantization::F16, "nomic-embed-text-v1.5-f16.gguf"),
559 (Quantization::Q8_0, "nomic-embed-text-v1.5-q8_0.gguf"),
560 ],
561 param_billions: 0.137,
562 min_ram_gb: 1,
563 description: "High-quality 768-dim embeddings",
564 },
565 KnownModel {
566 id: "bge-m3",
567 name: "BGE-M3",
568 model_type: ModelType::Embedding,
569 architecture: ModelArchitecture::BGE,
570 hf_repo: "BAAI/bge-m3-GGUF",
571 default_file: "bge-m3-f16.gguf",
572 quantizations: &[(Quantization::F16, "bge-m3-f16.gguf")],
573 param_billions: 0.568,
574 min_ram_gb: 2,
575 description: "Multilingual embedding model",
576 },
577 KnownModel {
578 id: "snowflake-arctic",
579 name: "Snowflake Arctic Embed",
580 model_type: ModelType::Embedding,
581 architecture: ModelArchitecture::Arctic,
582 hf_repo: "Snowflake/snowflake-arctic-embed-m-GGUF",
583 default_file: "snowflake-arctic-embed-m-f16.gguf",
584 quantizations: &[(Quantization::F16, "snowflake-arctic-embed-m-f16.gguf")],
585 param_billions: 0.335,
586 min_ram_gb: 1,
587 description: "Enterprise-grade embeddings",
588 },
589];
590
591#[derive(Debug)]
597pub enum ResolvedModel<'a> {
598 Curated(&'a KnownModel),
600 HuggingFace {
602 repo: String,
604 },
605}
606
607#[must_use]
629pub fn resolve_model(id: &str) -> Option<ResolvedModel<'_>> {
630 if let Some(hf_repo) = id.strip_prefix("hf:") {
631 Some(ResolvedModel::HuggingFace {
633 repo: hf_repo.to_string(),
634 })
635 } else {
636 KNOWN_MODELS
638 .iter()
639 .find(|m| m.id == id)
640 .map(ResolvedModel::Curated)
641 }
642}
643
644#[must_use]
648pub fn find_model(id: &str) -> Option<&'static KnownModel> {
649 KNOWN_MODELS.iter().find(|m| m.id == id)
650}
651
652pub fn models_by_type(model_type: ModelType) -> impl Iterator<Item = &'static KnownModel> {
654 KNOWN_MODELS.iter().filter(move |m| m.model_type == model_type)
655}
656
657#[must_use]
692pub fn auto_select_quantization(model: &KnownModel, available_ram_gb: u32) -> Quantization {
693 for (quant, _filename) in model.quantizations.iter().rev() {
695 let required_gb = (model.param_billions * quant.memory_multiplier()) as u32 + 2;
697
698 if required_gb <= available_ram_gb {
699 return *quant;
700 }
701 }
702
703 model
705 .quantizations
706 .first()
707 .map(|(q, _)| *q)
708 .unwrap_or(Quantization::Q4_K_M)
709}
710
711#[cfg(target_os = "macos")]
719#[must_use]
720pub fn detect_available_ram_gb() -> u32 {
721 use std::process::Command;
722
723 let output = Command::new("sysctl")
724 .args(["-n", "hw.memsize"])
725 .output()
726 .ok();
727
728 output
729 .and_then(|o| String::from_utf8(o.stdout).ok())
730 .and_then(|s| s.trim().parse::<u64>().ok())
731 .map(|bytes| (bytes / 1_073_741_824) as u32) .unwrap_or(8) }
734
735#[cfg(target_os = "linux")]
737#[must_use]
738pub fn detect_available_ram_gb() -> u32 {
739 use std::fs;
740
741 fs::read_to_string("/proc/meminfo")
742 .ok()
743 .and_then(|content| {
744 content
745 .lines()
746 .find(|line| line.starts_with("MemTotal:"))
747 .and_then(|line| {
748 line.split_whitespace()
749 .nth(1)
750 .and_then(|kb| kb.parse::<u64>().ok())
751 })
752 })
753 .map(|kb| (kb / 1_048_576) as u32) .unwrap_or(8)
755}
756
757#[cfg(target_os = "windows")]
759#[must_use]
760pub fn detect_available_ram_gb() -> u32 {
761 16 }
764
765#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
767#[must_use]
768pub fn detect_available_ram_gb() -> u32 {
769 8 }
771
772#[cfg(test)]
777mod tests {
778 use super::*;
779
780 #[test]
781 fn test_resolve_curated_model() {
782 let result = resolve_model("qwen3:8b");
783 assert!(matches!(result, Some(ResolvedModel::Curated(_))));
784
785 if let Some(ResolvedModel::Curated(model)) = result {
786 assert_eq!(model.id, "qwen3:8b");
787 assert_eq!(model.param_billions, 8.0);
788 }
789 }
790
791 #[test]
792 fn test_resolve_huggingface_model() {
793 let result = resolve_model("hf:bartowski/Qwen3-30B-GGUF");
794 assert!(matches!(result, Some(ResolvedModel::HuggingFace { .. })));
795
796 if let Some(ResolvedModel::HuggingFace { repo }) = result {
797 assert_eq!(repo, "bartowski/Qwen3-30B-GGUF");
798 }
799 }
800
801 #[test]
802 fn test_resolve_unknown_model() {
803 let result = resolve_model("unknown:model");
804 assert!(result.is_none());
805 }
806
807 #[test]
808 fn test_find_model() {
809 let model = find_model("qwen3:8b");
810 assert!(model.is_some());
811 assert_eq!(model.unwrap().name, "Qwen3 8B");
812 }
813
814 #[test]
815 fn test_models_by_type() {
816 let text_models: Vec<_> = models_by_type(ModelType::Text).collect();
817 assert!(text_models.len() >= 9);
818 assert!(text_models.iter().all(|m| m.model_type == ModelType::Text));
819
820 let vision_models: Vec<_> = models_by_type(ModelType::Vision).collect();
821 assert!(vision_models.len() >= 4);
822
823 let embed_models: Vec<_> = models_by_type(ModelType::Embedding).collect();
824 assert!(embed_models.len() >= 3);
825 }
826
827 #[test]
828 fn test_auto_select_quantization_high_ram() {
829 let model = find_model("qwen3:8b").unwrap();
830 let quant = auto_select_quantization(model, 32);
832 assert_eq!(quant, Quantization::Q8_0);
833 }
834
835 #[test]
836 fn test_auto_select_quantization_low_ram() {
837 let model = find_model("qwen3:32b").unwrap();
838 let quant = auto_select_quantization(model, 16);
840 assert_eq!(quant, Quantization::Q4_K_M);
841 }
842
843 #[test]
844 fn test_detect_ram() {
845 let ram = detect_available_ram_gb();
846 assert!(ram >= 1);
848 assert!(ram <= 1024);
849 }
850
851 #[test]
852 fn test_known_models_count() {
853 assert!(
856 KNOWN_MODELS.len() >= 16,
857 "Expected at least 16 models, got {}",
858 KNOWN_MODELS.len()
859 );
860 }
861
862 #[test]
863 fn test_model_architecture_model_type() {
864 assert_eq!(
866 ModelArchitecture::Qwen3.model_type(),
867 ModelType::Text
868 );
869 assert_eq!(
870 ModelArchitecture::Llama4.model_type(),
871 ModelType::Text
872 );
873
874 assert_eq!(
876 ModelArchitecture::Phi4MM.model_type(),
877 ModelType::Vision
878 );
879 assert_eq!(
880 ModelArchitecture::Qwen2_5VL.model_type(),
881 ModelType::Vision
882 );
883
884 assert_eq!(
886 ModelArchitecture::NomicEmbed.model_type(),
887 ModelType::Embedding
888 );
889 assert_eq!(
890 ModelArchitecture::BGE.model_type(),
891 ModelType::Embedding
892 );
893 }
894
895 #[test]
896 fn test_quantization_memory_multiplier() {
897 assert!(Quantization::Q4_K_M.memory_multiplier() < Quantization::Q8_0.memory_multiplier());
898 assert!(Quantization::Q8_0.memory_multiplier() < Quantization::F16.memory_multiplier());
899 }
900
901 #[test]
902 fn test_known_model_filename_for_quant() {
903 let model = find_model("qwen3:8b").unwrap();
904
905 let q4_file = model.filename_for_quant(Quantization::Q4_K_M);
906 assert!(q4_file.is_some());
907 assert!(q4_file.unwrap().contains("q4_k_m"));
908
909 let f16_file = model.filename_for_quant(Quantization::F16);
911 assert!(f16_file.is_none());
912 }
913}