use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelDefinition {
pub hf_id: String,
pub alias: String,
pub name: String,
pub architecture: String,
pub params_b: f32,
pub use_case: String,
pub recommended_quant: String,
pub memory_gb: f32,
pub context_length: usize,
pub notes: String,
}
pub fn get_recommended_models() -> Vec<ModelDefinition> {
vec![
ModelDefinition {
hf_id: "Qwen/Qwen2.5-14B-Instruct-GGUF".to_string(),
alias: "qwen".to_string(),
name: "Qwen2.5-14B-Instruct".to_string(),
architecture: "qwen".to_string(),
params_b: 14.0,
use_case: "Primary reasoning, code generation, complex tasks".to_string(),
recommended_quant: "Q4_K_M".to_string(),
memory_gb: 9.5,
context_length: 32768,
notes: "Best overall performance for reasoning tasks on M4 Pro".to_string(),
},
ModelDefinition {
hf_id: "mistralai/Mistral-7B-Instruct-v0.3".to_string(),
alias: "mistral".to_string(),
name: "Mistral-7B-Instruct-v0.3".to_string(),
architecture: "mistral".to_string(),
params_b: 7.0,
use_case: "Fast instruction following, general chat".to_string(),
recommended_quant: "Q4_K_M".to_string(),
memory_gb: 4.5,
context_length: 32768,
notes: "Excellent speed/quality tradeoff with sliding window attention".to_string(),
},
ModelDefinition {
hf_id: "microsoft/Phi-4-mini-instruct".to_string(),
alias: "phi".to_string(),
name: "Phi-4-mini".to_string(),
architecture: "phi".to_string(),
params_b: 3.8,
use_case: "Testing, quick prototyping, resource-constrained".to_string(),
recommended_quant: "Q4_K_M".to_string(),
memory_gb: 2.5,
context_length: 16384,
notes: "Surprisingly capable for its size, fast inference".to_string(),
},
ModelDefinition {
hf_id: "meta-llama/Llama-3.2-3B-Instruct".to_string(),
alias: "llama".to_string(),
name: "Llama-3.2-3B-Instruct".to_string(),
architecture: "llama".to_string(),
params_b: 3.2,
use_case: "Tool use, function calling, structured output".to_string(),
recommended_quant: "Q4_K_M".to_string(),
memory_gb: 2.2,
context_length: 131072,
notes: "Optimized for tool use and function calling".to_string(),
},
ModelDefinition {
hf_id: "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF".to_string(),
alias: "qwen-coder".to_string(),
name: "Qwen2.5-Coder-7B-Instruct".to_string(),
architecture: "qwen".to_string(),
params_b: 7.0,
use_case: "Code generation, code review, debugging".to_string(),
recommended_quant: "Q4_K_M".to_string(),
memory_gb: 4.8,
context_length: 32768,
notes: "Specialized for coding tasks, excellent at code completion".to_string(),
},
ModelDefinition {
hf_id: "Qwen/Qwen2.5-32B-Instruct-GGUF".to_string(),
alias: "qwen-large".to_string(),
name: "Qwen2.5-32B-Instruct".to_string(),
architecture: "qwen".to_string(),
params_b: 32.0,
use_case: "Complex reasoning, research, highest quality output".to_string(),
recommended_quant: "Q4_K_M".to_string(),
memory_gb: 20.0,
context_length: 32768,
notes: "Requires significant memory, but provides best quality".to_string(),
},
]
}
pub fn get_model(identifier: &str) -> Option<ModelDefinition> {
let models = get_recommended_models();
if let Some(model) = models.iter().find(|m| m.alias == identifier) {
return Some(model.clone());
}
if let Some(model) = models.iter().find(|m| m.hf_id == identifier) {
return Some(model.clone());
}
if let Some(model) = models.iter().find(|m| m.hf_id.contains(identifier)) {
return Some(model.clone());
}
None
}
pub fn resolve_model_id(identifier: &str) -> String {
if let Some(model) = get_model(identifier) {
model.hf_id
} else {
identifier.to_string()
}
}
pub fn get_aliases() -> HashMap<String, String> {
get_recommended_models()
.into_iter()
.map(|m| (m.alias, m.hf_id))
.collect()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QuantPreset {
Q4K,
Q8,
F16,
None,
}
impl QuantPreset {
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"q4k" | "q4_k" | "q4_k_m" | "q4" => Some(Self::Q4K),
"q8" | "q8_0" => Some(Self::Q8),
"f16" | "fp16" => Some(Self::F16),
"none" | "f32" | "fp32" => Some(Self::None),
_ => None,
}
}
pub fn gguf_suffix(&self) -> &'static str {
match self {
Self::Q4K => "Q4_K_M.gguf",
Self::Q8 => "Q8_0.gguf",
Self::F16 => "F16.gguf",
Self::None => "F32.gguf",
}
}
pub fn bytes_per_weight(&self) -> f32 {
match self {
Self::Q4K => 0.5,
Self::Q8 => 1.0,
Self::F16 => 2.0,
Self::None => 4.0,
}
}
pub fn estimate_memory_gb(&self, params_b: f32) -> f32 {
let weight_memory = params_b * self.bytes_per_weight();
weight_memory * 1.2
}
}
impl std::fmt::Display for QuantPreset {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Q4K => write!(f, "Q4_K_M"),
Self::Q8 => write!(f, "Q8_0"),
Self::F16 => write!(f, "F16"),
Self::None => write!(f, "F32"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_model_by_alias() {
let model = get_model("qwen").unwrap();
assert!(model.hf_id.contains("Qwen2.5-14B"));
}
#[test]
fn test_resolve_model_id() {
assert!(resolve_model_id("mistral").contains("Mistral-7B"));
assert_eq!(resolve_model_id("custom/model"), "custom/model");
}
#[test]
fn test_quant_preset() {
assert_eq!(QuantPreset::from_str("q4k"), Some(QuantPreset::Q4K));
assert_eq!(QuantPreset::Q4K.bytes_per_weight(), 0.5);
}
}