use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::InferenceError;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ModelRole {
Small,
Medium,
Large,
Expert,
Embedding,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelInfo {
pub name: String,
pub hf_repo: String,
pub hf_filename: String,
pub tokenizer_repo: String,
pub role: ModelRole,
pub param_count: &'static str,
pub quantized_size_mb: u64,
pub downloaded: bool,
}
pub struct ModelRegistry {
models_dir: PathBuf,
catalog: Vec<ModelSpec>,
}
struct ModelSpec {
name: &'static str,
hf_repo: &'static str,
hf_filename: &'static str,
tokenizer_repo: &'static str,
role: ModelRole,
param_count: &'static str,
quantized_size_mb: u64,
}
impl ModelRegistry {
pub fn new(models_dir: PathBuf) -> Self {
Self {
models_dir,
catalog: builtin_catalog(),
}
}
pub fn list_models(&self) -> Vec<ModelInfo> {
self.catalog
.iter()
.map(|spec| {
let local_path = self.models_dir.join(spec.name).join("model.gguf");
ModelInfo {
name: spec.name.to_string(),
hf_repo: spec.hf_repo.to_string(),
hf_filename: spec.hf_filename.to_string(),
tokenizer_repo: spec.tokenizer_repo.to_string(),
role: spec.role,
param_count: spec.param_count,
quantized_size_mb: spec.quantized_size_mb,
downloaded: local_path.exists(),
}
})
.collect()
}
fn find_spec(&self, name: &str) -> Option<&ModelSpec> {
self.catalog
.iter()
.find(|s| s.name.eq_ignore_ascii_case(name))
}
pub async fn ensure_model(&self, name: &str) -> Result<PathBuf, InferenceError> {
let spec = self
.find_spec(name)
.ok_or_else(|| InferenceError::ModelNotFound(name.to_string()))?;
let model_dir = self.models_dir.join(spec.name);
let model_path = model_dir.join("model.gguf");
let tokenizer_path = model_dir.join("tokenizer.json");
if model_path.exists() && tokenizer_path.exists() {
return Ok(model_dir);
}
std::fs::create_dir_all(&model_dir)?;
if !model_path.exists() {
info!(
model = spec.name,
repo = spec.hf_repo,
"downloading model weights"
);
download_file(spec.hf_repo, spec.hf_filename, &model_path).await?;
}
if !tokenizer_path.exists() {
info!(
model = spec.name,
repo = spec.tokenizer_repo,
"downloading tokenizer"
);
download_file(spec.tokenizer_repo, "tokenizer.json", &tokenizer_path).await?;
}
Ok(model_dir)
}
pub fn remove_model(&self, name: &str) -> Result<(), InferenceError> {
let _spec = self
.find_spec(name)
.ok_or_else(|| InferenceError::ModelNotFound(name.to_string()))?;
let model_dir = self.models_dir.join(name);
if model_dir.exists() {
std::fs::remove_dir_all(&model_dir)?;
info!(model = name, "removed model");
}
Ok(())
}
}
async fn download_file(repo: &str, filename: &str, dest: &Path) -> Result<(), InferenceError> {
let api = hf_hub::api::tokio::Api::new()
.map_err(|e| InferenceError::DownloadFailed(e.to_string()))?;
let repo = api.model(repo.to_string());
let path = repo
.get(filename)
.await
.map_err(|e| InferenceError::DownloadFailed(format!("{filename}: {e}")))?;
if dest.exists() {
return Ok(());
}
#[cfg(unix)]
{
if std::os::unix::fs::symlink(&path, dest).is_ok() {
return Ok(());
}
}
std::fs::copy(&path, dest)
.map_err(|e| InferenceError::DownloadFailed(format!("copy to {}: {e}", dest.display())))?;
Ok(())
}
fn builtin_catalog() -> Vec<ModelSpec> {
vec![
ModelSpec {
name: "Qwen3-Embedding-0.6B",
hf_repo: "Qwen/Qwen3-Embedding-0.6B-GGUF",
hf_filename: "Qwen3-Embedding-0.6B-Q8_0.gguf",
tokenizer_repo: "Qwen/Qwen3-Embedding-0.6B",
role: ModelRole::Embedding,
param_count: "0.6B",
quantized_size_mb: 639,
},
ModelSpec {
name: "Qwen3-0.6B",
hf_repo: "Qwen/Qwen3-0.6B-GGUF",
hf_filename: "Qwen3-0.6B-Q8_0.gguf",
tokenizer_repo: "Qwen/Qwen3-0.6B",
role: ModelRole::Small,
param_count: "0.6B",
quantized_size_mb: 650,
},
ModelSpec {
name: "Qwen3-1.7B",
hf_repo: "Qwen/Qwen3-1.7B-GGUF",
hf_filename: "Qwen3-1.7B-Q8_0.gguf",
tokenizer_repo: "Qwen/Qwen3-1.7B",
role: ModelRole::Medium,
param_count: "1.7B",
quantized_size_mb: 1800,
},
ModelSpec {
name: "Qwen3-4B",
hf_repo: "Qwen/Qwen3-4B-GGUF",
hf_filename: "Qwen3-4B-Q4_K_M.gguf",
tokenizer_repo: "Qwen/Qwen3-4B",
role: ModelRole::Medium,
param_count: "4B",
quantized_size_mb: 2500,
},
ModelSpec {
name: "Qwen3-8B",
hf_repo: "Qwen/Qwen3-8B-GGUF",
hf_filename: "Qwen3-8B-Q4_K_M.gguf",
tokenizer_repo: "Qwen/Qwen3-8B",
role: ModelRole::Large,
param_count: "8B",
quantized_size_mb: 4900,
},
ModelSpec {
name: "Qwen3-30B-A3B",
hf_repo: "Qwen/Qwen3-30B-A3B-GGUF",
hf_filename: "Qwen3-30B-A3B-Q4_K_M.gguf",
tokenizer_repo: "Qwen/Qwen3-30B-A3B",
role: ModelRole::Expert,
param_count: "30B (3B active)",
quantized_size_mb: 17000,
},
]
}