use crate::types::{Error, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum ModelType {
Embedding,
Rerank,
Generator,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum Architecture {
Bert,
CrossEncoder,
Qwen2Embedding,
MistralEmbedding,
Qwen2Generator,
MistralGenerator,
Qwen3Embedding,
Qwen3Reranker,
Qwen3Generator,
Phi3Generator,
SmolLM3Generator,
InternLM3Generator,
JinaV4,
JinaRerankerV3,
NVIDIANemotron,
Gemma3n,
GLM4,
GLM4MoE,
Qwen3MoE,
Mixtral,
DeepSeekV3,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
pub enum Quantization {
#[default]
None,
Int4,
Int8,
AWQ,
GPTQ,
GGUF,
BNB4,
BNB8,
FP8,
}
impl Quantization {
pub fn from_suffix(s: &str) -> Option<Self> {
match s.to_ascii_lowercase().as_str() {
"int4" | "4bit" => Some(Self::Int4),
"int8" | "8bit" => Some(Self::Int8),
"awq" => Some(Self::AWQ),
"gptq" => Some(Self::GPTQ),
"gguf" => Some(Self::GGUF),
"bnb4" | "bnb-4bit" => Some(Self::BNB4),
"bnb8" | "bnb-8bit" => Some(Self::BNB8),
"fp8" => Some(Self::FP8),
_ => None,
}
}
pub fn repo_suffix(&self) -> &'static str {
match self {
Self::None => "",
Self::Int4 => "-Int4",
Self::Int8 => "-Int8",
Self::AWQ => "-AWQ",
Self::GPTQ => "-GPTQ",
Self::GGUF => "-GGUF",
Self::BNB4 => "-bnb-4bit",
Self::BNB8 => "-bnb-8bit",
Self::FP8 => "-FP8",
}
}
pub fn is_quantized(&self) -> bool {
!matches!(self, Self::None)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelInfo {
pub alias: String,
pub repo_id: String,
pub model_type: ModelType,
pub architecture: Architecture,
#[serde(default)]
pub quantization: Quantization,
}
#[derive(Debug, Clone)]
struct RegistryEntry {
org: String,
base_name: String,
model_type: ModelType,
architecture: Architecture,
supports_quantization: bool,
}
impl RegistryEntry {
fn new(
repo_id: &str,
model_type: ModelType,
architecture: Architecture,
supports_quantization: bool,
) -> Self {
let parts: Vec<&str> = repo_id.split('/').collect();
let (org, base_name) = if parts.len() == 2 {
(parts[0].to_string(), parts[1].to_string())
} else {
(String::new(), repo_id.to_string())
};
Self {
org,
base_name,
model_type,
architecture,
supports_quantization,
}
}
fn to_model_info(&self, alias: &str, quantization: Quantization) -> ModelInfo {
let repo_id = if quantization.is_quantized() && self.supports_quantization {
format!("{}/{}{}", self.org, self.base_name, quantization.repo_suffix())
} else {
format!("{}/{}", self.org, self.base_name)
};
ModelInfo {
alias: alias.to_string(),
repo_id,
model_type: self.model_type,
architecture: self.architecture,
quantization,
}
}
}
pub struct ModelRegistry {
entries: HashMap<String, RegistryEntry>,
}
impl ModelRegistry {
pub fn new() -> Self {
let mut entries = HashMap::new();
let model_entries = [
("bge-small-zh", "BAAI/bge-small-zh-v1.5", ModelType::Embedding, Architecture::Bert, false),
("bge-small-en", "BAAI/bge-small-en-v1.5", ModelType::Embedding, Architecture::Bert, false),
("bge-base-en", "BAAI/bge-base-en-v1.5", ModelType::Embedding, Architecture::Bert, false),
("bge-large-en", "BAAI/bge-large-en-v1.5", ModelType::Embedding, Architecture::Bert, false),
("all-MiniLM-L6-v2", "sentence-transformers/all-MiniLM-L6-v2", ModelType::Embedding, Architecture::Bert, false),
("all-mpnet-base-v2", "sentence-transformers/all-mpnet-base-v2", ModelType::Embedding, Architecture::Bert, false),
("paraphrase-MiniLM-L6-v2", "sentence-transformers/paraphrase-MiniLM-L6-v2", ModelType::Embedding, Architecture::Bert, false),
("multi-qa-mpnet-base-dot-v1", "sentence-transformers/multi-qa-mpnet-base-dot-v1", ModelType::Embedding, Architecture::Bert, false),
("e5-large", "intfloat/e5-large", ModelType::Embedding, Architecture::Bert, false),
("e5-base", "intfloat/e5-base", ModelType::Embedding, Architecture::Bert, false),
("e5-small", "intfloat/e5-small", ModelType::Embedding, Architecture::Bert, false),
("jina-embeddings-v2-base-en", "jinaai/jina-embeddings-v2-base-en", ModelType::Embedding, Architecture::Bert, false),
("jina-embeddings-v2-small-en", "jinaai/jina-embeddings-v2-small-en", ModelType::Embedding, Architecture::Bert, false),
("jina-embeddings-v4", "jinaai/jina-embeddings-v4", ModelType::Embedding, Architecture::JinaV4, true),
("m3e-base", "moka-ai/m3e-base", ModelType::Embedding, Architecture::Bert, false),
("multilingual-MiniLM-L12-v2", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", ModelType::Embedding, Architecture::Bert, false),
("distiluse-base-multilingual-cased-v1", "sentence-transformers/distiluse-base-multilingual-cased-v1", ModelType::Embedding, Architecture::Bert, false),
("codebert-base", "claudios/codebert-base", ModelType::Embedding, Architecture::Bert, false),
("starencoder", "bigcode/starencoder", ModelType::Embedding, Architecture::Bert, false),
("graphcodebert-base", "claudios/graphcodebert-base", ModelType::Embedding, Architecture::Bert, false),
("unixcoder-base", "claudios/unixcoder-base", ModelType::Embedding, Architecture::Bert, false),
("codexembed-400m", "Salesforce/SFR-Embedding-Code-400M_R", ModelType::Embedding, Architecture::Bert, false),
("sfr-embedding-code-400m", "Salesforce/SFR-Embedding-Code-400M_R", ModelType::Embedding, Architecture::Bert, false),
("codexembed-2b", "Salesforce/SFR-Embedding-Code-2B_R", ModelType::Embedding, Architecture::Qwen2Embedding, false),
("codexembed-7b", "Salesforce/SFR-Embedding-Code-7B_R", ModelType::Embedding, Architecture::MistralEmbedding, false),
("sfr-embedding-code-2b", "Salesforce/SFR-Embedding-Code-2B_R", ModelType::Embedding, Architecture::Qwen2Embedding, false),
("sfr-embedding-code-7b", "Salesforce/SFR-Embedding-Code-7B_R", ModelType::Embedding, Architecture::MistralEmbedding, false),
("qwen2-7b-instruct", "Qwen/Qwen2-7B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen2.5-0.5b-instruct", "Qwen/Qwen2.5-0.5B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen2.5-1.5b-instruct", "Qwen/Qwen2.5-1.5B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen2.5-3b-instruct", "Qwen/Qwen2.5-3B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen2.5-7b-instruct", "Qwen/Qwen2.5-7B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen2.5-14b-instruct", "Qwen/Qwen2.5-14B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen2.5-32b-instruct", "Qwen/Qwen2.5-32B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen2.5-72b-instruct", "Qwen/Qwen2.5-72B-Instruct", ModelType::Generator, Architecture::Qwen2Generator, false),
("qwen3-0.6b", "Qwen/Qwen3-0.6B", ModelType::Generator, Architecture::Qwen3Generator, false),
("qwen3-1.7b", "Qwen/Qwen3-1.7B", ModelType::Generator, Architecture::Qwen3Generator, false),
("qwen3-4b", "Qwen/Qwen3-4B", ModelType::Generator, Architecture::Qwen3Generator, false),
("qwen3-8b", "Qwen/Qwen3-8B", ModelType::Generator, Architecture::Qwen3Generator, false),
("qwen3-14b", "Qwen/Qwen3-14B", ModelType::Generator, Architecture::Qwen3Generator, false),
("qwen3-32b", "Qwen/Qwen3-32B", ModelType::Generator, Architecture::Qwen3Generator, false),
("qwen3-30b-a3b", "Qwen/Qwen3-30B-A3B", ModelType::Generator, Architecture::Qwen3MoE, false),
("qwen3-235b-a22b", "Qwen/Qwen3-235B-A22B", ModelType::Generator, Architecture::Qwen3MoE, false),
("mistral-7b-instruct", "mistralai/Mistral-7B-Instruct-v0.2", ModelType::Generator, Architecture::MistralGenerator, false),
("mixtral-8x7b-instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", ModelType::Generator, Architecture::Mixtral, false),
("mixtral-8x22b-instruct", "mistralai/Mixtral-8x22B-Instruct-v0.1", ModelType::Generator, Architecture::Mixtral, false),
("glm-4-9b-chat", "THUDM/glm-4-9b-chat-hf", ModelType::Generator, Architecture::GLM4, false),
("glm-4.7", "zai-org/GLM-4.7", ModelType::Generator, Architecture::GLM4MoE, false),
("deepseek-v3", "deepseek-ai/DeepSeek-V3", ModelType::Generator, Architecture::DeepSeekV3, false),
("phi-4", "microsoft/phi-4", ModelType::Generator, Architecture::Phi3Generator, false),
("phi-4-mini-instruct", "microsoft/phi-4-mini-instruct", ModelType::Generator, Architecture::Phi3Generator, false),
("smollm3-3b", "HuggingFaceTB/SmolLM3-3B", ModelType::Generator, Architecture::SmolLM3Generator, false),
("internlm3-8b-instruct", "internlm/internlm3-8b-instruct", ModelType::Generator, Architecture::InternLM3Generator, false),
("all-MiniLM-L12-v2", "sentence-transformers/all-MiniLM-L12-v2", ModelType::Embedding, Architecture::Bert, false),
("all-distilroberta-v1", "sentence-transformers/all-distilroberta-v1", ModelType::Embedding, Architecture::Bert, false),
("qwen3-embedding-0.6b", "Qwen/Qwen3-Embedding-0.6B", ModelType::Embedding, Architecture::Qwen3Embedding, true),
("qwen3-embedding-4b", "Qwen/Qwen3-Embedding-4B", ModelType::Embedding, Architecture::Qwen3Embedding, true),
("qwen3-embedding-8b", "Qwen/Qwen3-Embedding-8B", ModelType::Embedding, Architecture::Qwen3Embedding, true),
("llama-embed-nemotron-8b", "nvidia/llama-embed-nemotron-8b", ModelType::Embedding, Architecture::NVIDIANemotron, true),
("bge-reranker-v2", "BAAI/bge-reranker-v2-m3", ModelType::Rerank, Architecture::CrossEncoder, false),
("bge-reranker-large", "BAAI/bge-reranker-large", ModelType::Rerank, Architecture::CrossEncoder, false),
("bge-reranker-base", "BAAI/bge-reranker-base", ModelType::Rerank, Architecture::CrossEncoder, false),
("ms-marco-MiniLM-L-6-v2", "cross-encoder/ms-marco-MiniLM-L-6-v2", ModelType::Rerank, Architecture::CrossEncoder, false),
("ms-marco-MiniLM-L-12-v2", "cross-encoder/ms-marco-MiniLM-L-12-v2", ModelType::Rerank, Architecture::CrossEncoder, false),
("ms-marco-TinyBERT-L-2-v2", "cross-encoder/ms-marco-TinyBERT-L-2-v2", ModelType::Rerank, Architecture::CrossEncoder, false),
("ms-marco-electra-base", "cross-encoder/ms-marco-electra-base", ModelType::Rerank, Architecture::CrossEncoder, false),
("quora-distilroberta-base", "cross-encoder/quora-distilroberta-base", ModelType::Rerank, Architecture::CrossEncoder, false),
("qwen3-reranker-0.6b", "Qwen/Qwen3-Reranker-0.6B", ModelType::Rerank, Architecture::Qwen3Reranker, true),
("qwen3-reranker-4b", "Qwen/Qwen3-Reranker-4B", ModelType::Rerank, Architecture::Qwen3Reranker, true),
("qwen3-reranker-8b", "Qwen/Qwen3-Reranker-8B", ModelType::Rerank, Architecture::Qwen3Reranker, true),
("jina-reranker-v3", "jinaai/jina-reranker-v3", ModelType::Rerank, Architecture::JinaRerankerV3, true),
];
for (alias, repo_id, model_type, architecture, supports_quant) in model_entries {
let alias_key = alias.to_ascii_lowercase();
entries.insert(
alias_key,
RegistryEntry::new(repo_id, model_type, architecture, supports_quant),
);
}
Self { entries }
}
pub fn resolve(&self, name: &str) -> Result<ModelInfo> {
let name = name.trim();
if let Some((base, quant_str)) = name.rsplit_once(':') {
if let Some(quantization) = Quantization::from_suffix(quant_str) {
let base_key = base.to_ascii_lowercase();
if let Some(entry) = self.entries.get(&base_key) {
if entry.supports_quantization {
return Ok(entry.to_model_info(&format!("{}:{}", base, quant_str), quantization));
} else {
return Ok(entry.to_model_info(base, Quantization::None));
}
}
}
}
let key = name.to_ascii_lowercase();
if let Some(entry) = self.entries.get(&key) {
return Ok(entry.to_model_info(name, Quantization::None));
}
if name.contains('/') {
return Ok(self.infer_from_repo(name));
}
Err(Error::ModelNotFound(name.to_string()))
}
pub fn list_aliases(&self) -> Vec<&str> {
self.entries.keys().map(|s| s.as_str()).collect()
}
pub fn supports_quantization(&self, alias: &str) -> bool {
let key = alias.to_ascii_lowercase();
self.entries.get(&key).map(|e| e.supports_quantization).unwrap_or(false)
}
pub fn available_quantizations(&self, alias: &str) -> Vec<Quantization> {
if self.supports_quantization(alias) {
vec![
Quantization::None,
Quantization::Int4,
Quantization::Int8,
Quantization::AWQ,
Quantization::GPTQ,
]
} else {
vec![Quantization::None]
}
}
fn infer_from_repo(&self, repo_id: &str) -> ModelInfo {
let lower = repo_id.to_ascii_lowercase();
let quantization = if lower.contains("-int4") || lower.contains("-4bit") {
Quantization::Int4
} else if lower.contains("-int8") || lower.contains("-8bit") {
Quantization::Int8
} else if lower.contains("-awq") {
Quantization::AWQ
} else if lower.contains("-gptq") {
Quantization::GPTQ
} else if lower.contains("-gguf") {
Quantization::GGUF
} else if lower.contains("-fp8") {
Quantization::FP8
} else {
Quantization::None
};
let is_generator = lower.contains("generator")
|| lower.contains("instruct")
|| lower.contains("chat")
|| lower.contains("glm-4.7")
|| lower.contains("glm4_moe")
|| lower.contains("glm4-moe")
|| lower.contains("mixtral")
|| lower.contains("deepseek")
|| lower.contains("phi-4")
|| lower.contains("phi4")
|| lower.contains("smollm3")
|| lower.contains("internlm3")
|| ((lower.contains("qwen3") || lower.contains("qwen-3"))
&& !lower.contains("embedding")
&& !lower.contains("reranker"));
let model_type = if lower.contains("reranker") {
ModelType::Rerank
} else if is_generator {
ModelType::Generator
} else {
ModelType::Embedding
};
let architecture = if lower.contains("sfr-embedding-code-2b")
|| lower.contains("codexembed-2b")
{
Architecture::Qwen2Embedding
} else if lower.contains("sfr-embedding-code-7b") || lower.contains("codexembed-7b") {
Architecture::MistralEmbedding
} else if lower.contains("qwen2.5")
|| lower.contains("qwen-2.5")
|| lower.contains("qwen2_5")
{
match model_type {
ModelType::Generator => Architecture::Qwen2Generator,
_ => Architecture::Qwen2Embedding,
}
} else if lower.contains("qwen2") || lower.contains("qwen-2") {
match model_type {
ModelType::Generator => Architecture::Qwen2Generator,
_ => Architecture::Qwen2Embedding,
}
} else if lower.contains("mistral") {
match model_type {
ModelType::Generator => Architecture::MistralGenerator,
_ => Architecture::MistralEmbedding,
}
} else if lower.contains("mixtral") {
Architecture::Mixtral
} else if lower.contains("deepseek") && lower.contains("v3") {
Architecture::DeepSeekV3
} else if lower.contains("phi-4") || lower.contains("phi4") {
Architecture::Phi3Generator
} else if lower.contains("smollm3") {
Architecture::SmolLM3Generator
} else if lower.contains("internlm3") {
Architecture::InternLM3Generator
} else if lower.contains("qwen3") || lower.contains("qwen-3") {
match model_type {
ModelType::Embedding => Architecture::Qwen3Embedding,
ModelType::Rerank => Architecture::Qwen3Reranker,
ModelType::Generator => {
if lower.contains("moe") || (lower.contains("-a") && lower.contains("b")) {
Architecture::Qwen3MoE
} else {
Architecture::Qwen3Generator
}
}
}
} else if lower.contains("jina") {
if lower.contains("reranker") {
Architecture::JinaRerankerV3
} else {
Architecture::JinaV4
}
} else if lower.contains("nemotron") {
Architecture::NVIDIANemotron
} else if lower.contains("glm-4.7")
|| lower.contains("glm4_moe")
|| lower.contains("glm4-moe")
{
Architecture::GLM4MoE
} else if lower.contains("glm") {
Architecture::GLM4
} else if lower.contains("gemma") {
Architecture::Gemma3n
} else {
match model_type {
ModelType::Embedding => Architecture::Bert,
ModelType::Rerank => Architecture::CrossEncoder,
ModelType::Generator => Architecture::Qwen3Generator,
}
};
let alias = repo_id
.rsplit('/')
.next()
.unwrap_or(repo_id)
.to_ascii_lowercase();
ModelInfo {
alias,
repo_id: repo_id.to_string(),
model_type,
architecture,
quantization,
}
}
}
impl Default for ModelRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolves_builtin_model() {
let registry = ModelRegistry::new();
let info = registry.resolve("bge-small-en").unwrap();
assert_eq!(info.repo_id, "BAAI/bge-small-en-v1.5");
assert!(matches!(info.architecture, Architecture::Bert));
assert!(matches!(info.quantization, Quantization::None));
}
#[test]
fn resolves_repo_id_directly() {
let registry = ModelRegistry::new();
let repo = "BAAI/custom-model";
let info = registry.resolve(repo).unwrap();
assert_eq!(info.repo_id, repo);
assert_eq!(info.alias, "custom-model");
}
#[test]
fn resolves_quantized_model() {
let registry = ModelRegistry::new();
let info = registry.resolve("qwen3-embedding-0.6b:int4").unwrap();
assert_eq!(info.repo_id, "Qwen/Qwen3-Embedding-0.6B-Int4");
assert!(matches!(info.quantization, Quantization::Int4));
let info = registry.resolve("qwen3-embedding-8b:awq").unwrap();
assert_eq!(info.repo_id, "Qwen/Qwen3-Embedding-8B-AWQ");
assert!(matches!(info.quantization, Quantization::AWQ));
let info = registry.resolve("qwen3-reranker-4b:gptq").unwrap();
assert_eq!(info.repo_id, "Qwen/Qwen3-Reranker-4B-GPTQ");
assert!(matches!(info.quantization, Quantization::GPTQ));
}
#[test]
fn quantization_ignored_for_unsupported_models() {
let registry = ModelRegistry::new();
let info = registry.resolve("bge-small-en:int4").unwrap();
assert_eq!(info.repo_id, "BAAI/bge-small-en-v1.5");
assert!(matches!(info.quantization, Quantization::None));
}
#[test]
fn infers_quantization_from_repo_id() {
let registry = ModelRegistry::new();
let info = registry.resolve("Qwen/Qwen3-Embedding-8B-Int4").unwrap();
assert!(matches!(info.quantization, Quantization::Int4));
let info = registry.resolve("some-org/model-name-AWQ").unwrap();
assert!(matches!(info.quantization, Quantization::AWQ));
}
#[test]
fn supports_quantization_check() {
let registry = ModelRegistry::new();
assert!(registry.supports_quantization("qwen3-embedding-0.6b"));
assert!(registry.supports_quantization("qwen3-reranker-8b"));
assert!(registry.supports_quantization("jina-embeddings-v4"));
assert!(!registry.supports_quantization("bge-small-en"));
assert!(!registry.supports_quantization("all-MiniLM-L6-v2"));
}
#[test]
fn resolves_expanded_models() {
let registry = ModelRegistry::new();
let cases = [
("qwen3-embedding-0.6b", "Qwen/Qwen3-Embedding-0.6B", ModelType::Embedding, Architecture::Qwen3Embedding),
("qwen3-embedding-4b", "Qwen/Qwen3-Embedding-4B", ModelType::Embedding, Architecture::Qwen3Embedding),
("qwen3-embedding-8b", "Qwen/Qwen3-Embedding-8B", ModelType::Embedding, Architecture::Qwen3Embedding),
("codexembed-2b", "Salesforce/SFR-Embedding-Code-2B_R", ModelType::Embedding, Architecture::Qwen2Embedding),
("codexembed-7b", "Salesforce/SFR-Embedding-Code-7B_R", ModelType::Embedding, Architecture::MistralEmbedding),
("sfr-embedding-code-2b", "Salesforce/SFR-Embedding-Code-2B_R", ModelType::Embedding, Architecture::Qwen2Embedding),
("sfr-embedding-code-7b", "Salesforce/SFR-Embedding-Code-7B_R", ModelType::Embedding, Architecture::MistralEmbedding),
("qwen2-7b-instruct", "Qwen/Qwen2-7B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen2.5-0.5b-instruct", "Qwen/Qwen2.5-0.5B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen2.5-1.5b-instruct", "Qwen/Qwen2.5-1.5B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen2.5-3b-instruct", "Qwen/Qwen2.5-3B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen2.5-7b-instruct", "Qwen/Qwen2.5-7B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen2.5-14b-instruct", "Qwen/Qwen2.5-14B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen2.5-32b-instruct", "Qwen/Qwen2.5-32B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen2.5-72b-instruct", "Qwen/Qwen2.5-72B-Instruct", ModelType::Generator, Architecture::Qwen2Generator),
("qwen3-0.6b", "Qwen/Qwen3-0.6B", ModelType::Generator, Architecture::Qwen3Generator),
("qwen3-1.7b", "Qwen/Qwen3-1.7B", ModelType::Generator, Architecture::Qwen3Generator),
("qwen3-4b", "Qwen/Qwen3-4B", ModelType::Generator, Architecture::Qwen3Generator),
("qwen3-8b", "Qwen/Qwen3-8B", ModelType::Generator, Architecture::Qwen3Generator),
("qwen3-14b", "Qwen/Qwen3-14B", ModelType::Generator, Architecture::Qwen3Generator),
("qwen3-32b", "Qwen/Qwen3-32B", ModelType::Generator, Architecture::Qwen3Generator),
("qwen3-30b-a3b", "Qwen/Qwen3-30B-A3B", ModelType::Generator, Architecture::Qwen3MoE),
("qwen3-235b-a22b", "Qwen/Qwen3-235B-A22B", ModelType::Generator, Architecture::Qwen3MoE),
("mistral-7b-instruct", "mistralai/Mistral-7B-Instruct-v0.2", ModelType::Generator, Architecture::MistralGenerator),
("mixtral-8x7b-instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", ModelType::Generator, Architecture::Mixtral),
("mixtral-8x22b-instruct", "mistralai/Mixtral-8x22B-Instruct-v0.1", ModelType::Generator, Architecture::Mixtral),
("glm-4-9b-chat", "THUDM/glm-4-9b-chat-hf", ModelType::Generator, Architecture::GLM4),
("glm-4.7", "zai-org/GLM-4.7", ModelType::Generator, Architecture::GLM4MoE),
("deepseek-v3", "deepseek-ai/DeepSeek-V3", ModelType::Generator, Architecture::DeepSeekV3),
("phi-4", "microsoft/phi-4", ModelType::Generator, Architecture::Phi3Generator),
("phi-4-mini-instruct", "microsoft/phi-4-mini-instruct", ModelType::Generator, Architecture::Phi3Generator),
("smollm3-3b", "HuggingFaceTB/SmolLM3-3B", ModelType::Generator, Architecture::SmolLM3Generator),
("internlm3-8b-instruct", "internlm/internlm3-8b-instruct", ModelType::Generator, Architecture::InternLM3Generator),
("qwen3-reranker-0.6b", "Qwen/Qwen3-Reranker-0.6B", ModelType::Rerank, Architecture::Qwen3Reranker),
("qwen3-reranker-4b", "Qwen/Qwen3-Reranker-4B", ModelType::Rerank, Architecture::Qwen3Reranker),
("qwen3-reranker-8b", "Qwen/Qwen3-Reranker-8B", ModelType::Rerank, Architecture::Qwen3Reranker),
("llama-embed-nemotron-8b", "nvidia/llama-embed-nemotron-8b", ModelType::Embedding, Architecture::NVIDIANemotron),
("jina-embeddings-v4", "jinaai/jina-embeddings-v4", ModelType::Embedding, Architecture::JinaV4),
("jina-reranker-v3", "jinaai/jina-reranker-v3", ModelType::Rerank, Architecture::JinaRerankerV3),
];
for (alias, repo_id, model_type, architecture) in cases {
let info = registry.resolve(alias).expect("resolve model");
assert_eq!(info.repo_id, repo_id);
assert_eq!(info.model_type, model_type);
assert_eq!(info.architecture, architecture);
}
}
#[test]
fn list_available_quantizations() {
let registry = ModelRegistry::new();
let quants = registry.available_quantizations("qwen3-embedding-8b");
assert!(quants.len() > 1);
assert!(quants.contains(&Quantization::Int4));
let quants = registry.available_quantizations("bge-small-en");
assert_eq!(quants.len(), 1);
assert!(quants.contains(&Quantization::None));
}
}