use gllm::{ModelRegistry, Quantization};
fn main() {
println!("=== gllm Quantization Support Demo ===\n");
let registry = ModelRegistry::new();
println!("1. Basic Resolution:");
let info = registry.resolve("qwen3-embedding-0.6b").unwrap();
println!(" qwen3-embedding-0.6b -> {}", info.repo_id);
println!(" Quantization: {:?}\n", info.quantization);
println!("2. Quantized Variants (alias:quant syntax):");
let variants = [
"qwen3-embedding-0.6b:int4",
"qwen3-embedding-0.6b:int8",
"qwen3-embedding-8b:awq",
"qwen3-reranker-4b:gptq",
];
for variant in variants {
let info = registry.resolve(variant).unwrap();
println!(" {} -> {} ({:?})", variant, info.repo_id, info.quantization);
}
println!();
println!("3. Direct Repo ID (auto-detect quantization):");
let repos = [
"Qwen/Qwen3-Embedding-8B-Int4",
"some-org/model-AWQ",
"nvidia/llama-embed-nemotron-8b-GPTQ",
];
for repo in repos {
let info = registry.resolve(repo).unwrap();
println!(" {} -> {:?}", repo, info.quantization);
}
println!();
println!("4. Non-quantizable Models (suffix ignored):");
let info = registry.resolve("bge-small-en:int4").unwrap();
println!(" bge-small-en:int4 -> {} (quant: {:?})", info.repo_id, info.quantization);
println!();
println!("5. Quantization Support Check:");
let models = ["qwen3-embedding-8b", "bge-small-en", "jina-embeddings-v4"];
for model in models {
let supports = registry.supports_quantization(model);
let quants = registry.available_quantizations(model);
println!(" {}: supports_quant={}, variants={:?}", model, supports, quants);
}
println!();
println!("6. Supported Quantization Types:");
let quant_suffixes = ["int4", "int8", "awq", "gptq", "gguf", "fp8", "bnb4", "bnb8"];
for suffix in quant_suffixes {
if let Some(q) = Quantization::from_suffix(suffix) {
println!(" :{} -> {:?} (repo_suffix: \"{}\")", suffix, q, q.repo_suffix());
}
}
println!("\n=== Demo Complete ===");
}