use super::HF_MODELS_BASE;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[allow(non_camel_case_types)]
pub enum QuantizationType {
F32,
F16,
BF16,
Q8_0,
Q6_K,
Q5_K_M,
Q5_K_S,
Q5_0,
Q5_1,
Q4_K_M,
Q4_K_S,
Q4_0,
Q4_1,
Q3_K_M,
Q3_K_S,
Q3_K_L,
Q2_K,
IQ2_XXS,
IQ2_XS,
IQ2_S,
IQ3_XXS,
IQ3_XS,
IQ3_S,
IQ4_XS,
IQ4_NL,
Other(String),
}
impl QuantizationType {
pub fn from_filename(filename: &str) -> Self {
let lower = filename.to_lowercase();
if lower.contains("f32") {
return Self::F32;
}
if lower.contains("bf16") {
return Self::BF16;
}
if lower.contains("f16") {
return Self::F16;
}
if lower.contains("q8_0") {
return Self::Q8_0;
}
if lower.contains("q6_k") {
return Self::Q6_K;
}
if lower.contains("q5_k_m") {
return Self::Q5_K_M;
}
if lower.contains("q5_k_s") {
return Self::Q5_K_S;
}
if lower.contains("q5_0") {
return Self::Q5_0;
}
if lower.contains("q5_1") {
return Self::Q5_1;
}
if lower.contains("q4_k_m") {
return Self::Q4_K_M;
}
if lower.contains("q4_k_s") {
return Self::Q4_K_S;
}
if lower.contains("q4_0") {
return Self::Q4_0;
}
if lower.contains("q4_1") {
return Self::Q4_1;
}
if lower.contains("q3_k_m") {
return Self::Q3_K_M;
}
if lower.contains("q3_k_s") {
return Self::Q3_K_S;
}
if lower.contains("q3_k_l") {
return Self::Q3_K_L;
}
if lower.contains("q2_k") {
return Self::Q2_K;
}
if lower.contains("iq2_xxs") {
return Self::IQ2_XXS;
}
if lower.contains("iq2_xs") {
return Self::IQ2_XS;
}
if lower.contains("iq2_s") {
return Self::IQ2_S;
}
if lower.contains("iq3_xxs") {
return Self::IQ3_XXS;
}
if lower.contains("iq3_xs") {
return Self::IQ3_XS;
}
if lower.contains("iq3_s") {
return Self::IQ3_S;
}
if lower.contains("iq4_xs") {
return Self::IQ4_XS;
}
if lower.contains("iq4_nl") {
return Self::IQ4_NL;
}
Self::Other(filename.to_string())
}
pub fn bits_per_weight(&self) -> f32 {
match self {
Self::F32 => 32.0,
Self::F16 | Self::BF16 => 16.0,
Self::Q8_0 => 8.0,
Self::Q6_K => 6.5,
Self::Q5_K_M | Self::Q5_K_S | Self::Q5_0 | Self::Q5_1 => 5.5,
Self::Q4_K_M | Self::Q4_K_S | Self::Q4_0 | Self::Q4_1 => 4.5,
Self::Q3_K_M | Self::Q3_K_S | Self::Q3_K_L => 3.5,
Self::Q2_K => 2.5,
Self::IQ2_XXS | Self::IQ2_XS | Self::IQ2_S => 2.5,
Self::IQ3_XXS | Self::IQ3_XS | Self::IQ3_S => 3.5,
Self::IQ4_XS | Self::IQ4_NL => 4.5,
Self::Other(_) => 4.0, }
}
pub fn quality_rating(&self) -> u8 {
match self {
Self::F32 => 10,
Self::F16 | Self::BF16 => 10,
Self::Q8_0 => 9,
Self::Q6_K => 8,
Self::Q5_K_M => 7,
Self::Q5_K_S | Self::Q5_0 | Self::Q5_1 => 7,
Self::Q4_K_M => 6,
Self::Q4_K_S | Self::Q4_0 | Self::Q4_1 => 5,
Self::Q3_K_M | Self::Q3_K_L => 4,
Self::Q3_K_S => 3,
Self::Q2_K => 2,
Self::IQ2_XXS | Self::IQ2_XS | Self::IQ2_S => 2,
Self::IQ3_XXS | Self::IQ3_XS | Self::IQ3_S => 4,
Self::IQ4_XS | Self::IQ4_NL => 5,
Self::Other(_) => 5,
}
}
}
impl std::fmt::Display for QuantizationType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::F32 => write!(f, "F32"),
Self::F16 => write!(f, "F16"),
Self::BF16 => write!(f, "BF16"),
Self::Q8_0 => write!(f, "Q8_0"),
Self::Q6_K => write!(f, "Q6_K"),
Self::Q5_K_M => write!(f, "Q5_K_M"),
Self::Q5_K_S => write!(f, "Q5_K_S"),
Self::Q5_0 => write!(f, "Q5_0"),
Self::Q5_1 => write!(f, "Q5_1"),
Self::Q4_K_M => write!(f, "Q4_K_M"),
Self::Q4_K_S => write!(f, "Q4_K_S"),
Self::Q4_0 => write!(f, "Q4_0"),
Self::Q4_1 => write!(f, "Q4_1"),
Self::Q3_K_M => write!(f, "Q3_K_M"),
Self::Q3_K_S => write!(f, "Q3_K_S"),
Self::Q3_K_L => write!(f, "Q3_K_L"),
Self::Q2_K => write!(f, "Q2_K"),
Self::IQ2_XXS => write!(f, "IQ2_XXS"),
Self::IQ2_XS => write!(f, "IQ2_XS"),
Self::IQ2_S => write!(f, "IQ2_S"),
Self::IQ3_XXS => write!(f, "IQ3_XXS"),
Self::IQ3_XS => write!(f, "IQ3_XS"),
Self::IQ3_S => write!(f, "IQ3_S"),
Self::IQ4_XS => write!(f, "IQ4_XS"),
Self::IQ4_NL => write!(f, "IQ4_NL"),
Self::Other(s) => write!(f, "{}", s),
}
}
}
#[derive(Debug, Clone)]
pub struct GGUFFile {
pub filename: String,
pub size: u64,
pub quantization: QuantizationType,
pub download_url: String,
pub sha256: Option<String>,
}
impl GGUFFile {
pub fn size_human(&self) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
const GB: u64 = MB * 1024;
if self.size >= GB {
format!("{:.2} GB", self.size as f64 / GB as f64)
} else if self.size >= MB {
format!("{:.2} MB", self.size as f64 / MB as f64)
} else if self.size >= KB {
format!("{:.2} KB", self.size as f64 / KB as f64)
} else {
format!("{} bytes", self.size)
}
}
pub fn estimated_vram_mb(&self) -> u64 {
(self.size / (1024 * 1024)) + 512 }
}
#[derive(Debug, Clone)]
pub struct HFModelInfo {
pub model_id: String,
pub author: String,
pub name: String,
pub description: Option<String>,
pub downloads: u64,
pub likes: u64,
pub tags: Vec<String>,
pub last_modified: Option<String>,
pub gguf_files: Vec<GGUFFile>,
pub pipeline_tag: Option<String>,
pub license: Option<String>,
}
impl HFModelInfo {
pub fn url(&self) -> String {
format!("{}/{}", HF_MODELS_BASE, self.model_id)
}
pub fn is_gguf(&self) -> bool {
self.tags.iter().any(|t| t.to_lowercase() == "gguf")
|| self.model_id.to_lowercase().contains("gguf")
|| !self.gguf_files.is_empty()
}
pub fn best_quantization_for_vram(&self, vram_mb: u64) -> Option<&GGUFFile> {
let mut suitable: Vec<&GGUFFile> = self
.gguf_files
.iter()
.filter(|f| f.estimated_vram_mb() <= vram_mb)
.collect();
suitable.sort_by(|a, b| {
b.quantization
.quality_rating()
.cmp(&a.quantization.quality_rating())
});
suitable.first().copied()
}
pub fn smallest_quantization(&self) -> Option<&GGUFFile> {
self.gguf_files.iter().min_by_key(|f| f.size)
}
pub fn highest_quality(&self) -> Option<&GGUFFile> {
self.gguf_files
.iter()
.max_by_key(|f| f.quantization.quality_rating())
}
}
#[derive(Debug, Clone, Default)]
pub struct ModelSearchFilters {
pub query: Option<String>,
pub author: Option<String>,
pub tags: Vec<String>,
pub gguf_only: bool,
pub min_downloads: Option<u64>,
pub sort: Option<String>,
pub limit: Option<usize>,
}
impl ModelSearchFilters {
pub fn new() -> Self {
Self::default()
}
pub fn with_query(mut self, query: &str) -> Self {
self.query = Some(query.to_string());
self
}
pub fn with_author(mut self, author: &str) -> Self {
self.author = Some(author.to_string());
self
}
pub fn with_tag(mut self, tag: &str) -> Self {
self.tags.push(tag.to_string());
self
}
pub fn gguf_only(mut self) -> Self {
self.gguf_only = true;
self.tags.push("gguf".to_string());
self
}
pub fn with_min_downloads(mut self, min: u64) -> Self {
self.min_downloads = Some(min);
self
}
pub fn sort_by_downloads(mut self) -> Self {
self.sort = Some("downloads".to_string());
self
}
pub fn sort_by_likes(mut self) -> Self {
self.sort = Some("likes".to_string());
self
}
pub fn with_limit(mut self, limit: usize) -> Self {
self.limit = Some(limit);
self
}
}
pub type ProgressCallback = Box<dyn Fn(DownloadProgress) + Send + Sync>;
#[derive(Debug, Clone)]
pub struct DownloadProgress {
pub downloaded: u64,
pub total: u64,
pub speed_bps: u64,
pub eta_seconds: u64,
pub filename: String,
}
impl DownloadProgress {
pub fn percentage(&self) -> f32 {
if self.total == 0 {
0.0
} else {
(self.downloaded as f64 / self.total as f64 * 100.0) as f32
}
}
pub fn speed_human(&self) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
if self.speed_bps >= MB {
format!("{:.2} MB/s", self.speed_bps as f64 / MB as f64)
} else if self.speed_bps >= KB {
format!("{:.2} KB/s", self.speed_bps as f64 / KB as f64)
} else {
format!("{} B/s", self.speed_bps)
}
}
pub fn eta_human(&self) -> String {
if self.eta_seconds >= 3600 {
format!(
"{}h {}m",
self.eta_seconds / 3600,
(self.eta_seconds % 3600) / 60
)
} else if self.eta_seconds >= 60 {
format!("{}m {}s", self.eta_seconds / 60, self.eta_seconds % 60)
} else {
format!("{}s", self.eta_seconds)
}
}
}
#[derive(Debug, Clone)]
pub struct ModelTestResult {
pub load_success: bool,
pub load_time_ms: u64,
pub tokenization_works: bool,
pub generation_works: bool,
pub sample_output: Option<String>,
pub n_params: u64,
pub n_ctx: u32,
pub n_embd: u32,
pub n_layers: u32,
pub vocab_size: u32,
pub error: Option<String>,
}