use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ModelSize {
Tiny,
Small,
Medium,
Large,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QuantizationLevel {
Q4,
Q5,
Q8,
FP16,
}
impl QuantizationLevel {
pub fn size_multiplier(&self) -> f32 {
match self {
Self::Q4 => 0.25,
Self::Q5 => 0.3125,
Self::Q8 => 0.5,
Self::FP16 => 1.0,
}
}
pub fn memory_reduction(&self) -> f32 {
match self {
Self::Q4 => 0.75, Self::Q5 => 0.69, Self::Q8 => 0.50, Self::FP16 => 0.0, }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HardwareRequirements {
pub min_ram_gb: f32,
pub recommended_ram_gb: f32,
pub supports_ane: bool,
pub supports_metal: bool,
pub supports_cuda: bool,
pub min_vram_gb: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelInfo {
pub id: String,
pub name: String,
pub repo: String,
pub filename: String,
pub size: ModelSize,
pub quantization: QuantizationLevel,
pub size_bytes: u64,
pub checksum: Option<String>,
pub params_b: f32,
pub context_length: usize,
pub hardware: HardwareRequirements,
pub description: String,
pub is_adapter: bool,
pub base_model: Option<String>,
pub has_sona_weights: bool,
}
impl ModelInfo {
pub fn download_url(&self) -> String {
format!(
"https://huggingface.co/{}/resolve/main/{}",
self.repo, self.filename
)
}
pub fn hub_url(&self) -> String {
format!("https://huggingface.co/{}", self.repo)
}
pub fn estimate_download_time(&self, speed_mbps: f32) -> f32 {
let size_mb = self.size_bytes as f32 / (1024.0 * 1024.0);
size_mb / speed_mbps
}
pub fn fits_in_ram(&self, available_gb: f32) -> bool {
available_gb >= self.hardware.min_ram_gb
}
}
pub struct RuvLtraRegistry {
models: HashMap<String, ModelInfo>,
}
impl RuvLtraRegistry {
pub fn new() -> Self {
let mut models = HashMap::new();
models.insert(
"ruvltra-small".to_string(),
ModelInfo {
id: "ruvltra-small".to_string(),
name: "RuvLTRA Small (0.5B Q4)".to_string(),
repo: "ruv/ruvltra".to_string(),
filename: "ruvltra-small-0.5b-q4_k_m.gguf".to_string(),
size: ModelSize::Small,
quantization: QuantizationLevel::Q4,
size_bytes: 662_000_000, checksum: None, params_b: 0.5,
context_length: 4096,
hardware: HardwareRequirements {
min_ram_gb: 1.0,
recommended_ram_gb: 2.0,
supports_ane: true,
supports_metal: true,
supports_cuda: true,
min_vram_gb: Some(1.0),
},
description: "Compact RuvLTRA model optimized for edge devices. \
Includes SONA pre-trained weights for adaptive learning."
.to_string(),
is_adapter: false,
base_model: None,
has_sona_weights: true,
},
);
models.insert(
"ruvltra-small-q8".to_string(),
ModelInfo {
id: "ruvltra-small-q8".to_string(),
name: "RuvLTRA Small (0.5B Q8)".to_string(),
repo: "ruv/ruvltra".to_string(),
filename: "ruvltra-small-0.5b-q8_0.gguf".to_string(),
size: ModelSize::Small,
quantization: QuantizationLevel::Q8,
size_bytes: 1_324_000_000, checksum: None,
params_b: 0.5,
context_length: 4096,
hardware: HardwareRequirements {
min_ram_gb: 2.0,
recommended_ram_gb: 4.0,
supports_ane: true,
supports_metal: true,
supports_cuda: true,
min_vram_gb: Some(2.0),
},
description: "High-quality Q8 quantization for better accuracy.".to_string(),
is_adapter: false,
base_model: None,
has_sona_weights: true,
},
);
models.insert(
"ruvltra-medium".to_string(),
ModelInfo {
id: "ruvltra-medium".to_string(),
name: "RuvLTRA Medium (3B Q4)".to_string(),
repo: "ruv/ruvltra".to_string(),
filename: "ruvltra-medium-1.1b-q4_k_m.gguf".to_string(),
size: ModelSize::Medium,
quantization: QuantizationLevel::Q4,
size_bytes: 2_100_000_000, checksum: None,
params_b: 3.0,
context_length: 8192,
hardware: HardwareRequirements {
min_ram_gb: 4.0,
recommended_ram_gb: 8.0,
supports_ane: true,
supports_metal: true,
supports_cuda: true,
min_vram_gb: Some(4.0),
},
description: "Balanced RuvLTRA model for general-purpose tasks. \
Extended context window with SONA learning."
.to_string(),
is_adapter: false,
base_model: None,
has_sona_weights: true,
},
);
models.insert(
"ruvltra-medium-q8".to_string(),
ModelInfo {
id: "ruvltra-medium-q8".to_string(),
name: "RuvLTRA Medium (3B Q8)".to_string(),
repo: "ruv/ruvltra".to_string(),
filename: "ruvltra-medium-1.1b-q8_0.gguf".to_string(),
size: ModelSize::Medium,
quantization: QuantizationLevel::Q8,
size_bytes: 4_200_000_000, checksum: None,
params_b: 3.0,
context_length: 8192,
hardware: HardwareRequirements {
min_ram_gb: 6.0,
recommended_ram_gb: 12.0,
supports_ane: true,
supports_metal: true,
supports_cuda: true,
min_vram_gb: Some(6.0),
},
description: "High-quality Medium model with Q8 quantization.".to_string(),
is_adapter: false,
base_model: None,
has_sona_weights: true,
},
);
models.insert(
"ruvltra-small-coder".to_string(),
ModelInfo {
id: "ruvltra-small-coder".to_string(),
name: "RuvLTRA Small Coder (LoRA)".to_string(),
repo: "ruv/ruvltra".to_string(),
filename: "ruvltra-small-coder-lora.safetensors".to_string(),
size: ModelSize::Tiny,
quantization: QuantizationLevel::FP16,
size_bytes: 50_000_000, checksum: None,
params_b: 0.05, context_length: 4096,
hardware: HardwareRequirements {
min_ram_gb: 0.1,
recommended_ram_gb: 0.5,
supports_ane: true,
supports_metal: true,
supports_cuda: true,
min_vram_gb: None,
},
description: "LoRA adapter for code completion. \
Requires ruvltra-small or ruvltra-small-q8 base model."
.to_string(),
is_adapter: true,
base_model: Some("ruvltra-small".to_string()),
has_sona_weights: false,
},
);
Self { models }
}
pub fn get(&self, id: &str) -> Option<&ModelInfo> {
self.models.get(id)
}
pub fn list_all(&self) -> Vec<&ModelInfo> {
self.models.values().collect()
}
pub fn list_by_size(&self, size: ModelSize) -> Vec<&ModelInfo> {
self.models.values().filter(|m| m.size == size).collect()
}
pub fn list_base_models(&self) -> Vec<&ModelInfo> {
self.models.values().filter(|m| !m.is_adapter).collect()
}
pub fn list_adapters(&self, base_model: &str) -> Vec<&ModelInfo> {
self.models
.values()
.filter(|m| {
m.is_adapter
&& m.base_model
.as_ref()
.map(|b| b == base_model)
.unwrap_or(false)
})
.collect()
}
pub fn recommend_for_ram(&self, available_gb: f32) -> Option<&ModelInfo> {
let mut candidates: Vec<_> = self
.models
.values()
.filter(|m| !m.is_adapter && m.fits_in_ram(available_gb))
.collect();
candidates.sort_by(|a, b| b.params_b.partial_cmp(&a.params_b).unwrap());
candidates.first().copied()
}
pub fn model_ids(&self) -> Vec<String> {
self.models.keys().cloned().collect()
}
}
impl Default for RuvLtraRegistry {
fn default() -> Self {
Self::new()
}
}
pub fn get_model_info(id: &str) -> Option<ModelInfo> {
RuvLtraRegistry::new().get(id).cloned()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_registry_initialization() {
let registry = RuvLtraRegistry::new();
assert!(registry.get("ruvltra-small").is_some());
assert!(registry.get("ruvltra-medium").is_some());
assert!(registry.get("nonexistent").is_none());
}
#[test]
fn test_model_info() {
let registry = RuvLtraRegistry::new();
let model = registry.get("ruvltra-small").unwrap();
assert_eq!(model.params_b, 0.5);
assert_eq!(model.quantization, QuantizationLevel::Q4);
assert!(model.has_sona_weights);
assert!(!model.is_adapter);
}
#[test]
fn test_list_by_size() {
let registry = RuvLtraRegistry::new();
let small_models = registry.list_by_size(ModelSize::Small);
assert!(!small_models.is_empty());
}
#[test]
fn test_adapters() {
let registry = RuvLtraRegistry::new();
let adapters = registry.list_adapters("ruvltra-small");
assert!(!adapters.is_empty());
assert!(adapters[0].is_adapter);
}
#[test]
fn test_ram_recommendation() {
let registry = RuvLtraRegistry::new();
let model = registry.recommend_for_ram(2.0);
assert!(model.is_some());
assert!(model.unwrap().params_b <= 1.0);
let model = registry.recommend_for_ram(8.0);
assert!(model.is_some());
}
#[test]
fn test_quantization_multipliers() {
assert_eq!(QuantizationLevel::Q4.size_multiplier(), 0.25);
assert_eq!(QuantizationLevel::Q8.size_multiplier(), 0.5);
assert_eq!(QuantizationLevel::FP16.size_multiplier(), 1.0);
}
#[test]
fn test_model_urls() {
let registry = RuvLtraRegistry::new();
let model = registry.get("ruvltra-small").unwrap();
let url = model.download_url();
assert!(url.contains("huggingface.co"));
assert!(url.contains("ruv/ruvltra"));
assert!(url.contains(".gguf"));
let hub_url = model.hub_url();
assert_eq!(hub_url, "https://huggingface.co/ruv/ruvltra");
}
#[test]
fn test_download_time_estimation() {
let registry = RuvLtraRegistry::new();
let model = registry.get("ruvltra-small").unwrap();
let time = model.estimate_download_time(10.0);
assert!(time > 60.0 && time < 70.0);
}
}