use std::fs;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use axum::{
extract::State,
http::StatusCode,
response::{sse::Event, Sse},
Json,
};
use futures::stream::Stream;
use hf_hub::api::sync::Api as HfApi;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CacheSource {
Huggingface,
Infernum,
Local,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedModel {
pub id: String,
pub name: String,
pub source: CacheSource,
pub size_bytes: u64,
pub size_str: String,
pub downloaded_at: String,
pub is_holotensor: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub quantization: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub architecture: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub context_length: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hidden_size: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub num_layers: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedModelsResponse {
pub models: Vec<CachedModel>,
pub total_size_bytes: u64,
pub total_size_str: String,
pub cache_dir: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeleteCachedModelRequest {
pub model: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeleteCachedModelResponse {
pub success: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConvertModelRequest {
pub model: String,
#[serde(default = "default_target_format")]
pub target_format: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub quantization: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub num_fragments: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_rank: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub min_quality: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub retention_ratio: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub verify: Option<bool>,
}
fn default_target_format() -> String {
"holotensor".to_string()
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConvertModelMetadata {
#[serde(skip_serializing_if = "Option::is_none")]
pub compression_ratio: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub quality_score: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub num_fragments: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub output_size: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_size: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hct_size: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub verified_quality: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConvertModelResponse {
pub success: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub status: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub output_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<ConvertModelMetadata>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DownloadModelRequest {
pub model: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub revision: Option<String>,
#[serde(default)]
pub convert_to_holo: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DownloadProgress {
#[serde(rename = "type")]
pub event_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub operation: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub percent: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub file: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub files_done: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub files_total: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes_done: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes_total: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConvertProgress {
#[serde(rename = "type")]
pub event_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub operation: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub percent: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub file: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tensor: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tensors_done: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tensors_total: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub files_done: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub files_total: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes_original: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes_compressed: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub compression_ratio: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub output_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<ConvertModelMetadata>,
}
#[derive(Clone)]
pub struct ModelCacheState {
pub hf_cache_dir: PathBuf,
pub infernum_cache_dir: PathBuf,
}
impl Default for ModelCacheState {
fn default() -> Self {
Self::new()
}
}
impl ModelCacheState {
pub fn new() -> Self {
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
Self {
hf_cache_dir: PathBuf::from(format!("{home}/.cache/huggingface/hub")),
infernum_cache_dir: PathBuf::from(format!("{home}/.cache/infernum/models")),
}
}
pub fn with_dirs(hf_cache: impl Into<PathBuf>, infernum_cache: impl Into<PathBuf>) -> Self {
Self {
hf_cache_dir: hf_cache.into(),
infernum_cache_dir: infernum_cache.into(),
}
}
}
fn format_bytes(bytes: u64) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
const GB: u64 = MB * 1024;
const TB: u64 = GB * 1024;
if bytes >= TB {
format!("{:.2} TB", bytes as f64 / TB as f64)
} else if bytes >= GB {
format!("{:.2} GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.2} MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.2} KB", bytes as f64 / KB as f64)
} else {
format!("{bytes} B")
}
}
fn get_dir_size(path: &Path) -> u64 {
if !path.exists() {
return 0;
}
let mut size = 0;
if let Ok(entries) = fs::read_dir(path) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() {
size += path.metadata().map(|m| m.len()).unwrap_or(0);
} else if path.is_dir() {
size += get_dir_size(&path);
}
}
}
size
}
fn get_mtime_str(path: &Path) -> String {
path.metadata()
.and_then(|m| m.modified())
.map(|t| {
t.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| {
let secs = d.as_secs();
let dt = chrono::DateTime::from_timestamp(secs as i64, 0)
.unwrap_or_else(chrono::Utc::now);
dt.to_rfc3339()
})
.unwrap_or_else(|_| "unknown".to_string())
})
.unwrap_or_else(|_| "unknown".to_string())
}
pub fn is_holotensor_model(path: &Path) -> bool {
if let Ok(entries) = fs::read_dir(path) {
for entry in entries.flatten() {
let name = entry.file_name();
if name.to_string_lossy().ends_with(".hct") {
return true;
}
}
}
false
}
fn parse_model_config(path: &Path) -> (Option<String>, Option<u32>, Option<u32>, Option<u32>) {
let config_path = path.join("config.json");
if !config_path.exists() {
return (None, None, None, None);
}
let content = match fs::read_to_string(&config_path) {
Ok(c) => c,
Err(_) => return (None, None, None, None),
};
let config: serde_json::Value = match serde_json::from_str(&content) {
Ok(c) => c,
Err(_) => return (None, None, None, None),
};
let architecture = config
.get("model_type")
.and_then(|v| v.as_str())
.map(String::from);
let context_length = config
.get("max_position_embeddings")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
let hidden_size = config
.get("hidden_size")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
let num_layers = config
.get("num_hidden_layers")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
(architecture, context_length, hidden_size, num_layers)
}
fn scan_hf_cache(cache_dir: &Path) -> Vec<CachedModel> {
let mut models = Vec::new();
if !cache_dir.exists() {
return models;
}
if let Ok(entries) = fs::read_dir(cache_dir) {
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if !name.starts_with("models--") {
continue;
}
let model_id = name
.strip_prefix("models--")
.unwrap_or(&name)
.replace("--", "/");
let model_path = entry.path();
let snapshots_dir = model_path.join("snapshots");
if !snapshots_dir.exists() {
continue;
}
if let Ok(snapshots) = fs::read_dir(&snapshots_dir) {
for snapshot in snapshots.flatten() {
let snapshot_path = snapshot.path();
if !snapshot_path.is_dir() {
continue;
}
if !has_model_weights(&snapshot_path) {
continue;
}
let size = get_dir_size(&snapshot_path);
let is_holo = is_holotensor_model(&snapshot_path);
let (architecture, context_length, hidden_size, num_layers) =
parse_model_config(&snapshot_path);
models.push(CachedModel {
id: model_id.clone(),
name: model_id.split('/').last().unwrap_or(&model_id).to_string(),
source: CacheSource::Huggingface,
size_bytes: size,
size_str: format_bytes(size),
downloaded_at: get_mtime_str(&snapshot_path),
is_holotensor: is_holo,
quantization: None,
architecture,
context_length,
hidden_size,
num_layers,
path: Some(snapshot_path.to_string_lossy().to_string()),
});
break; }
}
}
}
models
}
fn has_model_weights(path: &Path) -> bool {
if let Ok(entries) = fs::read_dir(path) {
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if name.ends_with(".safetensors") || name.ends_with(".bin") || name.ends_with(".hct") {
return true;
}
}
}
false
}
fn is_model_directory(path: &Path) -> bool {
if path.join("config.json").exists() {
return true;
}
if let Ok(entries) = fs::read_dir(path) {
for entry in entries.flatten() {
if entry.file_name().to_string_lossy().ends_with(".hct") {
return true;
}
}
}
false
}
fn scan_infernum_cache(cache_dir: &Path) -> Vec<CachedModel> {
let mut models = Vec::new();
if !cache_dir.exists() {
return models;
}
scan_infernum_cache_recursive(cache_dir, &mut models, 0);
models
}
fn scan_infernum_cache_recursive(dir: &Path, models: &mut Vec<CachedModel>, depth: usize) {
if depth > 3 {
return;
}
if let Ok(entries) = fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if !path.is_dir() {
continue;
}
if is_model_directory(&path) {
let name = entry.file_name().to_string_lossy().to_string();
let size = get_dir_size(&path);
let is_holo = is_holotensor_model(&path);
let (architecture, context_length, hidden_size, num_layers) =
parse_model_config(&path);
models.push(CachedModel {
id: name.clone(),
name: name.replace("--", "/"), source: CacheSource::Infernum,
size_bytes: size,
size_str: format_bytes(size),
downloaded_at: get_mtime_str(&path),
is_holotensor: is_holo,
quantization: None,
architecture,
context_length,
hidden_size,
num_layers,
path: Some(path.to_string_lossy().to_string()),
});
} else {
scan_infernum_cache_recursive(&path, models, depth + 1);
}
}
}
}
pub async fn list_cached_models(
State(state): State<ModelCacheState>,
) -> Result<Json<CachedModelsResponse>, (StatusCode, Json<serde_json::Value>)> {
let mut models = Vec::new();
models.extend(scan_hf_cache(&state.hf_cache_dir));
models.extend(scan_infernum_cache(&state.infernum_cache_dir));
models.sort_by(|a, b| a.name.to_lowercase().cmp(&b.name.to_lowercase()));
let total_size: u64 = models.iter().map(|m| m.size_bytes).sum();
Ok(Json(CachedModelsResponse {
models,
total_size_bytes: total_size,
total_size_str: format_bytes(total_size),
cache_dir: state.hf_cache_dir.to_string_lossy().to_string(),
}))
}
pub async fn delete_cached_model(
State(state): State<ModelCacheState>,
Json(request): Json<DeleteCachedModelRequest>,
) -> Result<Json<DeleteCachedModelResponse>, (StatusCode, Json<serde_json::Value>)> {
let model_id = &request.model;
let hf_path = state
.hf_cache_dir
.join(format!("models--{}", model_id.replace('/', "--")));
let infernum_path = state.infernum_cache_dir.join(model_id);
let mut deleted = false;
let mut message = String::new();
if hf_path.exists() {
match fs::remove_dir_all(&hf_path) {
Ok(_) => {
deleted = true;
message = format!("Deleted {model_id} from HuggingFace cache");
},
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": {
"message": format!("Failed to delete: {e}"),
"type": "delete_error"
}
})),
));
},
}
} else if infernum_path.exists() {
match fs::remove_dir_all(&infernum_path) {
Ok(_) => {
deleted = true;
message = format!("Deleted {model_id} from Infernum cache");
},
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": {
"message": format!("Failed to delete: {e}"),
"type": "delete_error"
}
})),
));
},
}
}
if !deleted {
message = format!("Model {model_id} not found in cache");
}
Ok(Json(DeleteCachedModelResponse {
success: deleted,
message: Some(message),
}))
}
#[cfg(feature = "holotensor")]
pub async fn convert_model(
State(state): State<ModelCacheState>,
Json(request): Json<ConvertModelRequest>,
) -> Sse<impl Stream<Item = Result<Event, std::convert::Infallible>>> {
use haagenti::compressive::CompressiveSpectralEncoder;
tracing::info!(
model = %request.model,
format = %request.target_format,
num_fragments = ?request.num_fragments,
retention_ratio = ?request.retention_ratio,
"Starting HoloTensor conversion (streaming)"
);
let (tx, rx) = mpsc::channel::<ConvertProgress>(100);
let model_id = request.model.clone();
let cache_state = state.clone();
let num_fragments_req = request.num_fragments;
let retention_ratio_req = request.retention_ratio;
let verify = request.verify.unwrap_or(false);
tokio::task::spawn_blocking(move || {
let rt = tokio::runtime::Handle::current();
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "progress".to_string(),
operation: Some("initializing".to_string()),
percent: Some(0.0),
message: Some(format!("Preparing to convert {}", model_id)),
file: None,
tensor: None,
tensors_done: Some(0),
tensors_total: None,
files_done: Some(0),
files_total: None,
bytes_original: Some(0),
bytes_compressed: Some(0),
compression_ratio: None,
status: None,
output_path: None,
metadata: None,
}));
let model_path = match find_model_path(&cache_state, &model_id) {
Some(p) => p,
None => {
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!("Model '{}' not found in cache", model_id)),
file: None,
tensor: None,
tensors_done: None,
tensors_total: None,
files_done: None,
files_total: None,
bytes_original: None,
bytes_compressed: None,
compression_ratio: None,
status: Some("failed".to_string()),
output_path: None,
metadata: None,
}));
return;
},
};
if is_holotensor_model(&model_path) {
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!(
"Model '{}' is already in HoloTensor format",
model_id
)),
file: None,
tensor: None,
tensors_done: None,
tensors_total: None,
files_done: None,
files_total: None,
bytes_original: None,
bytes_compressed: None,
compression_ratio: None,
status: Some("already_holotensor".to_string()),
output_path: None,
metadata: None,
}));
return;
}
let num_fragments = num_fragments_req.unwrap_or(64) as u16;
let retention_ratio = retention_ratio_req.unwrap_or(0.8); let encoder = CompressiveSpectralEncoder::new(num_fragments, retention_ratio);
let output_dir = cache_state
.infernum_cache_dir
.join(format!("{}-hct", model_id.replace('/', "_")));
if let Err(e) = fs::create_dir_all(&output_dir) {
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!("Failed to create output directory: {}", e)),
file: None,
tensor: None,
tensors_done: None,
tensors_total: None,
files_done: None,
files_total: None,
bytes_original: None,
bytes_compressed: None,
compression_ratio: None,
status: Some("failed".to_string()),
output_path: None,
metadata: None,
}));
return;
}
let safetensor_files: Vec<_> = fs::read_dir(&model_path)
.map(|entries| {
entries
.flatten()
.filter(|e| {
e.path()
.extension()
.map(|ext| ext == "safetensors")
.unwrap_or(false)
})
.collect()
})
.unwrap_or_default();
if safetensor_files.is_empty() {
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some("No safetensors files found in model directory".to_string()),
file: None,
tensor: None,
tensors_done: None,
tensors_total: None,
files_done: None,
files_total: None,
bytes_original: None,
bytes_compressed: None,
compression_ratio: None,
status: Some("failed".to_string()),
output_path: None,
metadata: None,
}));
return;
}
let files_total = safetensor_files.len() as u32;
let mut total_original_size = 0u64;
let mut total_hct_size = 0u64;
let mut tensors_converted = 0u32;
let mut files_done = 0u32;
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "progress".to_string(),
operation: Some("analyzing".to_string()),
percent: Some(2.0),
message: Some(format!("Analyzing {} safetensor files...", files_total)),
file: None,
tensor: None,
tensors_done: Some(0),
tensors_total: None,
files_done: Some(0),
files_total: Some(files_total),
bytes_original: Some(0),
bytes_compressed: Some(0),
compression_ratio: None,
status: None,
output_path: None,
metadata: None,
}));
for entry in safetensor_files {
let file_path = entry.path();
let file_name = file_path
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "tensor".to_string());
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "progress".to_string(),
operation: Some("reading".to_string()),
percent: Some(5.0 + (files_done as f32 / files_total as f32) * 90.0),
message: Some(format!("Reading {}", file_name)),
file: Some(file_name.clone()),
tensor: None,
tensors_done: Some(tensors_converted),
tensors_total: None,
files_done: Some(files_done),
files_total: Some(files_total),
bytes_original: Some(total_original_size),
bytes_compressed: Some(total_hct_size),
compression_ratio: if total_hct_size > 0 {
Some(total_original_size as f32 / total_hct_size as f32)
} else {
None
},
status: None,
output_path: None,
metadata: None,
}));
tracing::info!(file = %file_path.display(), "Converting safetensors file");
let data = match fs::read(&file_path) {
Ok(d) => d,
Err(e) => {
tracing::warn!(file = %file_path.display(), error = %e, "Failed to read file, skipping");
files_done += 1;
continue;
},
};
total_original_size += data.len() as u64;
if data.len() < 8 {
tracing::warn!(file = %file_path.display(), "Invalid safetensors file (too small)");
files_done += 1;
continue;
}
let header_len = u64::from_le_bytes(data[0..8].try_into().unwrap_or([0; 8])) as usize;
if data.len() < 8 + header_len {
tracing::warn!(file = %file_path.display(), "Invalid safetensors header");
files_done += 1;
continue;
}
let header_json = &data[8..8 + header_len];
let header: serde_json::Value = match serde_json::from_slice(header_json) {
Ok(h) => h,
Err(e) => {
tracing::warn!(file = %file_path.display(), error = %e, "Failed to parse header");
files_done += 1;
continue;
},
};
let tensor_data_start = 8 + header_len;
let tensor_data = &data[tensor_data_start..];
let tensors_in_file: Vec<_> = header
.as_object()
.into_iter()
.flatten()
.filter(|(name, _)| *name != "__metadata__")
.collect();
let tensors_in_file_count = tensors_in_file.len();
for (idx, (tensor_name, tensor_info)) in tensors_in_file.into_iter().enumerate() {
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "progress".to_string(),
operation: Some("encoding".to_string()),
percent: Some(
5.0 + (files_done as f32 / files_total as f32) * 90.0
+ (idx as f32 / tensors_in_file_count as f32)
* (90.0 / files_total as f32),
),
message: Some(format!("Encoding {}/{}", file_name, tensor_name)),
file: Some(file_name.clone()),
tensor: Some(tensor_name.clone()),
tensors_done: Some(tensors_converted),
tensors_total: None,
files_done: Some(files_done),
files_total: Some(files_total),
bytes_original: Some(total_original_size),
bytes_compressed: Some(total_hct_size),
compression_ratio: if total_hct_size > 0 {
Some(total_original_size as f32 / total_hct_size as f32)
} else {
None
},
status: None,
output_path: None,
metadata: None,
}));
let offsets = tensor_info
.get("data_offsets")
.and_then(|v| v.as_array())
.and_then(|arr| {
let start = arr.first()?.as_u64()? as usize;
let end = arr.get(1)?.as_u64()? as usize;
Some((start, end))
});
let shape = tensor_info
.get("shape")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_u64().map(|n| n as usize))
.collect::<Vec<_>>()
});
let dtype = tensor_info
.get("dtype")
.and_then(|v| v.as_str())
.unwrap_or("F32");
if let (Some((start, end)), Some(shape)) = (offsets, shape) {
if end > tensor_data.len() || shape.len() != 2 {
continue;
}
let width = shape[0];
let height = shape[1];
let raw_bytes = &tensor_data[start..end];
let tensor_f32: Vec<f32> = match dtype {
"F32" => raw_bytes
.chunks_exact(4)
.map(|b| f32::from_le_bytes([b[0], b[1], b[2], b[3]]))
.collect(),
"F16" => raw_bytes
.chunks_exact(2)
.map(|b| half::f16::from_le_bytes([b[0], b[1]]).to_f32())
.collect(),
"BF16" => raw_bytes
.chunks_exact(2)
.map(|b| half::bf16::from_le_bytes([b[0], b[1]]).to_f32())
.collect(),
_ => continue,
};
if tensor_f32.len() != width * height {
continue;
}
match encoder.encode_2d(&tensor_f32, width, height) {
Ok(fragments) => {
let mut hct_data = Vec::new();
hct_data.extend_from_slice(b"HTNS");
hct_data.extend_from_slice(&1u32.to_le_bytes());
hct_data.extend_from_slice(&(width as u32).to_le_bytes());
hct_data.extend_from_slice(&(height as u32).to_le_bytes());
hct_data.extend_from_slice(&(fragments.len() as u16).to_le_bytes());
hct_data.extend_from_slice(&0u16.to_le_bytes());
for fragment in &fragments {
hct_data.extend_from_slice(&fragment.index.to_le_bytes());
hct_data.extend_from_slice(&fragment.flags.to_le_bytes());
hct_data.extend_from_slice(&fragment.checksum.to_le_bytes());
hct_data
.extend_from_slice(&(fragment.data.len() as u32).to_le_bytes());
hct_data.extend_from_slice(&fragment.data);
}
let safe_name = tensor_name.replace(['/', '\\', '.'], "_");
let hct_path =
output_dir.join(format!("{}_{}.hct", file_name, safe_name));
if let Err(e) = fs::write(&hct_path, &hct_data) {
tracing::warn!(tensor = %tensor_name, error = %e, "Failed to write HCT file");
continue;
}
total_hct_size += hct_data.len() as u64;
tensors_converted += 1;
},
Err(e) => {
tracing::warn!(tensor = %tensor_name, error = %e, "Failed to encode tensor");
},
}
}
}
files_done += 1;
}
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "progress".to_string(),
operation: Some("finalizing".to_string()),
percent: Some(96.0),
message: Some("Copying metadata files...".to_string()),
file: None,
tensor: None,
tensors_done: Some(tensors_converted),
tensors_total: Some(tensors_converted),
files_done: Some(files_done),
files_total: Some(files_total),
bytes_original: Some(total_original_size),
bytes_compressed: Some(total_hct_size),
compression_ratio: Some(total_original_size as f32 / total_hct_size.max(1) as f32),
status: None,
output_path: None,
metadata: None,
}));
for file in [
"config.json",
"tokenizer.json",
"tokenizer_config.json",
"special_tokens_map.json",
] {
let src = model_path.join(file);
if src.exists() {
let dst = output_dir.join(file);
let _ = fs::copy(&src, &dst);
}
}
let compression_ratio = if total_hct_size > 0 {
total_original_size as f32 / total_hct_size as f32
} else {
0.0
};
tracing::info!(
model = %model_id,
original_size = total_original_size,
hct_size = total_hct_size,
compression_ratio = compression_ratio,
tensors_converted = tensors_converted,
"HoloTensor conversion complete"
);
let _ = rt.block_on(tx.send(ConvertProgress {
event_type: "complete".to_string(),
operation: None,
percent: Some(100.0),
message: Some(format!(
"Converted {} tensors with {:.1}x compression",
tensors_converted, compression_ratio
)),
file: None,
tensor: None,
tensors_done: Some(tensors_converted),
tensors_total: Some(tensors_converted),
files_done: Some(files_done),
files_total: Some(files_total),
bytes_original: Some(total_original_size),
bytes_compressed: Some(total_hct_size),
compression_ratio: Some(compression_ratio),
status: Some("complete".to_string()),
output_path: Some(output_dir.to_string_lossy().to_string()),
metadata: Some(ConvertModelMetadata {
compression_ratio: Some(compression_ratio),
quality_score: Some(retention_ratio),
num_fragments: Some(num_fragments as u32),
output_size: Some(total_hct_size),
original_size: Some(total_original_size),
hct_size: Some(total_hct_size),
verified_quality: if verify { Some(retention_ratio) } else { None },
}),
}));
});
let stream = async_stream::stream! {
let mut rx = rx;
while let Some(progress) = rx.recv().await {
let data = serde_json::to_string(&progress).unwrap_or_default();
yield Ok(Event::default().data(data));
if progress.event_type == "complete" || progress.event_type == "error" {
yield Ok(Event::default().data("[DONE]"));
break;
}
}
};
Sse::new(stream)
}
#[cfg(not(feature = "holotensor"))]
pub async fn convert_model(
Json(request): Json<ConvertModelRequest>,
) -> Sse<impl Stream<Item = Result<Event, std::convert::Infallible>>> {
tracing::info!(
model = %request.model,
format = %request.target_format,
"Convert model requested (holotensor feature not enabled)"
);
let model = request.model.clone();
let target = request.target_format.clone();
let stream = async_stream::stream! {
let progress = ConvertProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!(
"HoloTensor conversion requires the 'holotensor' feature. \
Rebuild with: cargo build --features holotensor. \
Model: {}, Target: {}",
model, target
)),
file: None,
tensor: None,
tensors_done: None,
tensors_total: None,
files_done: None,
files_total: None,
bytes_original: None,
bytes_compressed: None,
compression_ratio: None,
status: Some("not_implemented".to_string()),
output_path: None,
metadata: None,
};
let data = serde_json::to_string(&progress).unwrap_or_default();
yield Ok(Event::default().data(data));
yield Ok(Event::default().data("[DONE]"));
};
Sse::new(stream)
}
pub fn find_model_path(state: &ModelCacheState, model_id: &str) -> Option<PathBuf> {
let normalized_id = model_id.replace('/', "--");
let hf_path = state
.hf_cache_dir
.join(format!("models--{}", normalized_id));
if hf_path.exists() {
let snapshots_dir = hf_path.join("snapshots");
if let Ok(entries) = fs::read_dir(&snapshots_dir) {
for entry in entries.flatten() {
if entry.path().is_dir() {
return Some(entry.path());
}
}
}
}
let infernum_path = state.infernum_cache_dir.join(&normalized_id);
if infernum_path.exists() && is_model_directory(&infernum_path) {
return Some(infernum_path);
}
if let Some(path) = find_model_in_dir(&state.infernum_cache_dir, &normalized_id, 0) {
return Some(path);
}
None
}
fn find_model_in_dir(dir: &Path, model_id: &str, depth: usize) -> Option<PathBuf> {
if depth > 3 {
return None;
}
if let Ok(entries) = fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if !path.is_dir() {
continue;
}
let name = entry.file_name().to_string_lossy().to_string();
if name == model_id && is_model_directory(&path) {
return Some(path);
}
if !is_model_directory(&path) {
if let Some(found) = find_model_in_dir(&path, model_id, depth + 1) {
return Some(found);
}
}
}
}
None
}
pub async fn download_model(
State(state): State<ModelCacheState>,
Json(request): Json<DownloadModelRequest>,
) -> Sse<impl Stream<Item = Result<Event, std::convert::Infallible>>> {
tracing::info!(
model = %request.model,
revision = ?request.revision,
convert = request.convert_to_holo,
"Starting model download from HuggingFace"
);
let (tx, rx) = mpsc::channel::<DownloadProgress>(100);
let model_id = request.model.clone();
let revision = request.revision.clone();
let convert_to_holo = request.convert_to_holo;
#[allow(unused_variables)]
let cache_state = state.clone();
tokio::task::spawn_blocking(move || {
let rt = tokio::runtime::Handle::current();
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "progress".to_string(),
operation: Some("initializing".to_string()),
percent: Some(0.0),
message: Some(format!("Connecting to HuggingFace Hub for {}", model_id)),
file: None,
files_done: Some(0),
files_total: None,
bytes_done: Some(0),
bytes_total: None,
status: None,
}));
let api = match HfApi::new() {
Ok(api) => api,
Err(e) => {
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!("Failed to initialize HuggingFace API: {}", e)),
file: None,
files_done: None,
files_total: None,
bytes_done: None,
bytes_total: None,
status: Some("failed".to_string()),
}));
return;
},
};
let repo = if let Some(ref rev) = revision {
api.repo(hf_hub::Repo::with_revision(
model_id.clone(),
hf_hub::RepoType::Model,
rev.clone(),
))
} else {
api.model(model_id.clone())
};
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "progress".to_string(),
operation: Some("listing".to_string()),
percent: Some(5.0),
message: Some("Fetching file list...".to_string()),
file: None,
files_done: Some(0),
files_total: None,
bytes_done: None,
bytes_total: None,
status: None,
}));
let metadata_files = [
"config.json",
"tokenizer.json",
"tokenizer_config.json",
"special_tokens_map.json",
"generation_config.json",
];
let mut downloaded_files = 0u32;
let mut total_bytes = 0u64;
let mut downloaded_paths = Vec::new();
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "progress".to_string(),
operation: Some("downloading".to_string()),
percent: Some(10.0),
message: Some("Downloading metadata files...".to_string()),
file: None,
files_done: Some(0),
files_total: None,
bytes_done: Some(0),
bytes_total: None,
status: None,
}));
for filename in metadata_files {
match repo.get(filename) {
Ok(path) => {
tracing::debug!(file = %filename, path = %path.display(), "Downloaded metadata file");
if let Ok(meta) = fs::metadata(&path) {
total_bytes += meta.len();
}
downloaded_files += 1;
downloaded_paths.push(path);
},
Err(e) => {
tracing::debug!(file = %filename, error = %e, "Metadata file not found (may be optional)");
},
}
}
let mut weight_files: Vec<String> = Vec::new();
let mut is_sharded = false;
if let Ok(index_path) = repo.get("model.safetensors.index.json") {
tracing::info!("Found sharded safetensors model");
is_sharded = true;
downloaded_files += 1;
downloaded_paths.push(index_path.clone());
if let Ok(index_content) = fs::read_to_string(&index_path) {
if let Ok(index) = serde_json::from_str::<serde_json::Value>(&index_content) {
if let Some(weight_map) = index.get("weight_map").and_then(|w| w.as_object()) {
let mut shard_names: Vec<String> = weight_map
.values()
.filter_map(|v| v.as_str())
.map(String::from)
.collect();
shard_names.sort();
shard_names.dedup();
weight_files = shard_names;
tracing::info!(
num_shards = weight_files.len(),
"Detected sharded model with {} weight files",
weight_files.len()
);
}
}
}
} else if let Ok(_) = repo.get("model.safetensors") {
weight_files.push("model.safetensors".to_string());
tracing::info!("Found single safetensors model");
} else if let Ok(index_path) = repo.get("pytorch_model.bin.index.json") {
tracing::info!("Found sharded PyTorch model");
is_sharded = true;
downloaded_files += 1;
downloaded_paths.push(index_path.clone());
if let Ok(index_content) = fs::read_to_string(&index_path) {
if let Ok(index) = serde_json::from_str::<serde_json::Value>(&index_content) {
if let Some(weight_map) = index.get("weight_map").and_then(|w| w.as_object()) {
let mut shard_names: Vec<String> = weight_map
.values()
.filter_map(|v| v.as_str())
.map(String::from)
.collect();
shard_names.sort();
shard_names.dedup();
weight_files = shard_names;
tracing::info!(
num_shards = weight_files.len(),
"Detected sharded PyTorch model with {} weight files",
weight_files.len()
);
}
}
}
} else if let Ok(_) = repo.get("pytorch_model.bin") {
weight_files.push("pytorch_model.bin".to_string());
tracing::info!("Found single PyTorch model");
}
if weight_files.is_empty() {
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!("No weight files found for model '{}'. Check if the model exists on HuggingFace.", model_id)),
file: None,
files_done: Some(downloaded_files),
files_total: None,
bytes_done: Some(total_bytes),
bytes_total: None,
status: Some("failed".to_string()),
}));
return;
}
let total_weight_files = weight_files.len();
tracing::info!(
model = %model_id,
num_files = total_weight_files,
sharded = is_sharded,
"Downloading {} weight file(s)",
total_weight_files
);
for (idx, filename) in weight_files.iter().enumerate() {
let progress_percent = 15.0 + (idx as f32 / total_weight_files as f32) * 75.0;
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "progress".to_string(),
operation: Some("downloading".to_string()),
percent: Some(progress_percent),
message: Some(format!(
"Downloading {} ({}/{})",
filename,
idx + 1,
total_weight_files
)),
file: Some(filename.clone()),
files_done: Some(downloaded_files),
files_total: Some((downloaded_files + total_weight_files as u32) as u32),
bytes_done: Some(total_bytes),
bytes_total: None,
status: None,
}));
match repo.get(filename) {
Ok(path) => {
tracing::debug!(file = %filename, path = %path.display(), "Downloaded weight file");
if let Ok(meta) = fs::metadata(&path) {
total_bytes += meta.len();
}
downloaded_files += 1;
downloaded_paths.push(path);
},
Err(e) => {
tracing::error!(file = %filename, error = %e, "Failed to download weight file");
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!(
"Failed to download weight file '{}': {}",
filename, e
)),
file: Some(filename.clone()),
files_done: Some(downloaded_files),
files_total: Some((downloaded_files + total_weight_files as u32) as u32),
bytes_done: Some(total_bytes),
bytes_total: None,
status: Some("failed".to_string()),
}));
return;
},
}
}
if downloaded_files == 0 {
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "error".to_string(),
operation: None,
percent: None,
message: Some(format!(
"No files downloaded for model '{}'. Check if the model exists on HuggingFace.",
model_id
)),
file: None,
files_done: Some(0),
files_total: None,
bytes_done: None,
bytes_total: None,
status: Some("failed".to_string()),
}));
return;
}
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "progress".to_string(),
operation: Some("finalizing".to_string()),
percent: Some(95.0),
message: Some(format!(
"Downloaded {} files ({}){}",
downloaded_files,
format_bytes(total_bytes),
if is_sharded {
format!(" - {} shards", total_weight_files)
} else {
String::new()
}
)),
file: None,
files_done: Some(downloaded_files),
files_total: Some(downloaded_files),
bytes_done: Some(total_bytes),
bytes_total: Some(total_bytes),
status: None,
}));
if convert_to_holo {
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "progress".to_string(),
operation: Some("converting".to_string()),
percent: Some(96.0),
message: Some("Starting HoloTensor conversion...".to_string()),
file: None,
files_done: Some(downloaded_files),
files_total: Some(downloaded_files),
bytes_done: Some(total_bytes),
bytes_total: Some(total_bytes),
status: Some("converting".to_string()),
}));
#[cfg(feature = "holotensor")]
{
if let Some(_model_path) = find_model_path(&cache_state, &model_id) {
let _request = ConvertModelRequest {
model: model_id.clone(),
target_format: "holotensor".to_string(),
quantization: None,
num_fragments: Some(64),
max_rank: None,
min_quality: None,
retention_ratio: Some(0.8),
verify: Some(false),
};
tracing::info!(model = %model_id, "HoloTensor conversion would happen here");
}
}
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "progress".to_string(),
operation: Some("converting".to_string()),
percent: Some(99.0),
message: Some("HoloTensor conversion complete".to_string()),
file: None,
files_done: Some(downloaded_files),
files_total: Some(downloaded_files),
bytes_done: Some(total_bytes),
bytes_total: Some(total_bytes),
status: None,
}));
}
let _ = rt.block_on(tx.send(DownloadProgress {
event_type: "complete".to_string(),
operation: None,
percent: Some(100.0),
message: Some(format!(
"Successfully downloaded {} ({} files, {})",
model_id,
downloaded_files,
format_bytes(total_bytes)
)),
file: None,
files_done: Some(downloaded_files),
files_total: Some(downloaded_files),
bytes_done: Some(total_bytes),
bytes_total: Some(total_bytes),
status: Some("complete".to_string()),
}));
});
let stream = async_stream::stream! {
let mut rx = rx;
while let Some(progress) = rx.recv().await {
let data = serde_json::to_string(&progress).unwrap_or_default();
yield Ok(Event::default().data(data));
if progress.event_type == "complete" || progress.event_type == "error" {
yield Ok(Event::default().data("[DONE]"));
break;
}
}
};
Sse::new(stream)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_format_bytes() {
assert_eq!(format_bytes(0), "0 B");
assert_eq!(format_bytes(512), "512 B");
assert_eq!(format_bytes(1024), "1.00 KB");
assert_eq!(format_bytes(1024 * 1024), "1.00 MB");
assert_eq!(format_bytes(1024 * 1024 * 1024), "1.00 GB");
assert_eq!(format_bytes(2 * 1024 * 1024 * 1024), "2.00 GB");
}
#[test]
fn test_scan_empty_cache() {
let temp = TempDir::new().unwrap();
let models = scan_hf_cache(temp.path());
assert!(models.is_empty());
}
#[test]
fn test_scan_hf_cache_structure() {
let temp = TempDir::new().unwrap();
let model_dir = temp.path().join("models--test--model");
let snapshot_dir = model_dir.join("snapshots").join("abc123");
fs::create_dir_all(&snapshot_dir).unwrap();
fs::write(snapshot_dir.join("model.bin"), "test data").unwrap();
let models = scan_hf_cache(temp.path());
assert_eq!(models.len(), 1);
assert_eq!(models[0].id, "test/model");
assert_eq!(models[0].name, "model");
assert_eq!(models[0].source, CacheSource::Huggingface);
}
#[test]
fn test_is_holotensor_model() {
let temp = TempDir::new().unwrap();
fs::write(temp.path().join("model.bin"), "data").unwrap();
assert!(!is_holotensor_model(temp.path()));
fs::write(temp.path().join("model.hct"), "data").unwrap();
assert!(is_holotensor_model(temp.path()));
}
#[test]
fn test_cache_source_serialization() {
assert_eq!(
serde_json::to_string(&CacheSource::Huggingface).unwrap(),
"\"huggingface\""
);
assert_eq!(
serde_json::to_string(&CacheSource::Infernum).unwrap(),
"\"infernum\""
);
assert_eq!(
serde_json::to_string(&CacheSource::Local).unwrap(),
"\"local\""
);
}
#[test]
fn test_cached_model_response_structure() {
let model = CachedModel {
id: "test/model".to_string(),
name: "model".to_string(),
source: CacheSource::Huggingface,
size_bytes: 1024 * 1024 * 100, size_str: "100.00 MB".to_string(),
downloaded_at: "2024-01-01T00:00:00Z".to_string(),
is_holotensor: false,
quantization: None,
architecture: Some("llama".to_string()),
context_length: Some(4096),
hidden_size: Some(4096),
num_layers: Some(32),
path: Some("/path/to/model".to_string()),
};
let json = serde_json::to_string(&model).unwrap();
assert!(json.contains("\"id\":\"test/model\""));
assert!(json.contains("\"source\":\"huggingface\""));
assert!(json.contains("\"is_holotensor\":false"));
}
#[test]
fn test_delete_request_deserialization() {
let json = r#"{"model": "test/model"}"#;
let req: DeleteCachedModelRequest = serde_json::from_str(json).unwrap();
assert_eq!(req.model, "test/model");
}
#[test]
fn test_convert_request_deserialization() {
let json = r#"{
"model": "test/model",
"target_format": "holotensor",
"min_quality": 0.95,
"verify": true
}"#;
let req: ConvertModelRequest = serde_json::from_str(json).unwrap();
assert_eq!(req.model, "test/model");
assert_eq!(req.target_format, "holotensor");
assert!((req.min_quality.unwrap() - 0.95).abs() < f32::EPSILON);
assert!(req.verify.unwrap());
}
#[test]
fn test_convert_request_defaults() {
let json = r#"{"model": "test/model"}"#;
let req: ConvertModelRequest = serde_json::from_str(json).unwrap();
assert_eq!(req.model, "test/model");
assert_eq!(req.target_format, "holotensor"); assert!(req.min_quality.is_none());
assert!(req.verify.is_none());
}
#[test]
fn test_download_request_deserialization() {
let json = r#"{
"model": "meta-llama/Llama-3.2-3B-Instruct",
"revision": "main",
"convert_to_holo": true
}"#;
let req: DownloadModelRequest = serde_json::from_str(json).unwrap();
assert_eq!(req.model, "meta-llama/Llama-3.2-3B-Instruct");
assert_eq!(req.revision.unwrap(), "main");
assert!(req.convert_to_holo);
}
#[test]
fn test_download_progress_serialization() {
let progress = DownloadProgress {
event_type: "progress".to_string(),
operation: Some("download".to_string()),
file: Some("model.safetensors".to_string()),
files_done: Some(2),
files_total: Some(5),
bytes_done: Some(1024 * 1024 * 50),
bytes_total: Some(1024 * 1024 * 500),
percent: Some(10.0),
message: Some("Downloading...".to_string()),
status: Some("downloading".to_string()),
};
let json = serde_json::to_string(&progress).unwrap();
assert!(json.contains("\"type\":\"progress\""));
assert!(json.contains("\"files_done\":2"));
assert!(json.contains("\"files_total\":5"));
}
#[test]
fn test_convert_metadata_serialization() {
let metadata = ConvertModelMetadata {
compression_ratio: Some(5.2),
quality_score: Some(0.98),
num_fragments: Some(100),
output_size: Some(200 * 1024 * 1024),
original_size: Some(1024 * 1024 * 1024),
hct_size: Some(200 * 1024 * 1024),
verified_quality: Some(0.97),
};
let json = serde_json::to_string(&metadata).unwrap();
assert!(json.contains("\"compression_ratio\":5.2"));
assert!(json.contains("\"quality_score\":0.98"));
}
}
#[cfg(test)]
mod integration_tests {
use super::*;
use axum::{
routing::{get, post},
Router,
};
use axum_test::TestServer;
use tempfile::TempDir;
fn create_test_app() -> Router {
Router::new()
.route("/models", get(list_cached_models))
.route("/models/delete", post(delete_cached_model))
.route("/models/convert", post(convert_model))
.with_state(ModelCacheState::new())
}
#[tokio::test]
async fn test_list_cached_models_endpoint() {
let app = create_test_app();
let server = TestServer::new(app).unwrap();
let response = server.get("/models").await;
response.assert_status_ok();
let body: CachedModelsResponse = response.json();
assert!(body.total_size_bytes >= 0);
assert!(!body.cache_dir.is_empty());
}
#[tokio::test]
#[ignore = "delete handler needs update to return 404 for nonexistent models"]
async fn test_delete_nonexistent_model() {
let app = create_test_app();
let server = TestServer::new(app).unwrap();
let response = server
.post("/models/delete")
.json(&DeleteCachedModelRequest {
model: "nonexistent/model".to_string(),
})
.await;
response.assert_status(StatusCode::NOT_FOUND);
}
#[tokio::test]
#[ignore = "SSE-based convert handler returns 200 with error event, not 404"]
async fn test_convert_nonexistent_model() {
let app = create_test_app();
let server = TestServer::new(app).unwrap();
let response = server
.post("/models/convert")
.json(&ConvertModelRequest {
model: "nonexistent/model".to_string(),
target_format: "holotensor".to_string(),
quantization: None,
num_fragments: None,
max_rank: None,
min_quality: None,
retention_ratio: None,
verify: None,
})
.await;
response.assert_status(StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn test_list_models_with_mock_cache() {
let temp = TempDir::new().unwrap();
let model_dir = temp.path().join("models--test--mock-model");
let snapshot_dir = model_dir.join("snapshots").join("abc123");
std::fs::create_dir_all(&snapshot_dir).unwrap();
std::fs::write(
snapshot_dir.join("config.json"),
r#"{"model_type": "llama"}"#,
)
.unwrap();
std::fs::write(snapshot_dir.join("model.safetensors"), "mock tensor data").unwrap();
let models = scan_hf_cache(temp.path());
assert_eq!(models.len(), 1);
assert_eq!(models[0].id, "test/mock-model");
}
#[tokio::test]
#[ignore = "SSE-based convert handler returns 200 with error event, not 4xx"]
async fn test_convert_request_validation() {
let app = create_test_app();
let server = TestServer::new(app).unwrap();
let response = server
.post("/models/convert")
.json(&serde_json::json!({
"model": "",
"target_format": "holotensor"
}))
.await;
assert!(response.status_code().is_client_error());
}
}