use anyhow::Result;
use futures_util::StreamExt;
use tokio::io::AsyncWriteExt;
pub const DOWNLOAD_STATE_PAUSED: u8 = 2;
pub const DOWNLOAD_STATE_CANCELLED: u8 = 3;
pub fn get_free_space_bytes(path: &std::path::Path) -> u64 {
let path_str = path.to_string_lossy();
let c_path = match std::ffi::CString::new(path_str.as_ref()) {
Ok(c) => c,
Err(_) => return 0,
};
let mut stat: libc::statvfs = unsafe { std::mem::zeroed() };
let result = unsafe { libc::statvfs(c_path.as_ptr(), &mut stat) };
if result != 0 {
return 0;
}
stat.f_bavail * stat.f_frsize
}
fn default_tag(repo: &str) -> String {
if repo.contains("lemonade") {
"b1273".to_string()
} else if repo.contains("cuda") {
"b9279".to_string()
} else {
"b4100".to_string()
}
}
pub async fn search_models(
query: &str,
limit: u32,
offset: u32,
) -> Result<(Vec<crate::models::SearchResult>, usize, Vec<String>)> {
let url = format!(
"https://huggingface.co/api/models?search={}&limit={}&offset={}&filter=gguf&expand=config&expand=gguf&expand=downloads&expand=likes&expand=tags&expand=pipeline_tag&expand=trendingScore&expand=createdAt",
urlencoding::encode(query),
limit,
offset
);
let resp = reqwest::get(&url).await?.error_for_status()?;
let models: Vec<serde_json::Value> = resp.json().await?;
let query_words: Vec<String> = query
.split_whitespace()
.map(|w| w.to_lowercase())
.collect();
let raw_ids: Vec<String> = models
.iter()
.filter_map(|m| m.get("id").and_then(|v| v.as_str()))
.map(|s| s.to_string())
.collect();
let results: Vec<crate::models::SearchResult> = models
.into_iter()
.filter_map(|m| {
let model_id = m.get("id")?.as_str()?.to_string();
let model_lower = model_id.to_lowercase();
if !query_words.is_empty() && !query_words.iter().all(|w| model_lower.contains(w)) {
return None;
}
let model_name = model_id.clone();
let tags: Vec<String> = m
.get("tags")
.and_then(|t| t.as_array())
.map(|t| {
t.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect()
})
.unwrap_or_default();
let downloads = m.get("downloads").and_then(|v| v.as_u64()).unwrap_or(0);
let likes = m.get("likes").and_then(|v| v.as_u64()).unwrap_or(0);
let pipeline_tag = m
.get("pipeline_tag")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let trending_score = m.get("trendingScore").and_then(|v| v.as_i64()).unwrap_or(0);
let created_at = m
.get("createdAt")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let quantization = tags
.iter()
.find(|t| t.starts_with("gguf:"))
.and_then(|t| t.strip_prefix("gguf:"))
.map(|s| s.to_string());
let license = tags
.iter()
.find(|t| t.starts_with("license:"))
.and_then(|t| t.strip_prefix("license:"))
.map(|s| s.to_string());
let gguf = m.get("gguf");
let parameters = gguf
.and_then(|g| g.get("architecture"))
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let capabilities: Vec<String> = gguf
.and_then(|g| g.get("architecture"))
.and_then(|v| v.as_str())
.map(|s| vec![s.to_string()])
.unwrap_or_default();
let size = gguf
.and_then(|g| g.get("total"))
.and_then(|v| v.as_u64())
.or_else(|| {
gguf.and_then(|g| g.get("totalFileSize"))
.and_then(|v| v.as_u64())
});
let context_length = gguf
.and_then(|g| g.get("context_length"))
.and_then(|v| v.as_u64())
.map(|v| v as u32);
Some(crate::models::SearchResult {
model_id: model_id.clone(),
model_name,
tags,
downloads,
likes,
pipeline_tag,
size,
parameters,
capabilities,
context_length,
readme: None,
quantization,
license,
trending_score,
created_at,
downloaded: false,
})
})
.collect();
Ok((results, 1, raw_ids))
}
pub async fn list_gguf_files(model_id: &str) -> Result<Vec<(String, u64, String)>> {
let url = format!("https://huggingface.co/api/models/{}/tree/main", model_id);
let resp = reqwest::get(&url).await?.error_for_status()?;
let files: Vec<serde_json::Value> = resp.json().await?;
let mut gguf_files = Vec::new();
for file in &files {
let path = file.get("path").and_then(|p| p.as_str()).unwrap_or("");
if path.ends_with(".gguf") {
let size = file
.get("lfs")
.and_then(|l| l.get("size"))
.and_then(|s| s.as_u64())
.unwrap_or(0);
let lfs_url = file
.get("lfs")
.and_then(|l| l.get("url"))
.and_then(|u| u.as_str())
.map(|s| s.to_string())
.unwrap_or_else(|| {
format!("https://huggingface.co/{model_id}/resolve/main/{path}")
});
gguf_files.push((path.to_string(), size, lfs_url));
}
}
if gguf_files.is_empty() {
anyhow::bail!("No .gguf files found in {}", model_id);
}
Ok(gguf_files)
}
pub async fn fetch_readme(model_id: &str) -> Result<String> {
let url = format!("https://huggingface.co/{}/raw/main/README.md", model_id);
let resp = reqwest::Client::new()
.get(&url)
.header("User-Agent", "llm-manager/1.1.0")
.send()
.await?
.error_for_status()?;
let text = resp.text().await?;
Ok(text)
}
pub async fn download_file(
_model_id: &str,
_filename: &str,
url: &str,
dest: &std::path::Path,
progress: &mut crate::models::DownloadState,
download_state: std::sync::Arc<std::sync::atomic::AtomicU8>,
tx: tokio::sync::broadcast::Sender<crate::models::DownloadState>,
) -> Result<()> {
let client = reqwest::Client::new();
let resp = client.get(url).send().await?.error_for_status()?;
if let Some(len) = resp.content_length() {
progress.total_bytes = len;
}
let mut stream = resp.bytes_stream();
let mut file = tokio::fs::File::create(dest).await?;
let mut last_update = std::time::Instant::now();
let mut last_bytes = 0u64;
while let Some(chunk) = stream.next().await {
let chunk = match chunk {
Ok(c) => c,
Err(e) => {
drop(file);
let _ = tokio::fs::remove_file(dest).await;
return Err(anyhow::anyhow!("Stream error: {}", e));
}
};
if let Err(e) = file.write_all(&chunk).await {
drop(file);
let _ = tokio::fs::remove_file(dest).await;
return Err(anyhow::anyhow!("Write error: {}", e));
}
progress.downloaded_bytes += chunk.len() as u64;
let elapsed = progress.start_time.elapsed().as_secs_f64();
if elapsed > 0.0 {
progress.bytes_per_second = progress.downloaded_bytes as f64 / elapsed;
}
let state = download_state.load(std::sync::atomic::Ordering::Relaxed);
if state == DOWNLOAD_STATE_CANCELLED {
drop(file);
let _ = tokio::fs::remove_file(dest).await;
return Err(anyhow::anyhow!("Download cancelled"));
}
if state == DOWNLOAD_STATE_PAUSED {
let should_pause = if let Some(arc) = &progress.download_state_arc {
arc.load(std::sync::atomic::Ordering::Relaxed) == DOWNLOAD_STATE_PAUSED
} else {
true
};
if should_pause {
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
continue;
}
}
if last_update.elapsed() >= std::time::Duration::from_millis(100)
&& progress.downloaded_bytes != last_bytes
{
let _ = tx.send(progress.clone());
last_update = std::time::Instant::now();
last_bytes = progress.downloaded_bytes;
}
}
progress.status = crate::models::DownloadStatus::Complete;
let _ = tx.send(progress.clone());
Ok(())
}
pub fn get_bin_base() -> std::path::PathBuf {
dirs::data_local_dir()
.unwrap_or_default()
.join("llm-manager")
.join("bin")
}
pub fn binary_name() -> &'static str {
match std::env::consts::OS {
"windows" => "llama-server.exe",
_ => "llama-server",
}
}
pub fn lib_sentinel_name() -> &'static str {
match std::env::consts::OS {
"windows" => "libllama.dll",
"macos" => "libllama.dylib",
_ => "libllama.so",
}
}
pub fn lib_extension() -> &'static str {
match std::env::consts::OS {
"windows" => ".dll",
"macos" => ".dylib",
_ => ".so",
}
}
pub fn get_backend_dir(backend: crate::models::Backend, tag: &str) -> std::path::PathBuf {
get_bin_base().join(format!("llama-server-{}-{}", backend.slug(), tag))
}
pub fn is_backend_any_version_installed(backend: crate::models::Backend) -> bool {
let bin_base = get_bin_base();
if !bin_base.exists() {
return false;
}
let prefix = format!("llama-server-{}-", backend.slug());
let bin_name = binary_name();
let lib_name = lib_sentinel_name();
if let Ok(entries) = std::fs::read_dir(bin_base) {
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if name_str.starts_with(&prefix) {
let bin_path = entry.path().join(bin_name);
let lib_sentinel = entry.path().join(lib_name);
if bin_path.exists() && lib_sentinel.exists() {
return true;
}
}
}
}
false
}
pub fn is_backend_version_installed(backend: crate::models::Backend, tag: Option<&str>) -> bool {
let tag = match tag {
Some(t) => t,
None => return false,
};
let bin_dir = get_backend_dir(backend, tag);
let bin_name = binary_name();
let lib_name = lib_sentinel_name();
let bin_path = bin_dir.join(bin_name);
let lib_sentinel = bin_dir.join(lib_name);
bin_path.exists() && lib_sentinel.exists()
}
pub fn list_installed_backends() -> Vec<(crate::models::Backend, String)> {
let bin_base = get_bin_base();
let mut installed = Vec::new();
if !bin_base.exists() {
return installed;
}
let bin_name = binary_name();
if let Ok(entries) = std::fs::read_dir(bin_base) {
for entry in entries.flatten() {
if !entry.path().is_dir() {
continue;
}
let name = entry.file_name();
let name_str = name.to_string_lossy();
if !name_str.starts_with("llama-server-") {
continue;
}
let suffix = name_str.strip_prefix("llama-server-").unwrap_or("");
let parts: Vec<&str> = suffix.split('-').collect();
if parts.len() < 2 {
continue;
}
let tag = parts[parts.len() - 1].to_string();
let backend = match (parts[0], parts.get(1).copied()) {
("rocm", Some("lemonade")) => crate::models::Backend::RocmLemonade,
("win", Some("cuda")) if parts.len() >= 4 && parts[2] == "12.4" => {
crate::models::Backend::CudaWindows12_4
}
("win", Some("cuda")) if parts.len() >= 4 && parts[2] == "13.1" => {
crate::models::Backend::CudaWindows13_1
}
("cpu", Some("arm64")) => crate::models::Backend::CpuArm64,
("macos", Some("arm64")) => crate::models::Backend::CpuMacosArm64,
("macos", Some("x64")) => crate::models::Backend::CpuMacosX64,
("cpu", _) => crate::models::Backend::Cpu,
("vulkan", _) => crate::models::Backend::Vulkan,
("rocm", _) => crate::models::Backend::Rocm,
("cuda", _) => crate::models::Backend::Cuda,
("win-cpu", _) => crate::models::Backend::CpuWindows,
("win-vulkan", _) => crate::models::Backend::VulkanWindows,
("win-hip", _) => crate::models::Backend::HipWindows,
_ => continue,
};
if entry.path().join(bin_name).exists() {
installed.push((backend, tag));
}
}
}
installed.sort_by(|a, b| {
let b_cmp = format!("{:?}", a.0).cmp(&format!("{:?}", b.0));
if b_cmp == std::cmp::Ordering::Equal {
b.1.cmp(&a.1) } else {
b_cmp
}
});
installed
}
pub async fn resolve_backend_binary(
backend: crate::models::Backend,
version: Option<&str>,
log_tx: Option<tokio::sync::mpsc::Sender<String>>,
progress_tx: Option<tokio::sync::broadcast::Sender<crate::models::DownloadState>>,
) -> Result<std::path::PathBuf> {
tracing::info!(
"resolve_backend_binary: backend={}, version={:?}",
backend,
version
);
let tag = match version {
Some(v) if !v.is_empty() => {
tracing::info!(" -> using explicit version: {}", v);
v.to_string()
}
_ => {
let installed = list_installed_backends();
let backend_versions: Vec<_> = installed
.iter()
.filter(|(b, _)| *b == backend)
.map(|(_, t)| t.clone())
.collect();
tracing::info!(
" -> no explicit version, found {} installed versions for backend: {:?}",
backend_versions.len(),
backend
);
for v in &backend_versions {
tracing::info!(" installed version: {}", v);
}
let latest_local = installed
.iter()
.filter(|(b, _)| *b == backend)
.map(|(_, t)| t.clone())
.next();
if let Some(t) = &latest_local {
tracing::info!(" -> using latest installed version: {}", t);
t.clone()
} else {
let repo = match backend {
crate::models::Backend::RocmLemonade => "lemonade-sdk/llamacpp-rocm",
crate::models::Backend::Cuda => "ai-dock/llama.cpp-cuda",
_ => "ggml-org/llama.cpp",
};
tracing::info!(
" -> no local version, fetching latest from GitHub repo: {}",
repo
);
fetch_latest_release_tag(repo, &default_tag(repo)).await
}
}
};
let bin_dir = get_backend_dir(backend, &tag);
let bin_name = binary_name();
let bin_path = bin_dir.join(bin_name);
tracing::info!(
" -> resolved tag={}, bin_dir={}, bin_path={}",
tag,
bin_dir.display(),
bin_path.display()
);
let lib_name = lib_sentinel_name();
let lib_sentinel = bin_dir.join(lib_name);
tracing::info!(
" -> checking binary existence: bin_path={} lib_sentinel={}",
bin_path.exists(),
lib_sentinel.exists()
);
if bin_path.exists() && lib_sentinel.exists() {
tracing::info!(" -> binary already exists, returning cached path");
return Ok(bin_path);
}
tracing::info!(" -> binary not found, will download");
std::fs::create_dir_all(&bin_dir)?;
let client = reqwest::Client::new();
let (download_url, is_zip) = match backend {
crate::models::Backend::Cpu => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-ubuntu-x64.tar.gz"
),
false,
),
crate::models::Backend::Vulkan => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-ubuntu-vulkan-x64.tar.gz"
),
false,
),
crate::models::Backend::Rocm => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-ubuntu-rocm-7.2-x64.tar.gz"
),
false,
),
crate::models::Backend::RocmLemonade => {
use crate::backend::hardware::{detect_amd_gfx_target, get_lemonade_gfx_suffix};
let gfx = detect_amd_gfx_target().unwrap_or_else(|| "gfx1100".to_string());
let suffix = get_lemonade_gfx_suffix(&gfx);
(
format!(
"https://github.com/lemonade-sdk/llamacpp-rocm/releases/download/{tag}/llama-{tag}-ubuntu-rocm-{suffix}-x64.zip"
),
true,
)
}
crate::models::Backend::Cuda => (
format!(
"https://github.com/ai-dock/llama.cpp-cuda/releases/download/{tag}/llama.cpp-{tag}-cuda-12.8-amd64.tar.gz"
),
false,
),
crate::models::Backend::CpuArm64 => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-ubuntu-arm64.tar.gz"
),
false,
),
crate::models::Backend::CpuWindows => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-win-cpu-x64.zip"
),
true,
),
crate::models::Backend::VulkanWindows => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-win-vulkan-x64.zip"
),
true,
),
crate::models::Backend::CudaWindows12_4 => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-win-cuda-12.4-x64.zip"
),
true,
),
crate::models::Backend::CudaWindows13_1 => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-win-cuda-13.1-x64.zip"
),
true,
),
crate::models::Backend::HipWindows => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-win-hip-radeon-x64.zip"
),
true,
),
crate::models::Backend::CpuMacosArm64 => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-macos-arm64.tar.gz"
),
false,
),
crate::models::Backend::CpuMacosX64 => (
format!(
"https://github.com/ggml-org/llama.cpp/releases/download/{tag}/llama-{tag}-bin-macos-x64.tar.gz"
),
false,
),
};
if let Some(tx) = &log_tx {
let _ = tx.send(format!("Download URL: {}", download_url)).await;
let _ = tx
.send(format!("Install path: {}", bin_dir.display()))
.await;
}
let tmp_ext = if is_zip { "zip" } else { "tar.gz" };
let tmp_filename = format!("llama-server-{}-{}.tmp.{}", backend.slug(), tag, tmp_ext);
let tmp_path = bin_dir.join(&tmp_filename);
tracing::info!(" -> downloading to: {}", tmp_path.display());
if let Some(ref tx) = progress_tx {
let mut progress =
crate::models::DownloadState::new("llama-server".to_string(), tmp_filename.clone(), 0);
let download_state = std::sync::Arc::new(std::sync::atomic::AtomicU8::new(1));
download_file(
"llama-server",
&tmp_filename,
&download_url,
&tmp_path,
&mut progress,
download_state,
tx.clone(),
)
.await?;
} else {
let resp = client
.get(&download_url)
.header("User-Agent", "llm-manager/0.9.9")
.send()
.await?
.error_for_status()?;
let mut stream = resp.bytes_stream();
let mut file = tokio::fs::File::create(&tmp_path).await?;
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
file.write_all(&chunk).await?;
}
}
tracing::info!(" -> download complete, extracting...");
let extract_dir = bin_dir.join(format!("llama-server-{}-{}.extract", backend.slug(), tag));
if let Some(tx) = &log_tx {
let _ = tx.send("Extracting backend...".to_string()).await;
}
extract_archive(&tmp_path, &extract_dir)?;
if let Some(tx) = &log_tx {
let _ = tx.send("Finalizing installation...".to_string()).await;
}
let extracted_bin = extract_dir.join(bin_name);
tracing::info!(
" -> looking for binary in extracted archive at: {}",
extracted_bin.display()
);
if extracted_bin.exists() {
tracing::info!(
" -> found binary at expected location, moving to {}",
bin_path.display()
);
std::fs::rename(&extracted_bin, &bin_path)?;
} else {
tracing::info!(" -> binary not at expected location, searching recursively...");
let mut found = None;
walk_dir_recursive(&extract_dir, 0, 10, &mut |entry| {
if entry.file_name().to_str() == Some(bin_name) {
tracing::info!(" -> found binary at: {}", entry.path().display());
found = Some(entry.path().to_path_buf());
}
});
if let Some(path) = found {
std::fs::rename(path, &bin_path)?;
} else {
anyhow::bail!(
"Could not find {} binary in archive at {}",
bin_name,
extract_dir.display()
);
}
}
let bench_bin_path = bin_dir.join("llama-bench");
let mut bench_found = None;
walk_dir_recursive(&extract_dir, 0, 10, &mut |entry| {
if entry
.file_name()
.to_str()
.map(|n| n == "llama-bench")
.unwrap_or(false)
{
bench_found = Some(entry.path().to_path_buf());
}
});
if let Some(path) = bench_found {
let _ = std::fs::rename(path, &bench_bin_path);
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let _ =
std::fs::set_permissions(&bench_bin_path, std::fs::Permissions::from_mode(0o755));
}
}
let lib_ext = lib_extension();
walk_dir_recursive(&extract_dir, 0, 10, &mut |entry| {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if name_str.ends_with(lib_ext)
|| name_str.contains(&format!(".{}", lib_ext.trim_start_matches('.')))
{
let dest = bin_dir.join(name);
let _ = std::fs::copy(entry.path(), dest);
}
});
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&bin_path, std::fs::Permissions::from_mode(0o755))?;
}
let _ = tokio::fs::remove_file(&tmp_path).await;
let _ = tokio::fs::remove_dir_all(&extract_dir).await;
Ok(bin_path)
}
pub fn extract_archive(archive_path: &std::path::Path, dest_dir: &std::path::Path) -> Result<()> {
let filename = archive_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
if filename.ends_with(".zip") {
let file = std::fs::File::open(archive_path)?;
let mut archive = zip::ZipArchive::new(file)?;
archive.extract(dest_dir)?;
} else if filename.ends_with(".tar.gz") || filename.contains(".tar.gz") {
use flate2::read::GzDecoder;
use tar::Archive;
let file = std::fs::File::open(archive_path)?;
let decoder = GzDecoder::new(file);
let mut archive = Archive::new(decoder);
archive.unpack(dest_dir)?;
} else {
anyhow::bail!("Unsupported archive format: {}", filename);
}
Ok(())
}
pub fn walk_dir_recursive<F>(dir: &std::path::Path, depth: usize, max_depth: usize, f: &mut F)
where
F: FnMut(&std::fs::DirEntry),
{
if depth >= max_depth {
return;
}
if let Ok(read) = std::fs::read_dir(dir) {
for entry in read.flatten() {
let path = entry.path();
f(&entry);
if path.is_dir() {
walk_dir_recursive(&path, depth + 1, max_depth, f);
}
}
}
}
async fn fetch_latest_release_tag(repo: &str, fallback: &str) -> String {
let client = reqwest::Client::new();
let url = format!("https://api.github.com/repos/{}/releases/latest", repo);
match client
.get(&url)
.header("Accept", "application/vnd.github.v3+json")
.header("User-Agent", "llm-manager/1.1.0")
.send()
.await
{
Ok(resp) => match resp.error_for_status() {
Ok(resp) => match resp.json::<serde_json::Value>().await {
Ok(json) => json
.get("tag_name")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.unwrap_or_else(|| fallback.to_string()),
Err(_) => fallback.to_string(),
},
Err(_) => fallback.to_string(),
},
Err(_) => fallback.to_string(),
}
}