use anyhow::{Result, anyhow, Context};
use std::path::{Path, PathBuf};
use std::fs;
use std::process::Command;
const REQUIRED_FILES: &[&str] = &[
"config.json",
"tokenizer.json",
"tokenizer_config.json",
];
const WEIGHT_FILES: &[&str] = &[
"pytorch_model.bin",
"model.safetensors",
];
pub struct ModelSetup {
models_path: PathBuf,
hf_cache_path: PathBuf,
model_name: String,
}
#[derive(Debug, Clone)]
struct FileInfo {
name: String,
size: u64,
}
#[derive(Debug, Clone)]
struct ValidationReport {
all_files_present: bool,
missing_files: Vec<String>,
present_files: Vec<FileInfo>,
}
impl ModelSetup {
pub fn new(models_path: PathBuf) -> Self {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
let hf_cache_path = PathBuf::from(home)
.join(".cache")
.join("huggingface");
Self {
models_path,
hf_cache_path,
model_name: "embaas/sentence-transformers-e5-large-v2".to_string(),
}
}
pub fn check_model_exists(&self) -> Result<bool> {
let model_dir = self.get_model_directory();
if !model_dir.exists() {
return Ok(false);
}
let mut all_present = true;
let mut present_files = Vec::new();
let mut missing_files = Vec::new();
for file in REQUIRED_FILES {
let file_path = model_dir.join(file);
if file_path.exists() {
if let Ok(metadata) = fs::metadata(&file_path) {
present_files.push((file.to_string(), metadata.len()));
}
} else {
missing_files.push(file.to_string());
all_present = false;
}
}
let mut has_weights = false;
for file in WEIGHT_FILES {
let file_path = model_dir.join(file);
if file_path.exists() {
if let Ok(metadata) = fs::metadata(&file_path) {
present_files.push((file.to_string(), metadata.len()));
has_weights = true;
break;
}
}
}
if !has_weights {
missing_files.extend(WEIGHT_FILES.iter().map(|s| s.to_string()));
all_present = false;
}
if !all_present {
println!("📋 Model status in {:?}:", model_dir);
if !present_files.is_empty() {
println!("\n Files present:");
for (name, size) in &present_files {
let size_mb = *size as f64 / 1_048_576.0;
println!(" ✓ {} ({:.2} MB)", name, size_mb);
}
}
if !missing_files.is_empty() {
println!("\n Missing files:");
for name in &missing_files {
println!(" ✗ {}", name);
}
}
return Ok(false);
}
Ok(true)
}
fn get_model_directory(&self) -> PathBuf {
self.models_path.join("models--embaas--sentence-transformers-e5-large-v2")
}
fn find_hf_snapshot_dir(&self) -> Result<Option<PathBuf>> {
let model_cache = self.hf_cache_path
.join("models--embaas--sentence-transformers-e5-large-v2")
.join("snapshots");
if !model_cache.exists() {
return Ok(None);
}
let entries = fs::read_dir(&model_cache)
.context("Failed to read HuggingFace cache directory")?;
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
return Ok(Some(path));
}
}
Ok(None)
}
pub fn print_download_instructions(&self) {
println!("\n📦 BGE-M3 Model Setup Required\n");
println!("To use the RAG system, you need to download the BGE-M3 embedding model.");
println!("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("\nStep 1: Install HuggingFace CLI");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("\nRun these commands in your terminal:\n");
println!(" brew install pipx");
println!(" pipx install 'huggingface_hub[cli]'");
println!(" pipx ensurepath");
println!("\n # Restart your terminal or run:");
println!(" source ~/.zshrc # or source ~/.bashrc");
println!("\n # Verify installation (pipx installs it as 'hf'):");
println!(" hf --version");
println!("\n # Optional: Create symlink for compatibility:");
println!(" ln -s ~/.local/bin/hf ~/.local/bin/huggingface-cli");
println!("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("\nStep 2: Download the model");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("\nRun this command (use 'hf' or 'huggingface-cli'):\n");
println!(" hf download embaas/sentence-transformers-e5-large-v2 \\");
println!(" config.json \\");
println!(" tokenizer.json \\");
println!(" tokenizer_config.json \\");
println!(" pytorch_model.bin \\");
println!(" --cache-dir ~/.cache/huggingface");
println!("\n Note: This will download ~1.2GB. It may take a few minutes.");
println!("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("\nStep 3: Automatic copy");
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("\nAfter download completes, restart your application.");
println!("The system will automatically detect and copy the files to the local directory.");
println!("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
}
fn check_pipx_installed(&self) -> bool {
Command::new("pipx")
.arg("--version")
.output()
.is_ok()
}
fn install_pipx(&self) -> Result<bool> {
println!("📦 Installing pipx...");
if self.check_pipx_installed() {
println!("✅ pipx is already installed");
return Ok(true);
}
let output = Command::new("brew")
.arg("install")
.arg("pipx")
.output();
match output {
Ok(out) if out.status.success() => {
println!("✅ pipx installed successfully via Homebrew");
return Ok(true);
}
Ok(out) => {
let stderr = String::from_utf8_lossy(&out.stderr);
let stdout = String::from_utf8_lossy(&out.stdout);
if stderr.contains("already installed") || stdout.contains("already installed") {
println!("✅ pipx is already installed");
return Ok(true);
}
if stderr.contains("already locked") || stderr.contains("already running") {
println!("⚠️ Another brew process is running");
println!("💡 Skipping brew, trying pip3 instead...");
return self.install_pipx_via_pip();
}
if stderr.contains("Rosetta 2") || stderr.contains("ARM default prefix") {
println!("⚠️ Homebrew ARM/Rosetta issue detected");
println!("💡 Skipping ARM brew method, trying pip3 instead...");
return self.install_pipx_via_pip();
}
println!("⚠️ Homebrew installation failed, trying pip3...");
return self.install_pipx_via_pip();
}
Err(_) => {
println!("⚠️ Homebrew not available, trying pip3...");
return self.install_pipx_via_pip();
}
}
}
fn install_pipx_via_pip(&self) -> Result<bool> {
println!("📦 Installing pipx via pip3...");
let output = Command::new("pip3")
.arg("install")
.arg("--user")
.arg("pipx")
.output();
match output {
Ok(out) if out.status.success() => {
println!("✅ pipx installed successfully via pip3");
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
println!("💡 You may need to add to PATH:");
println!(" export PATH=\"$HOME/.local/bin:$PATH\"");
Ok(true)
}
Ok(out) => {
let stderr = String::from_utf8_lossy(&out.stderr);
let stdout = String::from_utf8_lossy(&out.stdout);
if stdout.contains("already satisfied") || stderr.contains("already satisfied") {
println!("✅ pipx is already installed");
return Ok(true);
}
if stderr.contains("externally-managed-environment") || stderr.contains("PEP 668") {
println!("⚠️ Python environment is externally managed (PEP 668)");
println!("💡 Trying with --break-system-packages flag...");
let output2 = Command::new("pip3")
.arg("install")
.arg("--user")
.arg("--break-system-packages")
.arg("pipx")
.output();
match output2 {
Ok(out2) if out2.status.success() => {
println!("✅ pipx installed successfully via pip3 (with --break-system-packages)");
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
println!("💡 You may need to add to PATH:");
println!(" export PATH=\"$HOME/.local/bin:$PATH\"");
return Ok(true);
}
Ok(out2) => {
let stderr2 = String::from_utf8_lossy(&out2.stderr);
if stderr2.contains("already satisfied") {
println!("✅ pipx is already installed");
return Ok(true);
}
}
_ => {}
}
}
Err(anyhow!("Failed to install pipx via pip3: {}", stderr))
}
Err(e) => {
Err(anyhow!("Failed to run pip3: {}. Please install Python3 first.", e))
}
}
}
fn install_hf_cli_direct(&self) -> Result<bool> {
println!("📥 Installing HuggingFace CLI directly via pip3...");
let output = Command::new("pip3")
.arg("install")
.arg("--user")
.arg("huggingface_hub[cli]")
.output();
match output {
Ok(out) if out.status.success() => {
println!("✅ HuggingFace CLI installed successfully via pip3");
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
let hf_path = format!("{}/.local/bin/hf", home);
std::thread::sleep(std::time::Duration::from_millis(500));
if std::path::Path::new(&hf_path).exists() {
println!("✅ CLI verified at: {}", hf_path);
} else {
println!("⚠️ CLI installed but not found at expected location");
println!("💡 Make sure ~/.local/bin is in your PATH");
}
Ok(true)
}
Ok(out) => {
let stderr = String::from_utf8_lossy(&out.stderr);
let stdout = String::from_utf8_lossy(&out.stdout);
if stdout.contains("already satisfied") || stderr.contains("already satisfied") {
println!("✅ HuggingFace CLI is already installed");
return Ok(true);
}
if stderr.contains("externally-managed-environment") || stderr.contains("PEP 668") {
println!("⚠️ Python environment is externally managed (PEP 668)");
println!("💡 Trying with --break-system-packages flag...");
let output2 = Command::new("pip3")
.arg("install")
.arg("--user")
.arg("--break-system-packages")
.arg("huggingface_hub[cli]")
.output();
match output2 {
Ok(out2) if out2.status.success() => {
println!("✅ HuggingFace CLI installed successfully via pip3 (with --break-system-packages)");
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
let hf_path = format!("{}/.local/bin/hf", home);
std::thread::sleep(std::time::Duration::from_millis(500));
if std::path::Path::new(&hf_path).exists() {
println!("✅ CLI verified at: {}", hf_path);
} else {
println!("⚠️ CLI installed but not found at expected location");
println!("💡 Make sure ~/.local/bin is in your PATH");
}
return Ok(true);
}
Ok(out2) => {
let stderr2 = String::from_utf8_lossy(&out2.stderr);
if stderr2.contains("already satisfied") {
println!("✅ HuggingFace CLI is already installed");
return Ok(true);
}
}
_ => {}
}
}
Err(anyhow!("Failed to install HuggingFace CLI via pip3: {}", stderr))
}
Err(e) => {
Err(anyhow!("Failed to run pip3: {}", e))
}
}
}
fn install_hf_cli(&self) -> Result<bool> {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
let hf_path = format!("{}/.local/bin/hf", home);
let symlink_path = format!("{}/.local/bin/huggingface-cli", home);
println!("📥 Installing HuggingFace CLI...");
println!(" Expected installation path: {}", hf_path);
let output = Command::new("pipx")
.arg("install")
.arg("huggingface_hub[cli]")
.output();
let result = match output {
Ok(out) => out,
Err(e) => {
println!("⚠️ pipx command failed: {}", e);
println!("💡 Trying direct pip3 installation...");
return self.install_hf_cli_direct();
}
};
let output = result;
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
if output.status.success() || stderr.contains("already seems to be installed") || stderr.contains("already installed") {
println!("\n📋 Installation output:");
for line in stdout.lines() {
println!(" {}", line);
}
if !stderr.is_empty() {
println!("\n stderr:");
for line in stderr.lines() {
println!(" {}", line);
}
}
if stderr.contains("already seems to be installed") || stderr.contains("already installed") {
println!("\n✅ HuggingFace CLI was already installed");
} else {
println!("\n✅ HuggingFace CLI installed successfully");
}
println!("\n🔍 Checking pipx installation list...");
if let Ok(list_output) = Command::new("pipx").arg("list").output() {
let list_stdout = String::from_utf8_lossy(&list_output.stdout);
for line in list_stdout.lines() {
if line.contains("huggingface") {
println!(" {}", line);
}
}
}
println!("\n🔍 Verifying installation...");
let hf_exists = std::path::Path::new(&hf_path).exists();
if hf_exists {
println!("✅ CLI found at: {}", hf_path);
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Ok(metadata) = std::fs::metadata(&hf_path) {
let permissions = metadata.permissions();
let mode = permissions.mode();
println!(" File permissions: {:o}", mode);
println!(" Executable: {}", mode & 0o111 != 0);
}
}
if !std::path::Path::new(&symlink_path).exists() {
println!("\n🔗 Creating compatibility symlink...");
#[cfg(unix)]
{
use std::os::unix::fs::symlink;
match symlink(&hf_path, &symlink_path) {
Ok(_) => {
println!("✅ Created symlink: {} -> {}", symlink_path, hf_path);
}
Err(e) => {
println!("⚠️ Failed to create symlink: {}", e);
println!(" You can create it manually with:");
println!(" ln -s {} {}", hf_path, symlink_path);
}
}
}
} else {
println!("✅ Symlink already exists at: {}", symlink_path);
}
} else {
println!("⚠️ File NOT found at expected path: {}", hf_path);
println!("\n🔍 Searching all possible locations:");
let search_paths = vec![
format!("{}/.local/bin", home),
format!("{}/.local/pipx/venvs", home),
"/usr/local/bin".to_string(),
"/opt/homebrew/bin".to_string(),
];
for search_path in search_paths {
println!("\n Searching in: {}", search_path);
if let Ok(entries) = std::fs::read_dir(&search_path) {
for entry in entries.flatten() {
let file_name = entry.file_name();
if let Some(name) = file_name.to_str() {
if name.contains("huggingface") || name.contains("hf") {
let full_path = entry.path();
println!(" Found: {}", full_path.display());
}
}
}
} else {
println!(" Directory doesn't exist or can't be read");
}
}
}
Ok(true)
} else {
println!("\n⚠️ pipx installation failed");
println!("💡 Trying direct pip3 installation as fallback...");
self.install_hf_cli_direct()
}
}
pub fn auto_install_cli(&self) -> Result<bool> {
println!("🔧 Setting up HuggingFace CLI...\n");
if self.check_hf_cli_installed() {
println!("✅ HuggingFace CLI is already installed");
return Ok(true);
}
if !self.check_pipx_installed() {
println!("📦 pipx not found, installing...");
self.install_pipx()?;
} else {
println!("✅ pipx is installed");
}
self.install_hf_cli()?;
println!("✅ HuggingFace CLI installation complete!\n");
Ok(true)
}
pub fn check_hf_cli_installed(&self) -> bool {
if let Ok(output) = Command::new("huggingface-cli")
.arg("--help")
.output() {
if output.status.success() {
return true;
}
}
if let Ok(output) = Command::new("hf")
.arg("--help")
.output() {
if output.status.success() {
return true;
}
}
let common_paths = vec![
"~/.local/bin/huggingface-cli", "~/.local/bin/hf", "/usr/local/bin/huggingface-cli",
"/usr/local/bin/hf",
"/opt/homebrew/bin/huggingface-cli",
"/opt/homebrew/bin/hf",
];
for path in common_paths {
let expanded_path = if path.starts_with("~/") {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
path.replace("~", &home)
} else {
path.to_string()
};
if let Ok(output) = Command::new(&expanded_path)
.arg("--help")
.output() {
if output.status.success() {
return true;
}
}
}
false
}
fn get_hf_cli_path(&self) -> Option<String> {
println!("🔍 Searching for HuggingFace CLI...");
if cfg!(target_os = "macos") {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
let library_python = format!("{}/Library/Python", home);
if let Ok(entries) = std::fs::read_dir(&library_python) {
println!(" Checking macOS Python user directories...");
for entry in entries.flatten() {
if let Some(dir_name) = entry.file_name().to_str() {
if dir_name.starts_with("3.") {
let bin_dir = format!("{}/{}/bin", library_python, dir_name);
for cli_name in &["hf", "huggingface-cli"] {
let cli_path = format!("{}/{}", bin_dir, cli_name);
println!(" Checking: {}", cli_path);
if std::path::Path::new(&cli_path).exists() {
println!(" ✓ File exists");
if let Ok(output) = Command::new(&cli_path).arg("--help").output() {
if output.status.success() {
println!("✅ Found working HuggingFace CLI at: {}", cli_path);
return Some(cli_path);
}
}
}
}
}
}
}
}
}
let common_paths = vec![
"~/.local/bin/huggingface-cli", "~/.local/bin/hf", "/usr/local/bin/huggingface-cli",
"/usr/local/bin/hf",
"/opt/homebrew/bin/huggingface-cli",
"/opt/homebrew/bin/hf",
];
for path in common_paths {
let expanded_path = if path.starts_with("~/") {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
path.replace("~", &home)
} else {
path.to_string()
};
println!(" Checking: {}", expanded_path);
if std::path::Path::new(&expanded_path).exists() {
println!(" ✓ File exists");
match Command::new(&expanded_path)
.arg("--help")
.output() {
Ok(output) if output.status.success() => {
println!("✅ Found working HuggingFace CLI at: {}", expanded_path);
return Some(expanded_path);
}
Ok(output) => {
println!(" ✗ File exists but command failed with exit code: {:?}", output.status.code());
}
Err(e) => {
println!(" ✗ File exists but error executing: {}", e);
}
}
} else {
println!(" ✗ File not found");
}
}
println!(" Checking: huggingface-cli (in PATH)");
if let Ok(output) = Command::new("huggingface-cli")
.arg("--help")
.output() {
if output.status.success() {
println!("✅ Found HuggingFace CLI in PATH");
return Some("huggingface-cli".to_string());
}
}
println!(" Checking: hf (in PATH)");
if let Ok(output) = Command::new("hf")
.arg("--help")
.output() {
if output.status.success() {
println!("✅ Found HuggingFace CLI (hf) in PATH");
return Some("hf".to_string());
}
}
println!(" Checking: python3 -m huggingface_hub.commands.huggingface_cli");
if let Ok(output) = Command::new("python3")
.arg("-m")
.arg("huggingface_hub.commands.huggingface_cli")
.arg("--help")
.output() {
if output.status.success() {
println!("✅ Found HuggingFace CLI via Python module");
return Some("python3-module".to_string()); }
}
println!("❌ Could not find huggingface-cli or hf in any location");
None
}
pub fn download_model(&self) -> Result<bool> {
println!("📥 Downloading BGE-M3 model files using HuggingFace CLI...");
println!(" This may take several minutes depending on your internet connection.\n");
let arch = std::env::consts::ARCH;
println!("🔍 Detected architecture: {}", arch);
let cli_path = if cfg!(target_os = "macos") && arch == "aarch64" {
println!("📱 Running on Apple Silicon (ARM64) - checking for native Python...");
self.get_arm64_compatible_cli().or_else(|| self.get_hf_cli_path())
} else {
self.get_hf_cli_path()
};
let cli_path = match cli_path {
Some(path) => path,
None => {
println!("⏳ CLI not found immediately, waiting 1 second...");
std::thread::sleep(std::time::Duration::from_secs(1));
self.get_hf_cli_path()
.ok_or_else(|| anyhow!("HuggingFace CLI not found. Please install it first."))?
}
};
println!("Using HuggingFace CLI: {}", cli_path);
if cfg!(target_os = "macos") && arch == "aarch64" {
println!("🔧 Checking for incompatible hf-xet package on ARM64...");
let python_to_check = if cli_path.starts_with("python3:") {
cli_path.strip_prefix("python3:").unwrap().to_string()
} else {
self.detect_hf_cli_python(&cli_path).unwrap_or_else(|| "python3".to_string())
};
if let Ok(check_output) = Command::new(&python_to_check)
.arg("-c")
.arg("import hf_xet; print('installed')")
.output() {
if check_output.status.success() {
println!("⚠️ Found incompatible hf-xet package, removing it...");
if let Ok(uninstall_output) = Command::new(&python_to_check)
.arg("-m")
.arg("pip")
.arg("uninstall")
.arg("hf-xet")
.arg("huggingface_hub")
.arg("-y")
.output() {
if uninstall_output.status.success() {
println!("✅ Successfully removed incompatible packages");
} else {
let err = String::from_utf8_lossy(&uninstall_output.stderr);
println!("⚠️ Cleanup warning: {}", err);
}
}
} else {
println!("✅ No incompatible hf-xet package found");
}
}
}
let mut cmd = if cli_path == "python3-module" {
let mut c = Command::new("python3");
c.arg("-m")
.arg("huggingface_hub.commands.huggingface_cli");
c
} else if cli_path.starts_with("python3:") {
let python_path = cli_path.strip_prefix("python3:").unwrap();
let possible_cli_paths = vec![
std::path::Path::new(python_path).parent().unwrap().join("huggingface-cli"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.14/bin/huggingface-cli"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.13/bin/huggingface-cli"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.12/bin/huggingface-cli"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.11/bin/huggingface-cli"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join(".local/bin/huggingface-cli"),
std::path::Path::new(python_path).parent().unwrap().join("hf"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.14/bin/hf"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.13/bin/hf"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.12/bin/hf"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join("Library/Python/3.11/bin/hf"),
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_default()).join(".local/bin/hf"),
];
let mut found_cli = None;
for cli_script in &possible_cli_paths {
if cli_script.exists() {
println!("✅ Found CLI script at: {}", cli_script.display());
found_cli = Some(cli_script.clone());
break;
}
}
if let Some(cli_script) = found_cli {
Command::new(cli_script)
} else {
println!("⚠️ No CLI script found, trying module invocation...");
let mut c = Command::new(python_path);
c.arg("-m")
.arg("huggingface_hub.cli"); c
}
} else {
Command::new(&cli_path)
};
if cfg!(target_os = "macos") && arch == "aarch64" {
println!("⚙️ Setting HF_HUB_ENABLE_HF_TRANSFER=0 to avoid ARM64/x86_64 conflicts");
cmd.env("HF_HUB_ENABLE_HF_TRANSFER", "0");
}
cmd.arg("download")
.arg("embaas/sentence-transformers-e5-large-v2");
for file in REQUIRED_FILES {
cmd.arg(*file);
}
cmd.arg("pytorch_model.bin");
cmd.arg("--cache-dir")
.arg(self.hf_cache_path.to_str().unwrap());
println!("\nRunning command:");
if cli_path == "python3-module" {
println!(" HF_HUB_ENABLE_HF_TRANSFER=0 python3 -m huggingface_hub.commands.huggingface_cli download embaas/sentence-transformers-e5-large-v2 \\");
} else if cli_path.starts_with("python3:") {
println!(" HF_HUB_ENABLE_HF_TRANSFER=0 [huggingface-cli] download embaas/sentence-transformers-e5-large-v2 \\");
} else {
println!(" HF_HUB_ENABLE_HF_TRANSFER=0 {} download embaas/sentence-transformers-e5-large-v2 \\", cli_path);
}
for file in REQUIRED_FILES {
println!(" {} \\", file);
}
println!(" pytorch_model.bin \\");
println!(" --cache-dir {}\n", self.hf_cache_path.display());
println!("⏳ Downloading... (this will take a few minutes for ~1.2GB)\n");
let output = cmd.output()
.context("Failed to execute huggingface-cli")?;
if output.status.success() {
println!("\n✅ Download completed successfully!");
if !output.stdout.is_empty() {
let stdout = String::from_utf8_lossy(&output.stdout);
println!("\nDownload output:");
println!("{}", stdout);
}
Ok(true)
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
println!("\n❌ Download failed!");
println!("Error: {}", stderr);
if stderr.contains("incompatible architecture") || stderr.contains("arm64") || stderr.contains("x86_64") {
println!("\n⚠️ Architecture mismatch detected!");
println!("🔧 Attempting automatic fix: Removing incompatible hf-xet package...");
let python_to_use = if cli_path.starts_with("python3:") {
cli_path.strip_prefix("python3:").unwrap().to_string()
} else {
self.detect_hf_cli_python(&cli_path).unwrap_or_else(|| "python3".to_string())
};
println!(" Using Python: {}", python_to_use);
let uninstall_result = Command::new(&python_to_use)
.arg("-m")
.arg("pip")
.arg("uninstall")
.arg("hf-xet")
.arg("huggingface_hub")
.arg("-y")
.output();
if let Ok(uninstall_output) = uninstall_result {
if uninstall_output.status.success() {
println!("✅ Removed incompatible packages from x86_64 Python");
println!("💡 Install ARM64 Python for better compatibility:");
println!(" brew install python@3.11");
println!(" /opt/homebrew/bin/python3.11 -m pip install --user 'huggingface_hub[cli]'");
println!("🔄 Then retry initialization");
return Err(anyhow!("Architecture mismatch fixed. Please retry after installing ARM64 Python."));
}
}
println!("\n💡 Manual fix:");
println!(" Step 1: Clean up incompatible packages:");
println!(" python3 -m pip uninstall hf_xet huggingface_hub -y");
println!("\n Step 2: Install ARM64 Python:");
println!(" brew install python@3.11");
println!("\n Step 3: Install huggingface_hub on ARM64 Python:");
println!(" /opt/homebrew/bin/python3.11 -m pip install --user 'huggingface_hub[cli]'");
println!("\n Step 4: Retry initialization");
}
Err(anyhow!("HuggingFace CLI download failed: {}", stderr))
}
}
fn get_arm64_compatible_cli(&self) -> Option<String> {
println!("🔍 Looking for ARM64-compatible Python installation...");
let homebrew_pythons = vec![
"/opt/homebrew/bin/python3.11",
"/opt/homebrew/bin/python3.12",
"/opt/homebrew/bin/python3.10",
"/opt/homebrew/bin/python3",
];
for python_path in &homebrew_pythons {
if std::path::Path::new(python_path).exists() {
if let Ok(output) = Command::new(python_path)
.arg("-c")
.arg("import huggingface_hub; print('ok')")
.output() {
if output.status.success() {
println!("✅ Found ARM64 Python with huggingface_hub at: {}", python_path);
return Some(format!("python3:{}", python_path));
} else {
println!("⚠️ Found ARM64 Python at {} but huggingface_hub not installed", python_path);
println!("🔧 Attempting to auto-install huggingface_hub...");
let install_strategies = vec![
vec!["--user", "huggingface_hub[cli]"],
vec!["--break-system-packages", "huggingface_hub[cli]"],
];
for strategy in install_strategies {
if let Ok(install_output) = Command::new(python_path)
.arg("-m")
.arg("pip")
.arg("install")
.arg("--quiet")
.args(&strategy)
.output() {
if install_output.status.success() {
println!("✅ Successfully installed huggingface_hub on ARM64 Python!");
println!("🧹 Cleaning up conflicting packages from x86_64 Python...");
let _ = Command::new("python3")
.arg("-m")
.arg("pip")
.arg("uninstall")
.arg("hf_xet")
.arg("huggingface_hub")
.arg("-y")
.output();
return Some(format!("python3:{}", python_path));
}
}
}
println!("⚠️ Auto-install failed");
println!("💡 Manual install: {} -m pip install --user 'huggingface_hub[cli]'", python_path);
}
}
}
}
println!("⚠️ No ARM64-compatible Python found");
println!("💡 Install with:");
println!(" brew install python@3.11");
println!(" /opt/homebrew/bin/python3.11 -m pip install --user 'huggingface_hub[cli]'");
None
}
fn detect_hf_cli_python(&self, cli_path: &str) -> Option<String> {
if cli_path == "python3-module" {
return Some("python3".to_string());
}
if let Ok(content) = std::fs::read_to_string(cli_path) {
if let Some(first_line) = content.lines().next() {
if first_line.starts_with("#!") {
let shebang = first_line.trim_start_matches("#!").trim();
if shebang.contains("python") {
println!(" Detected Python from shebang: {}", shebang);
return Some(shebang.to_string());
}
}
}
}
None
}
fn validate_cache_files(&self, snapshot_dir: &Path) -> Result<ValidationReport> {
let mut report = ValidationReport {
all_files_present: true,
missing_files: Vec::new(),
present_files: Vec::new(),
};
for file in REQUIRED_FILES {
let file_path = snapshot_dir.join(file);
if file_path.exists() {
let metadata = fs::metadata(&file_path)?;
report.present_files.push(FileInfo {
name: file.to_string(),
size: metadata.len(),
});
} else {
report.all_files_present = false;
report.missing_files.push(file.to_string());
}
}
let mut found_weight = false;
for file in WEIGHT_FILES {
let file_path = snapshot_dir.join(file);
if file_path.exists() {
let metadata = fs::metadata(&file_path)?;
report.present_files.push(FileInfo {
name: file.to_string(),
size: metadata.len(),
});
found_weight = true;
break; }
}
if !found_weight {
report.all_files_present = false;
report.missing_files.extend(WEIGHT_FILES.iter().map(|s| s.to_string()));
}
Ok(report)
}
pub fn copy_from_cache(&self) -> Result<bool> {
println!("🔍 Checking HuggingFace cache for BGE-M3 model...");
let snapshot_dir = match self.find_hf_snapshot_dir()? {
Some(dir) => dir,
None => {
println!("⚠️ Model not found in HuggingFace cache.");
println!("Cache directory checked: {:?}", self.hf_cache_path);
return Ok(false);
}
};
println!("✅ Found model in cache: {:?}", snapshot_dir);
println!("\n📋 Validating cache files...");
let validation = self.validate_cache_files(&snapshot_dir)?;
println!("\nFiles found in cache:");
for file_info in &validation.present_files {
let size_mb = file_info.size as f64 / 1_048_576.0;
println!(" ✓ {} ({:.2} MB)", file_info.name, size_mb);
}
if !validation.missing_files.is_empty() {
println!("\n❌ Missing required files:");
for file in &validation.missing_files {
println!(" ✗ {}", file);
}
println!("\n⚠️ Cache is incomplete. Please download the missing files.");
return Ok(false);
}
println!("\n✅ All required files present in cache!");
let dest_dir = self.get_model_directory();
fs::create_dir_all(&dest_dir)
.context("Failed to create model directory")?;
println!("\n📁 Copying model files to: {:?}\n", dest_dir);
let mut copied_files = Vec::new();
for file in REQUIRED_FILES {
let src = snapshot_dir.join(file);
let dst = dest_dir.join(file);
if src.exists() {
fs::copy(&src, &dst)
.with_context(|| format!("Failed to copy {}", file))?;
copied_files.push(file.to_string());
println!(" ✓ Copied {}", file);
}
}
for file in WEIGHT_FILES {
let src = snapshot_dir.join(file);
let dst = dest_dir.join(file);
if src.exists() {
println!(" 📦 Copying {} (this may take a moment)...", file);
fs::copy(&src, &dst)
.with_context(|| format!("Failed to copy {}", file))?;
copied_files.push(file.to_string());
println!(" ✓ Copied {}", file);
break; }
}
println!("\n✅ Model setup complete! Copied {} files.", copied_files.len());
Ok(true)
}
pub fn auto_setup(&self) -> Result<SetupResult> {
println!("🚀 Starting automatic model setup...\n");
if self.check_model_exists()? {
println!("✅ Model is already set up!");
return Ok(SetupResult::AlreadySetup);
}
println!("Model not found in local directory: {:?}\n", self.get_model_directory());
println!("Step 1: Checking HuggingFace cache...");
if self.copy_from_cache()? {
return Ok(SetupResult::CopiedFromCache);
}
println!("\nStep 2: Setting up HuggingFace CLI...");
if !self.check_hf_cli_installed() {
println!("HuggingFace CLI not found. Installing automatically...\n");
match self.auto_install_cli() {
Ok(_) => {
println!("✅ HuggingFace CLI setup complete");
}
Err(e) => {
println!("❌ Failed to install HuggingFace CLI: {}", e);
println!("\n💡 Please install manually:");
self.print_download_instructions();
return Ok(SetupResult::NeedsManualSetup);
}
}
} else {
println!("✅ HuggingFace CLI is already installed");
}
println!("\nStep 3: Downloading model files...");
println!("This will download ~1.2GB. Please wait...\n");
match self.download_model() {
Ok(true) => {
println!("\n✅ Download successful!");
println!("\nStep 4: Copying files to local directory...");
if self.copy_from_cache()? {
return Ok(SetupResult::Downloaded);
} else {
println!("⚠️ Download succeeded but copy failed. Please check the cache directory.");
return Ok(SetupResult::NeedsManualSetup);
}
}
Ok(false) => {
println!("⚠️ Download returned false");
self.print_download_instructions();
return Ok(SetupResult::NeedsManualSetup);
}
Err(e) => {
println!("❌ Download failed: {}", e);
println!("\n💡 You can try downloading manually:");
self.print_download_instructions();
return Ok(SetupResult::NeedsManualSetup);
}
}
}
pub fn get_copy_commands(&self) -> Result<Vec<String>> {
let snapshot_dir = match self.find_hf_snapshot_dir()? {
Some(dir) => dir,
None => {
return Err(anyhow!("Model not found in HuggingFace cache"));
}
};
let dest_dir = self.get_model_directory();
let mut commands = Vec::new();
commands.push(format!("mkdir -p {}", dest_dir.display()));
for file in REQUIRED_FILES {
let src = snapshot_dir.join(file);
let dst = dest_dir.join(file);
commands.push(format!("cp {} {}", src.display(), dst.display()));
}
for file in WEIGHT_FILES {
let src = snapshot_dir.join(file);
if src.exists() {
let dst = dest_dir.join(file);
commands.push(format!("cp {} {}", src.display(), dst.display()));
break;
}
}
Ok(commands)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum SetupResult {
AlreadySetup,
CopiedFromCache,
Downloaded,
NeedsManualSetup,
}
#[cfg(test)]
mod tests {
use super::*;
use std::env;
#[test]
fn test_model_setup_creation() {
let setup = ModelSetup::new(PathBuf::from("./test-models"));
assert_eq!(setup.model_name, "embaas/sentence-transformers-e5-large-v2");
}
#[test]
fn test_get_model_directory() {
let setup = ModelSetup::new(PathBuf::from("./test-models"));
let dir = setup.get_model_directory();
assert!(dir.to_str().unwrap().contains("models--embaas--sentence-transformers-e5-large-v2"));
}
#[test]
fn test_check_hf_cli() {
let setup = ModelSetup::new(PathBuf::from("./test-models"));
let _ = setup.check_hf_cli_installed();
}
}