offline_intelligence/engine_management/
registry.rs

1//! Engine Registry
2//!
3//! Manages the collection of available and installed llama.cpp engines,
4//! tracks compatibility with hardware capabilities, and maintains
5//! metadata about each engine.
6
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::{Arc, RwLock};
12use tracing::{debug, info, warn};
13
14use crate::model_runtime::platform_detector::{HardwareCapabilities, Platform, HardwareArchitecture};
15
16/// Types of hardware acceleration supported by llama.cpp engines
17#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18pub enum AccelerationType {
19    CPU,
20    CUDA,
21    Metal,
22    Vulkan,
23    OpenCL,
24    DirectML,
25}
26
27impl std::fmt::Display for AccelerationType {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        match self {
30            AccelerationType::CPU => write!(f, "CPU"),
31            AccelerationType::CUDA => write!(f, "CUDA"),
32            AccelerationType::Metal => write!(f, "Metal"),
33            AccelerationType::Vulkan => write!(f, "Vulkan"),
34            AccelerationType::OpenCL => write!(f, "OpenCL"),
35            AccelerationType::DirectML => write!(f, "DirectML"),
36        }
37    }
38}
39
40/// Status of an engine installation
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub enum EngineStatus {
43    NotInstalled,
44    Available,
45    Downloading,
46    Installed,
47    Active,
48    Corrupted,
49}
50
51/// Information about a specific llama.cpp engine
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct EngineInfo {
54    pub id: String,
55    pub name: String,
56    pub version: String,
57    pub platform: Platform,
58    pub architecture: HardwareArchitecture,
59    pub acceleration: AccelerationType,
60    pub download_url: String,
61    pub file_size: u64,
62    pub checksum: String,
63    pub compatibility_score: f32,
64    pub status: EngineStatus,
65    pub install_path: Option<PathBuf>,
66    pub binary_name: String,
67    pub required_dependencies: Vec<String>,
68}
69
70impl EngineInfo {
71    /// Calculate compatibility score for given hardware capabilities
72    pub fn calculate_compatibility(&self, hardware: &HardwareCapabilities) -> f32 {
73        let mut score: f32 = 0.0;
74        
75        // Platform match (highest priority)
76        if self.platform == hardware.platform {
77            score += 50.0;
78        } else {
79            return 0.0; // Incompatible platform
80        }
81        
82        // Architecture match
83        if self.architecture == hardware.architecture {
84            score += 20.0;
85        }
86        
87        // Acceleration support
88        match (&self.acceleration, hardware) {
89            (AccelerationType::CPU, _) => score += 15.0,
90            (AccelerationType::CUDA, hw) if hw.has_cuda => score += 25.0,
91            (AccelerationType::Metal, hw) if hw.has_metal => score += 25.0,
92            (AccelerationType::Vulkan, hw) if hw.has_vulkan => score += 20.0,
93            _ => {
94                // Unsupported acceleration type
95                if self.acceleration != AccelerationType::CPU {
96                    score -= 10.0;
97                }
98            }
99        }
100        
101        // Version recency bonus
102        if self.is_recent_version() {
103            score += 5.0;
104        }
105        
106        score.clamp(0.0, 100.0)
107    }
108    
109    /// Check if this is a recent version (within last 6 months)
110    fn is_recent_version(&self) -> bool {
111        // Simplified version check - in practice this would parse version dates
112        self.version.contains("b") || self.version.contains("latest")
113    }
114}
115
116/// Manages the registry of available and installed engines
117pub struct EngineRegistry {
118    pub installed_engines: HashMap<String, EngineInfo>,
119    pub available_engines: Vec<EngineInfo>,
120    pub default_engine: Option<String>,
121    pub storage_path: PathBuf,
122    /// Track if a download is currently in progress to prevent concurrent downloads
123    download_in_progress: Arc<RwLock<bool>>,
124}
125
126impl EngineRegistry {
127    pub fn new() -> Result<Self> {
128        let storage_path = Self::get_engine_storage_path()?;
129        std::fs::create_dir_all(&storage_path)?;
130        
131        Ok(Self {
132            installed_engines: HashMap::new(),
133            available_engines: Vec::new(),
134            default_engine: None,
135            download_in_progress: Arc::new(RwLock::new(false)),
136            storage_path,
137        })
138    }
139    
140    /// Get platform-appropriate storage path for engines
141    fn get_engine_storage_path() -> Result<PathBuf> {
142        let base_dir = if cfg!(target_os = "windows") {
143            dirs::data_dir()
144                .ok_or_else(|| anyhow::anyhow!("Failed to get APPDATA directory"))?
145                .join("Aud.io")
146                .join("engines")
147        } else if cfg!(target_os = "macos") {
148            dirs::data_dir()
149                .ok_or_else(|| anyhow::anyhow!("Failed to get Library directory"))?
150                .join("Aud.io")
151                .join("engines")
152        } else {
153            dirs::data_dir()
154                .ok_or_else(|| anyhow::anyhow!("Failed to get .local/share directory"))?
155                .join("aud.io")
156                .join("engines")
157        };
158        
159        Ok(base_dir)
160    }
161    
162    /// Scan for already installed engines in the storage directory
163    pub async fn scan_installed_engines(&mut self, hardware_capabilities: &HardwareCapabilities) -> Result<()> {
164        self.installed_engines.clear();
165
166        if self.storage_path.exists() {
167            for entry in std::fs::read_dir(&self.storage_path)? {
168                let entry = entry?;
169                if entry.file_type()?.is_dir() {
170                    let engine_dir = entry.path();
171                    match self.load_engine_metadata(engine_dir.clone()).await {
172                        Some(engine_info) => {
173                            self.installed_engines.insert(engine_info.id.clone(), engine_info);
174                        }
175                        None => {
176                            // Check if directory contains a binary (orphaned engine)
177                            if self.has_binary(&engine_dir) {
178                                warn!("Found orphaned engine at {} (missing or invalid metadata.json). Consider re-downloading this engine.", engine_dir.display());
179                            }
180                        }
181                    }
182                }
183            }
184        }
185
186        // Always refresh available engines to ensure there are recommendations
187        self.refresh_available_engines(hardware_capabilities).await?;
188
189        debug!("Found {} installed engines", self.installed_engines.len());
190        Ok(())
191    }
192
193    /// Check if directory contains engine binary files
194    fn has_binary(&self, engine_dir: &PathBuf) -> bool {
195        let binary_names = ["llama-server.exe", "llama-server", "llama-cli.exe", "llama-cli"];
196        let platform_dirs = ["Windows", "windows", "Linux", "MacOS", "macos"];
197
198        for platform_dir in platform_dirs.iter() {
199            let platform_path = engine_dir.join(platform_dir);
200            if platform_path.exists() {
201                for binary_name in binary_names.iter() {
202                    if platform_path.join(binary_name).exists() {
203                        return true;
204                    }
205                }
206            }
207        }
208
209        false
210    }
211    
212    /// Load engine metadata from installation directory
213    async fn load_engine_metadata(&self, engine_dir: PathBuf) -> Option<EngineInfo> {
214        let metadata_path = engine_dir.join("metadata.json");
215        if !metadata_path.exists() {
216            return None;
217        }
218        
219        match std::fs::read_to_string(&metadata_path) {
220            Ok(content) => {
221                match serde_json::from_str::<EngineInfo>(&content) {
222                    Ok(mut engine_info) => {
223                        // Verify the engine binary actually exists
224                        let binary_path = engine_dir.join(&engine_info.binary_name);
225                        if binary_path.exists() {
226                            engine_info.status = EngineStatus::Installed;
227                            engine_info.install_path = Some(engine_dir);
228                            Some(engine_info)
229                        } else {
230                            warn!("Engine binary not found: {:?}", binary_path);
231                            None
232                        }
233                    }
234                    Err(e) => {
235                        warn!("Failed to parse engine metadata: {}", e);
236                        None
237                    }
238                }
239            }
240            Err(e) => {
241                warn!("Failed to read engine metadata: {}", e);
242                None
243            }
244        }
245    }
246    
247    /// Get engines compatible with given hardware capabilities
248    pub fn get_compatible_engines(&self, hardware: &HardwareCapabilities) -> Vec<&EngineInfo> {
249        self.installed_engines
250            .values()
251            .filter(|engine| {
252                let compatibility = engine.calculate_compatibility(hardware);
253                compatibility > 30.0 && engine.status == EngineStatus::Installed
254            })
255            .collect()
256    }
257    
258    /// Select the best compatible engine for given hardware
259    pub fn select_best_compatible_engine(&self, hardware: &HardwareCapabilities) -> Option<EngineInfo> {
260        let mut compatible_engines: Vec<_> = self.get_compatible_engines(hardware)
261            .into_iter()
262            .map(|engine| {
263                let score = engine.calculate_compatibility(hardware);
264                (engine, score)
265            })
266            .collect();
267            
268        compatible_engines.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
269        
270        compatible_engines.first().map(|(engine, _)| (*engine).clone())
271    }
272    
273    /// Get recommended engine for current hardware (downloads new if needed)
274    pub fn get_recommended_engine(&self, hardware: &HardwareCapabilities) -> Option<EngineInfo> {
275        // First check if we have a good installed engine
276        if let Some(best_installed) = self.select_best_compatible_engine(hardware) {
277            if best_installed.calculate_compatibility(hardware) > 70.0 {
278                return Some(best_installed);
279            }
280        }
281        
282        // Otherwise, recommend downloading a suitable engine
283        // First try to find the highest compatibility engine from available engines
284        if !self.available_engines.is_empty() {
285            let mut compatible_engines: Vec<_> = self.available_engines
286                .iter()
287                .map(|engine| {
288                    let score = engine.calculate_compatibility(hardware);
289                    (engine, score)
290                })
291                .filter(|(_, score)| *score > 0.0) // Only engines compatible with hardware
292                .collect();
293            
294            compatible_engines.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
295            
296            if let Some((engine, _)) = compatible_engines.first() {
297                let mut engine_clone = (*engine).clone();
298                engine_clone.status = EngineStatus::Available; // Ensure status is Available for download
299                return Some(engine_clone);
300            }
301        }
302        
303        // Fallback to official recommendation if no available engines
304        self.get_official_engine_recommendation(hardware)
305    }
306    
307    /// Get the latest available version from llama.cpp releases
308    fn get_latest_version(&self) -> String {
309        // For now, we use a hardcoded latest version, but in the future this could fetch from GitHub API
310        // This is a fallback to ensure we always have a working version
311        // TODO: Implement GitHub API call to fetch the latest release dynamically
312        "b8037".to_string()
313    }
314    
315    /// Get official engine recommendation from llama.cpp releases
316    fn get_official_engine_recommendation(&self, hardware: &HardwareCapabilities) -> Option<EngineInfo> {
317        let latest_version = self.get_latest_version();
318        
319        match (&hardware.platform, &hardware.architecture, hardware.has_cuda, hardware.has_metal) {
320            (Platform::Windows, HardwareArchitecture::X86_64, true, _) => {
321                // Windows with CUDA - recommend CUDA version with latest version
322                Some(EngineInfo {
323                    id: format!("llama-cuda-windows-x64-{}", latest_version),
324                    name: format!("llama.cpp CUDA Windows x64 ({})", latest_version),
325                    version: latest_version.to_string(),
326                    platform: Platform::Windows,
327                    architecture: HardwareArchitecture::X86_64,
328                    acceleration: AccelerationType::CUDA,
329                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-win-cuda-12.4-x64.zip", latest_version, latest_version),
330                    file_size: 373 * 1024 * 1024, // Approximate size based on release
331                    checksum: "".to_string(), // Would be populated with actual checksum
332                    compatibility_score: 95.0,
333                    status: EngineStatus::Available,
334                    install_path: None,
335                    binary_name: "llama-server.exe".to_string(),
336                    required_dependencies: vec!["CUDA 12.4+ Runtime".to_string()],
337                })
338            }
339            (Platform::Windows, HardwareArchitecture::X86_64, false, _) => {
340                // Windows CPU-only with latest version
341                Some(EngineInfo {
342                    id: format!("llama-cpu-windows-x64-{}", latest_version),
343                    name: format!("llama.cpp CPU Windows x64 ({})", latest_version),
344                    version: latest_version.to_string(),
345                    platform: Platform::Windows,
346                    architecture: HardwareArchitecture::X86_64,
347                    acceleration: AccelerationType::CPU,
348                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-win-x64.zip", latest_version, latest_version),
349                    file_size: 50 * 1024 * 1024,
350                    checksum: "".to_string(),
351                    compatibility_score: 85.0,
352                    status: EngineStatus::Available,
353                    install_path: None,
354                    binary_name: "llama-server.exe".to_string(),
355                    required_dependencies: vec![],
356                })
357            }
358            (Platform::MacOS, HardwareArchitecture::Aarch64, _, true) => {
359                // macOS Apple Silicon with Metal - using latest version
360                Some(EngineInfo {
361                    id: format!("llama-metal-macos-arm64-{}", latest_version),
362                    name: format!("llama.cpp Metal macOS ARM64 ({})", latest_version),
363                    version: latest_version.to_string(),
364                    platform: Platform::MacOS,
365                    architecture: HardwareArchitecture::Aarch64,
366                    acceleration: AccelerationType::Metal,
367                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-arm64.tar.gz", latest_version, latest_version),
368                    file_size: 29 * 1024 * 1024,
369                    checksum: "".to_string(),
370                    compatibility_score: 95.0,
371                    status: EngineStatus::Available,
372                    install_path: None,
373                    binary_name: "llama-server".to_string(),
374                    required_dependencies: vec![],
375                })
376            }
377            (Platform::MacOS, HardwareArchitecture::X86_64, _, _) => {
378                // macOS Intel CPU-only - using latest version
379                Some(EngineInfo {
380                    id: format!("llama-cpu-macos-x64-{}", latest_version),
381                    name: format!("llama.cpp CPU macOS x64 ({})", latest_version),
382                    version: latest_version.to_string(),
383                    platform: Platform::MacOS,
384                    architecture: HardwareArchitecture::X86_64,
385                    acceleration: AccelerationType::CPU,
386                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-x64.tar.gz", latest_version, latest_version),
387                    file_size: 82 * 1024 * 1024,
388                    checksum: "".to_string(),
389                    compatibility_score: 85.0,
390                    status: EngineStatus::Available,
391                    install_path: None,
392                    binary_name: "llama-server".to_string(),
393                    required_dependencies: vec![],
394                })
395            }
396            (Platform::Linux, HardwareArchitecture::X86_64, true, _) => {
397                // Linux with CUDA - using latest version
398                Some(EngineInfo {
399                    id: format!("llama-cuda-linux-x64-{}", latest_version),
400                    name: format!("llama.cpp CUDA Linux x64 ({})", latest_version),
401                    version: latest_version.to_string(),
402                    platform: Platform::Linux,
403                    architecture: HardwareArchitecture::X86_64,
404                    acceleration: AccelerationType::CUDA,
405                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-ubuntu-x64-cuda-12.4.tar.gz", latest_version, latest_version), // Using ubuntu-x64-cuda as per release assets
406                    file_size: 180 * 1024 * 1024,
407                    checksum: "".to_string(),
408                    compatibility_score: 90.0,
409                    status: EngineStatus::Available,
410                    install_path: None,
411                    binary_name: "llama-server".to_string(),
412                    required_dependencies: vec!["CUDA 12.4+ Runtime".to_string()],
413                })
414            }
415            (Platform::Linux, HardwareArchitecture::X86_64, false, _) => {
416                // Linux CPU-only - using latest version
417                Some(EngineInfo {
418                    id: format!("llama-cpu-linux-x64-{}", latest_version),
419                    name: format!("llama.cpp CPU Linux x64 ({})", latest_version),
420                    version: latest_version.to_string(),
421                    platform: Platform::Linux,
422                    architecture: HardwareArchitecture::X86_64,
423                    acceleration: AccelerationType::CPU,
424                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-ubuntu-x64.tar.gz", latest_version, latest_version),
425                    file_size: 45 * 1024 * 1024,
426                    checksum: "".to_string(),
427                    compatibility_score: 80.0,
428                    status: EngineStatus::Available,
429                    install_path: None,
430                    binary_name: "llama-server".to_string(),
431                    required_dependencies: vec![],
432                })
433            }
434            _ => None,
435        }
436    }
437    
438    /// Set the default engine for single-engine mode
439    pub fn set_default_engine(&mut self, engine_id: &str) -> Result<()> {
440        if self.installed_engines.contains_key(engine_id) {
441            self.default_engine = Some(engine_id.to_string());
442            info!("Set default engine: {}", engine_id);
443            Ok(())
444        } else {
445            Err(anyhow::anyhow!("Engine not found: {}", engine_id))
446        }
447    }
448
449    /// Check if we have any installed engine
450    pub fn has_installed_engine(&self) -> bool {
451        !self.installed_engines.is_empty()
452    }
453
454    /// Get the default engine (single-engine model)
455    pub fn get_default_engine(&self) -> Option<&EngineInfo> {
456        if let Some(ref engine_id) = self.default_engine {
457            self.installed_engines.get(engine_id)
458        } else {
459            // Fallback: return first installed engine
460            self.installed_engines.values().next()
461        }
462    }
463    
464    /// Add a newly installed engine to the registry
465    pub async fn add_installed_engine(&mut self, mut engine: EngineInfo) -> Result<()> {
466        engine.status = EngineStatus::Installed;
467        self.installed_engines.insert(engine.id.clone(), engine);
468        Ok(())
469    }
470    
471    /// Refresh available engines from official sources
472    pub async fn refresh_available_engines(&mut self, hardware_capabilities: &HardwareCapabilities) -> Result<()> {
473        // Clear existing available engines and populate with ALL compatible options
474        self.available_engines.clear();
475        
476        // Get all engines for the current platform (not just the recommended one)
477        let all_engines = self.get_all_compatible_engines(hardware_capabilities);
478        
479        for engine in all_engines {
480            if !self.available_engines.iter().any(|e| e.id == engine.id) {
481                self.available_engines.push(engine);
482            }
483        }
484        
485        // Also add additional fallback engines to ensure there are always options
486        let fallback_engines = self.get_additional_engine_recommendations(hardware_capabilities);
487        for engine in fallback_engines {
488            if !self.available_engines.iter().any(|e| e.id == engine.id) {
489                self.available_engines.push(engine);
490            }
491        }
492        
493        info!("Refreshed available engines: {} found", self.available_engines.len());
494        Ok(())
495    }
496    
497    /// Get ALL compatible engines for the platform (like LM Studio)
498    fn get_all_compatible_engines(&self, hardware: &HardwareCapabilities) -> Vec<EngineInfo> {
499        let mut engines = Vec::new();
500        let latest_version = self.get_latest_version();
501        
502        match (&hardware.platform, &hardware.architecture) {
503            (Platform::Windows, HardwareArchitecture::X86_64) => {
504                // Windows x64: Always add CPU, CUDA, and Vulkan options
505                
506                // 1. CPU Engine (works on all Windows x64)
507                engines.push(EngineInfo {
508                    id: format!("llama-cpu-windows-x64-{}", latest_version),
509                    name: format!("llama.cpp CPU (Windows x64) ({})", latest_version),
510                    version: latest_version.to_string(),
511                    platform: Platform::Windows,
512                    architecture: HardwareArchitecture::X86_64,
513                    acceleration: AccelerationType::CPU,
514                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-win-x64.zip", latest_version, latest_version),
515                    file_size: 50 * 1024 * 1024,
516                    checksum: "".to_string(),
517                    compatibility_score: if !hardware.has_cuda { 95.0 } else { 80.0 },
518                    status: EngineStatus::Available,
519                    install_path: None,
520                    binary_name: "llama-server.exe".to_string(),
521                    required_dependencies: vec![],
522                });
523                
524                // 2. CUDA Engine (if NVIDIA GPU detected)
525                if hardware.has_cuda {
526                    engines.push(EngineInfo {
527                        id: format!("llama-cuda-windows-x64-{}", latest_version),
528                        name: format!("llama.cpp CUDA (Windows x64) ({})", latest_version),
529                        version: latest_version.to_string(),
530                        platform: Platform::Windows,
531                        architecture: HardwareArchitecture::X86_64,
532                        acceleration: AccelerationType::CUDA,
533                        download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-win-cuda-12.4-x64.zip", latest_version, latest_version),
534                        file_size: 373 * 1024 * 1024, // Based on actual release size
535                        checksum: "".to_string(),
536                        compatibility_score: 100.0,
537                        status: EngineStatus::Available,
538                        install_path: None,
539                        binary_name: "llama-server.exe".to_string(),
540                        required_dependencies: vec!["NVIDIA GPU with CUDA support".to_string()],
541                    });
542                    
543                    // CUDA 13 variant
544                    engines.push(EngineInfo {
545                        id: format!("llama-cuda13-windows-x64-{}", latest_version),
546                        name: format!("llama.cpp CUDA 13 (Windows x64) ({})", latest_version),
547                        version: latest_version.to_string(),
548                        platform: Platform::Windows,
549                        architecture: HardwareArchitecture::X86_64,
550                        acceleration: AccelerationType::CUDA,
551                        download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-win-cuda-13.1-x64.zip", latest_version, latest_version),
552                        file_size: 384 * 1024 * 1024, // Based on actual release size
553                        checksum: "".to_string(),
554                        compatibility_score: 95.0,
555                        status: EngineStatus::Available,
556                        install_path: None,
557                        binary_name: "llama-server.exe".to_string(),
558                        required_dependencies: vec!["CUDA 13.1+ Runtime".to_string()],
559                    });
560                }
561                
562                // 3. Vulkan Engine (alternative GPU acceleration)
563                if hardware.has_vulkan || hardware.has_cuda {
564                    engines.push(EngineInfo {
565                        id: format!("llama-vulkan-windows-x64-{}", latest_version),
566                        name: format!("llama.cpp Vulkan (Windows x64) ({})", latest_version),
567                        version: latest_version.to_string(),
568                        platform: Platform::Windows,
569                        architecture: HardwareArchitecture::X86_64,
570                        acceleration: AccelerationType::Vulkan,
571                        download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-win-vulkan-x64.zip", latest_version, latest_version),
572                        file_size: 80 * 1024 * 1024,
573                        checksum: "".to_string(),
574                        compatibility_score: 85.0,
575                        status: EngineStatus::Available,
576                        install_path: None,
577                        binary_name: "llama-server.exe".to_string(),
578                        required_dependencies: vec!["Vulkan-compatible GPU".to_string()],
579                    });
580                }
581            }
582            
583            (Platform::MacOS, HardwareArchitecture::Aarch64) => {
584                // macOS Apple Silicon: Metal and CPU
585                engines.push(EngineInfo {
586                    id: format!("llama-metal-macos-arm64-{}", latest_version),
587                    name: format!("llama.cpp Metal (macOS Apple Silicon) ({})", latest_version),
588                    version: latest_version.to_string(),
589                    platform: Platform::MacOS,
590                    architecture: HardwareArchitecture::Aarch64,
591                    acceleration: AccelerationType::Metal,
592                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-arm64.tar.gz", latest_version, latest_version),
593                    file_size: 29 * 1024 * 1024,
594                    checksum: "".to_string(),
595                    compatibility_score: 100.0,
596                    status: EngineStatus::Available,
597                    install_path: None,
598                    binary_name: "llama-server".to_string(),
599                    required_dependencies: vec![],
600                });
601                
602                // CPU fallback
603                engines.push(EngineInfo {
604                    id: format!("llama-cpu-macos-arm64-{}", latest_version),
605                    name: format!("llama.cpp CPU (macOS Apple Silicon) ({})", latest_version),
606                    version: latest_version.to_string(),
607                    platform: Platform::MacOS,
608                    architecture: HardwareArchitecture::Aarch64,
609                    acceleration: AccelerationType::CPU,
610                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-arm64.tar.gz", latest_version, latest_version),
611                    file_size: 29 * 1024 * 1024,
612                    checksum: "".to_string(),
613                    compatibility_score: 90.0,
614                    status: EngineStatus::Available,
615                    install_path: None,
616                    binary_name: "llama-server".to_string(),
617                    required_dependencies: vec![],
618                });
619            }
620            
621            (Platform::MacOS, HardwareArchitecture::X86_64) => {
622                // macOS Intel: CPU only
623                engines.push(EngineInfo {
624                    id: format!("llama-cpu-macos-x64-{}", latest_version),
625                    name: format!("llama.cpp CPU (macOS Intel) ({})", latest_version),
626                    version: latest_version.to_string(),
627                    platform: Platform::MacOS,
628                    architecture: HardwareArchitecture::X86_64,
629                    acceleration: AccelerationType::CPU,
630                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-x64.tar.gz", latest_version, latest_version),
631                    file_size: 82 * 1024 * 1024,
632                    checksum: "".to_string(),
633                    compatibility_score: 95.0,
634                    status: EngineStatus::Available,
635                    install_path: None,
636                    binary_name: "llama-server".to_string(),
637                    required_dependencies: vec![],
638                });
639            }
640            
641            (Platform::Linux, HardwareArchitecture::X86_64) => {
642                // Linux x64: CPU, CUDA
643                engines.push(EngineInfo {
644                    id: format!("llama-cpu-linux-x64-{}", latest_version),
645                    name: format!("llama.cpp CPU (Linux x64) ({})", latest_version),
646                    version: latest_version.to_string(),
647                    platform: Platform::Linux,
648                    architecture: HardwareArchitecture::X86_64,
649                    acceleration: AccelerationType::CPU,
650                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-ubuntu-x64.tar.gz", latest_version, latest_version),
651                    file_size: 45 * 1024 * 1024,
652                    checksum: "".to_string(),
653                    compatibility_score: if !hardware.has_cuda { 95.0 } else { 80.0 },
654                    status: EngineStatus::Available,
655                    install_path: None,
656                    binary_name: "llama-server".to_string(),
657                    required_dependencies: vec![],
658                });
659                
660                if hardware.has_cuda {
661                    engines.push(EngineInfo {
662                        id: format!("llama-cuda-linux-x64-{}", latest_version),
663                        name: format!("llama.cpp CUDA (Linux x64) ({})", latest_version),
664                        version: latest_version.to_string(),
665                        platform: Platform::Linux,
666                        architecture: HardwareArchitecture::X86_64,
667                        acceleration: AccelerationType::CUDA,
668                        download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-ubuntu-x64-cuda-12.4.tar.gz", latest_version, latest_version),
669                        file_size: 180 * 1024 * 1024,
670                        checksum: "".to_string(),
671                        compatibility_score: 100.0,
672                        status: EngineStatus::Available,
673                        install_path: None,
674                        binary_name: "llama-server".to_string(),
675                        required_dependencies: vec!["NVIDIA GPU with CUDA support".to_string()],
676                    });
677                }
678            }
679            
680            _ => {
681                // Unknown platform/architecture - add generic CPU engine
682                info!("Unknown platform/architecture: {:?}/{:?}", hardware.platform, hardware.architecture);
683            }
684        }
685        
686        // Sort by compatibility score (highest first)
687        engines.sort_by(|a, b| b.compatibility_score.partial_cmp(&a.compatibility_score).unwrap());
688        
689        engines
690    }
691    
692    /// Get additional engine recommendations to ensure there are always options
693    fn get_additional_engine_recommendations(&self, hardware: &HardwareCapabilities) -> Vec<EngineInfo> {
694        let mut engines = Vec::new();
695        let latest_version = self.get_latest_version();
696        
697        // Add a basic CPU engine for each platform as a fallback
698        match &hardware.platform {
699            Platform::Windows => {
700                engines.push(EngineInfo {
701                    id: format!("llama-cpu-windows-x64-fallback-{}", latest_version),
702                    name: format!("llama.cpp CPU Engine (Fallback {}) ", latest_version),
703                    version: latest_version.to_string(),
704                    platform: Platform::Windows,
705                    architecture: HardwareArchitecture::X86_64,
706                    acceleration: AccelerationType::CPU,
707                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-win-x64.zip", latest_version, latest_version),
708                    file_size: 50 * 1024 * 1024,
709                    checksum: "".to_string(),
710                    compatibility_score: 70.0,
711                    status: EngineStatus::Available,
712                    install_path: None,
713                    binary_name: "llama-server.exe".to_string(),
714                    required_dependencies: vec![],
715                });
716            }
717            Platform::Linux => {
718                engines.push(EngineInfo {
719                    id: format!("llama-cpu-linux-x64-fallback-{}", latest_version),
720                    name: format!("llama.cpp CPU Engine (Fallback {})", latest_version),
721                    version: latest_version.to_string(),
722                    platform: Platform::Linux,
723                    architecture: HardwareArchitecture::X86_64,
724                    acceleration: AccelerationType::CPU,
725                    download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-ubuntu-x64.tar.gz", latest_version, latest_version),
726                    file_size: 45 * 1024 * 1024,
727                    checksum: "".to_string(),
728                    compatibility_score: 70.0,
729                    status: EngineStatus::Available,
730                    install_path: None,
731                    binary_name: "llama-server".to_string(),
732                    required_dependencies: vec![],
733                });
734            }
735            Platform::MacOS => {
736                // Provide architecture-correct fallbacks.
737                // Apple Silicon (Aarch64): arm64 Metal is the primary choice; arm64 CPU
738                //   is the fallback. Never offer the x64 binary — it can only run via
739                //   Rosetta 2 and performs poorly for LLM workloads.
740                // Intel (X86_64): x64 CPU is the only option (no Metal GPU acceleration).
741                match &hardware.architecture {
742                    HardwareArchitecture::Aarch64 => {
743                        engines.push(EngineInfo {
744                            id: format!("llama-metal-macos-arm64-fallback-{}", latest_version),
745                            name: format!("llama.cpp Metal (Apple Silicon) fallback ({})", latest_version),
746                            version: latest_version.to_string(),
747                            platform: Platform::MacOS,
748                            architecture: HardwareArchitecture::Aarch64,
749                            acceleration: AccelerationType::Metal,
750                            download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-arm64.tar.gz", latest_version, latest_version),
751                            file_size: 29 * 1024 * 1024,
752                            checksum: "".to_string(),
753                            compatibility_score: 85.0,
754                            status: EngineStatus::Available,
755                            install_path: None,
756                            binary_name: "llama-server".to_string(),
757                            required_dependencies: vec![],
758                        });
759                        // CPU-only arm64 as secondary fallback
760                        engines.push(EngineInfo {
761                            id: format!("llama-cpu-macos-arm64-fallback-{}", latest_version),
762                            name: format!("llama.cpp CPU (Apple Silicon) fallback ({})", latest_version),
763                            version: latest_version.to_string(),
764                            platform: Platform::MacOS,
765                            architecture: HardwareArchitecture::Aarch64,
766                            acceleration: AccelerationType::CPU,
767                            download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-arm64.tar.gz", latest_version, latest_version),
768                            file_size: 29 * 1024 * 1024,
769                            checksum: "".to_string(),
770                            compatibility_score: 70.0,
771                            status: EngineStatus::Available,
772                            install_path: None,
773                            binary_name: "llama-server".to_string(),
774                            required_dependencies: vec![],
775                        });
776                    }
777                    _ => {
778                        // Intel Mac (X86_64) or other — use x64 CPU binary
779                        engines.push(EngineInfo {
780                            id: format!("llama-cpu-macos-x64-fallback-{}", latest_version),
781                            name: format!("llama.cpp CPU (macOS Intel) fallback ({})", latest_version),
782                            version: latest_version.to_string(),
783                            platform: Platform::MacOS,
784                            architecture: HardwareArchitecture::X86_64,
785                            acceleration: AccelerationType::CPU,
786                            download_url: format!("https://github.com/ggml-org/llama.cpp/releases/download/{}/llama-{}-bin-macos-x64.tar.gz", latest_version, latest_version),
787                            file_size: 82 * 1024 * 1024,
788                            checksum: "".to_string(),
789                            compatibility_score: 70.0,
790                            status: EngineStatus::Available,
791                            install_path: None,
792                            binary_name: "llama-server".to_string(),
793                            required_dependencies: vec![],
794                        });
795                    }
796                }
797            }
798        }
799        
800        engines
801    }
802    
803    /// Get the path to the default engine binary
804    pub fn get_default_engine_binary_path(&self) -> Option<PathBuf> {
805        if let Some(engine) = self.get_default_engine() {
806            engine.install_path.as_ref().map(|path| path.join(&engine.binary_name))
807        } else {
808            None
809        }
810    }
811
812    /// Mark that a download has started. Returns true if the download was started,
813    /// false if another download is already in progress.
814    pub fn mark_download_started(&self) -> bool {
815        let mut flag = self.download_in_progress.write().unwrap();
816        if *flag {
817            false // Download already in progress
818        } else {
819            *flag = true;
820            true // Download started
821        }
822    }
823
824    /// Mark that a download has finished (either successfully or with error)
825    pub fn mark_download_finished(&self) {
826        *self.download_in_progress.write().unwrap() = false;
827    }
828
829    /// Check if a download is currently in progress
830    pub fn is_download_in_progress(&self) -> bool {
831        *self.download_in_progress.read().unwrap()
832    }
833}
offline_intelligence/engine_management/registry.rs

offline_intelligence/engine_management/
registry.rs