offline_intelligence/model_runtime/
platform_detector.rs

1//! Platform and Hardware Detection
2//!
3//! Detects the appropriate runtime binary based on the platform (Windows, Linux, macOS)
4//! and hardware capabilities (Intel, Apple Silicon, NVIDIA CUDA).
5
6use std::path::PathBuf;
7use std::sync::OnceLock;
8use tracing::info;
9
10#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
11pub enum Platform {
12    Windows,
13    Linux,
14    MacOS,
15}
16
17#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
18pub enum HardwareArchitecture {
19    X86_64,
20    Aarch64, // Apple Silicon, ARM
21    Other(String),
22}
23
24#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
25pub struct HardwareCapabilities {
26    pub platform: Platform,
27    pub architecture: HardwareArchitecture,
28    pub has_cuda: bool,
29    pub has_metal: bool, // For Apple GPUs
30    pub has_vulkan: bool,
31}
32
33// Static cache for hardware capabilities to avoid repeated detection
34static HARDWARE_CACHE: OnceLock<HardwareCapabilities> = OnceLock::new();
35
36impl Default for HardwareCapabilities {
37    fn default() -> Self {
38        Self::detect()
39    }
40}
41
42impl HardwareCapabilities {
43    /// Detect hardware capabilities automatically (cached)
44    pub fn detect() -> Self {
45        // Return cached result if available
46        if let Some(cached) = HARDWARE_CACHE.get() {
47            return cached.clone();
48        }
49
50        // Perform detection
51        let platform = Self::detect_platform();
52        let architecture = Self::detect_architecture();
53        let has_cuda = Self::detect_cuda_support();
54        let has_metal = Self::detect_metal_support(&architecture);
55        let has_vulkan = Self::detect_vulkan_support();
56
57        info!(
58            "Detected platform: {:?}, architecture: {:?}, CUDA: {}, Metal: {}, Vulkan: {}",
59            platform, architecture, has_cuda, has_metal, has_vulkan
60        );
61
62        let capabilities = Self {
63            platform,
64            architecture,
65            has_cuda,
66            has_metal,
67            has_vulkan,
68        };
69
70        // Cache the result (ignore if already set by another thread)
71        let _ = HARDWARE_CACHE.set(capabilities.clone());
72
73        capabilities
74    }
75
76    fn detect_platform() -> Platform {
77        if cfg!(target_os = "windows") {
78            Platform::Windows
79        } else if cfg!(target_os = "linux") {
80            Platform::Linux
81        } else if cfg!(target_os = "macos") {
82            Platform::MacOS
83        } else {
84            // Default to current platform if unknown
85            #[cfg(target_os = "windows")]
86            return Platform::Windows;
87            #[cfg(target_os = "linux")]
88            return Platform::Linux;
89            #[cfg(target_os = "macos")]
90            return Platform::MacOS;
91            #[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos")))]
92            return Platform::Linux; // Default fallback
93        }
94    }
95
96    fn detect_architecture() -> HardwareArchitecture {
97        if cfg!(target_arch = "x86_64") {
98            HardwareArchitecture::X86_64
99        } else if cfg!(target_arch = "aarch64") {
100            HardwareArchitecture::Aarch64
101        } else {
102            HardwareArchitecture::Other(std::env::consts::ARCH.to_string())
103        }
104    }
105
106    fn detect_cuda_support() -> bool {
107        // Check for NVIDIA GPU via nvidia-smi with a timeout to prevent hangs
108        // on systems with broken driver installations
109        use std::process::{Command, Stdio};
110
111        // Create command with hidden window on Windows
112        #[cfg(target_os = "windows")]
113        let child = {
114            use std::os::windows::process::CommandExt;
115            Command::new("nvidia-smi")
116                .arg("--query-gpu=name")
117                .arg("--format=csv,noheader,nounits")
118                .stdout(Stdio::piped())
119                .stderr(Stdio::null())
120                .creation_flags(0x08000000) // CREATE_NO_WINDOW
121                .spawn()
122        };
123
124        #[cfg(not(target_os = "windows"))]
125        let child = Command::new("nvidia-smi")
126            .arg("--query-gpu=name")
127            .arg("--format=csv,noheader,nounits")
128            .stdout(Stdio::piped())
129            .stderr(Stdio::null())
130            .spawn();
131
132        match child {
133            Ok(mut process) => {
134                // Wait up to 5 seconds for nvidia-smi to respond
135                let start = std::time::Instant::now();
136                loop {
137                    match process.try_wait() {
138                        Ok(Some(status)) => return status.success(),
139                        Ok(None) => {
140                            if start.elapsed() > std::time::Duration::from_secs(5) {
141                                let _ = process.kill();
142                                let _ = process.wait();
143                                return false;
144                            }
145                            std::thread::sleep(std::time::Duration::from_millis(50));
146                        }
147                        Err(_) => return false,
148                    }
149                }
150            }
151            Err(_) => false,
152        }
153    }
154
155    /// Metal is available on all Apple Silicon Macs and on Intel Macs with a
156    /// compatible GPU (all Macs from ~2012 onward support Metal).
157    /// However, llama.cpp Metal acceleration is only effective on Apple Silicon
158    /// where unified memory is shared between CPU and GPU.  We return `true`
159    /// for all macOS targets so the engine registry correctly recommends the
160    /// Metal-enabled binary; gpu_layers is set to 0 for Intel in config.rs.
161    fn detect_metal_support(architecture: &HardwareArchitecture) -> bool {
162        if cfg!(target_os = "macos") {
163            // Both Apple Silicon and Intel Mac support Metal API
164            // (config.rs sets gpu_layers=0 for Intel to keep CPU-only inference)
165            let _ = architecture; // suppress unused-variable warning
166            true
167        } else {
168            false
169        }
170    }
171
172    fn detect_vulkan_support() -> bool {
173        false
174    }
175
176    /// Get the appropriate runtime binary path based on platform and hardware.
177    ///
178    /// Directory naming uses the **exact capitalisation** as stored on disk
179    /// (`Windows`, `MacOS`, `Linux`) to match what `config.rs` expects and
180    /// what is present in the repo's `Resources/bin/` tree.
181    pub fn get_runtime_binary_path(&self) -> Option<PathBuf> {
182        let resources_dir = self.get_resources_dir()?;
183
184        // Use the canonical, mixed-case folder name that matches the on-disk
185        // directory — do NOT call .to_lowercase() here, because on case-sensitive
186        // APFS that would silently break the lookup.
187        let os_folder = match &self.platform {
188            Platform::Windows => "Windows",
189            Platform::MacOS   => "MacOS",
190            Platform::Linux   => "Linux",
191        };
192        let platform_dir = resources_dir.join(os_folder);
193
194        match &self.platform {
195            Platform::Windows => {
196                // On Windows, prefer CUDA if available, otherwise use CPU
197                if self.has_cuda {
198                    Some(
199                        platform_dir
200                            .join("llama-b6970-bin-win-cuda-12.4-x64")
201                            .join("llama-server.exe"),
202                    )
203                } else {
204                    Some(platform_dir.join("llama-cpu").join("llama-server.exe"))
205                }
206            }
207            Platform::Linux => {
208                // On Linux, return None to use the config.rs / engine-registry path
209                None
210            }
211            Platform::MacOS => {
212                // On macOS, use Metal-compiled binary for both Apple Silicon and Intel.
213                // llama.cpp's macOS release builds always include Metal support;
214                // gpu_layers is controlled at runtime (0 for Intel, >0 for AS).
215                if self.has_metal {
216                    Some(platform_dir.join("llama-metal").join("llama-server"))
217                } else {
218                    Some(platform_dir.join("llama-cpu").join("llama-server"))
219                }
220            }
221        }
222    }
223
224    /// Locate the `Resources/bin` directory.
225    ///
226    /// Search order (most to least specific):
227    ///  1. macOS .app bundle standard: `<exe>/../Resources/bin`
228    ///     i.e. `App.app/Contents/Resources/bin`
229    ///  2. Sibling `Resources/bin` next to the executable
230    ///  3. Current working directory `Resources/bin`
231    ///  4. Development path relative to crate root
232    fn get_resources_dir(&self) -> Option<PathBuf> {
233        if let Ok(current_exe) = std::env::current_exe() {
234            if let Some(exe_dir) = current_exe.parent() {
235                // --- (1) macOS .app bundle ---
236                // exe_dir = App.app/Contents/MacOS/
237                // parent  = App.app/Contents/
238                // Resources live at App.app/Contents/Resources/
239                if let Some(contents_dir) = exe_dir.parent() {
240                    let candidate = contents_dir.join("Resources").join("bin");
241                    if candidate.exists() {
242                        return Some(candidate);
243                    }
244                }
245
246                // --- (2) Resources/ sibling to executable ---
247                // (Linux AppImage, dev `cargo run` from workspace root, etc.)
248                for resource_folder in &["Resources", "resources"] {
249                    let candidate = exe_dir.join(resource_folder).join("bin");
250                    if candidate.exists() {
251                        return Some(candidate);
252                    }
253                }
254            }
255        }
256
257        // --- (3) CWD-relative ---
258        for resource_folder in &["Resources", "resources"] {
259            let candidate = std::path::PathBuf::from(resource_folder).join("bin");
260            if candidate.exists() {
261                return Some(candidate);
262            }
263        }
264
265        // --- (4) Development: relative to crate manifest ---
266        #[cfg(debug_assertions)]
267        {
268            let dev_candidate = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
269                .join("Resources")
270                .join("bin");
271            if dev_candidate.exists() {
272                return Some(dev_candidate);
273            }
274        }
275
276        None
277    }
278}
279
280impl std::fmt::Display for Platform {
281    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
282        match self {
283            Platform::Windows => write!(f, "Windows"),
284            Platform::Linux => write!(f, "Linux"),
285            Platform::MacOS => write!(f, "MacOS"),
286        }
287    }
288}
289
290impl std::fmt::Display for HardwareArchitecture {
291    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292        match self {
293            HardwareArchitecture::X86_64 => write!(f, "x86_64"),
294            HardwareArchitecture::Aarch64 => write!(f, "aarch64"),
295            HardwareArchitecture::Other(s) => write!(f, "{}", s),
296        }
297    }
298}
offline_intelligence/model_runtime/platform_detector.rs

offline_intelligence/model_runtime/
platform_detector.rs