hanzo_engine/
diagnostics.rs

1use std::path::{Path, PathBuf};
2use std::time::Instant;
3
4use hf_hub::{api::sync::ApiBuilder, Cache};
5use serde::{Deserialize, Serialize};
6use sysinfo::{Disks, System};
7
8#[cfg(any(feature = "cuda", feature = "metal"))]
9use crate::MemoryUsage;
10#[cfg(any(feature = "cuda", feature = "metal"))]
11use hanzo_ml::Device;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct CpuInfo {
15    pub brand: Option<String>,
16    pub logical_cores: usize,
17    pub physical_cores: Option<usize>,
18    pub avx: bool,
19    pub avx2: bool,
20    pub avx512: bool,
21    pub fma: bool,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct MemoryInfo {
26    pub total_bytes: u64,
27    pub available_bytes: u64,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct DeviceInfo {
32    pub kind: String,
33    pub ordinal: Option<usize>,
34    pub name: Option<String>,
35    pub total_memory_bytes: Option<u64>,
36    pub available_memory_bytes: Option<u64>,
37    /// CUDA compute capability (major, minor) - None for non-CUDA devices
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub compute_capability: Option<(u32, u32)>,
40    /// Whether this GPU supports Flash Attention v2 (compute capability >= 8.0)
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub flash_attn_compatible: Option<bool>,
43    /// Whether this GPU supports Flash Attention v3 (compute capability == 9.0, Hopper only)
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub flash_attn_v3_compatible: Option<bool>,
46    /// Whether this device uses unified memory (GPU and CPU share the same physical RAM)
47    #[serde(skip_serializing_if = "Option::is_none")]
48    pub unified_memory: Option<bool>,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct BuildInfo {
53    pub cuda: bool,
54    pub metal: bool,
55    pub cudnn: bool,
56    pub flash_attn: bool,
57    pub flash_attn_v3: bool,
58    pub accelerate: bool,
59    pub mkl: bool,
60    pub git_revision: String,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct HfConnectivityInfo {
65    /// Whether HuggingFace is reachable
66    pub reachable: bool,
67    /// Latency in milliseconds (if reachable)
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub latency_ms: Option<u64>,
70    /// Whether the token is valid for gated models
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub token_valid_for_gated: Option<bool>,
73    /// Error message if not reachable
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub error: Option<String>,
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct SystemInfo {
80    pub os: Option<String>,
81    pub kernel: Option<String>,
82    pub cpu: CpuInfo,
83    pub memory: MemoryInfo,
84    pub devices: Vec<DeviceInfo>,
85    pub build: BuildInfo,
86    pub hf_cache_path: Option<String>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90#[serde(rename_all = "lowercase")]
91pub enum DoctorStatus {
92    Ok,
93    Warn,
94    Error,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct DoctorCheck {
99    pub name: String,
100    pub status: DoctorStatus,
101    pub message: String,
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub suggestion: Option<String>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct DoctorReport {
108    pub system: SystemInfo,
109    pub checks: Vec<DoctorCheck>,
110}
111
112fn build_info() -> BuildInfo {
113    BuildInfo {
114        cuda: cfg!(feature = "cuda"),
115        metal: cfg!(feature = "metal"),
116        cudnn: cfg!(feature = "cudnn"),
117        flash_attn: cfg!(feature = "flash-attn"),
118        flash_attn_v3: cfg!(feature = "flash-attn-v3"),
119        accelerate: cfg!(feature = "accelerate"),
120        mkl: cfg!(feature = "mkl"),
121        git_revision: crate::HANZO_GIT_REVISION.to_string(),
122    }
123}
124
125fn collect_devices(sys: &System) -> Vec<DeviceInfo> {
126    let mut devices = Vec::new();
127
128    // CPU device
129    let cpu_brand = sys.cpus().first().map(|c| c.brand().to_string());
130    devices.push(DeviceInfo {
131        kind: "cpu".to_string(),
132        ordinal: None,
133        name: cpu_brand,
134        total_memory_bytes: Some(sys.total_memory()),
135        available_memory_bytes: Some(sys.available_memory()),
136        compute_capability: None,
137        flash_attn_compatible: None,
138        flash_attn_v3_compatible: None,
139        unified_memory: None,
140    });
141
142    #[cfg(feature = "cuda")]
143    {
144        let mut ord = 0;
145        while let Ok(dev) = Device::new_cuda(ord) {
146            let mem = MemoryUsage.query(&dev).ok();
147            let total = mem.map(|m| m.total() as u64);
148            let avail = mem.map(|m| m.available() as u64);
149
150            // Get compute capability
151            let compute_cap = get_cuda_compute_capability(ord);
152            let flash_attn_v2_ok = compute_cap.map(|(major, _minor)| {
153                // Flash Attention v2 requires compute capability >= 8.0 (Ampere+)
154                major >= 8
155            });
156            let flash_attn_v3_ok = compute_cap.map(|(major, minor)| {
157                // Flash Attention v3 requires compute capability == 9.0 (Hopper only)
158                major == 9 && minor == 0
159            });
160
161            devices.push(DeviceInfo {
162                kind: "cuda".to_string(),
163                ordinal: Some(ord),
164                name: None,
165                total_memory_bytes: total,
166                available_memory_bytes: avail,
167                compute_capability: compute_cap,
168                flash_attn_compatible: flash_attn_v2_ok,
169                flash_attn_v3_compatible: flash_attn_v3_ok,
170                unified_memory: Some(crate::utils::normal::is_integrated_gpu(&dev)),
171            });
172            ord += 1;
173        }
174    }
175
176    #[cfg(feature = "metal")]
177    {
178        let total = hanzo_metal_kernels::metal::Device::all().len();
179        for ord in 0..total {
180            if let Ok(dev) = Device::new_metal(ord) {
181                let mem = MemoryUsage.query(&dev).ok();
182                let total = mem.map(|m| m.total() as u64);
183                let avail = mem.map(|m| m.available() as u64);
184                devices.push(DeviceInfo {
185                    kind: "metal".to_string(),
186                    ordinal: Some(ord),
187                    name: None,
188                    total_memory_bytes: total,
189                    available_memory_bytes: avail,
190                    compute_capability: None,
191                    flash_attn_compatible: Some(true), // Metal always supports flash attention
192                    flash_attn_v3_compatible: None,    // Flash Attn v3 is CUDA Hopper only
193                    unified_memory: Some(true),        // Apple Silicon always uses unified memory
194                });
195            }
196        }
197    }
198
199    devices
200}
201
202/// Get CUDA compute capability for a device ordinal
203#[cfg(feature = "cuda")]
204fn get_cuda_compute_capability(ordinal: usize) -> Option<(u32, u32)> {
205    // Use nvidia-smi to query compute capability
206    let output = std::process::Command::new("nvidia-smi")
207        .args([
208            "--query-gpu=compute_cap",
209            "--format=csv,noheader",
210            &format!("-i={ordinal}"),
211        ])
212        .output()
213        .ok()?;
214
215    if !output.status.success() {
216        return None;
217    }
218
219    let stdout = String::from_utf8(output.stdout).ok()?;
220    let cap = stdout.trim();
221
222    // Parse "8.9" format
223    let parts: Vec<&str> = cap.split('.').collect();
224    if parts.len() == 2 {
225        let major = parts[0].parse().ok()?;
226        let minor = parts[1].parse().ok()?;
227        Some((major, minor))
228    } else {
229        None
230    }
231}
232
233#[cfg(not(feature = "cuda"))]
234#[allow(dead_code)]
235fn get_cuda_compute_capability(_ordinal: usize) -> Option<(u32, u32)> {
236    None
237}
238
239/// Detect CPU extensions (AVX, AVX2, AVX-512, FMA)
240fn detect_cpu_extensions() -> (bool, bool, bool, bool) {
241    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
242    {
243        let avx = std::arch::is_x86_feature_detected!("avx");
244        let avx2 = std::arch::is_x86_feature_detected!("avx2");
245        let avx512 = std::arch::is_x86_feature_detected!("avx512f");
246        let fma = std::arch::is_x86_feature_detected!("fma");
247        (avx, avx2, avx512, fma)
248    }
249    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
250    {
251        (false, false, false, false)
252    }
253}
254
255pub fn collect_system_info() -> SystemInfo {
256    let mut sys = System::new_all();
257    sys.refresh_all();
258
259    let (avx, avx2, avx512, fma) = detect_cpu_extensions();
260
261    let cpu = CpuInfo {
262        brand: sys.cpus().first().map(|c| c.brand().to_string()),
263        logical_cores: sys.cpus().len(),
264        physical_cores: System::physical_core_count(),
265        avx,
266        avx2,
267        avx512,
268        fma,
269    };
270
271    let memory = MemoryInfo {
272        total_bytes: sys.total_memory(),
273        available_bytes: sys.available_memory(),
274    };
275
276    let hf_cache = Cache::from_env();
277    let hf_cache_path = hf_cache.path().to_string_lossy().to_string();
278
279    SystemInfo {
280        os: System::name(),
281        kernel: System::kernel_version(),
282        cpu,
283        memory,
284        devices: collect_devices(&sys),
285        build: build_info(),
286        hf_cache_path: Some(hf_cache_path),
287    }
288}
289
290/// Check HuggingFace connectivity and token validity by accessing a gated model
291#[allow(clippy::cast_possible_truncation)]
292pub fn check_hf_gated_access() -> HfConnectivityInfo {
293    let start = Instant::now();
294
295    if crate::pipeline::hf::is_hf_hub_offline() {
296        return HfConnectivityInfo {
297            reachable: false,
298            latency_ms: None,
299            token_valid_for_gated: None,
300            error: Some(format!(
301                "Skipped: `{}` is set; no network calls were made.",
302                crate::pipeline::hf::HF_HUB_OFFLINE_ENV
303            )),
304        };
305    }
306
307    // Try to access a gated model (google/gemma-3-4b-it)
308    let api_result = ApiBuilder::from_env()
309        .with_progress(false)
310        .build()
311        .and_then(|api| api.model("google/gemma-3-4b-it".to_string()).info());
312
313    let latency_ms = start.elapsed().as_millis() as u64;
314
315    match api_result {
316        Ok(_) => HfConnectivityInfo {
317            reachable: true,
318            latency_ms: Some(latency_ms),
319            token_valid_for_gated: Some(true),
320            error: None,
321        },
322        Err(e) => {
323            let error_str = e.to_string();
324            // Check if it's an auth error vs network error
325            let is_auth_error = error_str.contains("401")
326                || error_str.contains("403")
327                || error_str.contains("unauthorized")
328                || error_str.contains("Unauthorized")
329                || error_str.contains("Access denied")
330                || error_str.contains("gated");
331
332            if is_auth_error {
333                // Network works, but token is invalid/missing
334                HfConnectivityInfo {
335                    reachable: true,
336                    latency_ms: Some(latency_ms),
337                    token_valid_for_gated: Some(false),
338                    error: Some("Token invalid or missing for gated models".to_string()),
339                }
340            } else {
341                // Network/other error
342                HfConnectivityInfo {
343                    reachable: false,
344                    latency_ms: None,
345                    token_valid_for_gated: None,
346                    error: Some(error_str),
347                }
348            }
349        }
350    }
351}
352
353fn disk_usage_for(path: &Path) -> Option<(u64, u64)> {
354    let disks = Disks::new_with_refreshed_list();
355    let mut best: Option<(usize, u64, u64)> = None;
356    for disk in disks.list() {
357        let mount = disk.mount_point();
358        if path.starts_with(mount) {
359            let len = mount.as_os_str().len();
360            let avail = disk.available_space();
361            let total = disk.total_space();
362            if best.map(|b| len > b.0).unwrap_or(true) {
363                best = Some((len, avail, total));
364            }
365        }
366    }
367    best.map(|(_, avail, total)| (avail, total))
368}
369
370pub fn run_doctor() -> DoctorReport {
371    let system = collect_system_info();
372    let mut checks = Vec::new();
373
374    // CPU extensions check (ARM-aware)
375    {
376        let is_arm = cfg!(any(target_arch = "aarch64", target_arch = "arm"));
377
378        if is_arm {
379            // ARM CPUs use NEON, not AVX - no warning needed
380            checks.push(DoctorCheck {
381                name: "cpu_extensions".to_string(),
382                status: DoctorStatus::Ok,
383                message: "CPU: ARM architecture (uses NEON)".to_string(),
384                suggestion: None,
385            });
386        } else {
387            // x86/x86_64 - check for AVX extensions
388            let mut extensions = Vec::new();
389            if system.cpu.avx {
390                extensions.push("AVX");
391            }
392            if system.cpu.avx2 {
393                extensions.push("AVX2");
394            }
395            if system.cpu.fma {
396                extensions.push("FMA");
397            }
398            if system.cpu.avx512 {
399                extensions.push("AVX-512");
400            }
401
402            let has_avx2 = system.cpu.avx2;
403            let ext_str = if extensions.is_empty() {
404                "none detected".to_string()
405            } else {
406                extensions.join(", ")
407            };
408
409            checks.push(DoctorCheck {
410                name: "cpu_extensions".to_string(),
411                status: if has_avx2 {
412                    DoctorStatus::Ok
413                } else {
414                    DoctorStatus::Warn
415                },
416                message: format!("CPU extensions: {ext_str}"),
417                suggestion: if !has_avx2 {
418                    Some("AVX2 is recommended for optimal GGML performance on x86.".to_string())
419                } else {
420                    None
421                },
422            });
423        }
424    }
425
426    // Binary vs hardware mismatch check
427    {
428        let has_cuda_device = system.devices.iter().any(|d| d.kind == "cuda");
429        let has_metal_device = system.devices.iter().any(|d| d.kind == "metal");
430
431        if has_cuda_device && !system.build.cuda {
432            checks.push(DoctorCheck {
433                name: "binary_hardware_match".to_string(),
434                status: DoctorStatus::Error,
435                message: "NVIDIA GPU detected but binary compiled without CUDA support."
436                    .to_string(),
437                suggestion: Some("Reinstall with CUDA: cargo install --features cuda".to_string()),
438            });
439        } else if has_metal_device && !system.build.metal {
440            checks.push(DoctorCheck {
441                name: "binary_hardware_match".to_string(),
442                status: DoctorStatus::Error,
443                message: "Apple GPU detected but binary compiled without Metal support."
444                    .to_string(),
445                suggestion: Some(
446                    "Reinstall with Metal: cargo install --features metal".to_string(),
447                ),
448            });
449        } else {
450            checks.push(DoctorCheck {
451                name: "binary_hardware_match".to_string(),
452                status: DoctorStatus::Ok,
453                message: "Binary features match detected hardware.".to_string(),
454                suggestion: None,
455            });
456        }
457    }
458
459    // Unified memory detection
460    for dev in system
461        .devices
462        .iter()
463        .filter(|d| d.unified_memory == Some(true))
464    {
465        let kind = &dev.kind;
466        let ord = dev.ordinal.map(|o| format!(" {o}")).unwrap_or_default();
467        checks.push(DoctorCheck {
468            name: format!("{}_{}_unified_memory", kind, dev.ordinal.unwrap_or(0)),
469            status: DoctorStatus::Ok,
470            message: format!(
471                "{}{}: unified memory detected. GPU and CPU share the same physical RAM.",
472                kind.to_uppercase(),
473                ord,
474            ),
475            suggestion: None,
476        });
477    }
478
479    // CUDA compute capability + Flash Attention v2/v3 check
480    #[cfg(feature = "cuda")]
481    {
482        for dev in system.devices.iter().filter(|d| d.kind == "cuda") {
483            if let (Some(ord), Some((major, minor))) = (dev.ordinal, dev.compute_capability) {
484                let fa_v2_ok = dev.flash_attn_compatible.unwrap_or(false);
485                let fa_v3_ok = dev.flash_attn_v3_compatible.unwrap_or(false);
486
487                // Build status strings with emojis
488                let fa_v2_str = if fa_v2_ok { "✅" } else { "❌" };
489                let fa_v3_str = if fa_v3_ok {
490                    "✅"
491                } else {
492                    "❌ (requires Hopper/Compute 9.0)"
493                };
494
495                checks.push(DoctorCheck {
496                    name: format!("cuda_{}_compute", ord),
497                    status: DoctorStatus::Ok,
498                    message: format!(
499                        "GPU {}: compute {}.{} - Flash Attn v2 {}, v3 {}",
500                        ord, major, minor, fa_v2_str, fa_v3_str
501                    ),
502                    suggestion: None,
503                });
504
505                // Warn if hardware supports flash attn v2 but binary doesn't have it
506                if fa_v2_ok && !system.build.flash_attn {
507                    checks.push(DoctorCheck {
508                        name: format!("cuda_{}_flash_attn_v2_missing", ord),
509                        status: DoctorStatus::Warn,
510                        message: format!(
511                            "GPU {} supports Flash Attention v2 but binary compiled without it.",
512                            ord
513                        ),
514                        suggestion: Some(
515                            "Reinstall with: cargo install --features flash-attn".to_string(),
516                        ),
517                    });
518                }
519
520                // Warn if hardware supports flash attn v3 but binary doesn't have it
521                if fa_v3_ok && !system.build.flash_attn_v3 {
522                    checks.push(DoctorCheck {
523                        name: format!("cuda_{}_flash_attn_v3_missing", ord),
524                        status: DoctorStatus::Warn,
525                        message: format!(
526                            "GPU {} supports Flash Attention v3 but binary compiled without it.",
527                            ord
528                        ),
529                        suggestion: Some(
530                            "Reinstall with: cargo install --features flash-attn-v3".to_string(),
531                        ),
532                    });
533                }
534            }
535        }
536    }
537
538    let hf_cache_path = system
539        .hf_cache_path
540        .as_ref()
541        .map(PathBuf::from)
542        .unwrap_or_else(|| Cache::from_env().path().clone());
543
544    if std::fs::create_dir_all(&hf_cache_path).is_err() {
545        checks.push(DoctorCheck {
546            name: "hf_cache_writable".to_string(),
547            status: DoctorStatus::Error,
548            message: format!(
549                "Cannot create or access Hugging Face cache dir at {}",
550                hf_cache_path.display()
551            ),
552            suggestion: Some("Set HF_HOME or fix permissions.".to_string()),
553        });
554    } else {
555        checks.push(DoctorCheck {
556            name: "hf_cache_writable".to_string(),
557            status: DoctorStatus::Ok,
558            message: format!(
559                "Hugging Face cache dir is writable: {}",
560                hf_cache_path.display()
561            ),
562            suggestion: None,
563        });
564    }
565
566    // HuggingFace connectivity + gated model access check
567    {
568        let hf_info = check_hf_gated_access();
569        if hf_info.reachable {
570            if hf_info.token_valid_for_gated == Some(true) {
571                checks.push(DoctorCheck {
572                    name: "hf_connectivity".to_string(),
573                    status: DoctorStatus::Ok,
574                    message: format!(
575                        "Hugging Face: connected ({}ms), token valid for allowed gated models.",
576                        hf_info.latency_ms.unwrap_or(0)
577                    ),
578                    suggestion: None,
579                });
580            } else {
581                checks.push(DoctorCheck {
582                    name: "hf_connectivity".to_string(),
583                    status: DoctorStatus::Warn,
584                    message: format!(
585                        "Hugging Face: connected ({}ms), but token invalid/missing.",
586                        hf_info.latency_ms.unwrap_or(0)
587                    ),
588                    suggestion: Some(
589                        "Run `huggingface-cli login` or set HF_TOKEN to access gated models."
590                            .to_string(),
591                    ),
592                });
593            }
594        } else {
595            checks.push(DoctorCheck {
596                name: "hf_connectivity".to_string(),
597                status: DoctorStatus::Error,
598                message: format!(
599                    "Hugging Face: unreachable - {}",
600                    hf_info.error.unwrap_or_else(|| "unknown error".to_string())
601                ),
602                suggestion: Some(
603                    "Check your internet connection and firewall settings.".to_string(),
604                ),
605            });
606        }
607    }
608
609    if let Some((avail, total)) = disk_usage_for(&hf_cache_path) {
610        let min_free = 10_u64 * 1024 * 1024 * 1024;
611        let status = if avail < min_free {
612            DoctorStatus::Warn
613        } else {
614            DoctorStatus::Ok
615        };
616        checks.push(DoctorCheck {
617            name: "disk_space".to_string(),
618            status,
619            #[allow(clippy::cast_precision_loss)]
620            message: format!(
621                "Disk free: {:.1} GB / {:.1} GB on the volume containing the HF cache at {}.",
622                avail as f64 / 1e9,
623                total as f64 / 1e9,
624                hf_cache_path.display()
625            ),
626            suggestion: if avail < min_free {
627                Some("Free up disk space or move HF cache.".to_string())
628            } else {
629                None
630            },
631        });
632    }
633
634    let has_cuda = system.devices.iter().any(|d| d.kind == "cuda");
635
636    if system.build.cuda && !has_cuda {
637        checks.push(DoctorCheck {
638            name: "cuda_devices".to_string(),
639            status: DoctorStatus::Warn,
640            message: "CUDA support is enabled but no CUDA devices were found.".to_string(),
641            suggestion: Some("Check NVIDIA driver installation.".to_string()),
642        });
643    }
644
645    DoctorReport { system, checks }
646}
hanzo_engine/diagnostics.rs

hanzo_engine/
diagnostics.rs