1use std::path::{Path, PathBuf};
2use std::time::Instant;
3
4use hf_hub::{api::sync::ApiBuilder, Cache};
5use serde::{Deserialize, Serialize};
6use sysinfo::{Disks, System};
7
8#[cfg(any(feature = "cuda", feature = "metal"))]
9use crate::MemoryUsage;
10#[cfg(any(feature = "cuda", feature = "metal"))]
11use hanzo_ml::Device;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct CpuInfo {
15 pub brand: Option<String>,
16 pub logical_cores: usize,
17 pub physical_cores: Option<usize>,
18 pub avx: bool,
19 pub avx2: bool,
20 pub avx512: bool,
21 pub fma: bool,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct MemoryInfo {
26 pub total_bytes: u64,
27 pub available_bytes: u64,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct DeviceInfo {
32 pub kind: String,
33 pub ordinal: Option<usize>,
34 pub name: Option<String>,
35 pub total_memory_bytes: Option<u64>,
36 pub available_memory_bytes: Option<u64>,
37 #[serde(skip_serializing_if = "Option::is_none")]
39 pub compute_capability: Option<(u32, u32)>,
40 #[serde(skip_serializing_if = "Option::is_none")]
42 pub flash_attn_compatible: Option<bool>,
43 #[serde(skip_serializing_if = "Option::is_none")]
45 pub flash_attn_v3_compatible: Option<bool>,
46 #[serde(skip_serializing_if = "Option::is_none")]
48 pub unified_memory: Option<bool>,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct BuildInfo {
53 pub cuda: bool,
54 pub metal: bool,
55 pub cudnn: bool,
56 pub flash_attn: bool,
57 pub flash_attn_v3: bool,
58 pub accelerate: bool,
59 pub mkl: bool,
60 pub git_revision: String,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct HfConnectivityInfo {
65 pub reachable: bool,
67 #[serde(skip_serializing_if = "Option::is_none")]
69 pub latency_ms: Option<u64>,
70 #[serde(skip_serializing_if = "Option::is_none")]
72 pub token_valid_for_gated: Option<bool>,
73 #[serde(skip_serializing_if = "Option::is_none")]
75 pub error: Option<String>,
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct SystemInfo {
80 pub os: Option<String>,
81 pub kernel: Option<String>,
82 pub cpu: CpuInfo,
83 pub memory: MemoryInfo,
84 pub devices: Vec<DeviceInfo>,
85 pub build: BuildInfo,
86 pub hf_cache_path: Option<String>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90#[serde(rename_all = "lowercase")]
91pub enum DoctorStatus {
92 Ok,
93 Warn,
94 Error,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct DoctorCheck {
99 pub name: String,
100 pub status: DoctorStatus,
101 pub message: String,
102 #[serde(skip_serializing_if = "Option::is_none")]
103 pub suggestion: Option<String>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct DoctorReport {
108 pub system: SystemInfo,
109 pub checks: Vec<DoctorCheck>,
110}
111
112fn build_info() -> BuildInfo {
113 BuildInfo {
114 cuda: cfg!(feature = "cuda"),
115 metal: cfg!(feature = "metal"),
116 cudnn: cfg!(feature = "cudnn"),
117 flash_attn: cfg!(feature = "flash-attn"),
118 flash_attn_v3: cfg!(feature = "flash-attn-v3"),
119 accelerate: cfg!(feature = "accelerate"),
120 mkl: cfg!(feature = "mkl"),
121 git_revision: crate::HANZO_GIT_REVISION.to_string(),
122 }
123}
124
125fn collect_devices(sys: &System) -> Vec<DeviceInfo> {
126 let mut devices = Vec::new();
127
128 let cpu_brand = sys.cpus().first().map(|c| c.brand().to_string());
130 devices.push(DeviceInfo {
131 kind: "cpu".to_string(),
132 ordinal: None,
133 name: cpu_brand,
134 total_memory_bytes: Some(sys.total_memory()),
135 available_memory_bytes: Some(sys.available_memory()),
136 compute_capability: None,
137 flash_attn_compatible: None,
138 flash_attn_v3_compatible: None,
139 unified_memory: None,
140 });
141
142 #[cfg(feature = "cuda")]
143 {
144 let mut ord = 0;
145 while let Ok(dev) = Device::new_cuda(ord) {
146 let mem = MemoryUsage.query(&dev).ok();
147 let total = mem.map(|m| m.total() as u64);
148 let avail = mem.map(|m| m.available() as u64);
149
150 let compute_cap = get_cuda_compute_capability(ord);
152 let flash_attn_v2_ok = compute_cap.map(|(major, _minor)| {
153 major >= 8
155 });
156 let flash_attn_v3_ok = compute_cap.map(|(major, minor)| {
157 major == 9 && minor == 0
159 });
160
161 devices.push(DeviceInfo {
162 kind: "cuda".to_string(),
163 ordinal: Some(ord),
164 name: None,
165 total_memory_bytes: total,
166 available_memory_bytes: avail,
167 compute_capability: compute_cap,
168 flash_attn_compatible: flash_attn_v2_ok,
169 flash_attn_v3_compatible: flash_attn_v3_ok,
170 unified_memory: Some(crate::utils::normal::is_integrated_gpu(&dev)),
171 });
172 ord += 1;
173 }
174 }
175
176 #[cfg(feature = "metal")]
177 {
178 let total = hanzo_metal_kernels::metal::Device::all().len();
179 for ord in 0..total {
180 if let Ok(dev) = Device::new_metal(ord) {
181 let mem = MemoryUsage.query(&dev).ok();
182 let total = mem.map(|m| m.total() as u64);
183 let avail = mem.map(|m| m.available() as u64);
184 devices.push(DeviceInfo {
185 kind: "metal".to_string(),
186 ordinal: Some(ord),
187 name: None,
188 total_memory_bytes: total,
189 available_memory_bytes: avail,
190 compute_capability: None,
191 flash_attn_compatible: Some(true), flash_attn_v3_compatible: None, unified_memory: Some(true), });
195 }
196 }
197 }
198
199 devices
200}
201
202#[cfg(feature = "cuda")]
204fn get_cuda_compute_capability(ordinal: usize) -> Option<(u32, u32)> {
205 let output = std::process::Command::new("nvidia-smi")
207 .args([
208 "--query-gpu=compute_cap",
209 "--format=csv,noheader",
210 &format!("-i={ordinal}"),
211 ])
212 .output()
213 .ok()?;
214
215 if !output.status.success() {
216 return None;
217 }
218
219 let stdout = String::from_utf8(output.stdout).ok()?;
220 let cap = stdout.trim();
221
222 let parts: Vec<&str> = cap.split('.').collect();
224 if parts.len() == 2 {
225 let major = parts[0].parse().ok()?;
226 let minor = parts[1].parse().ok()?;
227 Some((major, minor))
228 } else {
229 None
230 }
231}
232
233#[cfg(not(feature = "cuda"))]
234#[allow(dead_code)]
235fn get_cuda_compute_capability(_ordinal: usize) -> Option<(u32, u32)> {
236 None
237}
238
239fn detect_cpu_extensions() -> (bool, bool, bool, bool) {
241 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
242 {
243 let avx = std::arch::is_x86_feature_detected!("avx");
244 let avx2 = std::arch::is_x86_feature_detected!("avx2");
245 let avx512 = std::arch::is_x86_feature_detected!("avx512f");
246 let fma = std::arch::is_x86_feature_detected!("fma");
247 (avx, avx2, avx512, fma)
248 }
249 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
250 {
251 (false, false, false, false)
252 }
253}
254
255pub fn collect_system_info() -> SystemInfo {
256 let mut sys = System::new_all();
257 sys.refresh_all();
258
259 let (avx, avx2, avx512, fma) = detect_cpu_extensions();
260
261 let cpu = CpuInfo {
262 brand: sys.cpus().first().map(|c| c.brand().to_string()),
263 logical_cores: sys.cpus().len(),
264 physical_cores: System::physical_core_count(),
265 avx,
266 avx2,
267 avx512,
268 fma,
269 };
270
271 let memory = MemoryInfo {
272 total_bytes: sys.total_memory(),
273 available_bytes: sys.available_memory(),
274 };
275
276 let hf_cache = Cache::from_env();
277 let hf_cache_path = hf_cache.path().to_string_lossy().to_string();
278
279 SystemInfo {
280 os: System::name(),
281 kernel: System::kernel_version(),
282 cpu,
283 memory,
284 devices: collect_devices(&sys),
285 build: build_info(),
286 hf_cache_path: Some(hf_cache_path),
287 }
288}
289
290#[allow(clippy::cast_possible_truncation)]
292pub fn check_hf_gated_access() -> HfConnectivityInfo {
293 let start = Instant::now();
294
295 if crate::pipeline::hf::is_hf_hub_offline() {
296 return HfConnectivityInfo {
297 reachable: false,
298 latency_ms: None,
299 token_valid_for_gated: None,
300 error: Some(format!(
301 "Skipped: `{}` is set; no network calls were made.",
302 crate::pipeline::hf::HF_HUB_OFFLINE_ENV
303 )),
304 };
305 }
306
307 let api_result = ApiBuilder::from_env()
309 .with_progress(false)
310 .build()
311 .and_then(|api| api.model("google/gemma-3-4b-it".to_string()).info());
312
313 let latency_ms = start.elapsed().as_millis() as u64;
314
315 match api_result {
316 Ok(_) => HfConnectivityInfo {
317 reachable: true,
318 latency_ms: Some(latency_ms),
319 token_valid_for_gated: Some(true),
320 error: None,
321 },
322 Err(e) => {
323 let error_str = e.to_string();
324 let is_auth_error = error_str.contains("401")
326 || error_str.contains("403")
327 || error_str.contains("unauthorized")
328 || error_str.contains("Unauthorized")
329 || error_str.contains("Access denied")
330 || error_str.contains("gated");
331
332 if is_auth_error {
333 HfConnectivityInfo {
335 reachable: true,
336 latency_ms: Some(latency_ms),
337 token_valid_for_gated: Some(false),
338 error: Some("Token invalid or missing for gated models".to_string()),
339 }
340 } else {
341 HfConnectivityInfo {
343 reachable: false,
344 latency_ms: None,
345 token_valid_for_gated: None,
346 error: Some(error_str),
347 }
348 }
349 }
350 }
351}
352
353fn disk_usage_for(path: &Path) -> Option<(u64, u64)> {
354 let disks = Disks::new_with_refreshed_list();
355 let mut best: Option<(usize, u64, u64)> = None;
356 for disk in disks.list() {
357 let mount = disk.mount_point();
358 if path.starts_with(mount) {
359 let len = mount.as_os_str().len();
360 let avail = disk.available_space();
361 let total = disk.total_space();
362 if best.map(|b| len > b.0).unwrap_or(true) {
363 best = Some((len, avail, total));
364 }
365 }
366 }
367 best.map(|(_, avail, total)| (avail, total))
368}
369
370pub fn run_doctor() -> DoctorReport {
371 let system = collect_system_info();
372 let mut checks = Vec::new();
373
374 {
376 let is_arm = cfg!(any(target_arch = "aarch64", target_arch = "arm"));
377
378 if is_arm {
379 checks.push(DoctorCheck {
381 name: "cpu_extensions".to_string(),
382 status: DoctorStatus::Ok,
383 message: "CPU: ARM architecture (uses NEON)".to_string(),
384 suggestion: None,
385 });
386 } else {
387 let mut extensions = Vec::new();
389 if system.cpu.avx {
390 extensions.push("AVX");
391 }
392 if system.cpu.avx2 {
393 extensions.push("AVX2");
394 }
395 if system.cpu.fma {
396 extensions.push("FMA");
397 }
398 if system.cpu.avx512 {
399 extensions.push("AVX-512");
400 }
401
402 let has_avx2 = system.cpu.avx2;
403 let ext_str = if extensions.is_empty() {
404 "none detected".to_string()
405 } else {
406 extensions.join(", ")
407 };
408
409 checks.push(DoctorCheck {
410 name: "cpu_extensions".to_string(),
411 status: if has_avx2 {
412 DoctorStatus::Ok
413 } else {
414 DoctorStatus::Warn
415 },
416 message: format!("CPU extensions: {ext_str}"),
417 suggestion: if !has_avx2 {
418 Some("AVX2 is recommended for optimal GGML performance on x86.".to_string())
419 } else {
420 None
421 },
422 });
423 }
424 }
425
426 {
428 let has_cuda_device = system.devices.iter().any(|d| d.kind == "cuda");
429 let has_metal_device = system.devices.iter().any(|d| d.kind == "metal");
430
431 if has_cuda_device && !system.build.cuda {
432 checks.push(DoctorCheck {
433 name: "binary_hardware_match".to_string(),
434 status: DoctorStatus::Error,
435 message: "NVIDIA GPU detected but binary compiled without CUDA support."
436 .to_string(),
437 suggestion: Some("Reinstall with CUDA: cargo install --features cuda".to_string()),
438 });
439 } else if has_metal_device && !system.build.metal {
440 checks.push(DoctorCheck {
441 name: "binary_hardware_match".to_string(),
442 status: DoctorStatus::Error,
443 message: "Apple GPU detected but binary compiled without Metal support."
444 .to_string(),
445 suggestion: Some(
446 "Reinstall with Metal: cargo install --features metal".to_string(),
447 ),
448 });
449 } else {
450 checks.push(DoctorCheck {
451 name: "binary_hardware_match".to_string(),
452 status: DoctorStatus::Ok,
453 message: "Binary features match detected hardware.".to_string(),
454 suggestion: None,
455 });
456 }
457 }
458
459 for dev in system
461 .devices
462 .iter()
463 .filter(|d| d.unified_memory == Some(true))
464 {
465 let kind = &dev.kind;
466 let ord = dev.ordinal.map(|o| format!(" {o}")).unwrap_or_default();
467 checks.push(DoctorCheck {
468 name: format!("{}_{}_unified_memory", kind, dev.ordinal.unwrap_or(0)),
469 status: DoctorStatus::Ok,
470 message: format!(
471 "{}{}: unified memory detected. GPU and CPU share the same physical RAM.",
472 kind.to_uppercase(),
473 ord,
474 ),
475 suggestion: None,
476 });
477 }
478
479 #[cfg(feature = "cuda")]
481 {
482 for dev in system.devices.iter().filter(|d| d.kind == "cuda") {
483 if let (Some(ord), Some((major, minor))) = (dev.ordinal, dev.compute_capability) {
484 let fa_v2_ok = dev.flash_attn_compatible.unwrap_or(false);
485 let fa_v3_ok = dev.flash_attn_v3_compatible.unwrap_or(false);
486
487 let fa_v2_str = if fa_v2_ok { "✅" } else { "❌" };
489 let fa_v3_str = if fa_v3_ok {
490 "✅"
491 } else {
492 "❌ (requires Hopper/Compute 9.0)"
493 };
494
495 checks.push(DoctorCheck {
496 name: format!("cuda_{}_compute", ord),
497 status: DoctorStatus::Ok,
498 message: format!(
499 "GPU {}: compute {}.{} - Flash Attn v2 {}, v3 {}",
500 ord, major, minor, fa_v2_str, fa_v3_str
501 ),
502 suggestion: None,
503 });
504
505 if fa_v2_ok && !system.build.flash_attn {
507 checks.push(DoctorCheck {
508 name: format!("cuda_{}_flash_attn_v2_missing", ord),
509 status: DoctorStatus::Warn,
510 message: format!(
511 "GPU {} supports Flash Attention v2 but binary compiled without it.",
512 ord
513 ),
514 suggestion: Some(
515 "Reinstall with: cargo install --features flash-attn".to_string(),
516 ),
517 });
518 }
519
520 if fa_v3_ok && !system.build.flash_attn_v3 {
522 checks.push(DoctorCheck {
523 name: format!("cuda_{}_flash_attn_v3_missing", ord),
524 status: DoctorStatus::Warn,
525 message: format!(
526 "GPU {} supports Flash Attention v3 but binary compiled without it.",
527 ord
528 ),
529 suggestion: Some(
530 "Reinstall with: cargo install --features flash-attn-v3".to_string(),
531 ),
532 });
533 }
534 }
535 }
536 }
537
538 let hf_cache_path = system
539 .hf_cache_path
540 .as_ref()
541 .map(PathBuf::from)
542 .unwrap_or_else(|| Cache::from_env().path().clone());
543
544 if std::fs::create_dir_all(&hf_cache_path).is_err() {
545 checks.push(DoctorCheck {
546 name: "hf_cache_writable".to_string(),
547 status: DoctorStatus::Error,
548 message: format!(
549 "Cannot create or access Hugging Face cache dir at {}",
550 hf_cache_path.display()
551 ),
552 suggestion: Some("Set HF_HOME or fix permissions.".to_string()),
553 });
554 } else {
555 checks.push(DoctorCheck {
556 name: "hf_cache_writable".to_string(),
557 status: DoctorStatus::Ok,
558 message: format!(
559 "Hugging Face cache dir is writable: {}",
560 hf_cache_path.display()
561 ),
562 suggestion: None,
563 });
564 }
565
566 {
568 let hf_info = check_hf_gated_access();
569 if hf_info.reachable {
570 if hf_info.token_valid_for_gated == Some(true) {
571 checks.push(DoctorCheck {
572 name: "hf_connectivity".to_string(),
573 status: DoctorStatus::Ok,
574 message: format!(
575 "Hugging Face: connected ({}ms), token valid for allowed gated models.",
576 hf_info.latency_ms.unwrap_or(0)
577 ),
578 suggestion: None,
579 });
580 } else {
581 checks.push(DoctorCheck {
582 name: "hf_connectivity".to_string(),
583 status: DoctorStatus::Warn,
584 message: format!(
585 "Hugging Face: connected ({}ms), but token invalid/missing.",
586 hf_info.latency_ms.unwrap_or(0)
587 ),
588 suggestion: Some(
589 "Run `huggingface-cli login` or set HF_TOKEN to access gated models."
590 .to_string(),
591 ),
592 });
593 }
594 } else {
595 checks.push(DoctorCheck {
596 name: "hf_connectivity".to_string(),
597 status: DoctorStatus::Error,
598 message: format!(
599 "Hugging Face: unreachable - {}",
600 hf_info.error.unwrap_or_else(|| "unknown error".to_string())
601 ),
602 suggestion: Some(
603 "Check your internet connection and firewall settings.".to_string(),
604 ),
605 });
606 }
607 }
608
609 if let Some((avail, total)) = disk_usage_for(&hf_cache_path) {
610 let min_free = 10_u64 * 1024 * 1024 * 1024;
611 let status = if avail < min_free {
612 DoctorStatus::Warn
613 } else {
614 DoctorStatus::Ok
615 };
616 checks.push(DoctorCheck {
617 name: "disk_space".to_string(),
618 status,
619 #[allow(clippy::cast_precision_loss)]
620 message: format!(
621 "Disk free: {:.1} GB / {:.1} GB on the volume containing the HF cache at {}.",
622 avail as f64 / 1e9,
623 total as f64 / 1e9,
624 hf_cache_path.display()
625 ),
626 suggestion: if avail < min_free {
627 Some("Free up disk space or move HF cache.".to_string())
628 } else {
629 None
630 },
631 });
632 }
633
634 let has_cuda = system.devices.iter().any(|d| d.kind == "cuda");
635
636 if system.build.cuda && !has_cuda {
637 checks.push(DoctorCheck {
638 name: "cuda_devices".to_string(),
639 status: DoctorStatus::Warn,
640 message: "CUDA support is enabled but no CUDA devices were found.".to_string(),
641 suggestion: Some("Check NVIDIA driver installation.".to_string()),
642 });
643 }
644
645 DoctorReport { system, checks }
646}