1#[cfg(feature = "amd-xdna")]
4pub(crate) mod amd_xdna;
5#[cfg(feature = "apple")]
6pub(crate) mod apple;
7pub mod bandwidth;
8#[cfg(feature = "cerebras")]
9pub(crate) mod cerebras;
10pub(crate) mod command;
11#[cfg(feature = "cuda")]
12pub mod cuda;
13pub(crate) mod disk;
14pub(crate) mod environment;
15#[cfg(feature = "gaudi")]
16pub mod gaudi;
17#[cfg(feature = "graphcore")]
18pub(crate) mod graphcore;
19#[cfg(feature = "groq")]
20pub(crate) mod groq;
21#[cfg(feature = "intel-npu")]
22pub(crate) mod intel_npu;
23#[cfg(feature = "intel-oneapi")]
24pub(crate) mod intel_oneapi;
25pub mod interconnect;
26#[cfg(feature = "mediatek-apu")]
27pub(crate) mod mediatek_apu;
28#[cfg(feature = "aws-neuron")]
29pub(crate) mod neuron;
30pub(crate) mod numa;
31pub mod pcie;
32#[cfg(feature = "qualcomm")]
33pub(crate) mod qualcomm;
34#[cfg(feature = "rocm")]
35pub(crate) mod rocm;
36#[cfg(feature = "samsung-npu")]
37pub(crate) mod samsung_npu;
38#[cfg(feature = "tpu")]
39pub(crate) mod tpu;
40#[cfg(feature = "vulkan")]
41pub mod vulkan;
42#[cfg(feature = "windows-wmi")]
43pub(crate) mod windows;
44
45pub mod platform;
46
47use std::collections::HashMap;
48use std::path::Path;
49use std::time::{Duration, Instant};
50
51use tracing::debug;
52
53use crate::error::DetectionError;
54use crate::hardware::AcceleratorType;
55use crate::profile::AcceleratorProfile;
56use crate::registry::{AcceleratorRegistry, Backend, DetectBuilder};
57use crate::system_io::SystemIo;
58
59type DetectResult = (Vec<AcceleratorProfile>, Vec<DetectionError>);
61
62type TimedDetectResult = (Vec<AcceleratorProfile>, Vec<DetectionError>, Duration);
64
65#[derive(Debug, Clone)]
67pub struct TimedDetection {
68 pub registry: AcceleratorRegistry,
70 pub timings: HashMap<String, Duration>,
72 pub total: Duration,
74}
75
76impl AcceleratorRegistry {
77 pub fn detect() -> Self {
89 detect_with_builder(DetectBuilder::new())
90 }
91
92 pub fn detect_with_timing() -> TimedDetection {
108 detect_with_builder_timed(DetectBuilder::new())
109 }
110}
111
112pub(crate) fn detect_with_builder(builder: DetectBuilder) -> AcceleratorRegistry {
117 let mut all_profiles = Vec::with_capacity(8);
119 all_profiles.push(cpu_profile());
120 let mut all_warnings: Vec<DetectionError> = Vec::new();
121
122 let use_threads = builder.enabled_count() >= 2;
123
124 macro_rules! run_backend {
125 ($feature:literal, $backend:expr, $detect_fn:expr) => {
126 #[cfg(feature = $feature)]
127 if builder.backend_enabled($backend) {
128 $detect_fn(&mut all_profiles, &mut all_warnings);
129 }
130 };
131 }
132
133 macro_rules! spawn_backend {
134 ($feature:literal, $backend:expr, $detect_fn:expr, $handles:expr, $s:expr) => {
135 #[cfg(feature = $feature)]
136 if builder.backend_enabled($backend) {
137 $handles.push($s.spawn(|| {
138 let mut p = Vec::new();
139 let mut w = Vec::new();
140 $detect_fn(&mut p, &mut w);
141 (p, w)
142 }));
143 }
144 };
145 }
146
147 if use_threads {
148 std::thread::scope(|s| {
149 let mut handles: Vec<std::thread::ScopedJoinHandle<'_, DetectResult>> = Vec::new();
150
151 spawn_backend!("cuda", Backend::Cuda, cuda::detect_cuda, handles, s);
152 spawn_backend!("rocm", Backend::Rocm, rocm::detect_rocm, handles, s);
153 spawn_backend!(
154 "apple",
155 Backend::Apple,
156 apple::detect_metal_and_ane,
157 handles,
158 s
159 );
160 spawn_backend!("vulkan", Backend::Vulkan, vulkan::detect_vulkan, handles, s);
161 spawn_backend!(
162 "intel-npu",
163 Backend::IntelNpu,
164 intel_npu::detect_intel_npu,
165 handles,
166 s
167 );
168 spawn_backend!(
169 "amd-xdna",
170 Backend::AmdXdna,
171 amd_xdna::detect_amd_xdna,
172 handles,
173 s
174 );
175 spawn_backend!("tpu", Backend::Tpu, tpu::detect_tpu, handles, s);
176 spawn_backend!("gaudi", Backend::Gaudi, gaudi::detect_gaudi, handles, s);
177 spawn_backend!(
178 "aws-neuron",
179 Backend::AwsNeuron,
180 neuron::detect_aws_neuron,
181 handles,
182 s
183 );
184 spawn_backend!(
185 "intel-oneapi",
186 Backend::IntelOneApi,
187 intel_oneapi::detect_intel_oneapi,
188 handles,
189 s
190 );
191 spawn_backend!(
192 "qualcomm",
193 Backend::Qualcomm,
194 qualcomm::detect_qualcomm_ai100,
195 handles,
196 s
197 );
198 spawn_backend!(
199 "cerebras",
200 Backend::Cerebras,
201 cerebras::detect_cerebras_wse,
202 handles,
203 s
204 );
205 spawn_backend!(
206 "graphcore",
207 Backend::Graphcore,
208 graphcore::detect_graphcore_ipu,
209 handles,
210 s
211 );
212 spawn_backend!("groq", Backend::Groq, groq::detect_groq_lpu, handles, s);
213 spawn_backend!(
214 "samsung-npu",
215 Backend::SamsungNpu,
216 samsung_npu::detect_samsung_npu,
217 handles,
218 s
219 );
220 spawn_backend!(
221 "mediatek-apu",
222 Backend::MediaTekApu,
223 mediatek_apu::detect_mediatek_apu,
224 handles,
225 s
226 );
227 spawn_backend!(
228 "windows-wmi",
229 Backend::WindowsWmi,
230 windows::detect_windows_gpu,
231 handles,
232 s
233 );
234
235 for handle in handles {
236 if let Ok((profiles, warnings)) = handle.join() {
237 all_profiles.extend(profiles);
238 all_warnings.extend(warnings);
239 }
240 }
241 });
242 } else {
243 run_backend!("cuda", Backend::Cuda, cuda::detect_cuda);
244 run_backend!("rocm", Backend::Rocm, rocm::detect_rocm);
245 run_backend!("apple", Backend::Apple, apple::detect_metal_and_ane);
246 run_backend!("vulkan", Backend::Vulkan, vulkan::detect_vulkan);
247 run_backend!("intel-npu", Backend::IntelNpu, intel_npu::detect_intel_npu);
248 run_backend!("amd-xdna", Backend::AmdXdna, amd_xdna::detect_amd_xdna);
249 run_backend!("tpu", Backend::Tpu, tpu::detect_tpu);
250 run_backend!("gaudi", Backend::Gaudi, gaudi::detect_gaudi);
251 run_backend!("aws-neuron", Backend::AwsNeuron, neuron::detect_aws_neuron);
252 run_backend!(
253 "intel-oneapi",
254 Backend::IntelOneApi,
255 intel_oneapi::detect_intel_oneapi
256 );
257 run_backend!(
258 "qualcomm",
259 Backend::Qualcomm,
260 qualcomm::detect_qualcomm_ai100
261 );
262 run_backend!("cerebras", Backend::Cerebras, cerebras::detect_cerebras_wse);
263 run_backend!(
264 "graphcore",
265 Backend::Graphcore,
266 graphcore::detect_graphcore_ipu
267 );
268 run_backend!("groq", Backend::Groq, groq::detect_groq_lpu);
269 run_backend!(
270 "samsung-npu",
271 Backend::SamsungNpu,
272 samsung_npu::detect_samsung_npu
273 );
274 run_backend!(
275 "mediatek-apu",
276 Backend::MediaTekApu,
277 mediatek_apu::detect_mediatek_apu
278 );
279 run_backend!(
280 "windows-wmi",
281 Backend::WindowsWmi,
282 windows::detect_windows_gpu
283 );
284 }
285
286 #[cfg(feature = "vulkan")]
288 {
289 let has_vulkan = all_profiles
290 .iter()
291 .any(|p| matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
292 let has_dedicated = all_profiles.iter().any(|p| {
293 matches!(
294 p.accelerator,
295 AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
296 )
297 });
298 if !has_vulkan && !has_dedicated && builder.backend_enabled(Backend::Vulkan) {
299 vulkan::detect_vulkan_sysfs(&mut all_profiles, &mut all_warnings);
300 }
301 }
302
303 let has_dedicated = all_profiles.iter().any(|p| {
305 matches!(
306 p.accelerator,
307 AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
308 )
309 });
310 if has_dedicated {
311 all_profiles.retain(|p| !matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
312 }
313
314 bandwidth::enrich_bandwidth(&mut all_profiles, &mut all_warnings);
317 let nvidia_pci = list_driver_pci_addrs("nvidia");
318 let amdgpu_pci = list_driver_pci_addrs("amdgpu");
319 pcie::enrich_pcie(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
320 numa::enrich_numa(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
321
322 let system_interconnects = interconnect::detect_interconnects(&mut all_warnings);
324 let system_storage = disk::detect_storage();
325 let system_environment = environment::detect_environment();
326 let system_io = SystemIo {
327 interconnects: system_interconnects,
328 storage: system_storage,
329 environment: Some(system_environment),
330 };
331
332 debug!(
333 count = all_profiles.len(),
334 warnings = all_warnings.len(),
335 interconnects = system_io.interconnects.len(),
336 storage_devices = system_io.storage.len(),
337 "accelerator detection complete"
338 );
339 AcceleratorRegistry {
340 schema_version: crate::registry::SCHEMA_VERSION,
341 profiles: all_profiles,
342 warnings: all_warnings,
343 system_io,
344 }
345}
346
347pub(crate) fn detect_with_builder_timed(builder: DetectBuilder) -> TimedDetection {
349 let wall_start = Instant::now();
350 let mut all_profiles = Vec::with_capacity(8);
351 all_profiles.push(cpu_profile());
352 let mut all_warnings: Vec<DetectionError> = Vec::new();
353 let mut timings: HashMap<String, Duration> = HashMap::new();
354
355 macro_rules! run_backend_timed {
356 ($feature:literal, $backend:expr, $name:literal, $detect_fn:expr) => {
357 #[cfg(feature = $feature)]
358 if builder.backend_enabled($backend) {
359 let start = Instant::now();
360 $detect_fn(&mut all_profiles, &mut all_warnings);
361 timings.insert($name.into(), start.elapsed());
362 }
363 };
364 }
365
366 macro_rules! spawn_backend_timed {
367 ($feature:literal, $backend:expr, $name:literal, $detect_fn:expr, $handles:expr, $s:expr) => {
368 #[cfg(feature = $feature)]
369 if builder.backend_enabled($backend) {
370 $handles.push((
371 $name,
372 $s.spawn(|| {
373 let start = Instant::now();
374 let mut p = Vec::new();
375 let mut w = Vec::new();
376 $detect_fn(&mut p, &mut w);
377 (p, w, start.elapsed())
378 }),
379 ));
380 }
381 };
382 }
383
384 let use_threads = builder.enabled_count() >= 2;
385
386 if use_threads {
387 std::thread::scope(|s| {
388 let mut handles: Vec<(&str, std::thread::ScopedJoinHandle<'_, TimedDetectResult>)> =
389 Vec::new();
390
391 spawn_backend_timed!("cuda", Backend::Cuda, "cuda", cuda::detect_cuda, handles, s);
392 spawn_backend_timed!("rocm", Backend::Rocm, "rocm", rocm::detect_rocm, handles, s);
393 spawn_backend_timed!(
394 "apple",
395 Backend::Apple,
396 "apple",
397 apple::detect_metal_and_ane,
398 handles,
399 s
400 );
401 spawn_backend_timed!(
402 "vulkan",
403 Backend::Vulkan,
404 "vulkan",
405 vulkan::detect_vulkan,
406 handles,
407 s
408 );
409 spawn_backend_timed!(
410 "intel-npu",
411 Backend::IntelNpu,
412 "intel_npu",
413 intel_npu::detect_intel_npu,
414 handles,
415 s
416 );
417 spawn_backend_timed!(
418 "amd-xdna",
419 Backend::AmdXdna,
420 "amd_xdna",
421 amd_xdna::detect_amd_xdna,
422 handles,
423 s
424 );
425 spawn_backend_timed!("tpu", Backend::Tpu, "tpu", tpu::detect_tpu, handles, s);
426 spawn_backend_timed!(
427 "gaudi",
428 Backend::Gaudi,
429 "gaudi",
430 gaudi::detect_gaudi,
431 handles,
432 s
433 );
434 spawn_backend_timed!(
435 "aws-neuron",
436 Backend::AwsNeuron,
437 "aws_neuron",
438 neuron::detect_aws_neuron,
439 handles,
440 s
441 );
442 spawn_backend_timed!(
443 "intel-oneapi",
444 Backend::IntelOneApi,
445 "intel_oneapi",
446 intel_oneapi::detect_intel_oneapi,
447 handles,
448 s
449 );
450 spawn_backend_timed!(
451 "qualcomm",
452 Backend::Qualcomm,
453 "qualcomm",
454 qualcomm::detect_qualcomm_ai100,
455 handles,
456 s
457 );
458 spawn_backend_timed!(
459 "cerebras",
460 Backend::Cerebras,
461 "cerebras",
462 cerebras::detect_cerebras_wse,
463 handles,
464 s
465 );
466 spawn_backend_timed!(
467 "graphcore",
468 Backend::Graphcore,
469 "graphcore",
470 graphcore::detect_graphcore_ipu,
471 handles,
472 s
473 );
474 spawn_backend_timed!(
475 "groq",
476 Backend::Groq,
477 "groq",
478 groq::detect_groq_lpu,
479 handles,
480 s
481 );
482 spawn_backend_timed!(
483 "samsung-npu",
484 Backend::SamsungNpu,
485 "samsung_npu",
486 samsung_npu::detect_samsung_npu,
487 handles,
488 s
489 );
490 spawn_backend_timed!(
491 "mediatek-apu",
492 Backend::MediaTekApu,
493 "mediatek_apu",
494 mediatek_apu::detect_mediatek_apu,
495 handles,
496 s
497 );
498 spawn_backend_timed!(
499 "windows-wmi",
500 Backend::WindowsWmi,
501 "windows_wmi",
502 windows::detect_windows_gpu,
503 handles,
504 s
505 );
506
507 for (name, handle) in handles {
508 if let Ok((profiles, warnings, duration)) = handle.join() {
509 all_profiles.extend(profiles);
510 all_warnings.extend(warnings);
511 timings.insert(name.into(), duration);
512 }
513 }
514 });
515 } else {
516 run_backend_timed!("cuda", Backend::Cuda, "cuda", cuda::detect_cuda);
517 run_backend_timed!("rocm", Backend::Rocm, "rocm", rocm::detect_rocm);
518 run_backend_timed!(
519 "apple",
520 Backend::Apple,
521 "apple",
522 apple::detect_metal_and_ane
523 );
524 run_backend_timed!("vulkan", Backend::Vulkan, "vulkan", vulkan::detect_vulkan);
525 run_backend_timed!(
526 "intel-npu",
527 Backend::IntelNpu,
528 "intel_npu",
529 intel_npu::detect_intel_npu
530 );
531 run_backend_timed!(
532 "amd-xdna",
533 Backend::AmdXdna,
534 "amd_xdna",
535 amd_xdna::detect_amd_xdna
536 );
537 run_backend_timed!("tpu", Backend::Tpu, "tpu", tpu::detect_tpu);
538 run_backend_timed!("gaudi", Backend::Gaudi, "gaudi", gaudi::detect_gaudi);
539 run_backend_timed!(
540 "aws-neuron",
541 Backend::AwsNeuron,
542 "aws_neuron",
543 neuron::detect_aws_neuron
544 );
545 run_backend_timed!(
546 "intel-oneapi",
547 Backend::IntelOneApi,
548 "intel_oneapi",
549 intel_oneapi::detect_intel_oneapi
550 );
551 run_backend_timed!(
552 "qualcomm",
553 Backend::Qualcomm,
554 "qualcomm",
555 qualcomm::detect_qualcomm_ai100
556 );
557 run_backend_timed!(
558 "cerebras",
559 Backend::Cerebras,
560 "cerebras",
561 cerebras::detect_cerebras_wse
562 );
563 run_backend_timed!(
564 "graphcore",
565 Backend::Graphcore,
566 "graphcore",
567 graphcore::detect_graphcore_ipu
568 );
569 run_backend_timed!("groq", Backend::Groq, "groq", groq::detect_groq_lpu);
570 run_backend_timed!(
571 "samsung-npu",
572 Backend::SamsungNpu,
573 "samsung_npu",
574 samsung_npu::detect_samsung_npu
575 );
576 run_backend_timed!(
577 "mediatek-apu",
578 Backend::MediaTekApu,
579 "mediatek_apu",
580 mediatek_apu::detect_mediatek_apu
581 );
582 run_backend_timed!(
583 "windows-wmi",
584 Backend::WindowsWmi,
585 "windows_wmi",
586 windows::detect_windows_gpu
587 );
588 }
589
590 #[cfg(feature = "vulkan")]
592 {
593 let has_vulkan = all_profiles
594 .iter()
595 .any(|p| matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
596 let has_dedicated = all_profiles.iter().any(|p| {
597 matches!(
598 p.accelerator,
599 AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
600 )
601 });
602 if !has_vulkan && !has_dedicated && builder.backend_enabled(Backend::Vulkan) {
603 let start = Instant::now();
604 vulkan::detect_vulkan_sysfs(&mut all_profiles, &mut all_warnings);
605 timings.insert("vulkan_sysfs".into(), start.elapsed());
606 }
607 }
608
609 let has_dedicated = all_profiles.iter().any(|p| {
611 matches!(
612 p.accelerator,
613 AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
614 )
615 });
616 if has_dedicated {
617 all_profiles.retain(|p| !matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
618 }
619
620 let enrich_start = Instant::now();
621 bandwidth::enrich_bandwidth(&mut all_profiles, &mut all_warnings);
622 let nvidia_pci = list_driver_pci_addrs("nvidia");
623 let amdgpu_pci = list_driver_pci_addrs("amdgpu");
624 pcie::enrich_pcie(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
625 numa::enrich_numa(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
626 timings.insert("_enrich".into(), enrich_start.elapsed());
627
628 let sysio_start = Instant::now();
629 let system_interconnects = interconnect::detect_interconnects(&mut all_warnings);
630 let system_storage = disk::detect_storage();
631 let system_environment = environment::detect_environment();
632 let system_io = SystemIo {
633 interconnects: system_interconnects,
634 storage: system_storage,
635 environment: Some(system_environment),
636 };
637 timings.insert("_system_io".into(), sysio_start.elapsed());
638
639 let registry = AcceleratorRegistry {
640 schema_version: crate::registry::SCHEMA_VERSION,
641 profiles: all_profiles,
642 warnings: all_warnings,
643 system_io,
644 };
645
646 TimedDetection {
647 registry,
648 timings,
649 total: wall_start.elapsed(),
650 }
651}
652
653pub(super) fn list_driver_pci_addrs(driver: &str) -> Vec<String> {
659 let dir = Path::new("/sys/bus/pci/drivers").join(driver);
660 if !dir.exists() {
661 return Vec::new();
662 }
663 let mut addrs: Vec<String> = std::fs::read_dir(&dir)
664 .into_iter()
665 .flatten()
666 .flatten()
667 .filter_map(|e| {
668 let name = e.file_name();
669 let name_bytes = name.as_encoded_bytes();
670 if name_bytes.contains(&b':')
672 && name_bytes.contains(&b'.')
673 && name_bytes
674 .iter()
675 .all(|&b| b.is_ascii_hexdigit() || b == b':' || b == b'.')
676 {
677 Some(name.to_string_lossy().into_owned())
678 } else {
679 None
680 }
681 })
682 .collect();
683 addrs.sort();
684 addrs
685}
686
687pub(super) fn iter_dev_devices(prefix: &str) -> impl Iterator<Item = u32> + '_ {
692 std::fs::read_dir("/dev")
693 .into_iter()
694 .flatten()
695 .flatten()
696 .filter_map(move |entry| {
697 let name = entry.file_name();
698 let name_str = name.to_string_lossy();
699 let suffix = name_str.strip_prefix(prefix)?;
700 if suffix.is_empty() || !suffix.chars().all(|c| c.is_ascii_digit()) {
701 return None;
702 }
703 suffix.parse::<u32>().ok()
704 })
705}
706
707pub(super) fn has_dev_device(prefix: &str) -> bool {
711 std::fs::read_dir("/dev")
712 .into_iter()
713 .flatten()
714 .flatten()
715 .any(|entry| entry.file_name().to_string_lossy().starts_with(prefix))
716}
717
718pub(crate) fn cpu_profile() -> AcceleratorProfile {
720 AcceleratorProfile {
721 accelerator: AcceleratorType::Cpu,
722 available: true,
723 memory_bytes: detect_cpu_memory(),
724 ..Default::default()
725 }
726}
727
728pub(crate) fn detect_cpu_memory() -> u64 {
730 if let Some(info) = read_sysfs_string(std::path::Path::new("/proc/meminfo"), 64 * 1024) {
731 for line in info.lines() {
732 if line.starts_with("MemTotal:")
733 && let Some(kb_str) = line.split_whitespace().nth(1)
734 && let Ok(kb) = kb_str.parse::<u64>()
735 {
736 return kb.saturating_mul(1024);
737 }
738 }
739 }
740 if let Ok(output) = command::run_tool("sysctl", &["-n", "hw.memsize"], command::DEFAULT_TIMEOUT)
742 && let Ok(bytes) = output.stdout.trim().parse::<u64>()
743 {
744 return bytes;
745 }
746 debug!("could not read system memory, defaulting to 16 GiB");
747 16 * 1024 * 1024 * 1024
748}
749
750pub(super) fn read_sysfs_u64(path: &Path) -> Option<u64> {
752 read_sysfs_string(path, 64).and_then(|s| s.trim().parse().ok())
753}
754
755pub(super) fn read_sysfs_string(path: &Path, max_bytes: usize) -> Option<String> {
764 use std::io::Read;
765 let mut file = std::fs::File::open(path).ok()?;
766
767 const STACK_SIZE: usize = 512;
769 if max_bytes < STACK_SIZE {
770 let mut buf = [0u8; STACK_SIZE];
771 let n = file.read(&mut buf[..max_bytes + 1]).ok()?;
772 if n > max_bytes {
773 return None;
774 }
775 return String::from_utf8(buf[..n].to_vec()).ok();
776 }
777
778 let mut buf = vec![0u8; max_bytes + 1];
779 let n = file.read(&mut buf).ok()?;
780 if n > max_bytes {
781 return None;
782 }
783 buf.truncate(n);
784 String::from_utf8(buf).ok()
785}
786
787#[cfg(feature = "async-detect")]
798pub(crate) async fn detect_with_builder_async(builder: DetectBuilder) -> AcceleratorRegistry {
799 let mut all_profiles = vec![cpu_profile()];
800 let mut all_warnings: Vec<DetectionError> = Vec::new();
801
802 debug!(
803 backends = builder.enabled_count(),
804 "starting async detection"
805 );
806
807 let mut handles: Vec<tokio::task::JoinHandle<DetectResult>> = Vec::new();
809
810 macro_rules! spawn_async_backend {
811 ($feature:literal, $backend:expr, $detect_fn:path) => {
812 #[cfg(feature = $feature)]
813 if builder.backend_enabled($backend) {
814 handles.push(tokio::spawn($detect_fn()));
815 }
816 };
817 }
818
819 spawn_async_backend!("cuda", Backend::Cuda, cuda::detect_cuda_async);
820 spawn_async_backend!("vulkan", Backend::Vulkan, vulkan::detect_vulkan_async);
821 spawn_async_backend!("gaudi", Backend::Gaudi, gaudi::detect_gaudi_async);
822 spawn_async_backend!(
823 "aws-neuron",
824 Backend::AwsNeuron,
825 neuron::detect_aws_neuron_async
826 );
827 spawn_async_backend!("apple", Backend::Apple, apple::detect_metal_and_ane_async);
828 spawn_async_backend!(
829 "intel-oneapi",
830 Backend::IntelOneApi,
831 intel_oneapi::detect_intel_oneapi_async
832 );
833
834 let sysfs_builder = builder.clone();
836 let sysfs_handle = tokio::task::spawn_blocking(move || {
837 let mut profiles = Vec::new();
838 let mut warnings: Vec<DetectionError> = Vec::new();
839
840 macro_rules! run_sysfs {
841 ($feature:literal, $backend:expr, $detect_fn:expr) => {
842 #[cfg(feature = $feature)]
843 if sysfs_builder.backend_enabled($backend) {
844 $detect_fn(&mut profiles, &mut warnings);
845 }
846 };
847 }
848
849 run_sysfs!("rocm", Backend::Rocm, rocm::detect_rocm);
850 run_sysfs!("intel-npu", Backend::IntelNpu, intel_npu::detect_intel_npu);
851 run_sysfs!("amd-xdna", Backend::AmdXdna, amd_xdna::detect_amd_xdna);
852 run_sysfs!("tpu", Backend::Tpu, tpu::detect_tpu);
853 run_sysfs!(
854 "qualcomm",
855 Backend::Qualcomm,
856 qualcomm::detect_qualcomm_ai100
857 );
858 run_sysfs!("cerebras", Backend::Cerebras, cerebras::detect_cerebras_wse);
859 run_sysfs!(
860 "graphcore",
861 Backend::Graphcore,
862 graphcore::detect_graphcore_ipu
863 );
864 run_sysfs!("groq", Backend::Groq, groq::detect_groq_lpu);
865 run_sysfs!(
866 "samsung-npu",
867 Backend::SamsungNpu,
868 samsung_npu::detect_samsung_npu
869 );
870 run_sysfs!(
871 "mediatek-apu",
872 Backend::MediaTekApu,
873 mediatek_apu::detect_mediatek_apu
874 );
875 run_sysfs!(
876 "windows-wmi",
877 Backend::WindowsWmi,
878 windows::detect_windows_gpu
879 );
880
881 (profiles, warnings)
882 });
883
884 for handle in handles {
886 if let Ok((profiles, warnings)) = handle.await {
887 all_profiles.extend(profiles);
888 all_warnings.extend(warnings);
889 }
890 }
891
892 if let Ok((profiles, warnings)) = sysfs_handle.await {
894 all_profiles.extend(profiles);
895 all_warnings.extend(warnings);
896 }
897
898 #[cfg(feature = "vulkan")]
900 {
901 let has_vulkan = all_profiles
902 .iter()
903 .any(|p| matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
904 let has_dedicated = all_profiles.iter().any(|p| {
905 matches!(
906 p.accelerator,
907 AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
908 )
909 });
910 if !has_vulkan && !has_dedicated && builder.backend_enabled(Backend::Vulkan) {
911 vulkan::detect_vulkan_sysfs(&mut all_profiles, &mut all_warnings);
912 }
913 }
914
915 let has_dedicated = all_profiles.iter().any(|p| {
917 matches!(
918 p.accelerator,
919 AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
920 )
921 });
922 if has_dedicated {
923 all_profiles.retain(|p| !matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
924 }
925
926 bandwidth::enrich_bandwidth_async(&mut all_profiles, &mut all_warnings).await;
928 let nvidia_pci = list_driver_pci_addrs("nvidia");
929 let amdgpu_pci = list_driver_pci_addrs("amdgpu");
930 pcie::enrich_pcie(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
931 numa::enrich_numa(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
932
933 let (system_interconnects, ic_warnings) = interconnect::detect_interconnects_async().await;
935 all_warnings.extend(ic_warnings);
936
937 let system_storage = tokio::task::spawn_blocking(disk::detect_storage)
938 .await
939 .unwrap_or_default();
940
941 let system_environment = environment::detect_environment();
942 let system_io = SystemIo {
943 interconnects: system_interconnects,
944 storage: system_storage,
945 environment: Some(system_environment),
946 };
947
948 debug!(
949 count = all_profiles.len(),
950 warnings = all_warnings.len(),
951 interconnects = system_io.interconnects.len(),
952 storage_devices = system_io.storage.len(),
953 "async accelerator detection complete"
954 );
955 AcceleratorRegistry {
956 schema_version: crate::registry::SCHEMA_VERSION,
957 profiles: all_profiles,
958 warnings: all_warnings,
959 system_io,
960 }
961}