ai-hwaccel 1.1.0

Universal AI hardware accelerator detection, capability querying, and workload planning for Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
//! Vulkan compute device detection via `vulkaninfo`.
//!
//! Uses `vulkaninfo --summary` for basic device info, then optionally
//! parses full `vulkaninfo` output for compute queue families and subgroup
//! sizes.
//!
//! Performance: `vulkaninfo` can take 3–5s on some systems (e.g. AMD Cezanne
//! iGPU). To mitigate this, results are cached in
//! `$XDG_CACHE_HOME/ai-hwaccel/vulkan.json` with a 60s TTL, and the subprocess
//! runs with a 3s timeout (shorter than the default 5s).

use std::path::PathBuf;
use std::time::{Duration, SystemTime};

use tracing::{debug, trace};

use crate::error::DetectionError;
use crate::hardware::AcceleratorType;
use crate::profile::AcceleratorProfile;

use super::command::run_tool;

const VULKANINFO_SUMMARY_ARGS: &[&str] = &["--summary"];
const VULKANINFO_FULL_ARGS: &[&str] = &[];

/// Vulkaninfo-specific timeout (shorter than the 5s default since vulkaninfo
/// is known to hang on some systems).
const VULKANINFO_TIMEOUT: Duration = Duration::from_secs(3);

/// TTL for the vulkaninfo file cache.
const VULKANINFO_CACHE_TTL: Duration = Duration::from_secs(60);

/// Return the cache file path for vulkaninfo results.
fn vulkan_cache_path() -> Option<PathBuf> {
    let cache_dir = std::env::var("XDG_CACHE_HOME")
        .ok()
        .filter(|s| !s.is_empty())
        .map(PathBuf::from)
        .or_else(|| {
            std::env::var("HOME")
                .ok()
                .map(|h| PathBuf::from(h).join(".cache"))
        })?;
    Some(cache_dir.join("ai-hwaccel").join("vulkan.json"))
}

/// Try to read cached vulkaninfo output. Returns (summary, full) if fresh.
fn read_vulkan_cache() -> Option<(String, Option<String>)> {
    let path = vulkan_cache_path()?;
    let metadata = std::fs::metadata(&path).ok()?;
    let age = metadata.modified().ok()?.elapsed().unwrap_or(Duration::MAX);
    if age > VULKANINFO_CACHE_TTL {
        return None;
    }
    let data = std::fs::read_to_string(&path).ok()?;
    let parsed: serde_json::Value = serde_json::from_str(&data).ok()?;
    let summary = parsed.get("summary")?.as_str()?.to_string();
    let full = parsed
        .get("full")
        .and_then(|v| v.as_str())
        .map(String::from);
    debug!(
        "using cached vulkaninfo results (age: {:.1}s)",
        age.as_secs_f64()
    );
    Some((summary, full))
}

/// Write vulkaninfo output to the cache file (atomic via temp+rename).
fn write_vulkan_cache(summary: &str, full: Option<&str>) {
    let Some(path) = vulkan_cache_path() else {
        return;
    };
    if let Some(parent) = path.parent() {
        let _ = std::fs::create_dir_all(parent);
    }
    let cache = serde_json::json!({
        "summary": summary,
        "full": full,
        "cached_at": SystemTime::now()
            .duration_since(SystemTime::UNIX_EPOCH)
            .map(|d| d.as_secs())
            .unwrap_or(0),
    });
    if let Ok(json) = serde_json::to_string(&cache) {
        let tmp = path.with_extension("tmp");
        if std::fs::write(&tmp, &json).is_ok() {
            if std::fs::rename(&tmp, &path).is_ok() {
                return;
            }
            let _ = std::fs::remove_file(&tmp);
        }
        let _ = std::fs::write(&path, json);
    }
}

/// Detect Vulkan devices.
///
/// Note: deduplication against CUDA/ROCm GPUs is handled by the orchestrator
/// in `detect/mod.rs` after all backends complete (since detection is parallel).
pub(crate) fn detect_vulkan(
    profiles: &mut Vec<AcceleratorProfile>,
    warnings: &mut Vec<DetectionError>,
) {
    // Try cache first.
    if let Some((summary, full)) = read_vulkan_cache() {
        parse_vulkan_output(&summary, full.as_deref(), profiles, warnings);
        return;
    }

    let output = match run_tool("vulkaninfo", VULKANINFO_SUMMARY_ARGS, VULKANINFO_TIMEOUT) {
        Ok(o) => o,
        Err(DetectionError::ToolNotFound { .. }) => {
            debug!("vulkaninfo not found on $PATH, skipping Vulkan detection");
            return;
        }
        Err(DetectionError::Timeout { .. }) => {
            debug!("vulkaninfo timed out, falling back to sysfs-only Vulkan detection");
            warnings.push(DetectionError::Timeout {
                tool: "vulkaninfo".into(),
                timeout_secs: VULKANINFO_TIMEOUT.as_secs_f64(),
            });
            return;
        }
        Err(e) => {
            warnings.push(e);
            return;
        }
    };
    // Full output for compute queue details (separate format from --summary).
    let full_output = run_tool("vulkaninfo", VULKANINFO_FULL_ARGS, VULKANINFO_TIMEOUT).ok();
    let full_stdout = full_output.as_ref().map(|o| o.stdout.as_str());

    // Cache the results for subsequent calls.
    write_vulkan_cache(&output.stdout, full_stdout);

    parse_vulkan_output(&output.stdout, full_stdout, profiles, warnings);
}

#[cfg(feature = "async-detect")]
pub(crate) async fn detect_vulkan_async() -> super::DetectResult {
    let mut profiles = Vec::new();
    let mut warnings = Vec::new();

    // Try cache first.
    if let Some((summary, full)) = read_vulkan_cache() {
        parse_vulkan_output(&summary, full.as_deref(), &mut profiles, &mut warnings);
        return (profiles, warnings);
    }

    let output = match super::command::run_tool_async(
        "vulkaninfo",
        VULKANINFO_SUMMARY_ARGS,
        VULKANINFO_TIMEOUT,
    )
    .await
    {
        Ok(o) => o,
        Err(DetectionError::ToolNotFound { .. }) => {
            debug!("vulkaninfo not found on $PATH, skipping Vulkan detection");
            return (profiles, warnings);
        }
        Err(DetectionError::Timeout { .. }) => {
            debug!("vulkaninfo timed out, falling back to sysfs-only Vulkan detection");
            warnings.push(DetectionError::Timeout {
                tool: "vulkaninfo".into(),
                timeout_secs: VULKANINFO_TIMEOUT.as_secs_f64(),
            });
            return (profiles, warnings);
        }
        Err(e) => {
            warnings.push(e);
            return (profiles, warnings);
        }
    };
    let full_output =
        super::command::run_tool_async("vulkaninfo", VULKANINFO_FULL_ARGS, VULKANINFO_TIMEOUT)
            .await
            .ok();
    let full_stdout = full_output.as_ref().map(|o| o.stdout.as_str());

    write_vulkan_cache(&output.stdout, full_stdout);

    parse_vulkan_output(&output.stdout, full_stdout, &mut profiles, &mut warnings);
    (profiles, warnings)
}

pub fn parse_vulkan_output(
    summary_stdout: &str,
    full_stdout: Option<&str>,
    profiles: &mut Vec<AcceleratorProfile>,
    _warnings: &mut Vec<DetectionError>,
) {
    let devices = parse_vulkan_summary(summary_stdout);
    // Cap full output parse at 256 KiB to bound memory usage on verbose drivers.
    let compute_info = full_stdout
        .filter(|s| s.len() <= 256 * 1024)
        .map(parse_vulkan_full)
        .unwrap_or_default();

    if devices.is_empty() {
        debug!("vulkaninfo found but no devices parsed, registering generic Vulkan GPU");
        profiles.push(AcceleratorProfile {
            accelerator: AcceleratorType::VulkanGpu { device_id: 0 },
            available: true,
            memory_bytes: 4 * 1024 * 1024 * 1024,
            device_name: Some("Unknown Vulkan Device".into()),
            ..Default::default()
        });
    } else {
        for (i, dev) in devices.into_iter().take(1024).enumerate() {
            let extra = compute_info.get(i);
            let mut cap_parts = Vec::new();
            if let Some(api) = &dev.api_version {
                cap_parts.push(format!("Vulkan {api}"));
            }
            if let Some(info) = extra {
                cap_parts.push(format!(
                    "compute queues: {}x{}, subgroup: {}",
                    info.compute_queue_count, info.compute_queue_family_count, info.subgroup_size,
                ));
            }
            let compute_cap = if cap_parts.is_empty() {
                dev.api_version.clone()
            } else {
                Some(cap_parts.join(", "))
            };

            debug!(
                device_id = i,
                name = %dev.name,
                mem_mb = dev.memory_mb,
                ?extra,
                "Vulkan GPU detected"
            );
            profiles.push(AcceleratorProfile {
                accelerator: AcceleratorType::VulkanGpu {
                    device_id: i as u32,
                },
                available: true,
                memory_bytes: dev.memory_mb.saturating_mul(1024 * 1024),
                compute_capability: compute_cap,
                driver_version: dev.driver_version,
                device_name: Some(dev.name),
                ..Default::default()
            });
        }
    }
}

// ---------------------------------------------------------------------------
// Sysfs-only Vulkan fallback
// ---------------------------------------------------------------------------

/// Detect Vulkan-capable GPUs via sysfs without spawning `vulkaninfo`.
///
/// Scans `/sys/class/drm/card*/device/{vendor,device}` and uses a PCI ID
/// lookup table to identify GPU names and estimate VRAM. This is a fast
/// fallback for when `vulkaninfo` is absent, slow, or times out.
pub(crate) fn detect_vulkan_sysfs(
    profiles: &mut Vec<AcceleratorProfile>,
    _warnings: &mut Vec<DetectionError>,
) {
    let drm_dir = std::path::Path::new("/sys/class/drm");
    if !drm_dir.exists() {
        return;
    }

    let mut device_id_counter: u32 = 0;

    let entries = match std::fs::read_dir(drm_dir) {
        Ok(e) => e,
        Err(_) => return,
    };

    for entry in entries.flatten() {
        let name = entry.file_name();
        let name_str = name.to_string_lossy();
        // Only look at cardN entries, not cardN-DP-1 etc.
        if !name_str.starts_with("card") || name_str.contains('-') {
            continue;
        }

        let dev_dir = entry.path().join("device");
        let vendor_path = dev_dir.join("vendor");
        let device_path = dev_dir.join("device");

        let Some(vendor_str) = super::read_sysfs_string(&vendor_path, 64) else {
            continue;
        };
        let Some(device_str) = super::read_sysfs_string(&device_path, 64) else {
            continue;
        };

        let vendor_id =
            u16::from_str_radix(vendor_str.trim().trim_start_matches("0x"), 16).unwrap_or(0);
        let device_id_pci =
            u16::from_str_radix(device_str.trim().trim_start_matches("0x"), 16).unwrap_or(0);

        // Skip non-GPU vendors.
        if !is_gpu_vendor(vendor_id) {
            continue;
        }

        let (device_name, estimated_vram_mb) = lookup_pci_gpu(vendor_id, device_id_pci);

        // Try to read VRAM from sysfs (some drivers expose this).
        let vram_bytes = read_drm_vram(&dev_dir).unwrap_or(estimated_vram_mb * 1024 * 1024);

        debug!(
            vendor_id,
            device_id_pci,
            name = %device_name,
            vram_mb = vram_bytes / (1024 * 1024),
            "sysfs Vulkan GPU detected"
        );

        profiles.push(AcceleratorProfile {
            accelerator: AcceleratorType::VulkanGpu {
                device_id: device_id_counter,
            },
            available: true,
            memory_bytes: vram_bytes,
            device_name: Some(device_name),
            ..Default::default()
        });
        device_id_counter += 1;
    }
}

/// Check if a PCI vendor ID belongs to a GPU vendor.
fn is_gpu_vendor(vendor_id: u16) -> bool {
    matches!(
        vendor_id,
        0x10de  // NVIDIA
        | 0x1002 // AMD/ATI
        | 0x8086 // Intel
    )
}

/// Try to read VRAM total from DRM sysfs.
fn read_drm_vram(dev_dir: &std::path::Path) -> Option<u64> {
    // AMD: mem_info_vram_total
    if let Some(vram) = super::read_sysfs_u64(&dev_dir.join("mem_info_vram_total"))
        && vram > 0
    {
        return Some(vram);
    }
    None
}

/// Lookup GPU name and estimated VRAM from PCI vendor/device IDs.
///
/// Returns (name, estimated_vram_mb). This covers common consumer and
/// datacenter GPUs. Unknown devices get a generic name and 4 GB estimate.
fn lookup_pci_gpu(vendor_id: u16, device_id: u16) -> (String, u64) {
    match vendor_id {
        // NVIDIA
        0x10de => {
            let (name, vram) = match device_id {
                // Ada Lovelace
                0x2684 => ("NVIDIA GeForce RTX 4090", 24 * 1024),
                0x2702 => ("NVIDIA GeForce RTX 4080 SUPER", 16 * 1024),
                0x2704 => ("NVIDIA GeForce RTX 4080", 16 * 1024),
                0x2782 => ("NVIDIA GeForce RTX 4070 Ti SUPER", 16 * 1024),
                0x2786 => ("NVIDIA GeForce RTX 4070 Ti", 12 * 1024),
                0x2783 => ("NVIDIA GeForce RTX 4070 SUPER", 12 * 1024),
                0x2787..=0x27a0 => ("NVIDIA GeForce RTX 4070", 12 * 1024),
                0x2803..=0x2820 => ("NVIDIA GeForce RTX 4060 Ti", 8 * 1024),
                0x2882..=0x2900 => ("NVIDIA GeForce RTX 4060", 8 * 1024),
                // Ampere
                0x2204 => ("NVIDIA GeForce RTX 3090", 24 * 1024),
                0x2206 => ("NVIDIA GeForce RTX 3080", 10 * 1024),
                0x2484 => ("NVIDIA GeForce RTX 3070", 8 * 1024),
                0x2504 => ("NVIDIA GeForce RTX 3060", 12 * 1024),
                // Datacenter
                0x2330 => ("NVIDIA H100", 80 * 1024),
                0x2324 => ("NVIDIA H100 PCIe", 80 * 1024),
                0x20b0 => ("NVIDIA A100 SXM", 80 * 1024),
                0x20b2 => ("NVIDIA A100 PCIe 80GB", 80 * 1024),
                0x20b5 => ("NVIDIA A100 PCIe 40GB", 40 * 1024),
                0x20b7 => ("NVIDIA A30", 24 * 1024),
                0x25b6 => ("NVIDIA A16", 16 * 1024),
                0x27b8 => ("NVIDIA L4", 24 * 1024),
                0x26b5 => ("NVIDIA L40", 48 * 1024),
                0x26b9 => ("NVIDIA L40S", 48 * 1024),
                0x1eb8 => ("NVIDIA T4", 16 * 1024),
                _ => ("NVIDIA GPU (unknown model)", 4 * 1024),
            };
            (name.into(), vram)
        }
        // AMD/ATI
        0x1002 => {
            let (name, vram) = match device_id {
                0x744c => ("AMD Radeon RX 7900 XTX", 24 * 1024),
                0x7448 => ("AMD Radeon RX 7900 XT", 20 * 1024),
                0x7480 => ("AMD Radeon RX 7800 XT", 16 * 1024),
                0x7470 => ("AMD Radeon RX 7700 XT", 12 * 1024),
                0x7460 => ("AMD Radeon RX 7600", 8 * 1024),
                0x73bf => ("AMD Radeon RX 6900 XT", 16 * 1024),
                0x73af => ("AMD Radeon RX 6800 XT", 16 * 1024),
                0x73df => ("AMD Radeon RX 6700 XT", 12 * 1024),
                0x73ff => ("AMD Radeon RX 6600 XT", 8 * 1024),
                0x7408 => ("AMD Instinct MI300X", 192 * 1024),
                0x740c => ("AMD Instinct MI250X", 128 * 1024),
                0x740f => ("AMD Instinct MI210", 64 * 1024),
                // Integrated GPUs (common Renoir/Cezanne/Phoenix/Hawk Point)
                0x1636 | 0x1638 => ("AMD Radeon Graphics (Renoir)", 512),
                0x164c | 0x164e => ("AMD Radeon Graphics (Cezanne)", 512),
                0x15bf | 0x15c8 => ("AMD Radeon Graphics (Phoenix)", 512),
                _ => ("AMD GPU (unknown model)", 4 * 1024),
            };
            (name.into(), vram)
        }
        // Intel
        0x8086 => {
            let (name, vram) = match device_id {
                0x56a0 | 0x56a1 => ("Intel Arc A770", 16 * 1024),
                0x56a5 | 0x56a6 => ("Intel Arc A750", 8 * 1024),
                0x5690..=0x5692 => ("Intel Arc A580", 8 * 1024),
                0x56c0 | 0x56c1 => ("Intel Arc A380", 6 * 1024),
                0x4f80..=0x4f8f => ("Intel Data Center GPU Flex", 16 * 1024),
                0x0bd0..=0x0bdf => ("Intel Data Center GPU Max", 48 * 1024),
                _ => ("Intel GPU (unknown model)", 2 * 1024),
            };
            (name.into(), vram)
        }
        _ => ("Unknown GPU".into(), 4 * 1024),
    }
}

struct VulkanDevice {
    name: String,
    memory_mb: u64,
    api_version: Option<String>,
    driver_version: Option<String>,
}

/// Compute-relevant info parsed from full `vulkaninfo` output.
#[derive(Debug, Default)]
struct VulkanComputeInfo {
    /// Number of queue families that support QUEUE_COMPUTE_BIT.
    compute_queue_family_count: u32,
    /// Total compute queues across all compute-capable families.
    compute_queue_count: u32,
    /// Subgroup size (wavefront/warp size).
    subgroup_size: u32,
}

// ---------------------------------------------------------------------------
// Summary parser (--summary)
// ---------------------------------------------------------------------------

/// Parse `vulkaninfo --summary` output for device details.
fn parse_vulkan_summary(output: &str) -> Vec<VulkanDevice> {
    trace!(
        line_count = output.lines().count(),
        "parsing vulkaninfo summary"
    );
    let mut devices = Vec::new();
    let mut current_name = String::new();
    let mut current_mem: u64 = 0;
    let mut current_api = None;
    let mut current_driver = None;
    let mut in_device = false;

    for line in output.lines() {
        let trimmed = line.trim();

        if trimmed.starts_with("GPU") && trimmed.ends_with(':') {
            if in_device && !current_name.is_empty() {
                devices.push(VulkanDevice {
                    name: std::mem::take(&mut current_name),
                    memory_mb: if current_mem > 0 {
                        current_mem
                    } else {
                        4 * 1024
                    },
                    api_version: current_api.take(),
                    driver_version: current_driver.take(),
                });
                current_mem = 0;
            }
            in_device = true;
            continue;
        }

        if !in_device {
            continue;
        }

        if let Some((key, value)) = trimmed.split_once('=') {
            let key = key.trim();
            let value = value.trim();

            match key {
                "deviceName" => current_name = value.chars().take(256).collect(),
                "apiVersion" => current_api = Some(value.to_string()),
                "driverVersion" => current_driver = Some(value.to_string()),
                _ => {}
            }
        }

        if trimmed.starts_with("size")
            && let Some((_, rest)) = trimmed.split_once('=')
        {
            let rest = rest.trim();
            if let Some(mb_str) = rest.split_whitespace().next()
                && let Ok(mb) = mb_str.parse::<u64>()
                && mb > current_mem
            {
                current_mem = mb;
            }
        }
    }

    if in_device && !current_name.is_empty() {
        devices.push(VulkanDevice {
            name: current_name,
            memory_mb: if current_mem > 0 {
                current_mem
            } else {
                4 * 1024
            },
            api_version: current_api,
            driver_version: current_driver,
        });
    }

    devices
}

// ---------------------------------------------------------------------------
// Full output parser (no args)
// ---------------------------------------------------------------------------

/// Parse full `vulkaninfo` output for compute queue families and subgroup size.
///
/// Returns one `VulkanComputeInfo` per GPU found.
fn parse_vulkan_full(output: &str) -> Vec<VulkanComputeInfo> {
    let mut infos = Vec::new();
    let mut current = VulkanComputeInfo::default();
    let mut in_queue_section = false;

    for line in output.lines() {
        let trimmed = line.trim();

        // New GPU section resets state.
        if trimmed.starts_with("VkPhysicalDeviceProperties:") || trimmed.starts_with("GPU id") {
            if current.subgroup_size > 0 || current.compute_queue_count > 0 {
                infos.push(current);
                current = VulkanComputeInfo::default();
            }
            in_queue_section = false;
        }

        // Subgroup size.
        if trimmed.starts_with("subgroupSize")
            && !trimmed.contains("Control")
            && let Some(val) = extract_value(trimmed)
            && let Ok(size) = val.parse::<u32>()
        {
            current.subgroup_size = size;
        }

        // Queue family section.
        if trimmed.starts_with("VkQueueFamilyProperties") {
            in_queue_section = true;
            continue;
        }

        if in_queue_section {
            // Detect compute-capable queue family.
            if trimmed.starts_with("queueFlags") && trimmed.contains("QUEUE_COMPUTE_BIT") {
                current.compute_queue_family_count += 1;
                // The queueCount for this family should follow shortly.
            }

            if trimmed.starts_with("queueCount")
                && let Some(val) = extract_value(trimmed)
                && let Ok(count) = val.parse::<u32>()
            {
                // Only add to compute_queue_count if the previous queueFlags
                // included COMPUTE_BIT. We track this by checking if family
                // count just incremented. This is a heuristic — the queueFlags
                // line always precedes its queueCount.
                if current.compute_queue_family_count > 0 {
                    // Count the latest family's queues.
                    current.compute_queue_count += count;
                }
            }

            // End of queue section.
            if trimmed.is_empty() || trimmed.starts_with("Vk") && !trimmed.starts_with("VkQueue") {
                in_queue_section = false;
            }
        }
    }

    // Flush last device.
    if current.subgroup_size > 0 || current.compute_queue_count > 0 {
        infos.push(current);
    }

    infos
}

/// Extract the value after `=` or `:` from a key-value line.
fn extract_value(line: &str) -> Option<&str> {
    line.split_once('=')
        .map(|(_, v)| v.trim())
        .or_else(|| line.split_once(':').map(|(_, v)| v.trim()))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_vulkan_full_compute_info() {
        let output = r#"
VkPhysicalDeviceProperties:
	deviceType        = PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU
	deviceName        = AMD Radeon Graphics (RADV RENOIR)
	subgroupSize                      = 64
VkQueueFamilyProperties:
		queueCount                  = 1
		queueFlags                  = QUEUE_GRAPHICS_BIT | QUEUE_COMPUTE_BIT | QUEUE_TRANSFER_BIT
		queueCount                  = 4
		queueFlags                  = QUEUE_COMPUTE_BIT | QUEUE_TRANSFER_BIT
		queueCount                  = 1
		queueFlags                  = QUEUE_VIDEO_DECODE_BIT_KHR
"#;
        let infos = parse_vulkan_full(output);
        assert_eq!(infos.len(), 1);
        assert_eq!(infos[0].subgroup_size, 64);
        assert_eq!(infos[0].compute_queue_family_count, 2);
        assert!(infos[0].compute_queue_count >= 5); // 1 + 4
    }

    #[test]
    fn parse_vulkan_full_empty() {
        let infos = parse_vulkan_full("");
        assert!(infos.is_empty());
    }

    #[test]
    fn extract_value_equals() {
        assert_eq!(extract_value("subgroupSize = 64"), Some("64"));
    }

    #[test]
    fn extract_value_colon() {
        assert_eq!(extract_value("queueCount: 4"), Some("4"));
    }
}