use std::path::PathBuf;
use std::time::{Duration, SystemTime};
use tracing::{debug, trace};
use crate::error::DetectionError;
use crate::hardware::AcceleratorType;
use crate::profile::AcceleratorProfile;
use super::command::run_tool;
const VULKANINFO_SUMMARY_ARGS: &[&str] = &["--summary"];
const VULKANINFO_FULL_ARGS: &[&str] = &[];
const VULKANINFO_TIMEOUT: Duration = Duration::from_secs(3);
const VULKANINFO_CACHE_TTL: Duration = Duration::from_secs(60);
fn vulkan_cache_path() -> Option<PathBuf> {
let cache_dir = std::env::var("XDG_CACHE_HOME")
.ok()
.filter(|s| !s.is_empty())
.map(PathBuf::from)
.or_else(|| {
std::env::var("HOME")
.ok()
.map(|h| PathBuf::from(h).join(".cache"))
})?;
Some(cache_dir.join("ai-hwaccel").join("vulkan.json"))
}
fn read_vulkan_cache() -> Option<(String, Option<String>)> {
let path = vulkan_cache_path()?;
let metadata = std::fs::metadata(&path).ok()?;
let age = metadata.modified().ok()?.elapsed().unwrap_or(Duration::MAX);
if age > VULKANINFO_CACHE_TTL {
return None;
}
let data = std::fs::read_to_string(&path).ok()?;
let parsed: serde_json::Value = serde_json::from_str(&data).ok()?;
let summary = parsed.get("summary")?.as_str()?.to_string();
let full = parsed
.get("full")
.and_then(|v| v.as_str())
.map(String::from);
debug!(
"using cached vulkaninfo results (age: {:.1}s)",
age.as_secs_f64()
);
Some((summary, full))
}
fn write_vulkan_cache(summary: &str, full: Option<&str>) {
let Some(path) = vulkan_cache_path() else {
return;
};
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let cache = serde_json::json!({
"summary": summary,
"full": full,
"cached_at": SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0),
});
if let Ok(json) = serde_json::to_string(&cache) {
let tmp = path.with_extension("tmp");
if std::fs::write(&tmp, &json).is_ok() {
if std::fs::rename(&tmp, &path).is_ok() {
return;
}
let _ = std::fs::remove_file(&tmp);
}
let _ = std::fs::write(&path, json);
}
}
pub(crate) fn detect_vulkan(
profiles: &mut Vec<AcceleratorProfile>,
warnings: &mut Vec<DetectionError>,
) {
if let Some((summary, full)) = read_vulkan_cache() {
parse_vulkan_output(&summary, full.as_deref(), profiles, warnings);
return;
}
let output = match run_tool("vulkaninfo", VULKANINFO_SUMMARY_ARGS, VULKANINFO_TIMEOUT) {
Ok(o) => o,
Err(DetectionError::ToolNotFound { .. }) => {
debug!("vulkaninfo not found on $PATH, skipping Vulkan detection");
return;
}
Err(DetectionError::Timeout { .. }) => {
debug!("vulkaninfo timed out, falling back to sysfs-only Vulkan detection");
warnings.push(DetectionError::Timeout {
tool: "vulkaninfo".into(),
timeout_secs: VULKANINFO_TIMEOUT.as_secs_f64(),
});
return;
}
Err(e) => {
warnings.push(e);
return;
}
};
let full_output = run_tool("vulkaninfo", VULKANINFO_FULL_ARGS, VULKANINFO_TIMEOUT).ok();
let full_stdout = full_output.as_ref().map(|o| o.stdout.as_str());
write_vulkan_cache(&output.stdout, full_stdout);
parse_vulkan_output(&output.stdout, full_stdout, profiles, warnings);
}
#[cfg(feature = "async-detect")]
pub(crate) async fn detect_vulkan_async() -> super::DetectResult {
let mut profiles = Vec::new();
let mut warnings = Vec::new();
if let Some((summary, full)) = read_vulkan_cache() {
parse_vulkan_output(&summary, full.as_deref(), &mut profiles, &mut warnings);
return (profiles, warnings);
}
let output = match super::command::run_tool_async(
"vulkaninfo",
VULKANINFO_SUMMARY_ARGS,
VULKANINFO_TIMEOUT,
)
.await
{
Ok(o) => o,
Err(DetectionError::ToolNotFound { .. }) => {
debug!("vulkaninfo not found on $PATH, skipping Vulkan detection");
return (profiles, warnings);
}
Err(DetectionError::Timeout { .. }) => {
debug!("vulkaninfo timed out, falling back to sysfs-only Vulkan detection");
warnings.push(DetectionError::Timeout {
tool: "vulkaninfo".into(),
timeout_secs: VULKANINFO_TIMEOUT.as_secs_f64(),
});
return (profiles, warnings);
}
Err(e) => {
warnings.push(e);
return (profiles, warnings);
}
};
let full_output =
super::command::run_tool_async("vulkaninfo", VULKANINFO_FULL_ARGS, VULKANINFO_TIMEOUT)
.await
.ok();
let full_stdout = full_output.as_ref().map(|o| o.stdout.as_str());
write_vulkan_cache(&output.stdout, full_stdout);
parse_vulkan_output(&output.stdout, full_stdout, &mut profiles, &mut warnings);
(profiles, warnings)
}
pub fn parse_vulkan_output(
summary_stdout: &str,
full_stdout: Option<&str>,
profiles: &mut Vec<AcceleratorProfile>,
_warnings: &mut Vec<DetectionError>,
) {
let devices = parse_vulkan_summary(summary_stdout);
let compute_info = full_stdout
.filter(|s| s.len() <= 256 * 1024)
.map(parse_vulkan_full)
.unwrap_or_default();
if devices.is_empty() {
debug!("vulkaninfo found but no devices parsed, registering generic Vulkan GPU");
profiles.push(AcceleratorProfile {
accelerator: AcceleratorType::VulkanGpu { device_id: 0 },
available: true,
memory_bytes: 4 * 1024 * 1024 * 1024,
device_name: Some("Unknown Vulkan Device".into()),
..Default::default()
});
} else {
for (i, dev) in devices.into_iter().take(1024).enumerate() {
let extra = compute_info.get(i);
let mut cap_parts = Vec::new();
if let Some(api) = &dev.api_version {
cap_parts.push(format!("Vulkan {api}"));
}
if let Some(info) = extra {
cap_parts.push(format!(
"compute queues: {}x{}, subgroup: {}",
info.compute_queue_count, info.compute_queue_family_count, info.subgroup_size,
));
}
let compute_cap = if cap_parts.is_empty() {
dev.api_version.clone()
} else {
Some(cap_parts.join(", "))
};
debug!(
device_id = i,
name = %dev.name,
mem_mb = dev.memory_mb,
?extra,
"Vulkan GPU detected"
);
profiles.push(AcceleratorProfile {
accelerator: AcceleratorType::VulkanGpu {
device_id: i as u32,
},
available: true,
memory_bytes: dev.memory_mb.saturating_mul(1024 * 1024),
compute_capability: compute_cap,
driver_version: dev.driver_version,
device_name: Some(dev.name),
..Default::default()
});
}
}
}
pub(crate) fn detect_vulkan_sysfs(
profiles: &mut Vec<AcceleratorProfile>,
_warnings: &mut Vec<DetectionError>,
) {
let drm_dir = std::path::Path::new("/sys/class/drm");
if !drm_dir.exists() {
return;
}
let mut device_id_counter: u32 = 0;
let entries = match std::fs::read_dir(drm_dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if !name_str.starts_with("card") || name_str.contains('-') {
continue;
}
let dev_dir = entry.path().join("device");
let vendor_path = dev_dir.join("vendor");
let device_path = dev_dir.join("device");
let Some(vendor_str) = super::read_sysfs_string(&vendor_path, 64) else {
continue;
};
let Some(device_str) = super::read_sysfs_string(&device_path, 64) else {
continue;
};
let vendor_id =
u16::from_str_radix(vendor_str.trim().trim_start_matches("0x"), 16).unwrap_or(0);
let device_id_pci =
u16::from_str_radix(device_str.trim().trim_start_matches("0x"), 16).unwrap_or(0);
if !is_gpu_vendor(vendor_id) {
continue;
}
let (device_name, estimated_vram_mb) = lookup_pci_gpu(vendor_id, device_id_pci);
let vram_bytes = read_drm_vram(&dev_dir).unwrap_or(estimated_vram_mb * 1024 * 1024);
debug!(
vendor_id,
device_id_pci,
name = %device_name,
vram_mb = vram_bytes / (1024 * 1024),
"sysfs Vulkan GPU detected"
);
profiles.push(AcceleratorProfile {
accelerator: AcceleratorType::VulkanGpu {
device_id: device_id_counter,
},
available: true,
memory_bytes: vram_bytes,
device_name: Some(device_name),
..Default::default()
});
device_id_counter += 1;
}
}
fn is_gpu_vendor(vendor_id: u16) -> bool {
matches!(
vendor_id,
0x10de | 0x1002 | 0x8086 )
}
fn read_drm_vram(dev_dir: &std::path::Path) -> Option<u64> {
if let Some(vram) = super::read_sysfs_u64(&dev_dir.join("mem_info_vram_total"))
&& vram > 0
{
return Some(vram);
}
None
}
fn lookup_pci_gpu(vendor_id: u16, device_id: u16) -> (String, u64) {
match vendor_id {
0x10de => {
let (name, vram) = match device_id {
0x2684 => ("NVIDIA GeForce RTX 4090", 24 * 1024),
0x2702 => ("NVIDIA GeForce RTX 4080 SUPER", 16 * 1024),
0x2704 => ("NVIDIA GeForce RTX 4080", 16 * 1024),
0x2782 => ("NVIDIA GeForce RTX 4070 Ti SUPER", 16 * 1024),
0x2786 => ("NVIDIA GeForce RTX 4070 Ti", 12 * 1024),
0x2783 => ("NVIDIA GeForce RTX 4070 SUPER", 12 * 1024),
0x2787..=0x27a0 => ("NVIDIA GeForce RTX 4070", 12 * 1024),
0x2803..=0x2820 => ("NVIDIA GeForce RTX 4060 Ti", 8 * 1024),
0x2882..=0x2900 => ("NVIDIA GeForce RTX 4060", 8 * 1024),
0x2204 => ("NVIDIA GeForce RTX 3090", 24 * 1024),
0x2206 => ("NVIDIA GeForce RTX 3080", 10 * 1024),
0x2484 => ("NVIDIA GeForce RTX 3070", 8 * 1024),
0x2504 => ("NVIDIA GeForce RTX 3060", 12 * 1024),
0x2330 => ("NVIDIA H100", 80 * 1024),
0x2324 => ("NVIDIA H100 PCIe", 80 * 1024),
0x20b0 => ("NVIDIA A100 SXM", 80 * 1024),
0x20b2 => ("NVIDIA A100 PCIe 80GB", 80 * 1024),
0x20b5 => ("NVIDIA A100 PCIe 40GB", 40 * 1024),
0x20b7 => ("NVIDIA A30", 24 * 1024),
0x25b6 => ("NVIDIA A16", 16 * 1024),
0x27b8 => ("NVIDIA L4", 24 * 1024),
0x26b5 => ("NVIDIA L40", 48 * 1024),
0x26b9 => ("NVIDIA L40S", 48 * 1024),
0x1eb8 => ("NVIDIA T4", 16 * 1024),
_ => ("NVIDIA GPU (unknown model)", 4 * 1024),
};
(name.into(), vram)
}
0x1002 => {
let (name, vram) = match device_id {
0x744c => ("AMD Radeon RX 7900 XTX", 24 * 1024),
0x7448 => ("AMD Radeon RX 7900 XT", 20 * 1024),
0x7480 => ("AMD Radeon RX 7800 XT", 16 * 1024),
0x7470 => ("AMD Radeon RX 7700 XT", 12 * 1024),
0x7460 => ("AMD Radeon RX 7600", 8 * 1024),
0x73bf => ("AMD Radeon RX 6900 XT", 16 * 1024),
0x73af => ("AMD Radeon RX 6800 XT", 16 * 1024),
0x73df => ("AMD Radeon RX 6700 XT", 12 * 1024),
0x73ff => ("AMD Radeon RX 6600 XT", 8 * 1024),
0x7408 => ("AMD Instinct MI300X", 192 * 1024),
0x740c => ("AMD Instinct MI250X", 128 * 1024),
0x740f => ("AMD Instinct MI210", 64 * 1024),
0x1636 | 0x1638 => ("AMD Radeon Graphics (Renoir)", 512),
0x164c | 0x164e => ("AMD Radeon Graphics (Cezanne)", 512),
0x15bf | 0x15c8 => ("AMD Radeon Graphics (Phoenix)", 512),
_ => ("AMD GPU (unknown model)", 4 * 1024),
};
(name.into(), vram)
}
0x8086 => {
let (name, vram) = match device_id {
0x56a0 | 0x56a1 => ("Intel Arc A770", 16 * 1024),
0x56a5 | 0x56a6 => ("Intel Arc A750", 8 * 1024),
0x5690..=0x5692 => ("Intel Arc A580", 8 * 1024),
0x56c0 | 0x56c1 => ("Intel Arc A380", 6 * 1024),
0x4f80..=0x4f8f => ("Intel Data Center GPU Flex", 16 * 1024),
0x0bd0..=0x0bdf => ("Intel Data Center GPU Max", 48 * 1024),
_ => ("Intel GPU (unknown model)", 2 * 1024),
};
(name.into(), vram)
}
_ => ("Unknown GPU".into(), 4 * 1024),
}
}
struct VulkanDevice {
name: String,
memory_mb: u64,
api_version: Option<String>,
driver_version: Option<String>,
}
#[derive(Debug, Default)]
struct VulkanComputeInfo {
compute_queue_family_count: u32,
compute_queue_count: u32,
subgroup_size: u32,
}
fn parse_vulkan_summary(output: &str) -> Vec<VulkanDevice> {
trace!(
line_count = output.lines().count(),
"parsing vulkaninfo summary"
);
let mut devices = Vec::new();
let mut current_name = String::new();
let mut current_mem: u64 = 0;
let mut current_api = None;
let mut current_driver = None;
let mut in_device = false;
for line in output.lines() {
let trimmed = line.trim();
if trimmed.starts_with("GPU") && trimmed.ends_with(':') {
if in_device && !current_name.is_empty() {
devices.push(VulkanDevice {
name: std::mem::take(&mut current_name),
memory_mb: if current_mem > 0 {
current_mem
} else {
4 * 1024
},
api_version: current_api.take(),
driver_version: current_driver.take(),
});
current_mem = 0;
}
in_device = true;
continue;
}
if !in_device {
continue;
}
if let Some((key, value)) = trimmed.split_once('=') {
let key = key.trim();
let value = value.trim();
match key {
"deviceName" => current_name = value.chars().take(256).collect(),
"apiVersion" => current_api = Some(value.to_string()),
"driverVersion" => current_driver = Some(value.to_string()),
_ => {}
}
}
if trimmed.starts_with("size")
&& let Some((_, rest)) = trimmed.split_once('=')
{
let rest = rest.trim();
if let Some(mb_str) = rest.split_whitespace().next()
&& let Ok(mb) = mb_str.parse::<u64>()
&& mb > current_mem
{
current_mem = mb;
}
}
}
if in_device && !current_name.is_empty() {
devices.push(VulkanDevice {
name: current_name,
memory_mb: if current_mem > 0 {
current_mem
} else {
4 * 1024
},
api_version: current_api,
driver_version: current_driver,
});
}
devices
}
fn parse_vulkan_full(output: &str) -> Vec<VulkanComputeInfo> {
let mut infos = Vec::new();
let mut current = VulkanComputeInfo::default();
let mut in_queue_section = false;
for line in output.lines() {
let trimmed = line.trim();
if trimmed.starts_with("VkPhysicalDeviceProperties:") || trimmed.starts_with("GPU id") {
if current.subgroup_size > 0 || current.compute_queue_count > 0 {
infos.push(current);
current = VulkanComputeInfo::default();
}
in_queue_section = false;
}
if trimmed.starts_with("subgroupSize")
&& !trimmed.contains("Control")
&& let Some(val) = extract_value(trimmed)
&& let Ok(size) = val.parse::<u32>()
{
current.subgroup_size = size;
}
if trimmed.starts_with("VkQueueFamilyProperties") {
in_queue_section = true;
continue;
}
if in_queue_section {
if trimmed.starts_with("queueFlags") && trimmed.contains("QUEUE_COMPUTE_BIT") {
current.compute_queue_family_count += 1;
}
if trimmed.starts_with("queueCount")
&& let Some(val) = extract_value(trimmed)
&& let Ok(count) = val.parse::<u32>()
{
if current.compute_queue_family_count > 0 {
current.compute_queue_count += count;
}
}
if trimmed.is_empty() || trimmed.starts_with("Vk") && !trimmed.starts_with("VkQueue") {
in_queue_section = false;
}
}
}
if current.subgroup_size > 0 || current.compute_queue_count > 0 {
infos.push(current);
}
infos
}
fn extract_value(line: &str) -> Option<&str> {
line.split_once('=')
.map(|(_, v)| v.trim())
.or_else(|| line.split_once(':').map(|(_, v)| v.trim()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_vulkan_full_compute_info() {
let output = r#"
VkPhysicalDeviceProperties:
deviceType = PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU
deviceName = AMD Radeon Graphics (RADV RENOIR)
subgroupSize = 64
VkQueueFamilyProperties:
queueCount = 1
queueFlags = QUEUE_GRAPHICS_BIT | QUEUE_COMPUTE_BIT | QUEUE_TRANSFER_BIT
queueCount = 4
queueFlags = QUEUE_COMPUTE_BIT | QUEUE_TRANSFER_BIT
queueCount = 1
queueFlags = QUEUE_VIDEO_DECODE_BIT_KHR
"#;
let infos = parse_vulkan_full(output);
assert_eq!(infos.len(), 1);
assert_eq!(infos[0].subgroup_size, 64);
assert_eq!(infos[0].compute_queue_family_count, 2);
assert!(infos[0].compute_queue_count >= 5); }
#[test]
fn parse_vulkan_full_empty() {
let infos = parse_vulkan_full("");
assert!(infos.is_empty());
}
#[test]
fn extract_value_equals() {
assert_eq!(extract_value("subgroupSize = 64"), Some("64"));
}
#[test]
fn extract_value_colon() {
assert_eq!(extract_value("queueCount: 4"), Some("4"));
}
}