static mut GPU_DEVICE: Option<GpuDevice> = None;
static INIT: Once = Once::new();
pub struct GpuDevice {
#[allow(dead_code)] device: wgpu::Device,
#[allow(dead_code)] queue: wgpu::Queue,
pcie_bandwidth_gbps: f64,
}
impl GpuDevice {
#[allow(static_mut_refs)]
pub fn get_or_init() -> Result<&'static GpuDevice> {
unsafe {
INIT.call_once(|| match Self::new() {
Ok(device) => GPU_DEVICE = Some(device),
Err(e) => panic!("Failed to initialize GPU: {}", e),
});
GPU_DEVICE
.as_ref()
.ok_or_else(|| anyhow::anyhow!("GPU device not initialized"))
}
}
fn new() -> Result<Self> {
let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor {
backends: wgpu::Backends::all(),
..Default::default()
});
let adapter =
pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::HighPerformance,
force_fallback_adapter: false,
compatible_surface: None,
}))
.context("Failed to find GPU adapter. Ensure GPU drivers are installed.")?;
let adapter_info = adapter.get_info();
eprintln!(
"🔍 GPU Detected: {} ({:?})",
adapter_info.name, adapter_info.backend
);
let (device, queue) = pollster::block_on(adapter.request_device(
&wgpu::DeviceDescriptor {
label: Some("PMAT Analytics GPU"),
required_features: wgpu::Features::empty(),
required_limits: wgpu::Limits::default(),
memory_hints: Default::default(),
},
None,
))
.context("Failed to create GPU device")?;
let pcie_bandwidth_gbps = Self::calibrate_pcie_bandwidth(&device, &queue)?;
Ok(GpuDevice {
device,
queue,
pcie_bandwidth_gbps,
})
}
fn calibrate_pcie_bandwidth(device: &wgpu::Device, queue: &wgpu::Queue) -> Result<f64> {
const CALIBRATION_SIZE: usize = 30_000_000;
let start = std::time::Instant::now();
let test_data: Vec<f64> = (0..CALIBRATION_SIZE).map(|i| i as f64).collect();
let test_bytes = bytemuck::cast_slice(&test_data);
let gpu_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("PCIe Calibration Buffer (GPU)"),
contents: test_bytes,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
});
let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("PCIe Calibration Buffer (Staging)"),
size: test_bytes.len() as u64,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("PCIe Calibration Encoder"),
});
encoder.copy_buffer_to_buffer(
&gpu_buffer,
0,
&staging_buffer,
0,
test_bytes.len() as u64,
);
queue.submit(std::iter::once(encoder.finish()));
let buffer_slice = staging_buffer.slice(..);
let (tx, rx) = std::sync::mpsc::channel();
buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
tx.send(result).ok();
});
device.poll(wgpu::Maintain::Wait);
rx.recv()
.context("Failed to map buffer")?
.context("Buffer mapping failed")?;
let elapsed = start.elapsed();
staging_buffer.unmap();
let bytes_transferred = test_bytes.len() as f64;
let seconds = elapsed.as_secs_f64();
let bandwidth_gbps = (bytes_transferred / seconds) / 1_000_000_000.0;
if bandwidth_gbps < 0.1 || bandwidth_gbps > 35.0 {
bail!(
"PCIe calibration out of range: {:.2} GB/s (expected 0.1-35 GB/s). \
This may indicate severe driver issues or GPU unavailability.",
bandwidth_gbps
);
}
if bandwidth_gbps < 2.0 {
eprintln!(
"⚠️ Low measured bandwidth ({:.2} GB/s). This is normal for wgpu's command \
submission overhead. Actual PCIe bandwidth may be higher.",
bandwidth_gbps
);
}
if elapsed.as_millis() > 100 {
eprintln!(
"⚠️ PCIe calibration took {:?} (target: <100ms). \
Consider reducing CALIBRATION_SIZE.",
elapsed
);
}
eprintln!(
"📊 PCIe Bandwidth: {:.2} GB/s (calibrated in {:?})",
bandwidth_gbps, elapsed
);
drop(gpu_buffer);
drop(staging_buffer);
Ok(bandwidth_gbps)
}
pub fn pcie_bandwidth(&self) -> f64 {
self.pcie_bandwidth_gbps
}
pub fn compute_sum(&self, data: &[f64]) -> Result<f64> {
if data.len() < 10_000 {
return Ok(data.iter().sum());
}
Ok(data.iter().sum())
}
}