use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use xlog_core::{Result, XlogError};
use crate::CudaDevice;
pub struct GpuDevicePool {
devices: Vec<Arc<CudaDevice>>,
current: AtomicUsize,
}
impl GpuDevicePool {
pub fn new(device_count: usize) -> Result<Self> {
if device_count == 0 {
return Err(XlogError::Kernel(
"Device pool requires at least one device".to_string(),
));
}
let available = CudaDevice::count()?;
if device_count > available as usize {
return Err(XlogError::Kernel(format!(
"Requested {} devices but only {} available",
device_count, available
)));
}
let mut devices = Vec::with_capacity(device_count);
for ordinal in 0..device_count {
let device = CudaDevice::new(ordinal).map_err(|e| {
XlogError::Kernel(format!("Failed to create device {}: {}", ordinal, e))
})?;
devices.push(Arc::new(device));
}
Ok(Self {
devices,
current: AtomicUsize::new(0),
})
}
pub fn device_count(&self) -> usize {
self.devices.len()
}
pub fn get_device(&self, idx: usize) -> Option<&Arc<CudaDevice>> {
self.devices.get(idx)
}
pub fn next_device_idx(&self) -> usize {
let idx = self.current.fetch_add(1, Ordering::SeqCst);
idx % self.devices.len()
}
pub fn next_device(&self) -> &Arc<CudaDevice> {
let idx = self.next_device_idx();
&self.devices[idx]
}
pub fn synchronize_all(&self) -> Result<()> {
for (i, device) in self.devices.iter().enumerate() {
device
.synchronize()
.map_err(|e| XlogError::Kernel(format!("Failed to sync device {}: {}", i, e)))?;
}
Ok(())
}
pub fn devices(&self) -> &[Arc<CudaDevice>] {
&self.devices
}
}