use super::common::*;
pub struct CpuFallbackBackend {
device_info: GpuDeviceInfo,
}
impl Default for CpuFallbackBackend {
fn default() -> Self {
Self::new()
}
}
impl CpuFallbackBackend {
pub fn new() -> Self {
#[cfg(target_pointer_width = "32")]
let total_memory = 512 * 1024 * 1024; #[cfg(target_pointer_width = "64")]
let total_memory = 8usize * 1024 * 1024 * 1024;
Self {
device_info: GpuDeviceInfo {
device_type: GpuDeviceType::OpenCl, name: "CPU Fallback".to_string(),
total_memory,
compute_units: num_cpus::get() as u32,
clock_frequency: 3000, supports_fp64: true,
supports_fp16: false,
max_work_groupsize: 1024,
memory_bandwidth: 100.0, l2_cachesize: 32 * 1024 * 1024, shared_memory_per_block: 0, registers_per_block: 0,
warpsize: 1, max_threads_per_mp: 1,
multiprocessor_count: num_cpus::get() as u32,
supports_tensor_cores: false,
supports_mixed_precision: false,
vendor: "CPU".to_string(),
},
}
}
}
impl GpuBackend for CpuFallbackBackend {
fn name(&self) -> &str {
"CPU Fallback"
}
fn is_available(&self) -> bool {
true }
fn list_devices(&self) -> LinalgResult<Vec<GpuDeviceInfo>> {
Ok(vec![self.device_info.clone()])
}
fn create_context(&self, device_id: usize) -> LinalgResult<Box<dyn GpuContext>> {
if device_id != 0 {
return Err(LinalgError::ComputationError(
"CPU fallback only has one device".to_string(),
));
}
Ok(Box::new(CpuFallbackContext {
device_info: self.device_info.clone(),
}))
}
}
#[derive(Debug)]
struct CpuFallbackContext {
device_info: GpuDeviceInfo,
}
impl GpuContext for CpuFallbackContext {
fn device_info(&self) -> &GpuDeviceInfo {
&self.device_info
}
fn synchronize(&self) -> LinalgResult<()> {
Ok(())
}
fn available_memory(&self) -> LinalgResult<usize> {
Ok(self.device_info.total_memory / 2)
}
}
impl GpuContextAlloc for CpuFallbackContext {
fn allocate_buffer<T: Clone + Send + Sync + Copy + 'static + std::fmt::Debug>(
&self,
size: usize,
) -> LinalgResult<Box<dyn GpuBuffer<T>>> {
Ok(Box::new(CpuBuffer::new(size)))
}
}
#[derive(Debug)]
struct CpuBuffer<T> {
data: Vec<T>,
}
impl<T: Clone + Send + Sync> CpuBuffer<T> {
fn new(size: usize) -> Self {
Self {
data: Vec::with_capacity(size),
}
}
}
impl<T: Clone + Send + Sync + Copy + std::fmt::Debug> GpuBuffer<T> for CpuBuffer<T> {
fn len(&self) -> usize {
self.data.len()
}
fn copy_from_host(&mut self, data: &[T]) -> LinalgResult<()> {
self.data.clear();
self.data.extend_from_slice(data);
Ok(())
}
fn copy_to_host(&self, data: &mut [T]) -> LinalgResult<()> {
if data.len() != self.data.len() {
return Err(LinalgError::ShapeError("Buffer size mismatch".to_string()));
}
data.copy_from_slice(&self.data);
Ok(())
}
fn device_ptr(&self) -> *mut std::ffi::c_void {
self.data.as_ptr() as *mut std::ffi::c_void
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cpu_fallback_backend() {
let backend = CpuFallbackBackend::new();
assert_eq!(backend.name(), "CPU Fallback");
assert!(backend.is_available());
let devices = backend.list_devices().expect("Operation failed");
assert_eq!(devices.len(), 1);
assert_eq!(devices[0].name, "CPU Fallback");
}
#[test]
fn test_cpu_fallback_context() {
let backend = CpuFallbackBackend::new();
let context = backend.create_context(0).expect("Operation failed");
assert_eq!(context.device_info().name, "CPU Fallback");
assert!(context.available_memory().expect("Operation failed") > 0);
assert!(context.synchronize().is_ok());
}
#[test]
fn test_cpu_buffer() {
let backend = CpuFallbackBackend::new();
let device_info = backend.device_info.clone();
let cpu_context = CpuFallbackContext { device_info };
let mut buffer = cpu_context
.allocate_buffer::<f32>(10)
.expect("Operation failed");
assert_eq!(buffer.len(), 0);
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
buffer.copy_from_host(&data).expect("Operation failed");
assert_eq!(buffer.len(), 5);
let mut output = vec![0.0; 5];
buffer.copy_to_host(&mut output).expect("Operation failed");
assert_eq!(output, data);
}
}