pub mod compute_backend;
pub mod cpu_unified;
pub use compute_backend::{
global_device_manager, initialize_backends, ComputeBackend as UnifiedComputeBackend,
DeviceManager, DeviceType as UnifiedDeviceType, Operation, PerformanceMetrics,
SelectionStrategy, TransferDirection,
};
pub use cpu_unified::CpuBackend as UnifiedCpuBackend;
use crate::tensor::Tensor;
use std::any::Any;
use std::fmt;
pub type BackendResult<T> = crate::error::RusTorchResult<T>;
pub trait DeviceBuffer: Send + Sync {
fn size(&self) -> usize;
fn copy_from_host(&mut self, data: &[u8]) -> BackendResult<()>;
fn copy_to_host(&self, data: &mut [u8]) -> BackendResult<()>;
fn as_ptr(&self) -> *mut u8;
}
#[derive(Debug, Clone)]
pub struct DeviceInfo {
pub name: String,
pub device_type: DeviceType,
pub total_memory: usize,
pub available_memory: usize,
pub max_threads: usize,
pub supports_f64: bool,
pub supports_f16: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DeviceType {
Cpu,
Cuda,
Metal,
OpenCL,
}
impl fmt::Display for DeviceType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DeviceType::Cpu => write!(f, "CPU"),
DeviceType::Cuda => write!(f, "CUDA"),
DeviceType::Metal => write!(f, "Metal"),
DeviceType::OpenCL => write!(f, "OpenCL"),
}
}
}
#[derive(Debug, Clone)]
pub struct ConvolutionParams {
pub kernel_size: Vec<usize>,
pub stride: Vec<usize>,
pub padding: Vec<usize>,
pub dilation: Vec<usize>,
pub groups: usize,
}
impl Default for ConvolutionParams {
fn default() -> Self {
Self {
kernel_size: vec![3, 3],
stride: vec![1, 1],
padding: vec![0, 0],
dilation: vec![1, 1],
groups: 1,
}
}
}
pub trait ComputeBackend: Send + Sync {
fn device_info(&self) -> &DeviceInfo;
fn allocate_memory(&self, size: usize) -> BackendResult<Box<dyn DeviceBuffer>>;
fn synchronize(&self) -> BackendResult<()>;
fn is_available() -> bool
where
Self: Sized;
fn add<T>(&self, a: &Tensor<T>, b: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn sub<T>(&self, a: &Tensor<T>, b: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn mul<T>(&self, a: &Tensor<T>, b: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn div<T>(&self, a: &Tensor<T>, b: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn matmul<T>(&self, a: &Tensor<T>, b: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn sum<T>(&self, tensor: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn mean<T>(&self, tensor: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn max<T>(&self, tensor: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn min<T>(&self, tensor: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn reshape<T>(&self, tensor: &Tensor<T>, new_shape: &[usize]) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn transpose<T>(&self, tensor: &Tensor<T>, axes: &[usize]) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn convolution<T>(
&self,
input: &Tensor<T>,
kernel: &Tensor<T>,
params: &ConvolutionParams,
) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
#[allow(clippy::too_many_arguments)]
fn batch_norm<T>(
&self,
input: &Tensor<T>,
weight: &Tensor<T>,
bias: &Tensor<T>,
running_mean: &Tensor<T>,
running_var: &Tensor<T>,
training: bool,
momentum: T,
eps: T,
) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn relu<T>(&self, tensor: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn sigmoid<T>(&self, tensor: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn tanh<T>(&self, tensor: &Tensor<T>) -> BackendResult<Tensor<T>>
where
T: num_traits::Float + 'static;
fn backend_context(&self) -> &dyn Any;
fn execute_custom_op(
&self,
op_name: &str,
inputs: &[&dyn Any],
params: &dyn Any,
) -> BackendResult<Box<dyn Any>>;
}
pub struct BackendFactory;
impl BackendFactory {
pub fn available_backends() -> Vec<DeviceType> {
let backends = vec![DeviceType::Cpu];
backends
}
pub fn create_cpu_backend() -> BackendResult<cpu_unified::CpuBackend> {
cpu_unified::CpuBackend::new()
}
}