pub mod concrete_gpu_backends;
pub mod device_detection;
pub mod gpu_acceleration_framework;
pub mod kernels;
#[cfg(feature = "cuda")]
pub mod cuda;
pub use device_detection::{DeviceCapability, DeviceManager, MemoryManager, SystemCapabilities};
pub use gpu_acceleration_framework::{
CompiledKernel, GpuAccelerationManager, GpuKernelCache, GpuMemoryPool, GpuPerformanceReport,
KernelPerformanceStats, MemoryPoolConfig, MemoryPoolStatistics,
};
pub use kernels::{GpuBuffer, GpuKernelExecutor, KernelInfo};
#[cfg(feature = "cuda")]
pub use concrete_gpu_backends::CudaContext;
#[cfg(feature = "opencl")]
pub use concrete_gpu_backends::OpenCLContext;
use crate::error::{NdimageError, NdimageResult};
use scirs2_core::ndarray::{Array, ArrayView, Dimension};
use scirs2_core::numeric::{Float, FromPrimitive};
use std::fmt::Debug;
use std::sync::Arc;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Backend {
Cpu,
#[cfg(feature = "cuda")]
Cuda,
#[cfg(feature = "opencl")]
OpenCL,
#[cfg(all(target_os = "macos", feature = "metal"))]
Metal,
Auto,
}
impl Default for Backend {
fn default() -> Self {
Backend::Cpu
}
}
#[derive(Debug, Clone)]
pub struct BackendConfig {
pub backend: Backend,
pub gpu_threshold: usize,
pub gpu_memory_limit: Option<usize>,
pub allow_fallback: bool,
pub device_id: Option<usize>,
}
impl Default for BackendConfig {
fn default() -> Self {
Self {
backend: Backend::default(),
gpu_threshold: 100_000, gpu_memory_limit: None,
allow_fallback: true,
device_id: None,
}
}
}
pub trait BackendOp<T, D>: Send + Sync
where
T: Float + FromPrimitive + Debug + Clone,
D: Dimension,
{
fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>>;
#[cfg(feature = "gpu")]
fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>>;
fn memory_requirement(&self, input_shape: &[usize]) -> usize;
fn benefits_from_gpu(&self, array_size: usize) -> bool {
array_size > 50_000 }
}
pub struct BackendExecutor {
config: BackendConfig,
#[cfg(feature = "gpu")]
gpu_context: Option<Arc<dyn GpuContext>>,
}
impl BackendExecutor {
pub fn new(config: BackendConfig) -> NdimageResult<Self> {
#[cfg(feature = "gpu")]
let gpu_context: Option<Arc<dyn GpuContext>> = match config.backend {
#[cfg(feature = "cuda")]
Backend::Cuda => Some(Arc::new(CudaContext::new(config.device_id)?)),
#[cfg(feature = "opencl")]
Backend::OpenCL => Some(Arc::new(OpenCLContext::new(config.device_id)?)),
_ => None,
};
Ok(Self {
config,
#[cfg(feature = "gpu")]
gpu_context,
})
}
pub fn execute<T, D, Op>(&self, input: &ArrayView<T, D>, op: Op) -> NdimageResult<Array<T, D>>
where
T: Float + FromPrimitive + Debug + Clone + Send + Sync + 'static,
D: Dimension,
Op: BackendOp<T, D>,
{
let array_size = input.len();
let backend = self.select_backend(&op, array_size)?;
match backend {
Backend::Cpu => op.execute_cpu(input),
#[cfg(feature = "gpu")]
_ => match op.execute_gpu(input, backend) {
Ok(result) => Ok(result),
Err(e) if self.config.allow_fallback => {
eprintln!("GPU execution failed, falling back to CPU: {}", e);
op.execute_cpu(input)
}
Err(e) => Err(e),
},
#[cfg(not(feature = "gpu"))]
_ => op.execute_cpu(input),
}
}
fn select_backend<T, D, Op>(&self, op: &Op, array_size: usize) -> NdimageResult<Backend>
where
T: Float + FromPrimitive + Debug + Clone,
D: Dimension,
Op: BackendOp<T, D>,
{
match self.config.backend {
Backend::Auto => {
if array_size < self.config.gpu_threshold {
Ok(Backend::Cpu)
} else if op.benefits_from_gpu(array_size) {
#[cfg(feature = "cuda")]
if self.is_cuda_available() {
return Ok(Backend::Cuda);
}
#[cfg(feature = "opencl")]
if self.is_opencl_available() {
return Ok(Backend::OpenCL);
}
#[cfg(all(target_os = "macos", feature = "metal"))]
if self.is_metal_available() {
return Ok(Backend::Metal);
}
Ok(Backend::Cpu)
} else {
Ok(Backend::Cpu)
}
}
backend => Ok(backend),
}
}
#[cfg(feature = "cuda")]
fn is_cuda_available(&self) -> bool {
device_detection::get_device_manager()
.map(|manager| {
manager
.lock()
.expect("Operation failed")
.is_backend_available(Backend::Cuda)
})
.unwrap_or(false)
}
#[cfg(feature = "opencl")]
fn is_opencl_available(&self) -> bool {
device_detection::get_device_manager()
.map(|manager| {
manager
.lock()
.expect("Operation failed")
.is_backend_available(Backend::OpenCL)
})
.unwrap_or(false)
}
#[cfg(all(target_os = "macos", feature = "metal"))]
fn is_metal_available(&self) -> bool {
device_detection::get_device_manager()
.map(|manager| {
manager
.lock()
.expect("Operation failed")
.is_backend_available(Backend::Metal)
})
.unwrap_or(false)
}
}
#[cfg(feature = "gpu")]
pub trait GpuContext: Send + Sync {
fn name(&self) -> &str;
fn device_count(&self) -> usize;
fn current_device(&self) -> usize;
fn memory_info(&self) -> (usize, usize); }
pub struct GaussianFilterOp<T> {
sigma: Vec<T>,
truncate: Option<T>,
}
impl<T: Float + FromPrimitive + Debug + Clone> GaussianFilterOp<T> {
pub fn new(sigma: Vec<T>, truncate: Option<T>) -> Self {
Self { sigma, truncate }
}
}
impl<T, D> BackendOp<T, D> for GaussianFilterOp<T>
where
T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
D: Dimension + 'static,
{
fn execute_cpu(&self, input: &ArrayView<T, D>) -> NdimageResult<Array<T, D>> {
crate::filters::gaussian_filter_chunked(
&input.to_owned(),
&self.sigma,
self.truncate,
crate::filters::BorderMode::Reflect,
None,
)
}
#[cfg(feature = "gpu")]
fn execute_gpu(&self, input: &ArrayView<T, D>, backend: Backend) -> NdimageResult<Array<T, D>> {
match backend {
#[cfg(feature = "cuda")]
Backend::Cuda => {
cuda_gaussian_filter(input, &self.sigma, self.truncate)
}
_ => self.execute_cpu(input),
}
}
fn memory_requirement(&self, input_shape: &[usize]) -> usize {
let elements: usize = input_shape.iter().product();
elements * std::mem::size_of::<T>() * 3
}
fn benefits_from_gpu(&self, array_size: usize) -> bool {
array_size > 100_000
}
}
#[cfg(feature = "cuda")]
#[allow(dead_code)]
fn cuda_gaussian_filter<T, D>(
input: &ArrayView<T, D>,
sigma: &[T],
_truncate: Option<T>,
) -> NdimageResult<Array<T, D>>
where
T: Float + FromPrimitive + Debug + Clone + Default + Send + Sync + 'static,
D: Dimension,
{
if input.ndim() == 2 {
let input_2d = input
.view()
.into_dimensionality::<scirs2_core::ndarray::Ix2>()
.map_err(|_| NdimageError::DimensionError("Failed to convert to 2D array".into()))?;
if sigma.len() >= 2 {
let sigma_2d = [sigma[0], sigma[1]];
let cuda_ops = cuda::CudaOperations::new(None)?;
let result_2d = cuda_ops.gaussian_filter_2d(&input_2d, sigma_2d)?;
let result = result_2d.into_dimensionality::<D>().map_err(|_| {
NdimageError::DimensionError("Failed to convert result dimension".into())
})?;
return Ok(result);
}
}
Err(NdimageError::NotImplementedError(
"CUDA Gaussian filter currently only supports 2D arrays".into(),
))
}
pub struct BackendBuilder {
config: BackendConfig,
}
impl BackendBuilder {
pub fn new() -> Self {
Self {
config: BackendConfig::default(),
}
}
pub fn backend(mut self, backend: Backend) -> Self {
self.config.backend = backend;
self
}
pub fn gpu_threshold(mut self, threshold: usize) -> Self {
self.config.gpu_threshold = threshold;
self
}
pub fn gpu_memory_limit(mut self, limit: usize) -> Self {
self.config.gpu_memory_limit = Some(limit);
self
}
pub fn allow_fallback(mut self, allow: bool) -> Self {
self.config.allow_fallback = allow;
self
}
pub fn device_id(mut self, id: usize) -> Self {
self.config.device_id = Some(id);
self
}
pub fn build(self) -> NdimageResult<BackendExecutor> {
BackendExecutor::new(self.config)
}
}
#[allow(dead_code)]
pub fn auto_backend() -> NdimageResult<BackendExecutor> {
BackendBuilder::new().backend(Backend::Auto).build()
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::arr2;
#[test]
fn test_backend_selection() {
let config = BackendConfig {
backend: Backend::Auto,
gpu_threshold: 1000,
..Default::default()
};
let executor = BackendExecutor::new(config).expect("Operation failed");
let small_array = arr2(&[[1.0, 2.0], [3.0, 4.0]]);
let op = GaussianFilterOp::new(vec![1.0, 1.0], None);
let _result = executor
.execute(&small_array.view(), op)
.expect("Operation failed");
}
#[test]
fn test_backend_builder() {
let executor = BackendBuilder::new()
.backend(Backend::Cpu)
.gpu_threshold(50_000)
.allow_fallback(true)
.build()
.expect("Operation failed");
assert_eq!(executor.config.backend, Backend::Cpu);
assert_eq!(executor.config.gpu_threshold, 50_000);
assert!(executor.config.allow_fallback);
}
}