use std::collections::HashMap;
use std::mem;
use std::sync::Arc;
use scirs2_core::ndarray::{Array, ArrayView, ArrayView2, Dimension, Ix2};
use scirs2_core::numeric::{Float, FromPrimitive};
use crate::backend::gpu_acceleration_framework::{
GpuAccelerationManager, GpuPerformanceReport, MemoryPoolConfig,
};
use crate::backend::{Backend, DeviceManager};
use crate::error::{NdimageError, NdimageResult};
use crate::interpolation::BoundaryMode;
use std::f64::consts::PI;
pub struct GpuOperations {
acceleration_manager: Arc<GpuAccelerationManager>,
device_manager: DeviceManager,
config: GpuOperationsConfig,
operation_registry: HashMap<String, OperationInfo>,
}
#[derive(Debug, Clone)]
pub struct GpuOperationsConfig {
pub min_gpu_size: usize,
pub memory_pool_config: MemoryPoolConfig,
pub auto_fallback: bool,
pub enable_monitoring: bool,
pub operation_timeout: u64,
}
impl Default for GpuOperationsConfig {
fn default() -> Self {
Self {
min_gpu_size: 1024 * 1024, memory_pool_config: MemoryPoolConfig::default(),
auto_fallback: true,
enable_monitoring: true,
operation_timeout: 10000, }
}
}
#[derive(Debug, Clone)]
struct OperationInfo {
name: String,
kernel_source: String,
preferred_backend: Backend,
memory_complexity: f64,
compute_complexity: f64,
}
impl GpuOperations {
pub fn new(config: GpuOperationsConfig) -> NdimageResult<Self> {
let acceleration_manager = Arc::new(GpuAccelerationManager::new(
config.memory_pool_config.clone(),
)?);
let device_manager = DeviceManager::new()?;
let mut gpu_ops = Self {
acceleration_manager,
device_manager,
config,
operation_registry: HashMap::new(),
};
gpu_ops.register_builtin_operations()?;
Ok(gpu_ops)
}
pub fn gpu_convolution_2d<T>(
&self,
input: ArrayView2<T>,
kernel: ArrayView2<T>,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float
+ FromPrimitive
+ Clone
+ Send
+ Sync
+ std::ops::DivAssign
+ std::ops::AddAssign
+ std::fmt::Debug
+ 'static,
{
let operation_name = "convolution_2d";
if !self.should_use_gpu(&input, operation_name) {
return self.fallback_convolution_2d(input, kernel, mode);
}
let kernel_source = self.get_convolution_kernel_source();
let backend = self.select_backend_for_operation(operation_name)?;
match self.execute_gpu_convolution(input, kernel, &kernel_source, backend, mode) {
Ok(result) => Ok(result),
Err(e) if self.config.auto_fallback => {
eprintln!("GPU convolution failed: {:?}, falling back to CPU", e);
self.fallback_convolution_2d(input, kernel, mode)
}
Err(e) => Err(e),
}
}
pub fn gpu_morphological_erosion<T>(
&self,
input: ArrayView2<T>,
structuring_element: ArrayView2<bool>,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float
+ FromPrimitive
+ Clone
+ Send
+ Sync
+ PartialOrd
+ std::ops::AddAssign
+ std::ops::DivAssign
+ std::fmt::Debug
+ 'static,
{
let operation_name = "morphological_erosion";
if !self.should_use_gpu(&input, operation_name) {
return self.fallback_morphological_erosion(input, structuring_element, mode);
}
let kernel_source = self.get_morphology_kernel_source();
let backend = self.select_backend_for_operation(operation_name)?;
match self.execute_gpu_morphology(
input,
structuring_element,
&kernel_source,
backend,
mode,
MorphologyOperation::Erosion,
) {
Ok(result) => Ok(result),
Err(e) if self.config.auto_fallback => {
eprintln!("GPU erosion failed: {:?}, falling back to CPU", e);
self.fallback_morphological_erosion(input, structuring_element, mode)
}
Err(e) => Err(e),
}
}
pub fn gpu_morphological_dilation<T>(
&self,
input: ArrayView2<T>,
structuring_element: ArrayView2<bool>,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float
+ FromPrimitive
+ Clone
+ Send
+ Sync
+ PartialOrd
+ std::ops::AddAssign
+ std::ops::DivAssign
+ std::fmt::Debug
+ 'static,
{
let operation_name = "morphological_dilation";
if !self.should_use_gpu(&input, operation_name) {
return self.fallback_morphological_dilation(input, structuring_element, mode);
}
let kernel_source = self.get_morphology_kernel_source();
let backend = self.select_backend_for_operation(operation_name)?;
match self.execute_gpu_morphology(
input,
structuring_element,
&kernel_source,
backend,
mode,
MorphologyOperation::Dilation,
) {
Ok(result) => Ok(result),
Err(e) if self.config.auto_fallback => {
eprintln!("GPU dilation failed: {:?}, falling back to CPU", e);
self.fallback_morphological_dilation(input, structuring_element, mode)
}
Err(e) => Err(e),
}
}
pub fn gpu_gaussian_filter<T>(
&self,
input: ArrayView2<T>,
sigma: (f64, f64),
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
{
let operation_name = "gaussian_filter";
if !self.should_use_gpu(&input, operation_name) {
return self.fallback_gaussian_filter(input, sigma, mode);
}
let kernel_source = self.get_gaussian_kernel_source();
let backend = self.select_backend_for_operation(operation_name)?;
match self.execute_gpu_gaussian(input, sigma, &kernel_source, backend, mode) {
Ok(result) => Ok(result),
Err(e) if self.config.auto_fallback => {
eprintln!("GPU Gaussian filter failed: {:?}, falling back to CPU", e);
self.fallback_gaussian_filter(input, sigma, mode)
}
Err(e) => Err(e),
}
}
pub fn gpu_distance_transform<T>(
&self,
input: ArrayView2<T>,
metric: DistanceMetric,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
{
let operation_name = "distance_transform";
if !self.should_use_gpu(&input, operation_name) {
return self.fallback_distance_transform(input, metric);
}
let kernel_source = self.get_distance_transform_kernel_source();
let backend = self.select_backend_for_operation(operation_name)?;
match self.execute_gpu_distance_transform(input, metric, &kernel_source, backend) {
Ok(result) => Ok(result),
Err(e) if self.config.auto_fallback => {
eprintln!(
"GPU distance transform failed: {:?}, falling back to CPU",
e
);
self.fallback_distance_transform(input, metric)
}
Err(e) => Err(e),
}
}
pub fn get_performance_report(&self) -> GpuPerformanceReport {
self.acceleration_manager.get_performance_report()
}
pub fn get_gpu_info(&self) -> GpuInfo {
let capabilities = self.device_manager.get_capabilities();
GpuInfo {
cuda_available: capabilities.cuda_available,
opencl_available: capabilities.opencl_available,
metal_available: capabilities.metal_available,
gpu_memory: capabilities.gpu_memory_mb,
compute_units: capabilities.compute_units,
preferred_backend: self.select_preferred_backend(),
}
}
fn register_builtin_operations(&mut self) -> NdimageResult<()> {
self.operation_registry.insert(
"convolution_2d".to_string(),
OperationInfo {
name: "convolution_2d".to_string(),
kernel_source: self.get_convolution_kernel_source(),
preferred_backend: {
#[cfg(feature = "opencl")]
{
Backend::OpenCL
}
#[cfg(not(feature = "opencl"))]
{
Backend::Cpu
}
},
memory_complexity: 2.0, compute_complexity: 9.0, },
);
self.operation_registry.insert(
"morphological_erosion".to_string(),
OperationInfo {
name: "morphological_erosion".to_string(),
kernel_source: self.get_morphology_kernel_source(),
preferred_backend: {
#[cfg(feature = "opencl")]
{
Backend::OpenCL
}
#[cfg(not(feature = "opencl"))]
{
Backend::Cpu
}
},
memory_complexity: 2.0,
compute_complexity: 9.0,
},
);
self.operation_registry.insert(
"gaussian_filter".to_string(),
OperationInfo {
name: "gaussian_filter".to_string(),
kernel_source: self.get_gaussian_kernel_source(),
preferred_backend: {
#[cfg(feature = "opencl")]
{
Backend::OpenCL
}
#[cfg(not(feature = "opencl"))]
{
Backend::Cpu
}
},
memory_complexity: 3.0, compute_complexity: 6.0, },
);
self.operation_registry.insert(
"distance_transform".to_string(),
OperationInfo {
name: "distance_transform".to_string(),
kernel_source: self.get_distance_transform_kernel_source(),
preferred_backend: {
#[cfg(feature = "opencl")]
{
Backend::OpenCL
}
#[cfg(not(feature = "opencl"))]
{
Backend::Cpu
}
},
memory_complexity: 2.0,
compute_complexity: 10.0, },
);
Ok(())
}
fn should_use_gpu<T, D>(&self, input: &ArrayView<T, D>, operation_name: &str) -> bool
where
T: Float + FromPrimitive,
D: Dimension,
{
if input.len() < self.config.min_gpu_size {
return false;
}
let capabilities = self.device_manager.get_capabilities();
if !capabilities.gpu_available {
return false;
}
if let Some(op_info) = self.operation_registry.get(operation_name) {
let required_memory =
input.len() * std::mem::size_of::<T>() * op_info.memory_complexity as usize;
let available_memory = capabilities.gpu_memory_mb * 1024 * 1024;
if required_memory > available_memory {
return false;
}
}
true
}
fn select_backend_for_operation(&self, operation_name: &str) -> NdimageResult<Backend> {
let capabilities = self.device_manager.get_capabilities();
if let Some(op_info) = self.operation_registry.get(operation_name) {
match op_info.preferred_backend {
#[cfg(feature = "cuda")]
Backend::Cuda if capabilities.cuda_available => return Ok(Backend::Cuda),
#[cfg(feature = "opencl")]
Backend::OpenCL if capabilities.opencl_available => return Ok(Backend::OpenCL),
#[cfg(all(target_os = "macos", feature = "metal"))]
Backend::Metal if capabilities.metal_available => return Ok(Backend::Metal),
_ => {}
}
}
if capabilities.cuda_available {
#[cfg(feature = "cuda")]
{
Ok(Backend::Cuda)
}
#[cfg(not(feature = "cuda"))]
{
Ok(Backend::Cpu)
}
} else if capabilities.opencl_available {
#[cfg(feature = "opencl")]
{
Ok(Backend::OpenCL)
}
#[cfg(not(feature = "opencl"))]
{
Ok(Backend::Cpu)
}
} else if capabilities.metal_available {
#[cfg(all(target_os = "macos", feature = "metal"))]
{
Ok(Backend::Metal)
}
#[cfg(not(all(target_os = "macos", feature = "metal")))]
{
Ok(Backend::Cpu)
}
} else {
Err(NdimageError::GpuNotAvailable(
"No GPU backend available".to_string(),
))
}
}
fn select_preferred_backend(&self) -> Backend {
let capabilities = self.device_manager.get_capabilities();
if capabilities.cuda_available {
#[cfg(feature = "cuda")]
{
Backend::Cuda
}
#[cfg(not(feature = "cuda"))]
{
Backend::Cpu
}
} else if capabilities.opencl_available {
#[cfg(feature = "opencl")]
{
Backend::OpenCL
}
#[cfg(not(feature = "opencl"))]
{
Backend::Cpu
}
} else if capabilities.metal_available {
#[cfg(all(target_os = "macos", feature = "metal"))]
{
Backend::Metal
}
#[cfg(not(all(target_os = "macos", feature = "metal")))]
{
Backend::Cpu
}
} else {
Backend::Cpu
}
}
fn execute_gpu_convolution<T>(
&self,
input: ArrayView2<T>,
_kernel: ArrayView2<T>,
kernel_source: &str,
backend: Backend,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
{
self.acceleration_manager
.execute_operation("convolution_2d", input.into_dyn(), kernel_source, backend)
.map(|result| {
result
.into_dimensionality::<Ix2>()
.expect("Operation failed")
})
}
fn execute_gpu_morphology<T>(
&self,
input: ArrayView2<T>,
_structuring_element: ArrayView2<bool>,
kernel_source: &str,
backend: Backend,
mode: BoundaryMode,
_operation: MorphologyOperation,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
{
self.acceleration_manager
.execute_operation(
"morphological_operation",
input.into_dyn(),
kernel_source,
backend,
)
.map(|result| {
result
.into_dimensionality::<Ix2>()
.expect("Operation failed")
})
}
fn execute_gpu_gaussian<T>(
&self,
input: ArrayView2<T>,
_sigma: (f64, f64),
kernel_source: &str,
backend: Backend,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
{
self.acceleration_manager
.execute_operation("gaussian_filter", input.into_dyn(), kernel_source, backend)
.map(|result| {
result
.into_dimensionality::<Ix2>()
.expect("Operation failed")
})
}
fn execute_gpu_distance_transform<T>(
&self,
input: ArrayView2<T>,
_metric: DistanceMetric,
kernel_source: &str,
backend: Backend,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
{
self.acceleration_manager
.execute_operation(
"distance_transform",
input.into_dyn(),
kernel_source,
backend,
)
.map(|result| {
result
.into_dimensionality::<Ix2>()
.expect("Operation failed")
})
}
fn fallback_convolution_2d<T>(
&self,
input: ArrayView2<T>,
kernel: ArrayView2<T>,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float
+ FromPrimitive
+ Clone
+ Send
+ Sync
+ std::ops::DivAssign
+ std::ops::AddAssign
+ std::fmt::Debug
+ 'static,
{
let border_mode = match mode {
BoundaryMode::Constant => Some(crate::filters::BorderMode::Constant),
BoundaryMode::Reflect => Some(crate::filters::BorderMode::Reflect),
BoundaryMode::Mirror => Some(crate::filters::BorderMode::Mirror),
BoundaryMode::Wrap => Some(crate::filters::BorderMode::Wrap),
BoundaryMode::Nearest => Some(crate::filters::BorderMode::Constant), };
crate::convolve(&input.to_owned(), &kernel.to_owned(), border_mode)
}
fn fallback_morphological_erosion<T>(
&self,
input: ArrayView2<T>,
structuring_element: ArrayView2<bool>,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float
+ FromPrimitive
+ Clone
+ Send
+ Sync
+ PartialOrd
+ std::ops::DivAssign
+ std::ops::AddAssign
+ std::fmt::Debug
+ 'static,
{
use crate::morphology::MorphBorderMode;
let morph_mode = match mode {
BoundaryMode::Constant => MorphBorderMode::Constant,
BoundaryMode::Reflect => MorphBorderMode::Reflect,
BoundaryMode::Mirror => MorphBorderMode::Mirror,
BoundaryMode::Wrap => MorphBorderMode::Wrap,
BoundaryMode::Nearest => MorphBorderMode::Nearest,
};
crate::grey_erosion(
&input.to_owned(),
None,
Some(&structuring_element.to_owned()),
Some(morph_mode),
None,
None,
)
}
fn fallback_morphological_dilation<T>(
&self,
input: ArrayView2<T>,
structuring_element: ArrayView2<bool>,
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float
+ FromPrimitive
+ Clone
+ Send
+ Sync
+ PartialOrd
+ std::ops::DivAssign
+ std::ops::AddAssign
+ std::fmt::Debug
+ 'static,
{
use crate::morphology::MorphBorderMode;
let morph_mode = match mode {
BoundaryMode::Constant => MorphBorderMode::Constant,
BoundaryMode::Reflect => MorphBorderMode::Reflect,
BoundaryMode::Mirror => MorphBorderMode::Mirror,
BoundaryMode::Wrap => MorphBorderMode::Wrap,
BoundaryMode::Nearest => MorphBorderMode::Nearest,
};
crate::grey_dilation(
&input.to_owned(),
None,
Some(&structuring_element.to_owned()),
Some(morph_mode),
None,
None,
)
}
fn fallback_gaussian_filter<T>(
&self,
input: ArrayView2<T>,
sigma: (f64, f64),
mode: BoundaryMode,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + std::ops::DivAssign + std::ops::AddAssign,
{
let (rows, cols) = (input.nrows(), input.ncols());
let (sigma_x, sigma_y) = sigma;
let kernel_size_x = (6.0 * sigma_x).ceil() as usize | 1; let kernel_size_y = (6.0 * sigma_y).ceil() as usize | 1;
let kernel_x = gaussian_kernel_1d(sigma_x, kernel_size_x);
let kernel_y = gaussian_kernel_1d(sigma_y, kernel_size_y);
let mut temp = Array::zeros((rows, cols));
for i in 0..rows {
for j in 0..cols {
let mut sum = T::zero();
let mut weight_sum = T::zero();
let half_k = (kernel_size_x / 2) as isize;
for k_idx in 0..kernel_size_x {
let j_offset = j as isize + k_idx as isize - half_k;
let weight = safe_f64_to_float::<T>(kernel_x[k_idx])?;
if j_offset >= 0 && j_offset < cols as isize {
sum = sum + input[[i, j_offset as usize]] * weight;
weight_sum = weight_sum + weight;
}
}
if weight_sum > T::zero() {
temp[[i, j]] = sum / weight_sum;
} else {
temp[[i, j]] = input[[i, j]];
}
}
}
let mut output = Array::zeros((rows, cols));
for j in 0..cols {
for i in 0..rows {
let mut sum = T::zero();
let mut weight_sum = T::zero();
let half_k = (kernel_size_y / 2) as isize;
for k_idx in 0..kernel_size_y {
let i_offset = i as isize + k_idx as isize - half_k;
let weight = safe_f64_to_float::<T>(kernel_y[k_idx])?;
if i_offset >= 0 && i_offset < rows as isize {
sum = sum + temp[[i_offset as usize, j]] * weight;
weight_sum = weight_sum + weight;
}
}
if weight_sum > T::zero() {
output[[i, j]] = sum / weight_sum;
} else {
output[[i, j]] = temp[[i, j]];
}
}
}
Ok(output)
}
fn fallback_distance_transform<T>(
&self,
input: ArrayView2<T>,
metric: DistanceMetric,
) -> NdimageResult<Array<T, Ix2>>
where
T: Float + FromPrimitive + Clone + Send + Sync + PartialOrd,
{
let bool_input = input.mapv(|x| x > T::zero());
let result = crate::distance_transform_edt(&bool_input.into_dyn(), None, true, false)?;
if let Some(distances) = result.0 {
let converted_distances = distances.mapv(|x| T::from_f64(x).unwrap_or(T::zero()));
converted_distances
.into_dimensionality::<scirs2_core::ndarray::Ix2>()
.map_err(|_| {
NdimageError::DimensionError(
"Failed to convert distance transform result to 2D".into(),
)
})
} else {
Err(NdimageError::ComputationError(
"Distance transform did not return distances".into(),
))
}
}
fn get_convolution_kernel_source(&self) -> String {
include_str!("backend/kernels/convolution.kernel").to_string()
}
fn get_morphology_kernel_source(&self) -> String {
include_str!("backend/kernels/morphology.kernel").to_string()
}
fn get_gaussian_kernel_source(&self) -> String {
include_str!("backend/kernels/gaussian_blur.kernel").to_string()
}
fn get_distance_transform_kernel_source(&self) -> String {
include_str!("backend/kernels/morphology.kernel").to_string()
}
}
#[derive(Debug, Clone, Copy)]
pub enum MorphologyOperation {
Erosion,
Dilation,
}
#[derive(Debug, Clone, Copy)]
pub enum DistanceMetric {
Euclidean,
Manhattan,
Chessboard,
}
#[derive(Debug, Clone)]
pub struct GpuInfo {
pub cuda_available: bool,
pub opencl_available: bool,
pub metal_available: bool,
pub gpu_memory: usize,
pub compute_units: u32,
pub preferred_backend: Backend,
}
impl GpuInfo {
pub fn display(&self) {
println!("=== GPU Information ===");
println!("CUDA Available: {}", self.cuda_available);
println!("OpenCL Available: {}", self.opencl_available);
println!("Metal Available: {}", self.metal_available);
println!("GPU Memory: {} MB", self.gpu_memory);
println!("Compute Units: {}", self.compute_units);
println!("Preferred Backend: {:?}", self.preferred_backend);
}
}
#[allow(dead_code)]
pub fn create_gpu_operations() -> NdimageResult<GpuOperations> {
GpuOperations::new(GpuOperationsConfig::default())
}
#[allow(dead_code)]
pub fn create_gpu_operations_with_config(
config: GpuOperationsConfig,
) -> NdimageResult<GpuOperations> {
GpuOperations::new(config)
}
fn gaussian_kernel_1d(sigma: f64, size: usize) -> Vec<f64> {
let center = (size / 2) as f64;
let coeff = 1.0 / (sigma * (2.0 * PI).sqrt());
let denom = 2.0 * sigma * sigma;
let mut kernel: Vec<f64> = (0..size)
.map(|i| {
let x = i as f64 - center;
coeff * (-x * x / denom).exp()
})
.collect();
let sum: f64 = kernel.iter().sum();
if sum > 0.0 {
for val in &mut kernel {
*val /= sum;
}
}
kernel
}
fn safe_f64_to_float<T: Float>(value: f64) -> NdimageResult<T> {
T::from(value).ok_or_else(|| {
NdimageError::InvalidInput(format!("Cannot convert {} to target float type", value))
})
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_gpu_operations_creation() {
let result = create_gpu_operations();
assert!(result.is_ok() || result.is_err());
}
#[test]
fn test_gpu_info_display() {
let gpu_info = GpuInfo {
cuda_available: false,
opencl_available: true,
metal_available: false,
gpu_memory: 8192,
compute_units: 16,
preferred_backend: {
#[cfg(feature = "opencl")]
{
Backend::OpenCL
}
#[cfg(not(feature = "opencl"))]
{
Backend::Cpu
}
},
};
gpu_info.display();
}
#[test]
fn test_gpu_convolution_fallback() {
let input = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]];
let kernel = array![[1.0, 0.0, -1.0], [2.0, 0.0, -2.0], [1.0, 0.0, -1.0]];
if let Ok(gpu_ops) = create_gpu_operations() {
let result =
gpu_ops.gpu_convolution_2d(input.view(), kernel.view(), BoundaryMode::Constant);
assert!(result.is_ok() || result.is_err());
}
}
}