use crate::tensor::TensorStorage;
#[cfg(feature = "gpu")]
use crate::Shape;
use crate::{Result, Tensor, TensorError};
use scirs2_core::numeric::{Float, FromPrimitive, Zero};
use std::cmp::min;
pub fn max_pool2d<T>(
input: &Tensor<T>,
kernel_size: (usize, usize),
stride: (usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ PartialOrd
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
match &input.storage {
TensorStorage::Cpu(_input_arr) => max_pool2d_cpu(input, kernel_size, stride, padding),
#[cfg(feature = "gpu")]
TensorStorage::Gpu(gpu_buffer) => max_pool2d_gpu(input, kernel_size, stride, padding),
}
}
pub fn avg_pool2d<T>(
input: &Tensor<T>,
kernel_size: (usize, usize),
stride: (usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ Float
+ FromPrimitive
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
match &input.storage {
TensorStorage::Cpu(_input_arr) => avg_pool2d_cpu(input, kernel_size, stride, padding),
#[cfg(feature = "gpu")]
TensorStorage::Gpu(gpu_buffer) => avg_pool2d_gpu(input, kernel_size, stride, padding),
}
}
pub fn max_pool3d<T>(
input: &Tensor<T>,
kernel_size: (usize, usize, usize),
stride: (usize, usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ PartialOrd
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
match &input.storage {
TensorStorage::Cpu(_input_arr) => max_pool3d_cpu(input, kernel_size, stride, padding),
#[cfg(feature = "gpu")]
TensorStorage::Gpu(_gpu_buffer) => max_pool3d_gpu(input, kernel_size, stride, padding),
}
}
pub fn avg_pool3d<T>(
input: &Tensor<T>,
kernel_size: (usize, usize, usize),
stride: (usize, usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ Float
+ FromPrimitive
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
match &input.storage {
TensorStorage::Cpu(_input_arr) => avg_pool3d_cpu(input, kernel_size, stride, padding),
#[cfg(feature = "gpu")]
TensorStorage::Gpu(_gpu_buffer) => avg_pool3d_gpu(input, kernel_size, stride, padding),
}
}
fn max_pool2d_cpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize),
stride: (usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + PartialOrd + Send + Sync + 'static,
{
let shape = input.shape();
if shape.rank() != 4 {
return Err(TensorError::invalid_shape_simple(format!(
"MaxPool2D expects 4D input, got {}D",
shape.rank()
)));
}
let batch_size = shape.dims()[0];
let input_height = shape.dims()[1];
let input_width = shape.dims()[2];
let channels = shape.dims()[3];
let (output_height, output_width) = if padding == "valid" {
(
(input_height - kernel_size.0) / stride.0 + 1,
(input_width - kernel_size.1) / stride.1 + 1,
)
} else {
(
(input_height + stride.0 - 1) / stride.0,
(input_width + stride.1 - 1) / stride.1,
)
};
let mut output_data = vec![T::zero(); batch_size * output_height * output_width * channels];
for b in 0..batch_size {
for oh in 0..output_height {
for ow in 0..output_width {
for c in 0..channels {
let h_start = oh * stride.0;
let w_start = ow * stride.1;
let h_end = min(h_start + kernel_size.0, input_height);
let w_end = min(w_start + kernel_size.1, input_width);
let mut max_val: Option<T> = None;
for h in h_start..h_end {
for w in w_start..w_end {
if let Some(val) = input.get(&[b, h, w, c]) {
max_val = match max_val {
None => Some(val),
Some(current_max) => {
if val > current_max {
Some(val)
} else {
Some(current_max)
}
}
};
}
}
}
let out_idx = ((b * output_height + oh) * output_width + ow) * channels + c;
output_data[out_idx] = max_val.unwrap_or_else(T::zero);
}
}
}
}
Tensor::from_vec(
output_data,
&[batch_size, output_height, output_width, channels],
)
}
fn avg_pool2d_cpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize),
stride: (usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + Float + FromPrimitive + Send + Sync + 'static,
{
let shape = input.shape();
if shape.rank() != 4 {
return Err(TensorError::invalid_shape_simple(format!(
"AvgPool2D expects 4D input, got {}D",
shape.rank()
)));
}
let batch_size = shape.dims()[0];
let input_height = shape.dims()[1];
let input_width = shape.dims()[2];
let channels = shape.dims()[3];
let (output_height, output_width) = if padding == "valid" {
(
(input_height - kernel_size.0) / stride.0 + 1,
(input_width - kernel_size.1) / stride.1 + 1,
)
} else {
(
(input_height + stride.0 - 1) / stride.0,
(input_width + stride.1 - 1) / stride.1,
)
};
let mut output_data = vec![T::zero(); batch_size * output_height * output_width * channels];
for b in 0..batch_size {
for oh in 0..output_height {
for ow in 0..output_width {
for c in 0..channels {
let h_start = oh * stride.0;
let w_start = ow * stride.1;
let h_end = min(h_start + kernel_size.0, input_height);
let w_end = min(w_start + kernel_size.1, input_width);
let mut sum = T::zero();
let mut count = 0;
for h in h_start..h_end {
for w in w_start..w_end {
if let Some(val) = input.get(&[b, h, w, c]) {
sum = sum + val;
count += 1;
}
}
}
let out_idx = ((b * output_height + oh) * output_width + ow) * channels + c;
if count > 0 {
output_data[out_idx] = sum
/ T::from(count).expect("count must be convertible to tensor dtype");
} else {
output_data[out_idx] = T::zero();
}
}
}
}
}
Tensor::from_vec(
output_data,
&[batch_size, output_height, output_width, channels],
)
}
fn max_pool3d_cpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize, usize),
stride: (usize, usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + PartialOrd + Send + Sync + 'static,
{
let shape = input.shape();
if shape.rank() != 5 {
return Err(TensorError::InvalidShape {
operation: "max_pool3d".to_string(),
reason: format!("MaxPool3D expects 5D input, got {}D", shape.rank()),
shape: Some(shape.dims().to_vec()),
context: None,
});
}
let batch_size = shape.dims()[0];
let channels = shape.dims()[1];
let input_depth = shape.dims()[2];
let input_height = shape.dims()[3];
let input_width = shape.dims()[4];
let (output_depth, output_height, output_width) = if padding == "valid" {
(
(input_depth - kernel_size.0) / stride.0 + 1,
(input_height - kernel_size.1) / stride.1 + 1,
(input_width - kernel_size.2) / stride.2 + 1,
)
} else {
(
(input_depth + stride.0 - 1) / stride.0,
(input_height + stride.1 - 1) / stride.1,
(input_width + stride.2 - 1) / stride.2,
)
};
let mut output_data =
vec![T::zero(); batch_size * channels * output_depth * output_height * output_width];
for b in 0..batch_size {
for c in 0..channels {
for od in 0..output_depth {
for oh in 0..output_height {
for ow in 0..output_width {
let d_start = od * stride.0;
let h_start = oh * stride.1;
let w_start = ow * stride.2;
let d_end = min(d_start + kernel_size.0, input_depth);
let h_end = min(h_start + kernel_size.1, input_height);
let w_end = min(w_start + kernel_size.2, input_width);
let mut max_val: Option<T> = None;
for d in d_start..d_end {
for h in h_start..h_end {
for w in w_start..w_end {
if let Some(val) = input.get(&[b, c, d, h, w]) {
max_val = match max_val {
None => Some(val),
Some(current_max) => {
if val > current_max {
Some(val)
} else {
Some(current_max)
}
}
};
}
}
}
}
let out_idx = b * channels * output_depth * output_height * output_width
+ c * output_depth * output_height * output_width
+ od * output_height * output_width
+ oh * output_width
+ ow;
output_data[out_idx] = max_val.unwrap_or_else(T::zero);
}
}
}
}
}
Tensor::from_vec(
output_data,
&[
batch_size,
channels,
output_depth,
output_height,
output_width,
],
)
}
fn avg_pool3d_cpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize, usize),
stride: (usize, usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + Float + FromPrimitive + Send + Sync + 'static,
{
let shape = input.shape();
if shape.rank() != 5 {
return Err(TensorError::invalid_shape_simple(format!(
"AvgPool3D expects 5D input, got {}D",
shape.rank()
)));
}
let batch_size = shape.dims()[0];
let channels = shape.dims()[1];
let input_depth = shape.dims()[2];
let input_height = shape.dims()[3];
let input_width = shape.dims()[4];
let (output_depth, output_height, output_width) = if padding == "valid" {
(
(input_depth - kernel_size.0) / stride.0 + 1,
(input_height - kernel_size.1) / stride.1 + 1,
(input_width - kernel_size.2) / stride.2 + 1,
)
} else {
(
(input_depth + stride.0 - 1) / stride.0,
(input_height + stride.1 - 1) / stride.1,
(input_width + stride.2 - 1) / stride.2,
)
};
let mut output_data =
vec![T::zero(); batch_size * channels * output_depth * output_height * output_width];
for b in 0..batch_size {
for c in 0..channels {
for od in 0..output_depth {
for oh in 0..output_height {
for ow in 0..output_width {
let d_start = od * stride.0;
let h_start = oh * stride.1;
let w_start = ow * stride.2;
let d_end = min(d_start + kernel_size.0, input_depth);
let h_end = min(h_start + kernel_size.1, input_height);
let w_end = min(w_start + kernel_size.2, input_width);
let mut sum = T::zero();
let mut count = 0;
for d in d_start..d_end {
for h in h_start..h_end {
for w in w_start..w_end {
if let Some(val) = input.get(&[b, c, d, h, w]) {
sum = sum + val;
count += 1;
}
}
}
}
let out_idx = b * channels * output_depth * output_height * output_width
+ c * output_depth * output_height * output_width
+ od * output_height * output_width
+ oh * output_width
+ ow;
if count > 0 {
output_data[out_idx] = sum
/ T::from(count)
.expect("count must be convertible to tensor dtype");
} else {
output_data[out_idx] = T::zero();
}
}
}
}
}
}
Tensor::from_vec(
output_data,
&[
batch_size,
channels,
output_depth,
output_height,
output_width,
],
)
}
#[cfg(feature = "gpu")]
fn max_pool2d_gpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize),
stride: (usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ PartialOrd
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
use crate::gpu::buffer::GpuBuffer;
let shape = input.shape();
if shape.rank() != 4 {
return Err(TensorError::invalid_shape_simple(format!(
"MaxPool2D expects 4D input, got {}D",
shape.rank()
)));
}
let batch_size = shape.dims()[0];
let channels = shape.dims()[1];
let input_height = shape.dims()[2];
let input_width = shape.dims()[3];
let (output_height, output_width) = if padding == "valid" {
(
(input_height - kernel_size.0) / stride.0 + 1,
(input_width - kernel_size.1) / stride.1 + 1,
)
} else {
(
(input_height + stride.0 - 1) / stride.0,
(input_width + stride.1 - 1) / stride.1,
)
};
let input_shape = &[batch_size, channels, input_height, input_width];
let output_shape = &[batch_size, channels, output_height, output_width];
let padding_tuple = if padding == "same" {
let pad_h = std::cmp::max(
0,
(output_height - 1) * stride.0 + kernel_size.0 - input_height,
) / 2;
let pad_w = std::cmp::max(
0,
(output_width - 1) * stride.1 + kernel_size.1 - input_width,
) / 2;
(pad_h, pad_w)
} else {
(0, 0)
};
let TensorStorage::Gpu(gpu_buffer) = &input.storage else {
return Err(TensorError::unsupported_operation_simple(
"Internal error: max_pool2d_gpu called with non-GPU tensor".to_string(),
));
};
let kernel_size_slice = &[kernel_size.0, kernel_size.1];
let stride_slice = &[stride.0, stride.1];
let padding_slice = &[padding_tuple.0, padding_tuple.1];
let output_len = output_shape.iter().product();
let result_gpu = crate::gpu::ops::execute_pooling_op(
gpu_buffer,
crate::gpu::ops::PoolingOp::MaxPool2D,
kernel_size_slice,
stride_slice,
padding_slice,
input_shape,
output_len,
)?;
let mut result = Tensor::from_gpu_buffer(result_gpu, crate::Shape::new(output_shape.to_vec()));
result.set_requires_grad(input.requires_grad());
Ok(result)
}
#[cfg(feature = "gpu")]
fn avg_pool2d_gpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize),
stride: (usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ Float
+ FromPrimitive
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
use crate::gpu::buffer::GpuBuffer;
let shape = input.shape();
if shape.rank() != 4 {
return Err(TensorError::invalid_shape_simple(format!(
"AvgPool2D expects 4D input, got {}D",
shape.rank()
)));
}
let batch_size = shape.dims()[0];
let channels = shape.dims()[1];
let input_height = shape.dims()[2];
let input_width = shape.dims()[3];
let (output_height, output_width) = if padding == "valid" {
(
(input_height - kernel_size.0) / stride.0 + 1,
(input_width - kernel_size.1) / stride.1 + 1,
)
} else {
(
(input_height + stride.0 - 1) / stride.0,
(input_width + stride.1 - 1) / stride.1,
)
};
let input_shape = &[batch_size, channels, input_height, input_width];
let output_shape = &[batch_size, channels, output_height, output_width];
let padding_tuple = if padding == "same" {
let pad_h = std::cmp::max(
0,
(output_height - 1) * stride.0 + kernel_size.0 - input_height,
) / 2;
let pad_w = std::cmp::max(
0,
(output_width - 1) * stride.1 + kernel_size.1 - input_width,
) / 2;
(pad_h, pad_w)
} else {
(0, 0)
};
let TensorStorage::Gpu(gpu_buffer) = &input.storage else {
return Err(TensorError::unsupported_operation_simple(
"Internal error: avg_pool2d_gpu called with non-GPU tensor".to_string(),
));
};
let kernel_size_slice = &[kernel_size.0, kernel_size.1];
let stride_slice = &[stride.0, stride.1];
let padding_slice = &[padding_tuple.0, padding_tuple.1];
let output_len = output_shape.iter().product();
let result_gpu = crate::gpu::ops::execute_pooling_op(
gpu_buffer,
crate::gpu::ops::PoolingOp::AvgPool2D,
kernel_size_slice,
stride_slice,
padding_slice,
input_shape,
output_len,
)?;
let mut result = Tensor::from_gpu_buffer(result_gpu, crate::Shape::new(output_shape.to_vec()));
result.set_requires_grad(input.requires_grad());
Ok(result)
}
#[cfg(feature = "gpu")]
fn max_pool3d_gpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize, usize),
stride: (usize, usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ PartialOrd
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
let shape = input.shape();
if shape.rank() != 5 {
return Err(TensorError::InvalidShape {
operation: "max_pool3d".to_string(),
reason: format!("MaxPool3D expects 5D input, got {}D", shape.rank()),
shape: Some(shape.dims().to_vec()),
context: None,
});
}
let batch_size = shape.dims()[0];
let channels = shape.dims()[1];
let input_depth = shape.dims()[2];
let input_height = shape.dims()[3];
let input_width = shape.dims()[4];
let (output_depth, output_height, output_width) = if padding == "valid" {
(
(input_depth - kernel_size.0) / stride.0 + 1,
(input_height - kernel_size.1) / stride.1 + 1,
(input_width - kernel_size.2) / stride.2 + 1,
)
} else {
(
(input_depth + stride.0 - 1) / stride.0,
(input_height + stride.1 - 1) / stride.1,
(input_width + stride.2 - 1) / stride.2,
)
};
let input_shape = &[batch_size, channels, input_depth, input_height, input_width];
let output_shape = &[
batch_size,
channels,
output_depth,
output_height,
output_width,
];
let TensorStorage::Gpu(gpu_buffer) = &input.storage else {
return Err(TensorError::unsupported_operation_simple(
"Internal error: max_pool3d_gpu called with non-GPU tensor".to_string(),
));
};
let kernel_size_slice = &[kernel_size.0, kernel_size.1];
let stride_slice = &[stride.0, stride.1];
let padding_slice = &[0, 0];
let output_len = output_shape.iter().product();
let result_gpu = crate::gpu::ops::execute_pooling_op(
gpu_buffer,
crate::gpu::ops::PoolingOp::MaxPool3D,
kernel_size_slice,
stride_slice,
padding_slice,
input_shape,
output_len,
)?;
let mut result = Tensor::from_gpu_buffer(result_gpu, crate::Shape::new(output_shape.to_vec()));
result.set_requires_grad(input.requires_grad());
Ok(result)
}
#[cfg(feature = "gpu")]
fn avg_pool3d_gpu<T>(
input: &Tensor<T>,
kernel_size: (usize, usize, usize),
stride: (usize, usize, usize),
padding: &str,
) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ Float
+ FromPrimitive
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
let shape = input.shape();
if shape.rank() != 5 {
return Err(TensorError::invalid_shape_simple(format!(
"AvgPool3D expects 5D input, got {}D",
shape.rank()
)));
}
let batch_size = shape.dims()[0];
let channels = shape.dims()[1];
let input_depth = shape.dims()[2];
let input_height = shape.dims()[3];
let input_width = shape.dims()[4];
let (output_depth, output_height, output_width) = if padding == "valid" {
(
(input_depth - kernel_size.0) / stride.0 + 1,
(input_height - kernel_size.1) / stride.1 + 1,
(input_width - kernel_size.2) / stride.2 + 1,
)
} else {
(
(input_depth + stride.0 - 1) / stride.0,
(input_height + stride.1 - 1) / stride.1,
(input_width + stride.2 - 1) / stride.2,
)
};
let input_shape = &[batch_size, channels, input_depth, input_height, input_width];
let output_shape = &[
batch_size,
channels,
output_depth,
output_height,
output_width,
];
let TensorStorage::Gpu(gpu_buffer) = &input.storage else {
return Err(TensorError::unsupported_operation_simple(
"Internal error: avg_pool3d_gpu called with non-GPU tensor".to_string(),
));
};
let kernel_size_slice = &[kernel_size.0, kernel_size.1];
let stride_slice = &[stride.0, stride.1];
let padding_slice = &[0, 0];
let output_len = output_shape.iter().product();
let result_gpu = crate::gpu::ops::execute_pooling_op(
gpu_buffer,
crate::gpu::ops::PoolingOp::AvgPool3D,
kernel_size_slice,
stride_slice,
padding_slice,
input_shape,
output_len,
)?;
let mut result = Tensor::from_gpu_buffer(result_gpu, crate::Shape::new(output_shape.to_vec()));
result.set_requires_grad(input.requires_grad());
Ok(result)
}