#[cfg(feature = "gpu")]
use crate::gpu::buffer::GpuBuffer;
use crate::tensor::TensorStorage;
use crate::{Result, Tensor, TensorError};
use scirs2_core::ndarray::{ArrayD, IxDyn};
use scirs2_core::numeric::{One, Zero};
pub fn identity<T>(tensor: &Tensor<T>) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + Send + Sync + 'static,
{
Ok(tensor.clone())
}
pub fn cast<T, U>(tensor: &Tensor<T>) -> Result<Tensor<U>>
where
T: Clone + Default + Zero + One + Send + Sync + 'static + Into<U>,
U: Clone + Default + Zero + Send + Sync + 'static,
{
match &tensor.storage {
TensorStorage::Cpu(array) => {
let new_array = array.mapv(|x| x.into());
Ok(Tensor::from_array(new_array))
}
#[cfg(feature = "gpu")]
TensorStorage::Gpu(_) => {
Err(TensorError::unsupported_operation_simple(
"GPU cast not implemented for these types. Only f32 is currently supported."
.to_string(),
))
}
}
}
pub fn pad<T>(
tensor: &Tensor<T>,
padding: &[(usize, usize)],
constant_value: T,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + Send + Sync + 'static,
{
if padding.len() != tensor.shape().rank() {
return Err(TensorError::invalid_argument(format!(
"Padding length {} does not match tensor rank {}",
padding.len(),
tensor.shape().rank()
)));
}
let mut out_shape = Vec::new();
for (i, &dim) in tensor.shape().dims().iter().enumerate() {
out_shape.push(dim + padding[i].0 + padding[i].1);
}
match &tensor.storage {
TensorStorage::Cpu(array) => {
let mut result = ArrayD::from_elem(IxDyn(&out_shape), constant_value);
let mut indices = vec![0; tensor.shape().rank()];
let tensor_shape = tensor.shape().dims();
loop {
let mut padded_indices = indices.clone();
for i in 0..indices.len() {
padded_indices[i] += padding[i].0;
}
result[IxDyn(&padded_indices)] = array[IxDyn(&indices)].clone();
let mut carry = true;
for i in (0..tensor_shape.len()).rev() {
if carry {
indices[i] += 1;
if indices[i] < tensor_shape[i] {
carry = false;
} else {
indices[i] = 0;
}
}
}
if carry {
break;
}
}
Ok(Tensor::from_array(result))
}
#[cfg(feature = "gpu")]
TensorStorage::Gpu(_) => gpu_pad_dispatch(tensor, padding, constant_value),
}
}
pub fn one_hot<T>(
indices: &Tensor<i32>,
depth: usize,
on_value: T,
off_value: T,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + Send + Sync + 'static,
{
let indices_shape = indices.shape();
let mut out_shape = indices_shape.dims().to_vec();
out_shape.push(depth);
match &indices.storage {
TensorStorage::Cpu(indices_arr) => {
let mut result = ArrayD::from_elem(IxDyn(&out_shape), off_value.clone());
let indices_flat = indices_arr
.view()
.into_shape_with_order((indices_arr.len(),))
.map_err(|e| {
TensorError::invalid_argument(format!("Failed to flatten indices: {e}"))
})?;
for (i, &idx) in indices_flat.iter().enumerate() {
if idx < 0 || idx as usize >= depth {
return Err(TensorError::invalid_argument(format!(
"Index {idx} out of range for depth {depth}"
)));
}
let mut out_position = Vec::with_capacity(out_shape.len());
let mut remaining = i;
for &dim in indices_shape.dims().iter().rev() {
out_position.push(remaining % dim);
remaining /= dim;
}
out_position.reverse();
out_position.push(idx as usize);
result[IxDyn(&out_position)] = on_value.clone();
}
Ok(Tensor::from_array(result))
}
#[cfg(feature = "gpu")]
TensorStorage::Gpu(_) => gpu_one_hot_dispatch(indices, depth, on_value, off_value),
}
}
#[cfg(feature = "gpu")]
fn gpu_pad_dispatch<T>(
tensor: &Tensor<T>,
padding: &[(usize, usize)],
constant_value: T,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + Send + Sync + 'static,
{
let type_name = std::any::type_name::<T>();
if type_name == "f32" {
let gpu_buffer = match &tensor.storage {
TensorStorage::Gpu(buf) => unsafe {
std::mem::transmute::<
&crate::gpu::buffer::GpuBuffer<T>,
&crate::gpu::buffer::GpuBuffer<f32>,
>(buf)
},
_ => {
return Err(TensorError::device_error_simple(
"Expected GPU tensor ".to_string(),
))
}
};
let constant_f32 = unsafe { std::mem::transmute_copy::<T, f32>(&constant_value) };
let mut out_shape = Vec::new();
for (i, &dim) in tensor.shape().dims().iter().enumerate() {
out_shape.push(dim + padding[i].0 + padding[i].1);
}
let output_len: usize = out_shape.iter().product();
let result_buffer = crate::gpu::ops::execute_pad(
gpu_buffer,
padding,
constant_f32,
tensor.shape().dims(),
output_len,
)?;
let result_buffer_t = unsafe {
std::mem::transmute::<
crate::gpu::buffer::GpuBuffer<f32>,
crate::gpu::buffer::GpuBuffer<T>,
>(result_buffer)
};
Ok(Tensor::from_gpu_buffer(
result_buffer_t,
crate::Shape::from_slice(&out_shape),
))
} else {
Err(TensorError::unsupported_operation_simple(format!(
"GPU pad only supports f32, got {}",
std::any::type_name::<T>()
)))
}
}
#[cfg(feature = "gpu")]
fn gpu_one_hot_dispatch<T>(
indices: &Tensor<i32>,
depth: usize,
on_value: T,
off_value: T,
) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + Send + Sync + 'static,
{
let type_name = std::any::type_name::<T>();
if type_name == "f32" {
let indices_gpu_buffer = match &indices.storage {
TensorStorage::Gpu(buf) => buf,
_ => {
return Err(TensorError::device_error_simple(
"Expected GPU tensor ".to_string(),
))
}
};
let on_value_f32 = unsafe { std::mem::transmute_copy::<T, f32>(&on_value) };
let off_value_f32 = unsafe { std::mem::transmute_copy::<T, f32>(&off_value) };
let mut out_shape = indices.shape().dims().to_vec();
out_shape.push(depth);
let output_len: usize = out_shape.iter().product();
let indices_gpu_buffer_u32 = unsafe {
std::mem::transmute::<
&crate::gpu::buffer::GpuBuffer<i32>,
&crate::gpu::buffer::GpuBuffer<u32>,
>(indices_gpu_buffer)
};
let result_buffer = crate::gpu::ops::execute_one_hot(
indices_gpu_buffer_u32,
depth,
on_value_f32,
off_value_f32,
-1i32, indices.shape().dims(),
output_len,
)?;
let result_buffer_t = unsafe {
std::mem::transmute::<
crate::gpu::buffer::GpuBuffer<f32>,
crate::gpu::buffer::GpuBuffer<T>,
>(result_buffer)
};
Ok(Tensor::from_gpu_buffer(
result_buffer_t,
crate::Shape::from_slice(&out_shape),
))
} else {
Err(TensorError::unsupported_operation_simple(format!(
"GPU one_hot only supports f32, got {}",
std::any::type_name::<T>()
)))
}
}