use crate::error::{RusTorchError, RusTorchResult};
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
use crate::gpu::hybrid_executor::HybridExecution;
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
use crate::gpu::{DeviceType, OpType};
use crate::tensor::Tensor;
use ndarray::ScalarOperand;
use num_traits::{Float, FromPrimitive};
pub trait GpuActivation<T: Float + FromPrimitive + ScalarOperand + 'static> {
fn gpu_relu(&self) -> RusTorchResult<Tensor<T>>;
fn gpu_sigmoid(&self) -> RusTorchResult<Tensor<T>>;
fn gpu_tanh(&self) -> RusTorchResult<Tensor<T>>;
fn gpu_softmax(&self, dim: isize) -> RusTorchResult<Tensor<T>>;
fn gpu_gelu(&self) -> RusTorchResult<Tensor<T>>;
fn gpu_leaky_relu(&self, negative_slope: f64) -> RusTorchResult<Tensor<T>>;
fn gpu_elu(&self, alpha: f64) -> RusTorchResult<Tensor<T>>;
fn gpu_swish(&self) -> RusTorchResult<Tensor<T>>;
}
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
impl<T: Float + FromPrimitive + ScalarOperand + Send + Sync + 'static> GpuActivation<T>
for Tensor<T>
{
fn gpu_relu(&self) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_relu()
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA ReLU not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
self.relu_metal()
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL ReLU not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
self.relu_fallback()
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
self.relu_fallback()
}
}
fn gpu_sigmoid(&self) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_sigmoid()
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA Sigmoid not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
self.sigmoid_metal()
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL Sigmoid not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
self.sigmoid_fallback()
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
self.sigmoid_fallback()
}
}
fn gpu_tanh(&self) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_tanh()
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA Tanh not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
self.tanh_metal()
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL Tanh not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
Ok(self.tanh())
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
Ok(self.tanh())
}
}
fn gpu_softmax(&self, dim: isize) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_softmax(dim)
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA Softmax not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
Err(RusTorchError::UnsupportedOperation(
"Metal Softmax not yet implemented".to_string(),
))
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL Softmax not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
self.softmax_fallback(dim)
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
self.softmax_fallback(dim)
}
}
fn gpu_gelu(&self) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_gelu()
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA GELU not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
self.gelu_metal()
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL GELU not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
self.gelu_fallback()
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
self.gelu_fallback()
}
}
fn gpu_leaky_relu(&self, negative_slope: f64) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_leaky_relu(negative_slope)
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA Leaky ReLU not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
self.leaky_relu_metal(negative_slope)
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL Leaky ReLU not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
self.leaky_relu_fallback(negative_slope)
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
self.leaky_relu_fallback(negative_slope)
}
}
fn gpu_elu(&self, alpha: f64) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_elu(alpha)
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA ELU not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
self.elu_metal(alpha)
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL ELU not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
self.elu_fallback(alpha)
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
self.elu_fallback(alpha)
}
}
fn gpu_swish(&self) -> RusTorchResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::hybrid_executor::HybridExecution;
use crate::gpu::{coreml::CoreMLActivation, OpType};
return self.hybrid_operation(OpType::Activation, |device| {
match device {
super::DeviceType::CoreML(_) => {
self.coreml_swish()
}
super::DeviceType::Cuda(_) => {
Err(RusTorchError::UnsupportedOperation(
"CUDA Swish not yet implemented".to_string(),
))
}
super::DeviceType::Metal(_) => {
self.swish_metal()
}
super::DeviceType::OpenCL(_) => {
Err(RusTorchError::UnsupportedOperation(
"OpenCL Swish not yet implemented".to_string(),
))
}
super::DeviceType::Cpu => {
self.swish_fallback()
}
_ => Err(RusTorchError::UnsupportedDevice(
"Unsupported device for activation".to_string(),
)),
}
});
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
self.swish_fallback()
}
}
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
impl<T: Float + FromPrimitive + ScalarOperand + Send + Sync + 'static> GpuActivation<T>
for Tensor<T>
{
fn gpu_relu(&self) -> RusTorchResult<Tensor<T>> {
self.relu_fallback()
}
fn gpu_sigmoid(&self) -> RusTorchResult<Tensor<T>> {
self.sigmoid_fallback()
}
fn gpu_tanh(&self) -> RusTorchResult<Tensor<T>> {
Ok(self.tanh())
}
fn gpu_softmax(&self, dim: isize) -> RusTorchResult<Tensor<T>> {
self.softmax_fallback(dim)
}
fn gpu_gelu(&self) -> RusTorchResult<Tensor<T>> {
self.gelu_fallback()
}
fn gpu_leaky_relu(&self, negative_slope: f64) -> RusTorchResult<Tensor<T>> {
self.leaky_relu_fallback(negative_slope)
}
fn gpu_elu(&self, alpha: f64) -> RusTorchResult<Tensor<T>> {
self.elu_fallback(alpha)
}
fn gpu_swish(&self) -> RusTorchResult<Tensor<T>> {
self.swish_fallback()
}
}
impl<T: Float + FromPrimitive + ScalarOperand + Send + Sync + 'static> Tensor<T> {
pub fn relu_fallback(&self) -> RusTorchResult<Tensor<T>> {
let result_data = self.data.mapv(|x| x.max(T::zero()));
Ok(Tensor::from_ndarray(result_data))
}
pub fn sigmoid_fallback(&self) -> RusTorchResult<Tensor<T>> {
let result_data = self.data.mapv(|x| T::one() / (T::one() + (-x).exp()));
Ok(Tensor::from_ndarray(result_data))
}
pub fn softmax_fallback(&self, dim: isize) -> RusTorchResult<Tensor<T>> {
let max_val = self.data.fold(T::neg_infinity(), |acc, &x| acc.max(x));
let exp_data = self.data.mapv(|x| (x - max_val).exp());
let sum = exp_data.sum();
let result_data = exp_data.mapv(|x| x / sum);
Ok(Tensor::from_ndarray(result_data))
}
pub fn gelu_fallback(&self) -> RusTorchResult<Tensor<T>> {
let sqrt_2_pi = T::from(0.7978845608).unwrap(); let result_data = self.data.mapv(|x| {
let tanh_arg = sqrt_2_pi * (x + T::from(0.044715).unwrap() * x.powi(3));
T::from(0.5).unwrap() * x * (T::one() + tanh_arg.tanh())
});
Ok(Tensor::from_ndarray(result_data))
}
pub fn leaky_relu_fallback(&self, negative_slope: f64) -> RusTorchResult<Tensor<T>> {
let slope = T::from(negative_slope).unwrap();
let result_data = self
.data
.mapv(|x| if x > T::zero() { x } else { slope * x });
Ok(Tensor::from_ndarray(result_data))
}
pub fn elu_fallback(&self, alpha: f64) -> RusTorchResult<Tensor<T>> {
let alpha_val = T::from(alpha).unwrap();
let result_data = self.data.mapv(|x| {
if x > T::zero() {
x
} else {
alpha_val * (x.exp() - T::one())
}
});
Ok(Tensor::from_ndarray(result_data))
}
pub fn swish_fallback(&self) -> RusTorchResult<Tensor<T>> {
let result_data = self.data.mapv(|x| x / (T::one() + (-x).exp()));
Ok(Tensor::from_ndarray(result_data))
}
#[cfg(feature = "metal")]
pub fn relu_metal(&self) -> RusTorchResult<Tensor<T>> {
use crate::gpu::metal_kernels::metal_relu_f32;
let input_data: Vec<f32> = self
.data
.iter()
.map(|&x| x.to_f32().unwrap_or(0.0))
.collect();
let mut output_data = vec![0.0f32; input_data.len()];
metal_relu_f32(&input_data, &mut output_data)?;
let output_tensor_data: Vec<T> = output_data.iter().map(|&x| T::from(x).unwrap()).collect();
Ok(Tensor::from_vec(output_tensor_data, self.shape().to_vec()))
}
#[cfg(feature = "metal")]
pub fn sigmoid_metal(&self) -> RusTorchResult<Tensor<T>> {
use crate::gpu::metal_kernels::metal_sigmoid_f32;
let input_data: Vec<f32> = self
.data
.iter()
.map(|&x| x.to_f32().unwrap_or(0.0))
.collect();
let mut output_data = vec![0.0f32; input_data.len()];
metal_sigmoid_f32(&input_data, &mut output_data)?;
let output_tensor_data: Vec<T> = output_data.iter().map(|&x| T::from(x).unwrap()).collect();
Ok(Tensor::from_vec(output_tensor_data, self.shape().to_vec()))
}
#[cfg(feature = "metal")]
pub fn tanh_metal(&self) -> RusTorchResult<Tensor<T>> {
use crate::gpu::metal_kernels::metal_tanh_f32;
let input_data: Vec<f32> = self
.data
.iter()
.map(|&x| x.to_f32().unwrap_or(0.0))
.collect();
let mut output_data = vec![0.0f32; input_data.len()];
metal_tanh_f32(&input_data, &mut output_data)?;
let output_tensor_data: Vec<T> = output_data.iter().map(|&x| T::from(x).unwrap()).collect();
Ok(Tensor::from_vec(output_tensor_data, self.shape().to_vec()))
}
#[cfg(feature = "metal")]
pub fn gelu_metal(&self) -> RusTorchResult<Tensor<T>> {
use crate::gpu::metal_kernels::metal_gelu_f32;
let input_data: Vec<f32> = self
.data
.iter()
.map(|&x| x.to_f32().unwrap_or(0.0))
.collect();
let mut output_data = vec![0.0f32; input_data.len()];
metal_gelu_f32(&input_data, &mut output_data)?;
let output_tensor_data: Vec<T> = output_data.iter().map(|&x| T::from(x).unwrap()).collect();
Ok(Tensor::from_vec(output_tensor_data, self.shape().to_vec()))
}
#[cfg(feature = "metal")]
pub fn leaky_relu_metal(&self, negative_slope: f64) -> RusTorchResult<Tensor<T>> {
use crate::gpu::metal_kernels::metal_leaky_relu_f32;
let input_data: Vec<f32> = self
.data
.iter()
.map(|&x| x.to_f32().unwrap_or(0.0))
.collect();
let mut output_data = vec![0.0f32; input_data.len()];
metal_leaky_relu_f32(&input_data, &mut output_data, negative_slope as f32)?;
let output_tensor_data: Vec<T> = output_data.iter().map(|&x| T::from(x).unwrap()).collect();
Ok(Tensor::from_vec(output_tensor_data, self.shape().to_vec()))
}
#[cfg(feature = "metal")]
pub fn elu_metal(&self, alpha: f64) -> RusTorchResult<Tensor<T>> {
use crate::gpu::metal_kernels::metal_elu_f32;
let input_data: Vec<f32> = self
.data
.iter()
.map(|&x| x.to_f32().unwrap_or(0.0))
.collect();
let mut output_data = vec![0.0f32; input_data.len()];
metal_elu_f32(&input_data, &mut output_data, alpha as f32)?;
let output_tensor_data: Vec<T> = output_data.iter().map(|&x| T::from(x).unwrap()).collect();
Ok(Tensor::from_vec(output_tensor_data, self.shape().to_vec()))
}
#[cfg(feature = "metal")]
pub fn swish_metal(&self) -> RusTorchResult<Tensor<T>> {
use crate::gpu::metal_kernels::metal_swish_f32;
let input_data: Vec<f32> = self
.data
.iter()
.map(|&x| x.to_f32().unwrap_or(0.0))
.collect();
let mut output_data = vec![0.0f32; input_data.len()];
metal_swish_f32(&input_data, &mut output_data)?;
let output_tensor_data: Vec<T> = output_data.iter().map(|&x| T::from(x).unwrap()).collect();
Ok(Tensor::from_vec(output_tensor_data, self.shape().to_vec()))
}
#[cfg(not(feature = "metal"))]
pub fn relu_metal(&self) -> RusTorchResult<Tensor<T>> {
Err(RusTorchError::UnsupportedDevice(
"Metal not available".to_string(),
))
}
#[cfg(not(feature = "metal"))]
pub fn sigmoid_metal(&self) -> RusTorchResult<Tensor<T>> {
Err(RusTorchError::UnsupportedDevice(
"Metal not available".to_string(),
))
}
#[cfg(not(feature = "metal"))]
pub fn tanh_metal(&self) -> RusTorchResult<Tensor<T>> {
Err(RusTorchError::UnsupportedDevice(
"Metal not available".to_string(),
))
}
#[cfg(not(feature = "metal"))]
pub fn gelu_metal(&self) -> RusTorchResult<Tensor<T>> {
Err(RusTorchError::UnsupportedDevice(
"Metal not available".to_string(),
))
}
#[cfg(not(feature = "metal"))]
pub fn leaky_relu_metal(&self, _negative_slope: f64) -> RusTorchResult<Tensor<T>> {
Err(RusTorchError::UnsupportedDevice(
"Metal not available".to_string(),
))
}
#[cfg(not(feature = "metal"))]
pub fn elu_metal(&self, _alpha: f64) -> RusTorchResult<Tensor<T>> {
Err(RusTorchError::UnsupportedDevice(
"Metal not available".to_string(),
))
}
#[cfg(not(feature = "metal"))]
pub fn swish_metal(&self) -> RusTorchResult<Tensor<T>> {
Err(RusTorchError::UnsupportedDevice(
"Metal not available".to_string(),
))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tensor::Tensor;
#[test]
fn test_gpu_relu_fallback() {
let data = vec![-1.0, 0.0, 1.0, 2.0];
let tensor = Tensor::<f32>::from_vec(data, vec![2, 2]);
let result = tensor.gpu_relu().unwrap();
let result_data = result.as_slice().unwrap().to_vec();
assert_eq!(result_data, vec![0.0, 0.0, 1.0, 2.0]);
}
#[test]
fn test_gpu_sigmoid_fallback() {
let data = vec![0.0, 1.0];
let tensor = Tensor::<f32>::from_vec(data, vec![2]);
let result = tensor.gpu_sigmoid().unwrap();
let result_data = result.as_slice().unwrap().to_vec();
assert!((result_data[0] - 0.5).abs() < 0.001);
assert!((result_data[1] - 0.731).abs() < 0.01);
}
#[test]
fn test_gpu_tanh_fallback() {
let data = vec![0.0, 1.0];
let tensor = Tensor::<f32>::from_vec(data, vec![2]);
let result = tensor.gpu_tanh().unwrap();
let result_data = result.as_slice().unwrap().to_vec();
assert!((result_data[0] - 0.0).abs() < 0.001);
assert!((result_data[1] - 0.762).abs() < 0.01);
}
}