use super::*;
use crate::gpu::coreml::common::coreml_feature;
use crate::tensor::Tensor;
use ndarray::ScalarOperand;
use num_traits::{Float, FromPrimitive};
pub trait CoreMLActivation<T>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
fn coreml_relu(&self) -> CoreMLResult<Tensor<T>>;
fn coreml_sigmoid(&self) -> CoreMLResult<Tensor<T>>;
fn coreml_tanh(&self) -> CoreMLResult<Tensor<T>>;
fn coreml_softmax(&self, dim: isize) -> CoreMLResult<Tensor<T>>;
fn coreml_gelu(&self) -> CoreMLResult<Tensor<T>>;
fn coreml_leaky_relu(&self, negative_slope: f64) -> CoreMLResult<Tensor<T>>;
fn coreml_elu(&self, alpha: f64) -> CoreMLResult<Tensor<T>>;
fn coreml_swish(&self) -> CoreMLResult<Tensor<T>>;
}
pub struct ActivationOperation<T: Float> {
input: Tensor<T>,
activation_type: CoreMLActivationType,
parameters: ActivationParameters,
}
#[derive(Debug, Clone)]
pub struct ActivationParameters {
pub negative_slope: Option<f64>,
pub alpha: Option<f64>,
pub dim: Option<isize>,
}
impl Default for ActivationParameters {
fn default() -> Self {
Self {
negative_slope: None,
alpha: None,
dim: None,
}
}
}
impl<T> ActivationOperation<T>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
pub fn new(input: Tensor<T>, activation_type: CoreMLActivationType) -> Self {
Self {
input,
activation_type,
parameters: ActivationParameters::default(),
}
}
pub fn with_parameters(
input: Tensor<T>,
activation_type: CoreMLActivationType,
parameters: ActivationParameters,
) -> Self {
Self {
input,
activation_type,
parameters,
}
}
fn is_efficient_on_coreml(&self) -> bool {
let elements: usize = self.input.shape().iter().product();
match self.activation_type {
CoreMLActivationType::ReLU
| CoreMLActivationType::Sigmoid
| CoreMLActivationType::Tanh => elements > 256, CoreMLActivationType::Softmax | CoreMLActivationType::GELU => elements > 1024, CoreMLActivationType::LeakyReLU
| CoreMLActivationType::ELU
| CoreMLActivationType::Swish => elements > 512, }
}
}
impl<T> CoreMLOperation<T> for ActivationOperation<T>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
fn execute_coreml(&self, device_id: usize) -> CoreMLResult<Tensor<T>> {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
use crate::gpu::coreml::backend::CoreMLGraph;
use crate::gpu::coreml::common::CoreMLActivationType as ActivationType;
let graph = CoreMLGraph::new(device_id)?;
let backend_activation = match self.activation_type {
CoreMLActivationType::ReLU => ActivationType::ReLU,
CoreMLActivationType::Sigmoid => ActivationType::Sigmoid,
CoreMLActivationType::Tanh => ActivationType::Tanh,
CoreMLActivationType::Softmax => ActivationType::Softmax,
CoreMLActivationType::GELU => ActivationType::GELU,
_ => {
return Err(error_helpers::unsupported_operation(&format!(
"Activation {:?} not yet implemented in CoreML backend",
self.activation_type
)));
}
};
return graph.activation(&self.input, backend_activation);
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
Err(error_helpers::feature_disabled())
}
}
fn is_supported_by_coreml(&self) -> bool {
let is_supported = matches!(
self.activation_type,
CoreMLActivationType::ReLU
| CoreMLActivationType::Sigmoid
| CoreMLActivationType::Tanh
| CoreMLActivationType::Softmax
| CoreMLActivationType::GELU
);
is_supported && self.is_efficient_on_coreml()
}
fn estimated_execution_time(&self) -> Option<std::time::Duration> {
if !self.is_supported_by_coreml() {
return None;
}
let elements: usize = self.input.shape().iter().product();
let nanos_per_element = match self.activation_type {
CoreMLActivationType::ReLU => 1, CoreMLActivationType::Sigmoid => 10, CoreMLActivationType::Tanh => 8, CoreMLActivationType::Softmax => 20, CoreMLActivationType::GELU => 15, _ => 5, };
Some(std::time::Duration::from_nanos(
(elements * nanos_per_element) as u64,
))
}
}
impl<T> CoreMLActivation<T> for Tensor<T>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
fn coreml_relu(&self) -> CoreMLResult<Tensor<T>> {
let operation = ActivationOperation::new(self.clone(), CoreMLActivationType::ReLU);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
fn coreml_sigmoid(&self) -> CoreMLResult<Tensor<T>> {
let operation = ActivationOperation::new(self.clone(), CoreMLActivationType::Sigmoid);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
fn coreml_tanh(&self) -> CoreMLResult<Tensor<T>> {
let operation = ActivationOperation::new(self.clone(), CoreMLActivationType::Tanh);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
fn coreml_softmax(&self, dim: isize) -> CoreMLResult<Tensor<T>> {
let parameters = ActivationParameters {
dim: Some(dim),
..Default::default()
};
let operation = ActivationOperation::with_parameters(
self.clone(),
CoreMLActivationType::Softmax,
parameters,
);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
fn coreml_gelu(&self) -> CoreMLResult<Tensor<T>> {
let operation = ActivationOperation::new(self.clone(), CoreMLActivationType::GELU);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
fn coreml_leaky_relu(&self, negative_slope: f64) -> CoreMLResult<Tensor<T>> {
let parameters = ActivationParameters {
negative_slope: Some(negative_slope),
..Default::default()
};
let operation = ActivationOperation::with_parameters(
self.clone(),
CoreMLActivationType::LeakyReLU,
parameters,
);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
fn coreml_elu(&self, alpha: f64) -> CoreMLResult<Tensor<T>> {
let parameters = ActivationParameters {
alpha: Some(alpha),
..Default::default()
};
let operation = ActivationOperation::with_parameters(
self.clone(),
CoreMLActivationType::ELU,
parameters,
);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
fn coreml_swish(&self) -> CoreMLResult<Tensor<T>> {
let operation = ActivationOperation::new(self.clone(), CoreMLActivationType::Swish);
let executor = CoreMLExecutor::new(0)?;
executor.execute(&operation)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_activation_operation_creation() {
let input = Tensor::<f32>::zeros(&[20, 20]); let operation = ActivationOperation::new(input, CoreMLActivationType::ReLU);
assert!(operation.is_efficient_on_coreml()); }
#[test]
fn test_small_tensor_not_efficient() {
let input = Tensor::<f32>::zeros(&[5, 5]); let operation = ActivationOperation::new(input, CoreMLActivationType::ReLU);
assert!(!operation.is_efficient_on_coreml()); assert!(!operation.is_supported_by_coreml());
}
#[test]
fn test_supported_activations() {
let input = Tensor::<f32>::zeros(&[40, 40]);
let supported_types = vec![
CoreMLActivationType::ReLU,
CoreMLActivationType::Sigmoid,
CoreMLActivationType::Tanh,
CoreMLActivationType::Softmax,
CoreMLActivationType::GELU,
];
for activation_type in supported_types {
let operation = ActivationOperation::new(input.clone(), activation_type);
assert!(operation.is_supported_by_coreml());
}
}
#[test]
fn test_execution_time_estimation() {
let input = Tensor::<f32>::zeros(&[64, 64]); let operation = ActivationOperation::new(input, CoreMLActivationType::ReLU);
let estimated_time = operation.estimated_execution_time();
assert!(estimated_time.is_some());
let time = estimated_time.unwrap();
assert!(time.as_nanos() > 0);
}
#[test]
fn test_parameters() {
let input = Tensor::<f32>::zeros(&[32, 32]);
let parameters = ActivationParameters {
negative_slope: Some(0.01),
alpha: Some(1.0),
dim: Some(-1),
};
let operation = ActivationOperation::with_parameters(
input,
CoreMLActivationType::LeakyReLU,
parameters,
);
assert_eq!(operation.parameters.negative_slope, Some(0.01));
assert_eq!(operation.parameters.alpha, Some(1.0));
assert_eq!(operation.parameters.dim, Some(-1));
}
}