use super::common::error_helpers;
use super::common::*;
use super::device::{CoreMLDevice, CoreMLDeviceManager};
use crate::gpu::coreml::common::coreml_feature;
use crate::tensor::Tensor;
use ndarray::ScalarOperand;
use num_traits::{Float, FromPrimitive};
use std::collections::HashMap;
use std::sync::{Arc, Mutex, OnceLock};
#[derive(Debug, Default, Clone)]
pub struct CoreMLBackendStats {
pub cache_hits: u64,
pub cache_misses: u64,
pub total_operations: u64,
pub fallback_operations: u64,
pub average_execution_time: std::time::Duration,
}
#[derive(Clone)]
pub struct CoreMLBackend {
device_manager: Arc<CoreMLDeviceManager>,
model_manager: Arc<super::model_manager::CoreMLModelManager>,
operation_cache: Arc<Mutex<HashMap<String, CachedOperation>>>,
config: CoreMLBackendConfig,
}
#[derive(Debug, Clone)]
pub struct CoreMLBackendConfig {
pub enable_caching: bool,
pub max_cache_size: usize,
pub auto_fallback: bool,
pub enable_profiling: bool,
}
impl Default for CoreMLBackendConfig {
fn default() -> Self {
Self {
enable_caching: true,
max_cache_size: 1000,
auto_fallback: true,
enable_profiling: false,
}
}
}
#[derive(Debug, Clone)]
struct CachedOperation {
id: String,
last_execution_time: std::time::Duration,
execution_count: usize,
average_time: std::time::Duration,
}
#[derive(Debug, Clone)]
pub struct NeuralEngineInfo {
pub available: bool,
pub apple_silicon: bool,
pub optimal_batch_size: usize,
pub optimal_matrix_size: usize,
pub supports_float16: bool,
pub supports_int8: bool,
pub max_ops_per_second: u64, }
impl CoreMLBackend {
pub fn global() -> &'static CoreMLBackend {
static BACKEND: OnceLock<CoreMLBackend> = OnceLock::new();
BACKEND.get_or_init(|| {
CoreMLBackend::new(CoreMLBackendConfig::default()).unwrap_or_else(|_| {
CoreMLBackend::dummy()
})
})
}
pub fn detect_neural_engine() -> NeuralEngineInfo {
#[cfg(all(feature = "coreml", target_os = "macos"))]
{
use std::process::Command;
let apple_silicon = Command::new("sysctl")
.args(&["-n", "machdep.cpu.brand_string"])
.output()
.map(|output| {
let cpu_info = String::from_utf8_lossy(&output.stdout);
cpu_info.contains("Apple")
})
.unwrap_or(false);
let available = apple_silicon;
NeuralEngineInfo {
available,
apple_silicon,
optimal_batch_size: if apple_silicon { 16 } else { 1 },
optimal_matrix_size: if apple_silicon { 1024 } else { 256 },
supports_float16: apple_silicon,
supports_int8: apple_silicon,
max_ops_per_second: if apple_silicon { 15_000_000_000_000 } else { 0 }, }
}
#[cfg(not(all(feature = "coreml", target_os = "macos")))]
{
NeuralEngineInfo {
available: false,
apple_silicon: false,
optimal_batch_size: 1,
optimal_matrix_size: 256,
supports_float16: false,
supports_int8: false,
max_ops_per_second: 0,
}
}
}
pub fn new(config: CoreMLBackendConfig) -> CoreMLResult<Self> {
let device_manager = Arc::new(CoreMLDeviceManager::new());
let model_manager = Arc::new(super::model_manager::CoreMLModelManager::new());
if !is_coreml_available() {
return Err(error_helpers::not_available());
}
if let Err(e) = device_manager.warmup() {
eprintln!("Warning: CoreML warmup failed: {}", e);
}
Ok(Self {
device_manager,
model_manager,
operation_cache: Arc::new(Mutex::new(HashMap::new())),
config,
})
}
fn dummy() -> Self {
Self {
device_manager: Arc::new(CoreMLDeviceManager::new()),
model_manager: Arc::new(super::model_manager::CoreMLModelManager::new()),
operation_cache: Arc::new(Mutex::new(HashMap::new())),
config: CoreMLBackendConfig {
enable_caching: false,
auto_fallback: true,
..Default::default()
},
}
}
pub fn is_available(&self) -> bool {
is_coreml_available() && self.device_manager.is_available()
}
pub fn device_manager(&self) -> &CoreMLDeviceManager {
&self.device_manager
}
pub fn model_manager(&self) -> &super::model_manager::CoreMLModelManager {
&self.model_manager
}
pub fn execute_operation<T, F>(&self, operation_id: &str, operation: F) -> CoreMLResult<T>
where
F: FnOnce() -> CoreMLResult<T>,
{
let start_time = std::time::Instant::now();
let result = operation();
let execution_time = start_time.elapsed();
if self.config.enable_caching && result.is_ok() {
self.update_operation_cache(operation_id, execution_time);
}
if self.config.enable_profiling {
println!(
"CoreML Operation '{}' executed in {:?}",
operation_id, execution_time
);
}
result
}
fn update_operation_cache(&self, operation_id: &str, execution_time: std::time::Duration) {
if let Ok(mut cache) = self.operation_cache.lock() {
let cached_op =
cache
.entry(operation_id.to_string())
.or_insert_with(|| CachedOperation {
id: operation_id.to_string(),
last_execution_time: execution_time,
execution_count: 0,
average_time: execution_time,
});
cached_op.last_execution_time = execution_time;
cached_op.execution_count += 1;
let total_time =
cached_op.average_time.as_nanos() as f64 * (cached_op.execution_count - 1) as f64;
let new_total = total_time + execution_time.as_nanos() as f64;
cached_op.average_time = std::time::Duration::from_nanos(
(new_total / cached_op.execution_count as f64) as u64,
);
if cache.len() > self.config.max_cache_size {
let keys_to_remove: Vec<String> = cache
.iter()
.filter(|(_, op)| op.execution_count == 1) .take(cache.len() - self.config.max_cache_size + 1)
.map(|(k, _)| k.clone())
.collect();
for key in keys_to_remove {
cache.remove(&key);
}
}
}
}
pub fn get_operation_stats(&self, operation_id: &str) -> Option<(usize, std::time::Duration)> {
if let Ok(cache) = self.operation_cache.lock() {
cache
.get(operation_id)
.map(|op| (op.execution_count, op.average_time))
} else {
None
}
}
pub fn get_stats(&self) -> CoreMLBackendStats {
let (total_ops, total_execs) = self.cache_stats();
CoreMLBackendStats {
cache_hits: 0, cache_misses: 0, total_operations: total_execs as u64,
fallback_operations: 0, average_execution_time: std::time::Duration::from_millis(0), }
}
pub fn cleanup_cache(&self) -> CoreMLResult<()> {
self.clear_cache();
Ok(())
}
pub fn clear_cache(&self) {
if let Ok(mut cache) = self.operation_cache.lock() {
cache.clear();
}
}
pub fn cache_stats(&self) -> (usize, usize) {
if let Ok(cache) = self.operation_cache.lock() {
let total_operations = cache.len();
let total_executions: usize = cache.values().map(|op| op.execution_count).sum();
(total_operations, total_executions)
} else {
(0, 0)
}
}
pub fn is_neural_engine_available(&self) -> bool {
#[cfg(all(feature = "coreml", target_os = "macos"))]
{
if !self.is_apple_silicon() {
return false;
}
true
}
#[cfg(not(all(feature = "coreml", target_os = "macos")))]
{
false
}
}
fn is_apple_silicon(&self) -> bool {
#[cfg(target_os = "macos")]
{
use std::process::Command;
if let Ok(output) = Command::new("uname").arg("-m").output() {
let arch = String::from_utf8_lossy(&output.stdout);
return arch.trim() == "arm64";
}
false
}
#[cfg(not(target_os = "macos"))]
{
false
}
}
pub fn execute_on_neural_engine<T>(
&self,
a: &Tensor<T>,
b: &Tensor<T>,
) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
#[cfg(all(feature = "coreml", target_os = "macos"))]
{
let model_manager = self.model_manager();
let model_handle =
model_manager.get_or_create_matmul_model::<T>(a.shape(), b.shape())?;
let start_time = std::time::Instant::now();
let a_optimized = self.optimize_tensor_for_ane(a)?;
let b_optimized = self.optimize_tensor_for_ane(b)?;
let inputs = vec![&a_optimized, &b_optimized];
let outputs = model_manager.execute_model(&model_handle, &inputs)?;
let execution_time = start_time.elapsed();
if self.config.enable_profiling {
println!(
"Neural Engine execution completed in {:?} for {}x{} matrix multiplication",
execution_time,
a.shape()[0],
b.shape()[1]
);
}
if outputs.is_empty() {
return Err(error_helpers::unsupported_operation(
"Neural Engine model returned no outputs",
));
}
Ok(outputs[0].clone())
}
#[cfg(not(all(feature = "coreml", target_os = "macos")))]
{
Err(error_helpers::not_available())
}
}
fn optimize_tensor_for_ane<T>(&self, tensor: &Tensor<T>) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
Ok(tensor.clone())
}
pub fn get_neural_engine_info(&self) -> Option<NeuralEngineInfo> {
#[cfg(all(feature = "coreml", target_os = "macos"))]
{
if self.is_neural_engine_available() {
Some(NeuralEngineInfo {
available: true,
apple_silicon: self.is_apple_silicon(),
optimal_batch_size: 1,
optimal_matrix_size: 1024, supports_float16: true,
supports_int8: true,
max_ops_per_second: 15_800_000_000_000, })
} else {
None
}
}
#[cfg(not(all(feature = "coreml", target_os = "macos")))]
{
None
}
}
}
pub struct CoreMLGraph {
device: CoreMLDevice,
backend: Arc<CoreMLBackend>,
}
impl CoreMLGraph {
pub fn new(device_id: usize) -> CoreMLResult<Self> {
let backend = Arc::new(CoreMLBackend::new(CoreMLBackendConfig::default())?);
let device = backend.device_manager().initialize(device_id)?;
Ok(Self { device, backend })
}
pub fn matmul<T>(&self, a: &Tensor<T>, b: &Tensor<T>) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
let a_size = a.shape().iter().product::<usize>() * std::mem::size_of::<T>();
let b_size = b.shape().iter().product::<usize>() * std::mem::size_of::<T>();
self.device
.validate_tensor::<T>(a_size, &crate::dtype::DType::Float32)?;
self.device
.validate_tensor::<T>(b_size, &crate::dtype::DType::Float32)?;
let operation_id = format!(
"matmul_{}x{}_{}",
a.shape().iter().product::<usize>(),
b.shape().iter().product::<usize>(),
std::any::type_name::<T>()
);
self.backend.execute_operation(&operation_id, || {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
let a_shape = a.shape();
let b_shape = b.shape();
if a_shape.len() != 2 || b_shape.len() != 2 {
return a.matmul(b);
}
if a_shape[1] != b_shape[0] {
return Err(error_helpers::unsupported_operation(
"Matrix dimensions don't match for multiplication",
));
}
return coreml_implementation::coreml_matmul(a, b);
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
Err(error_helpers::feature_disabled())
}
})
}
pub fn conv2d<T>(
&self,
input: &Tensor<T>,
kernel: &Tensor<T>,
stride: &[usize],
padding: &[usize],
) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
let input_size = input.shape().iter().product::<usize>() * std::mem::size_of::<T>();
let kernel_size = kernel.shape().iter().product::<usize>() * std::mem::size_of::<T>();
self.device
.validate_tensor::<T>(input_size, &crate::dtype::DType::Float32)?;
self.device
.validate_tensor::<T>(kernel_size, &crate::dtype::DType::Float32)?;
let operation_id = format!(
"conv2d_{}_{}_{:?}_{:?}",
input.shape().iter().product::<usize>(),
kernel.shape().iter().product::<usize>(),
stride,
padding
);
self.backend.execute_operation(&operation_id, || {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
return coreml_implementation::coreml_conv2d(input, kernel, stride, padding);
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
Err(error_helpers::feature_disabled())
}
})
}
pub fn activation<T>(
&self,
input: &Tensor<T>,
activation_type: CoreMLActivationType,
) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
let input_size = input.shape().iter().product::<usize>() * std::mem::size_of::<T>();
self.device
.validate_tensor::<T>(input_size, &crate::dtype::DType::Float32)?;
let operation_id = format!(
"activation_{:?}_{}",
activation_type,
input.shape().iter().product::<usize>()
);
self.backend.execute_operation(&operation_id, || {
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
{
return coreml_implementation::coreml_activation(input, activation_type);
}
#[cfg(not(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
)))]
{
Err(error_helpers::feature_disabled())
}
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_backend_creation() {
let config = CoreMLBackendConfig::default();
let backend_result = CoreMLBackend::new(config);
match backend_result {
Ok(backend) => {
assert!(backend.config.enable_caching);
assert_eq!(backend.config.max_cache_size, 1000);
}
Err(_) => {
println!("CoreML backend creation failed (expected on some platforms)");
}
}
}
#[test]
fn test_global_backend() {
let backend = CoreMLBackend::global();
assert!(backend.config.auto_fallback);
}
#[test]
fn test_cache_operations() {
let backend = CoreMLBackend::global();
backend.clear_cache();
let (ops, execs) = backend.cache_stats();
assert_eq!(ops, 0);
assert_eq!(execs, 0);
}
#[test]
fn test_graph_creation() {
let graph_result = CoreMLGraph::new(0);
match graph_result {
Ok(_graph) => {
println!("CoreML graph created successfully");
}
Err(_) => {
println!("CoreML graph creation failed (expected on some platforms)");
}
}
}
}
#[cfg(any(
feature = "coreml",
feature = "coreml-hybrid",
feature = "coreml-fallback"
))]
mod coreml_implementation {
use super::*;
pub fn coreml_matmul<T>(a: &Tensor<T>, b: &Tensor<T>) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
#[cfg(all(feature = "coreml", target_os = "macos"))]
{
let backend = CoreMLBackend::global();
if backend.is_neural_engine_available() {
return backend.execute_on_neural_engine(a, b);
}
if backend.is_available() {
let model_manager = backend.model_manager();
let model_handle =
model_manager.get_or_create_matmul_model::<T>(a.shape(), b.shape())?;
let inputs = vec![a, b];
let outputs = model_manager.execute_model(&model_handle, &inputs)?;
if outputs.is_empty() {
return Err(error_helpers::unsupported_operation(
"CoreML model returned no outputs",
));
}
return Ok(outputs[0].clone());
}
}
a.matmul(b)
}
pub fn coreml_conv2d<T>(
input: &Tensor<T>,
kernel: &Tensor<T>,
stride: &[usize],
padding: &[usize],
) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
let backend = CoreMLBackend::global();
let model_manager = backend.model_manager();
if stride.len() < 2 || padding.len() < 2 {
return Err(error_helpers::unsupported_operation(
"Conv2D requires stride and padding arrays with at least 2 elements",
));
}
let model_handle = model_manager.get_or_create_conv2d_model::<T>(
input.shape(),
kernel.shape(),
stride[0],
padding[0],
)?;
let inputs = vec![input, kernel];
let outputs = model_manager.execute_model(&model_handle, &inputs)?;
if outputs.is_empty() {
return Err(error_helpers::unsupported_operation(
"CoreML model returned no outputs",
));
}
Ok(outputs[0].clone())
}
pub fn coreml_activation<T>(
input: &Tensor<T>,
activation_type: CoreMLActivationType,
) -> CoreMLResult<Tensor<T>>
where
T: Float + FromPrimitive + ScalarOperand + 'static,
{
let backend = CoreMLBackend::global();
let model_manager = backend.model_manager();
let model_handle =
model_manager.get_or_create_activation_model::<T>(activation_type, input.shape())?;
let inputs = vec![input];
let outputs = model_manager.execute_model(&model_handle, &inputs)?;
if outputs.is_empty() {
return Err(error_helpers::unsupported_operation(
"CoreML model returned no outputs",
));
}
Ok(outputs[0].clone())
}
}