torsh-backend 0.1.2

//! Apple Neural Engine integration for Apple Silicon devices
//!
//! This module provides access to the Apple Neural Engine (ANE) through Core ML
//! for accelerated neural network operations on Apple Silicon Macs and iOS devices.

use crate::error::{BackendError, BackendResult};
use metal::NSUInteger;
use objc2::msg_send;
use objc2::runtime::AnyObject;
use std::collections::HashMap;
use std::ptr;
use std::sync::{Arc, Mutex, OnceLock};
use torsh_core::{dtype::DType, shape::Shape};

/// Neural Engine device capabilities
#[derive(Debug, Clone)]
pub struct NeuralEngineCapabilities {
    /// Whether Neural Engine is available
    pub available: bool,
    /// Supported model formats
    pub supported_formats: Vec<ModelFormat>,
    /// Maximum input dimensions
    pub max_input_dims: Vec<usize>,
    /// Maximum batch size
    pub max_batch_size: usize,
    /// Supported data types
    pub supported_dtypes: Vec<DType>,
    /// Memory bandwidth to ANE
    pub memory_bandwidth_gbps: Option<f32>,
    /// Number of neural processing units
    pub processing_units: usize,
}

/// Supported model formats for Neural Engine
#[derive(Debug, Clone, PartialEq)]
pub enum ModelFormat {
    /// Core ML model format
    CoreML,
    /// ONNX models (converted to Core ML)
    ONNX,
    /// MIL (Model Intermediate Language)
    MIL,
}

/// Neural Engine operation types
#[derive(Debug, Clone)]
pub enum NeuralEngineOperation {
    /// Matrix multiplication optimized for transformers
    MatMul {
        transpose_a: bool,
        transpose_b: bool,
    },
    /// Convolution operations
    Conv2D {
        kernel_size: (usize, usize),
        stride: (usize, usize),
        padding: (usize, usize),
        groups: usize,
    },
    /// Attention mechanisms
    MultiHeadAttention {
        num_heads: usize,
        head_dim: usize,
        dropout: f32,
    },
    /// Layer normalization
    LayerNorm {
        normalized_shape: Vec<usize>,
        eps: f32,
    },
    /// GELU activation
    GELU,
    /// SiLU/Swish activation  
    SiLU,
    /// Softmax
    Softmax { dim: i32 },
}

/// Neural Engine context for managing ANE operations
pub struct NeuralEngineContext {
    /// Core ML compute unit configuration
    compute_config: *mut AnyObject,
    /// Model cache for compiled operations
    model_cache: Arc<Mutex<HashMap<String, CompiledModel>>>,
    /// Device capabilities
    capabilities: NeuralEngineCapabilities,
    /// Performance statistics
    performance_stats: Arc<Mutex<NeuralEngineStats>>,
}

impl std::fmt::Debug for NeuralEngineContext {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("NeuralEngineContext")
            .field("compute_config", &format!("{:p}", self.compute_config))
            .field("model_cache", &self.model_cache)
            .field("capabilities", &self.capabilities)
            .field("performance_stats", &self.performance_stats)
            .finish()
    }
}

/// Compiled model for Neural Engine execution
struct CompiledModel {
    /// Core ML model handle
    model: *mut AnyObject,
    /// Input specifications
    input_specs: Vec<TensorSpec>,
    /// Output specifications
    output_specs: Vec<TensorSpec>,
    /// Compilation timestamp
    compiled_at: std::time::Instant,
    /// Usage count for cache management
    usage_count: usize,
}

impl std::fmt::Debug for CompiledModel {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("CompiledModel")
            .field("model", &format!("{:p}", self.model))
            .field("input_specs", &self.input_specs)
            .field("output_specs", &self.output_specs)
            .field("compiled_at", &self.compiled_at)
            .field("usage_count", &self.usage_count)
            .finish()
    }
}

/// Tensor specification for Neural Engine operations
#[derive(Debug, Clone)]
pub struct TensorSpec {
    /// Tensor name
    name: String,
    /// Data type
    dtype: DType,
    /// Shape
    shape: Shape,
    /// Memory layout
    layout: MemoryLayout,
}

/// Memory layout for tensors
#[derive(Debug, Clone, PartialEq)]
pub enum MemoryLayout {
    /// Contiguous row-major layout
    RowMajor,
    /// Contiguous column-major layout
    ColumnMajor,
    /// Channels-last layout for images
    ChannelsLast,
    /// Planar layout for images
    Planar,
}

/// Neural Engine performance statistics
#[derive(Debug, Default, Clone)]
pub struct NeuralEngineStats {
    /// Total operations executed
    total_operations: u64,
    /// Total execution time in microseconds
    total_execution_time_us: u64,
    /// Average throughput in operations per second
    avg_throughput: f64,
    /// Memory usage statistics
    memory_usage: NeuralEngineMemoryStats,
    /// Model compilation statistics
    compilation_stats: CompilationStats,
}

/// Memory usage statistics for Neural Engine
#[derive(Debug, Default, Clone)]
struct NeuralEngineMemoryStats {
    /// Peak memory usage in bytes
    peak_memory_usage: usize,
    /// Current memory usage in bytes
    current_memory_usage: usize,
    /// Number of memory transfers to ANE
    transfers_to_ane: u64,
    /// Number of memory transfers from ANE
    transfers_from_ane: u64,
    /// Total bytes transferred to ANE
    total_bytes_to_ane: u64,
    /// Total bytes transferred from ANE
    total_bytes_from_ane: u64,
}

/// Model compilation statistics
#[derive(Debug, Default, Clone)]
struct CompilationStats {
    /// Number of models compiled
    models_compiled: u64,
    /// Total compilation time in milliseconds
    total_compilation_time_ms: u64,
    /// Average compilation time per model
    avg_compilation_time_ms: f64,
    /// Cache hit rate
    cache_hit_rate: f64,
}

/// Global Neural Engine context
static NEURAL_ENGINE_CONTEXT: OnceLock<Arc<Mutex<NeuralEngineContext>>> = OnceLock::new();

impl NeuralEngineContext {
    /// Create a new Neural Engine context
    pub fn new() -> BackendResult<Self> {
        let capabilities = Self::detect_capabilities()?;

        if !capabilities.available {
            return Err(BackendError::UnsupportedOperation {
                op: "neural_engine_access".to_string(),
                dtype: "neural_engine".to_string(),
            });
        }

        let compute_config = Self::create_compute_config()?;

        Ok(Self {
            compute_config,
            model_cache: Arc::new(Mutex::new(HashMap::new())),
            capabilities,
            performance_stats: Arc::new(Mutex::new(NeuralEngineStats::default())),
        })
    }

    /// Get the global Neural Engine context
    pub fn global() -> BackendResult<Arc<Mutex<NeuralEngineContext>>> {
        let context = NEURAL_ENGINE_CONTEXT
            .get_or_init(|| {
                match Self::new() {
                    Ok(context) => Arc::new(Mutex::new(context)),
                    Err(_) => {
                        // Create a dummy context if ANE is not available
                        let dummy_capabilities = NeuralEngineCapabilities {
                            available: false,
                            supported_formats: vec![],
                            max_input_dims: vec![],
                            max_batch_size: 0,
                            supported_dtypes: vec![],
                            memory_bandwidth_gbps: None,
                            processing_units: 0,
                        };

                        let dummy_context = NeuralEngineContext {
                            compute_config: ptr::null_mut(),
                            model_cache: Arc::new(Mutex::new(HashMap::new())),
                            capabilities: dummy_capabilities,
                            performance_stats: Arc::new(Mutex::new(NeuralEngineStats::default())),
                        };

                        Arc::new(Mutex::new(dummy_context))
                    }
                }
            })
            .clone();

        Ok(context)
    }

    /// Detect Neural Engine capabilities
    fn detect_capabilities() -> BackendResult<NeuralEngineCapabilities> {
        #[allow(unused_unsafe)]
        unsafe {
            // Check if we're running on Apple Silicon
            #[cfg(all(target_arch = "aarch64", target_os = "macos"))]
            {
                // Check for Neural Engine availability through Core ML
                // Try to check if CoreML is available - if not, return early
                // Note: This may fail if CoreML framework is not available (e.g., in CI/test environments)
                // We catch the panic to handle this gracefully
                use std::panic::AssertUnwindSafe;
                let coreml_available = std::panic::catch_unwind(AssertUnwindSafe(|| {
                    // This will panic if MLModel class is not found
                    let _ml_model_class = objc2::class!(MLModel);
                    true
                }))
                .unwrap_or(false);

                if !coreml_available {
                    // CoreML is not available, Neural Engine cannot be used
                    return Ok(NeuralEngineCapabilities {
                        available: false,
                        supported_formats: vec![],
                        max_input_dims: vec![],
                        max_batch_size: 0,
                        supported_dtypes: vec![],
                        memory_bandwidth_gbps: None,
                        processing_units: 0,
                    });
                }
            }

            // If not on Apple Silicon macOS, Neural Engine is not available
            #[cfg(not(all(target_arch = "aarch64", target_os = "macos")))]
            {
                return Ok(NeuralEngineCapabilities {
                    available: false,
                    supported_formats: vec![],
                    max_input_dims: vec![],
                    max_batch_size: 0,
                    supported_dtypes: vec![],
                    memory_bandwidth_gbps: None,
                    processing_units: 0,
                });
            }

            #[cfg(all(target_arch = "aarch64", target_os = "macos"))]
            {
                // Check for Neural Engine compute units
                use std::panic::AssertUnwindSafe;
                let coreml_compute_units_available =
                    std::panic::catch_unwind(AssertUnwindSafe(|| {
                        let _compute_units_class = objc2::class!(MLComputeUnits);
                        true
                    }))
                    .unwrap_or(false);

                if !coreml_compute_units_available {
                    return Ok(NeuralEngineCapabilities {
                        available: false,
                        supported_formats: vec![],
                        max_input_dims: vec![],
                        max_batch_size: 0,
                        supported_dtypes: vec![],
                        memory_bandwidth_gbps: None,
                        processing_units: 0,
                    });
                }

                // CoreML is available, assume Neural Engine is available
                Ok(NeuralEngineCapabilities {
                    available: true,
                    supported_formats: vec![
                        ModelFormat::CoreML,
                        ModelFormat::ONNX,
                        ModelFormat::MIL,
                    ],
                    max_input_dims: vec![8192, 8192, 8192, 8192], // Typical ANE limits
                    max_batch_size: 16,                           // Conservative batch size
                    supported_dtypes: vec![DType::F16, DType::F32, DType::I8, DType::U8],
                    memory_bandwidth_gbps: Some(800.0), // Estimated for M1/M2 ANE
                    processing_units: 16,               // Typical ANE configuration
                })
            }
        }
    }

    /// Create Core ML compute configuration for Neural Engine
    fn create_compute_config() -> BackendResult<*mut AnyObject> {
        unsafe {
            #[cfg(target_arch = "aarch64")]
            {
                let config_class = objc2::class!(MLModelConfiguration);
                #[cfg(not(target_os = "macos"))]
                if false {
                    return Err(BackendError::InitializationError(
                        "Core ML not available".to_string(),
                    ));
                }

                let config: *mut AnyObject = msg_send![config_class, alloc];
                let config: *mut AnyObject = msg_send![config, init];

                // Set compute units to use Neural Engine when available
                let _compute_units_class = objc2::class!(MLComputeUnits);
                #[cfg(target_os = "macos")]
                {
                    let compute_units_all: NSUInteger = 0; // MLComputeUnitsAll
                    let _: () = msg_send![config, setComputeUnits: compute_units_all];
                }

                // Enable low precision if available (better for ANE)
                let _: () = msg_send![config, setAllowLowPrecisionAccumulationOnGPU: true];

                Ok(config)
            }

            #[cfg(not(target_arch = "aarch64"))]
            {
                Err(BackendError::UnsupportedOperation(
                    "Neural Engine only available on Apple Silicon".to_string(),
                ))
            }
        }
    }

    /// Check if Neural Engine is available
    pub fn is_available(&self) -> bool {
        self.capabilities.available
    }

    /// Get Neural Engine capabilities
    pub fn capabilities(&self) -> &NeuralEngineCapabilities {
        &self.capabilities
    }

    /// Compile a neural network operation for Neural Engine execution
    pub fn compile_operation(
        &mut self,
        operation: &NeuralEngineOperation,
        input_specs: &[TensorSpec],
        output_specs: &[TensorSpec],
    ) -> BackendResult<String> {
        if !self.capabilities.available {
            return Err(
                crate::metal::error::metal_errors::unsupported_operation_error(
                    "Neural Engine not available",
                    None,
                ),
            );
        }

        // Generate a unique key for this operation
        let operation_key = self.generate_operation_key(operation, input_specs, output_specs);

        // Check cache first
        {
            let cache = self
                .model_cache
                .lock()
                .expect("lock should not be poisoned");
            if cache.contains_key(&operation_key) {
                // Update cache statistics
                let mut stats = self
                    .performance_stats
                    .lock()
                    .expect("lock should not be poisoned");
                stats.compilation_stats.cache_hit_rate =
                    stats.compilation_stats.cache_hit_rate * 0.9 + 0.1;
                return Ok(operation_key);
            }
        }

        // Compile the operation
        let start_time = std::time::Instant::now();
        let compiled_model = self.compile_to_coreml(operation, input_specs, output_specs)?;
        let compilation_time = start_time.elapsed();

        // Cache the compiled model
        {
            let mut cache = self
                .model_cache
                .lock()
                .expect("lock should not be poisoned");
            cache.insert(operation_key.clone(), compiled_model);
        }

        // Update compilation statistics
        {
            let mut stats = self
                .performance_stats
                .lock()
                .expect("lock should not be poisoned");
            stats.compilation_stats.models_compiled += 1;
            stats.compilation_stats.total_compilation_time_ms +=
                compilation_time.as_millis() as u64;
            stats.compilation_stats.avg_compilation_time_ms =
                stats.compilation_stats.total_compilation_time_ms as f64
                    / stats.compilation_stats.models_compiled as f64;
            stats.compilation_stats.cache_hit_rate = stats.compilation_stats.cache_hit_rate * 0.9;
            // Decrease hit rate
        }

        Ok(operation_key)
    }

    /// Execute a compiled operation on Neural Engine
    pub fn execute_operation(
        &mut self,
        operation_key: &str,
        inputs: &[NeuralEngineBuffer],
        outputs: &mut [NeuralEngineBuffer],
    ) -> BackendResult<()> {
        if !self.capabilities.available {
            return Err(
                crate::metal::error::metal_errors::unsupported_operation_error(
                    "Neural Engine not available",
                    None,
                ),
            );
        }

        let start_time = std::time::Instant::now();

        // Get compiled model from cache
        let model = {
            let mut cache = self
                .model_cache
                .lock()
                .expect("lock should not be poisoned");
            let model_entry = cache.get_mut(operation_key).ok_or_else(|| {
                BackendError::InvalidArgument(format!("Operation not found: {}", operation_key))
            })?;

            model_entry.usage_count += 1;
            model_entry.model
        };

        // Execute the model
        self.execute_coreml_model(model, inputs, outputs)?;

        let execution_time = start_time.elapsed();

        // Update performance statistics
        {
            let mut stats = self
                .performance_stats
                .lock()
                .expect("lock should not be poisoned");
            stats.total_operations += 1;
            stats.total_execution_time_us += execution_time.as_micros() as u64;
            stats.avg_throughput = stats.total_operations as f64
                / (stats.total_execution_time_us as f64 / 1_000_000.0);
        }

        Ok(())
    }

    /// Generate a unique key for an operation
    fn generate_operation_key(
        &self,
        operation: &NeuralEngineOperation,
        input_specs: &[TensorSpec],
        output_specs: &[TensorSpec],
    ) -> String {
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};

        let mut hasher = DefaultHasher::new();

        // Hash operation type and parameters
        std::mem::discriminant(operation).hash(&mut hasher);
        format!("{:?}", operation).hash(&mut hasher);

        // Hash input and output specifications
        for spec in input_specs {
            spec.name.hash(&mut hasher);
            format!("{:?}", spec.dtype).hash(&mut hasher);
            spec.shape.as_slice().hash(&mut hasher);
            std::mem::discriminant(&spec.layout).hash(&mut hasher);
        }

        for spec in output_specs {
            spec.name.hash(&mut hasher);
            format!("{:?}", spec.dtype).hash(&mut hasher);
            spec.shape.as_slice().hash(&mut hasher);
            std::mem::discriminant(&spec.layout).hash(&mut hasher);
        }

        format!("ane_op_{:016x}", hasher.finish())
    }

    /// Compile operation to Core ML model
    fn compile_to_coreml(
        &self,
        _operation: &NeuralEngineOperation,
        input_specs: &[TensorSpec],
        output_specs: &[TensorSpec],
    ) -> BackendResult<CompiledModel> {
        #[cfg(target_arch = "aarch64")]
        {
            // This is a simplified implementation
            // In a real implementation, you would use Core ML model builder APIs
            // or compile from MIL (Model Intermediate Language)

            unsafe {
                let model_class = objc2::class!(MLModel);
                #[cfg(not(target_os = "macos"))]
                if false {
                    return Err(BackendError::InitializationError(
                        "Core ML model class not available".to_string(),
                    ));
                }

                // For now, return a placeholder model
                // Real implementation would create an actual Core ML model
                let model: *mut AnyObject = msg_send![model_class, alloc];

                Ok(CompiledModel {
                    model,
                    input_specs: input_specs.to_vec(),
                    output_specs: output_specs.to_vec(),
                    compiled_at: std::time::Instant::now(),
                    usage_count: 0,
                })
            }
        }

        #[cfg(not(target_arch = "aarch64"))]
        {
            Err(BackendError::UnsupportedOperation(
                "Neural Engine compilation only available on Apple Silicon".to_string(),
            ))
        }
    }

    /// Execute Core ML model
    fn execute_coreml_model(
        &self,
        _model: *mut AnyObject,
        inputs: &[NeuralEngineBuffer],
        outputs: &mut [NeuralEngineBuffer],
    ) -> BackendResult<()> {
        #[cfg(target_arch = "aarch64")]
        {
            // This is a simplified implementation
            // Real implementation would:
            // 1. Convert inputs to MLMultiArray or MLPixelBuffer
            // 2. Create MLFeatureProvider for inputs
            // 3. Execute model prediction
            // 4. Extract outputs and copy to output buffers

            // Placeholder implementation that just copies data
            if inputs.len() != outputs.len() {
                return Err(BackendError::InvalidArgument(
                    "Input and output count mismatch".to_string(),
                ));
            }

            for (input, output) in inputs.iter().zip(outputs.iter_mut()) {
                if input.size != output.size {
                    return Err(BackendError::InvalidArgument(
                        "Input and output size mismatch".to_string(),
                    ));
                }

                // Simple copy for placeholder
                unsafe {
                    std::ptr::copy_nonoverlapping(input.data, output.data, input.size);
                }
            }

            Ok(())
        }

        #[cfg(not(target_arch = "aarch64"))]
        {
            Err(BackendError::UnsupportedOperation(
                "Neural Engine execution only available on Apple Silicon".to_string(),
            ))
        }
    }

    /// Get performance statistics
    pub fn performance_stats(&self) -> NeuralEngineStats {
        (*self
            .performance_stats
            .lock()
            .expect("lock should not be poisoned"))
        .clone()
    }

    /// Clear model cache
    pub fn clear_cache(&mut self) {
        let mut cache = self
            .model_cache
            .lock()
            .expect("lock should not be poisoned");
        cache.clear();

        // Reset compilation stats
        let mut stats = self
            .performance_stats
            .lock()
            .expect("lock should not be poisoned");
        stats.compilation_stats = CompilationStats::default();
    }
}

/// Neural Engine buffer for data transfer
pub struct NeuralEngineBuffer {
    /// Pointer to data
    pub data: *mut u8,
    /// Size in bytes
    pub size: usize,
    /// Data type
    pub dtype: DType,
    /// Shape
    pub shape: Shape,
    /// Memory layout
    pub layout: MemoryLayout,
}

impl NeuralEngineBuffer {
    /// Create a new Neural Engine buffer
    pub fn new(
        data: *mut u8,
        size: usize,
        dtype: DType,
        shape: Shape,
        layout: MemoryLayout,
    ) -> Self {
        Self {
            data,
            size,
            dtype,
            shape,
            layout,
        }
    }
}

/// Neural Engine operations builder for high-level operations
#[derive(Debug)]
pub struct NeuralEngineOpsBuilder {
    context: Arc<Mutex<NeuralEngineContext>>,
}

impl NeuralEngineOpsBuilder {
    /// Create a new operations builder
    pub fn new() -> BackendResult<Self> {
        let context = NeuralEngineContext::global()?;
        Ok(Self { context })
    }

    /// Create optimized matrix multiplication for transformers
    pub fn create_transformer_matmul(
        &self,
        input_shape: &Shape,
        weight_shape: &Shape,
        output_shape: &Shape,
        transpose_weight: bool,
    ) -> BackendResult<String> {
        let mut context = self.context.lock().expect("lock should not be poisoned");

        let input_spec = TensorSpec {
            name: "input".to_string(),
            dtype: DType::F16, // Use F16 for better ANE performance
            shape: input_shape.clone(),
            layout: MemoryLayout::RowMajor,
        };

        let weight_spec = TensorSpec {
            name: "weight".to_string(),
            dtype: DType::F16,
            shape: weight_shape.clone(),
            layout: if transpose_weight {
                MemoryLayout::ColumnMajor
            } else {
                MemoryLayout::RowMajor
            },
        };

        let output_spec = TensorSpec {
            name: "output".to_string(),
            dtype: DType::F16,
            shape: output_shape.clone(),
            layout: MemoryLayout::RowMajor,
        };

        let operation = NeuralEngineOperation::MatMul {
            transpose_a: false,
            transpose_b: transpose_weight,
        };

        context.compile_operation(&operation, &[input_spec, weight_spec], &[output_spec])
    }

    /// Create optimized multi-head attention
    pub fn create_multi_head_attention(
        &self,
        sequence_length: usize,
        num_heads: usize,
        head_dim: usize,
        dropout: f32,
    ) -> BackendResult<String> {
        let mut context = self.context.lock().expect("lock should not be poisoned");

        let batch_size = 1; // Will be dynamically handled
        let embed_dim = num_heads * head_dim;

        let input_spec = TensorSpec {
            name: "input".to_string(),
            dtype: DType::F16,
            shape: Shape::from(vec![batch_size, sequence_length, embed_dim]),
            layout: MemoryLayout::RowMajor,
        };

        let output_spec = TensorSpec {
            name: "output".to_string(),
            dtype: DType::F16,
            shape: Shape::from(vec![batch_size, sequence_length, embed_dim]),
            layout: MemoryLayout::RowMajor,
        };

        let operation = NeuralEngineOperation::MultiHeadAttention {
            num_heads,
            head_dim,
            dropout,
        };

        context.compile_operation(&operation, &[input_spec], &[output_spec])
    }

    /// Execute compiled operation
    pub fn execute(
        &self,
        operation_key: &str,
        inputs: &[NeuralEngineBuffer],
        outputs: &mut [NeuralEngineBuffer],
    ) -> BackendResult<()> {
        let mut context = self.context.lock().expect("lock should not be poisoned");
        context.execute_operation(operation_key, inputs, outputs)
    }

    /// Check if Neural Engine is available
    pub fn is_available(&self) -> bool {
        let context = self.context.lock().expect("lock should not be poisoned");
        context.is_available()
    }
}

unsafe impl Send for NeuralEngineContext {}
unsafe impl Sync for NeuralEngineContext {}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_neural_engine_detection() {
        // This test will only pass on Apple Silicon
        #[cfg(target_arch = "aarch64")]
        {
            let capabilities = NeuralEngineContext::detect_capabilities();
            assert!(capabilities.is_ok());

            let caps = capabilities.expect("operation should succeed");
            if caps.available {
                assert!(!caps.supported_formats.is_empty());
                assert!(caps.processing_units > 0);
            }
        }

        #[cfg(not(target_arch = "aarch64"))]
        {
            let capabilities = NeuralEngineContext::detect_capabilities();
            assert!(capabilities.is_ok());

            let caps = capabilities.expect("operation should succeed");
            assert!(!caps.available);
        }
    }

    #[test]
    fn test_neural_engine_ops_builder() {
        let builder = NeuralEngineOpsBuilder::new();

        #[cfg(target_arch = "aarch64")]
        {
            // Test may succeed on Apple Silicon
            if builder.is_ok() {
                let ops_builder = builder.expect("operation should succeed");
                assert!(ops_builder.is_available() || !ops_builder.is_available());
            }
        }

        #[cfg(not(target_arch = "aarch64"))]
        {
            // Should create a dummy context on non-Apple Silicon
            if let Ok(ops_builder) = builder {
                assert!(!ops_builder.is_available());
            }
        }
    }
}