etensor-core 0.0.1

//! Standardized trait bounds for hardware execution routing.
//! 
//! By enforcing these traits, the `Dispatcher` remains entirely decoupled from 
//! the underlying physical hardware (CPU, CUDA, MPS, etc.).

use crate::tensor::Tensor;
use crate::errors::EtensorResult;

/// The foundational contract that all physical hardware backends must fulfill.
pub trait Backend {
    /// Executes an element-wise addition (a + b) and returns a dynamically allocated output tensor.
    fn add(a: &Tensor, b: &Tensor) -> EtensorResult<Tensor>;
    
    /// Executes an element-wise multiplication (a * b) and returns a dynamically allocated output tensor.
    fn mul(a: &Tensor, b: &Tensor) -> EtensorResult<Tensor>;
    
    /// Executes matrix multiplication (a @ b) and returns a dynamically allocated output tensor.
    fn matmul(a: &Tensor, b: &Tensor) -> EtensorResult<Tensor>;

    /// Executes a global sum reduction, collapsing the tensor into a single scalar.
    fn sum_all(a: &Tensor) -> EtensorResult<Tensor>;

    /// Executes a global mean reduction, collapsing the tensor into a single scalar.
    fn mean_all(a: &Tensor) -> EtensorResult<Tensor>;

    /// Executes a global max reduction, collapsing the tensor into a single scalar.
    fn max_all(a: &Tensor) -> EtensorResult<Tensor>;

    /// Executes the Rectified Linear Unit (ReLU) activation function.
    fn relu(a: &Tensor) -> EtensorResult<Tensor>;

    /// Executes the Sigmoid activation function.
    fn sigmoid(a: &Tensor) -> EtensorResult<Tensor>;

    /// Executes Fused Addition and ReLU: f(A, B) = max(0, A + B)
    fn add_relu(a: &Tensor, b: &Tensor) -> EtensorResult<Tensor>;

    /// Executes Fused Linear Layer: y = X @ W + b
    fn linear(x: &Tensor, w: &Tensor, b: &Tensor) -> EtensorResult<Tensor>;
    
}