Struct ThreadedCpuBackend

Source

pub struct ThreadedCpuBackend { /* private fields */ }

Expand description

CPU backend with a dedicated rayon thread pool for predictable kernel threading depth.

Implementations§

Source §

impl ThreadedCpuBackend

Source

pub fn new(num_threads: NonZeroUsize) -> Result<Self, KernelError>

Build a threaded backend with default parallel matmul heuristics.

Source

pub fn with_config( num_threads: NonZeroUsize, matmul_config: ParallelMatmulConfig, ) -> Result<Self, KernelError>

Build a threaded backend with explicit parallel-matmul configuration.

Source

pub fn with_full_config( num_threads: NonZeroUsize, config: ThreadedCpuBackendConfig, ) -> Result<Self, KernelError>

Build a threaded backend with explicit matmul and elementwise configuration.

Source

pub const fn matmul_config(&self) -> ParallelMatmulConfig

Matmul parallelism knobs used by this backend.

Source

pub const fn elementwise_config(&self) -> ParallelElementwiseConfig

Elementwise parallelism knobs used by this backend.

Trait Implementations§

Source §

fn layer_norm_last_dim( &self, input: &Tensor, params: LayerNormLastDimParams<'_>, ) -> Result<Tensor, KernelError>

Source §

fn max_pool2d_nhwc( &self, input: &Tensor, kernel_h: usize, kernel_w: usize, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

Source §

fn avg_pool2d_nhwc( &self, input: &Tensor, kernel_h: usize, kernel_w: usize, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

Source §

fn conv2d_nhwc( &self, input: &Tensor, kernel: &Tensor, bias: Option<&Tensor>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

Source §

fn depthwise_conv2d_nhwc( &self, input: &Tensor, kernel: &Tensor, bias: Option<&Tensor>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

Source §

fn separable_conv2d_nhwc( &self, input: &Tensor, params: SeparableConv2dParams<'_>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

Source §

fn batch_norm2d_nhwc( &self, input: &Tensor, params: BatchNorm2dParams<'_>, ) -> Result<Tensor, KernelError>

Source §

fn group_norm_nhwc( &self, input: &Tensor, params: GroupNormNhwcParams<'_>, ) -> Result<Tensor, KernelError>

Source §

fn rms_norm_last_dim( &self, input: &Tensor, params: RmsNormLastDimParams<'_>, ) -> Result<Tensor, KernelError>

Source §

fn matmul_2d(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>

Source §

fn neg(&self, input: &Tensor) -> Tensor

Element-wise negation.

Source §

fn div(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>

Element-wise division with broadcast.

Source §

fn sqrt(&self, input: &Tensor) -> Tensor

Element-wise square root.

Source §

fn transpose_2d(&self, input: &Tensor) -> Result<Tensor, KernelError>

Transpose a 2-D matrix.

Source §

fn sum_all(&self, input: &Tensor) -> Tensor

Scalar sum of all elements (returns a scalar tensor).

Source §

fn mul_scalar(&self, input: &Tensor, scalar: f32) -> Tensor

Multiply every element by a scalar.

Source §

fn reciprocal(&self, input: &Tensor) -> Tensor

Element-wise reciprocal (1/x).

Source §

impl BackwardOps for ThreadedCpuBackend

Source §

fn relu_backward( &self, upstream: &Tensor, forward_input: &Tensor, ) -> Result<Tensor, KernelError>

ReLU backward: grad_input[i] = upstream[i] * (forward_input[i] > 0 ? 1 : 0).

Source §

fn sigmoid_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>

Sigmoid backward: grad_input[i] = upstream[i] * s[i] * (1 - s[i]) where s = forward output.

Source §

fn tanh_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>

Tanh backward: grad_input[i] = upstream[i] * (1 - t[i]^2) where t = forward output.

Source §

fn exp_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>

Exp backward: grad_input[i] = upstream[i] * e[i] where e = forward output.

Source §

fn reduce_sum_backward( &self, upstream: &Tensor, original_shape: &[usize], ) -> Result<Tensor, KernelError>

Reduce-sum backward: broadcast scalar gradient to all elements of original_shape.

Source §

fn matmul_backward( &self, upstream: &Tensor, lhs: &Tensor, rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

MatMul backward: grad_lhs = upstream @ rhs^T, grad_rhs = lhs^T @ upstream.

Source §

fn add_backward( &self, upstream: &Tensor, _lhs: &Tensor, _rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

Add backward: gradient passes through unchanged to both operands.

Source §

fn sub_backward( &self, upstream: &Tensor, _lhs: &Tensor, _rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

Sub backward: grad_lhs = upstream, grad_rhs = -upstream.

Source §

fn mul_backward( &self, upstream: &Tensor, lhs: &Tensor, rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

Mul backward: grad_lhs = upstream * rhs, grad_rhs = upstream * lhs.

Source §

fn conv2d_input_backward( &self, upstream: &Tensor, kernel: &Tensor, input_shape: &[usize], stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

Conv2d backward (input gradient): compute dL/dInput from dL/dOutput and weights. Read more

Source §

impl Debug for ThreadedCpuBackend

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl Freeze for ThreadedCpuBackend

§

impl !RefUnwindSafe for ThreadedCpuBackend

§

impl Send for ThreadedCpuBackend

§

impl Sync for ThreadedCpuBackend

§

impl Unpin for ThreadedCpuBackend

§

impl UnsafeUnpin for ThreadedCpuBackend

§

impl !UnwindSafe for ThreadedCpuBackend

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

impl<T> Pointable for T

Source §

const ALIGN: usize

The alignment of pointer.

Source §

type Init = T

The type for initializers.

Source §

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

Source §

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

Source §

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

Source §

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

ThreadedCpuBackend

Struct ThreadedCpuBackend Copy item path

Implementations§

impl ThreadedCpuBackend

pub fn new(num_threads: NonZeroUsize) -> Result<Self, KernelError>

pub fn with_config( num_threads: NonZeroUsize, matmul_config: ParallelMatmulConfig, ) -> Result<Self, KernelError>

pub fn with_full_config( num_threads: NonZeroUsize, config: ThreadedCpuBackendConfig, ) -> Result<Self, KernelError>

pub const fn matmul_config(&self) -> ParallelMatmulConfig

pub const fn elementwise_config(&self) -> ParallelElementwiseConfig

Trait Implementations§

impl Backend for ThreadedCpuBackend

fn add(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>

fn sub(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>

fn mul(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>

fn relu(&self, input: &Tensor) -> Tensor

fn sigmoid(&self, input: &Tensor) -> Tensor

fn exp(&self, input: &Tensor) -> Tensor

fn tanh_act(&self, input: &Tensor) -> Tensor

fn softmax_last_dim(&self, input: &Tensor) -> Result<Tensor, KernelError>

fn log_softmax_last_dim(&self, input: &Tensor) -> Result<Tensor, KernelError>

fn logsumexp_last_dim(&self, input: &Tensor) -> Result<Tensor, KernelError>

fn layer_norm_last_dim( &self, input: &Tensor, params: LayerNormLastDimParams<'_>, ) -> Result<Tensor, KernelError>

fn max_pool2d_nhwc( &self, input: &Tensor, kernel_h: usize, kernel_w: usize, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

fn avg_pool2d_nhwc( &self, input: &Tensor, kernel_h: usize, kernel_w: usize, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

fn conv2d_nhwc( &self, input: &Tensor, kernel: &Tensor, bias: Option<&Tensor>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

fn depthwise_conv2d_nhwc( &self, input: &Tensor, kernel: &Tensor, bias: Option<&Tensor>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

fn separable_conv2d_nhwc( &self, input: &Tensor, params: SeparableConv2dParams<'_>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

fn batch_norm2d_nhwc( &self, input: &Tensor, params: BatchNorm2dParams<'_>, ) -> Result<Tensor, KernelError>

fn group_norm_nhwc( &self, input: &Tensor, params: GroupNormNhwcParams<'_>, ) -> Result<Tensor, KernelError>

fn rms_norm_last_dim( &self, input: &Tensor, params: RmsNormLastDimParams<'_>, ) -> Result<Tensor, KernelError>

fn matmul_2d(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>

fn neg(&self, input: &Tensor) -> Tensor

fn div(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>

fn sqrt(&self, input: &Tensor) -> Tensor

fn transpose_2d(&self, input: &Tensor) -> Result<Tensor, KernelError>

fn sum_all(&self, input: &Tensor) -> Tensor

fn mul_scalar(&self, input: &Tensor, scalar: f32) -> Tensor

fn reciprocal(&self, input: &Tensor) -> Tensor

impl BackwardOps for ThreadedCpuBackend

fn relu_backward( &self, upstream: &Tensor, forward_input: &Tensor, ) -> Result<Tensor, KernelError>

fn sigmoid_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>

fn tanh_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>

fn exp_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>

fn reduce_sum_backward( &self, upstream: &Tensor, original_shape: &[usize], ) -> Result<Tensor, KernelError>

fn matmul_backward( &self, upstream: &Tensor, lhs: &Tensor, rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

fn add_backward( &self, upstream: &Tensor, _lhs: &Tensor, _rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

fn sub_backward( &self, upstream: &Tensor, _lhs: &Tensor, _rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

fn mul_backward( &self, upstream: &Tensor, lhs: &Tensor, rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>

fn conv2d_input_backward( &self, upstream: &Tensor, kernel: &Tensor, input_shape: &[usize], stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>

impl Debug for ThreadedCpuBackend

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for ThreadedCpuBackend

impl !RefUnwindSafe for ThreadedCpuBackend

impl Send for ThreadedCpuBackend

impl Sync for ThreadedCpuBackend

impl Unpin for ThreadedCpuBackend

impl UnsafeUnpin for ThreadedCpuBackend

impl !UnwindSafe for ThreadedCpuBackend

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

Struct ThreadedCpuBackend

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,