pub struct ThreadedCpuBackend { /* private fields */ }Expand description
CPU backend with a dedicated rayon thread pool for predictable kernel threading depth.
Implementations§
Source§impl ThreadedCpuBackend
impl ThreadedCpuBackend
Sourcepub fn new(num_threads: NonZeroUsize) -> Result<Self, KernelError>
pub fn new(num_threads: NonZeroUsize) -> Result<Self, KernelError>
Build a threaded backend with default parallel matmul heuristics.
Sourcepub fn with_config(
num_threads: NonZeroUsize,
matmul_config: ParallelMatmulConfig,
) -> Result<Self, KernelError>
pub fn with_config( num_threads: NonZeroUsize, matmul_config: ParallelMatmulConfig, ) -> Result<Self, KernelError>
Build a threaded backend with explicit parallel-matmul configuration.
Sourcepub fn with_full_config(
num_threads: NonZeroUsize,
config: ThreadedCpuBackendConfig,
) -> Result<Self, KernelError>
pub fn with_full_config( num_threads: NonZeroUsize, config: ThreadedCpuBackendConfig, ) -> Result<Self, KernelError>
Build a threaded backend with explicit matmul and elementwise configuration.
Sourcepub const fn matmul_config(&self) -> ParallelMatmulConfig
pub const fn matmul_config(&self) -> ParallelMatmulConfig
Matmul parallelism knobs used by this backend.
Sourcepub const fn elementwise_config(&self) -> ParallelElementwiseConfig
pub const fn elementwise_config(&self) -> ParallelElementwiseConfig
Elementwise parallelism knobs used by this backend.
Trait Implementations§
Source§impl Backend for ThreadedCpuBackend
impl Backend for ThreadedCpuBackend
fn add(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>
fn sub(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>
fn mul(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>
fn relu(&self, input: &Tensor) -> Tensor
fn sigmoid(&self, input: &Tensor) -> Tensor
fn exp(&self, input: &Tensor) -> Tensor
fn tanh_act(&self, input: &Tensor) -> Tensor
fn softmax_last_dim(&self, input: &Tensor) -> Result<Tensor, KernelError>
fn log_softmax_last_dim(&self, input: &Tensor) -> Result<Tensor, KernelError>
fn logsumexp_last_dim(&self, input: &Tensor) -> Result<Tensor, KernelError>
fn layer_norm_last_dim( &self, input: &Tensor, params: LayerNormLastDimParams<'_>, ) -> Result<Tensor, KernelError>
fn max_pool2d_nhwc( &self, input: &Tensor, kernel_h: usize, kernel_w: usize, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>
fn avg_pool2d_nhwc( &self, input: &Tensor, kernel_h: usize, kernel_w: usize, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>
fn conv2d_nhwc( &self, input: &Tensor, kernel: &Tensor, bias: Option<&Tensor>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>
fn depthwise_conv2d_nhwc( &self, input: &Tensor, kernel: &Tensor, bias: Option<&Tensor>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>
fn separable_conv2d_nhwc( &self, input: &Tensor, params: SeparableConv2dParams<'_>, stride_h: usize, stride_w: usize, ) -> Result<Tensor, KernelError>
fn batch_norm2d_nhwc( &self, input: &Tensor, params: BatchNorm2dParams<'_>, ) -> Result<Tensor, KernelError>
fn group_norm_nhwc( &self, input: &Tensor, params: GroupNormNhwcParams<'_>, ) -> Result<Tensor, KernelError>
fn rms_norm_last_dim( &self, input: &Tensor, params: RmsNormLastDimParams<'_>, ) -> Result<Tensor, KernelError>
fn matmul_2d(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>
Source§fn div(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>
fn div(&self, lhs: &Tensor, rhs: &Tensor) -> Result<Tensor, KernelError>
Element-wise division with broadcast.
Source§fn transpose_2d(&self, input: &Tensor) -> Result<Tensor, KernelError>
fn transpose_2d(&self, input: &Tensor) -> Result<Tensor, KernelError>
Transpose a 2-D matrix.
Source§fn sum_all(&self, input: &Tensor) -> Tensor
fn sum_all(&self, input: &Tensor) -> Tensor
Scalar sum of all elements (returns a scalar tensor).
Source§fn mul_scalar(&self, input: &Tensor, scalar: f32) -> Tensor
fn mul_scalar(&self, input: &Tensor, scalar: f32) -> Tensor
Multiply every element by a scalar.
Source§fn reciprocal(&self, input: &Tensor) -> Tensor
fn reciprocal(&self, input: &Tensor) -> Tensor
Element-wise reciprocal (1/x).
Source§impl BackwardOps for ThreadedCpuBackend
impl BackwardOps for ThreadedCpuBackend
Source§fn relu_backward(
&self,
upstream: &Tensor,
forward_input: &Tensor,
) -> Result<Tensor, KernelError>
fn relu_backward( &self, upstream: &Tensor, forward_input: &Tensor, ) -> Result<Tensor, KernelError>
ReLU backward:
grad_input[i] = upstream[i] * (forward_input[i] > 0 ? 1 : 0).Source§fn sigmoid_backward(
&self,
upstream: &Tensor,
forward_output: &Tensor,
) -> Result<Tensor, KernelError>
fn sigmoid_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>
Sigmoid backward:
grad_input[i] = upstream[i] * s[i] * (1 - s[i]) where s = forward output.Source§fn tanh_backward(
&self,
upstream: &Tensor,
forward_output: &Tensor,
) -> Result<Tensor, KernelError>
fn tanh_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>
Tanh backward:
grad_input[i] = upstream[i] * (1 - t[i]^2) where t = forward output.Source§fn exp_backward(
&self,
upstream: &Tensor,
forward_output: &Tensor,
) -> Result<Tensor, KernelError>
fn exp_backward( &self, upstream: &Tensor, forward_output: &Tensor, ) -> Result<Tensor, KernelError>
Exp backward:
grad_input[i] = upstream[i] * e[i] where e = forward output.Source§fn reduce_sum_backward(
&self,
upstream: &Tensor,
original_shape: &[usize],
) -> Result<Tensor, KernelError>
fn reduce_sum_backward( &self, upstream: &Tensor, original_shape: &[usize], ) -> Result<Tensor, KernelError>
Reduce-sum backward: broadcast scalar gradient to all elements of
original_shape.Source§fn matmul_backward(
&self,
upstream: &Tensor,
lhs: &Tensor,
rhs: &Tensor,
) -> Result<(Tensor, Tensor), KernelError>
fn matmul_backward( &self, upstream: &Tensor, lhs: &Tensor, rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>
MatMul backward:
grad_lhs = upstream @ rhs^T, grad_rhs = lhs^T @ upstream.Source§fn add_backward(
&self,
upstream: &Tensor,
_lhs: &Tensor,
_rhs: &Tensor,
) -> Result<(Tensor, Tensor), KernelError>
fn add_backward( &self, upstream: &Tensor, _lhs: &Tensor, _rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>
Add backward: gradient passes through unchanged to both operands.
Source§fn sub_backward(
&self,
upstream: &Tensor,
_lhs: &Tensor,
_rhs: &Tensor,
) -> Result<(Tensor, Tensor), KernelError>
fn sub_backward( &self, upstream: &Tensor, _lhs: &Tensor, _rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>
Sub backward:
grad_lhs = upstream, grad_rhs = -upstream.Source§fn mul_backward(
&self,
upstream: &Tensor,
lhs: &Tensor,
rhs: &Tensor,
) -> Result<(Tensor, Tensor), KernelError>
fn mul_backward( &self, upstream: &Tensor, lhs: &Tensor, rhs: &Tensor, ) -> Result<(Tensor, Tensor), KernelError>
Mul backward:
grad_lhs = upstream * rhs, grad_rhs = upstream * lhs.Auto Trait Implementations§
impl Freeze for ThreadedCpuBackend
impl !RefUnwindSafe for ThreadedCpuBackend
impl Send for ThreadedCpuBackend
impl Sync for ThreadedCpuBackend
impl Unpin for ThreadedCpuBackend
impl UnsafeUnpin for ThreadedCpuBackend
impl !UnwindSafe for ThreadedCpuBackend
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more