Struct dfdx::devices::Cpu

source · [−]

pub struct Cpu;

Expand description

The CPU device

Implementations

source

impl Cpu

source

pub fn vm<const K: usize, const N: usize>(
 a: &[f32; K],
 b: &[[f32; N]; K],
 c: &mut [f32; N]
)

vector matrix multiply c += a * b

source

pub fn vm_bt<const K: usize, const N: usize>(
 a: &[f32; K],
 b_t: &[[f32; K]; N],
 c: &mut [f32; N]
)

vector matrix multiply c += a * trans(b)

source

pub fn vv<const M: usize, const N: usize>(
 a: &[f32; M],
 b: &[f32; N],
 c: &mut [[f32; N]; M]
)

vector vector

Trait Implementations

source

impl AllocateZeros for Cpu

source

fn zeros<T: CountElements>() -> Box<T>

Allocates using alloc_zeroed.

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> Device<[[[[f32; P]; O]; N]; M]> for Cpu

source

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

Allocate a new T and then store f applied to t in the new T. Uses ForEachElement::foreach_mr.

source

impl<const M: usize, const N: usize, const O: usize> Device<[[[f32; O]; N]; M]> for Cpu

source

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

Allocate a new T and then store f applied to t in the new T. Uses ForEachElement::foreach_mr.

source

impl<const M: usize, const N: usize> Device<[[f32; N]; M]> for Cpu

source

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

Allocate a new T and then store f applied to t in the new T. Uses ForEachElement::foreach_mr.

source

impl<const M: usize> Device<[f32; M]> for Cpu

source

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

Allocate a new T and then store f applied to t in the new T. Uses ForEachElement::foreach_mr.

source

impl Device<f32> for Cpu

source

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

Allocate a new T and then store f applied to t in the new T. Uses ForEachElement::foreach_mr.

source

impl<const S: usize, const P: usize> DeviceConv2D<S, P> for Cpuwhere
Self: AllocateZeros,

source

fn conv_forward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
 img: &[[[f32; W]; H]; C],
 weight: &[[[[f32; K]; K]; C]; O],
 bias: &[f32; O],
 out: &mut [[[f32; { _ }]; { _ }]; O]
)

Forward operation that modifies the out image.

source

fn conv_backward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
 img: &[[[f32; W]; H]; C],
 weight: &[[[[f32; K]; K]; C]; O],
 out_g: &[[[f32; { _ }]; { _ }]; O],
 img_g: &mut [[[f32; W]; H]; C],
 weight_g: &mut [[[[f32; K]; K]; C]; O],
 bias_g: &mut [f32; O]
)

Backward operation that modifies the gradients of img, weight, and bias.

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; O]; P], (Axis<3>, Axis<2>, Axis<1>, Axis<0>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; N]; O]; P])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; N]; O]; P])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; P]; O], (Axis<2>, Axis<3>, Axis<1>, Axis<0>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; N]; P]; O])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; N]; P]; O])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; N]; P], (Axis<3>, Axis<1>, Axis<2>, Axis<0>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; O]; N]; P])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; O]; N]; P])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; P]; N], (Axis<1>, Axis<3>, Axis<2>, Axis<0>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; O]; P]; N])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; O]; P]; N])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; N]; O], (Axis<2>, Axis<1>, Axis<3>, Axis<0>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; P]; N]; O])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; P]; N]; O])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; O]; N], (Axis<1>, Axis<2>, Axis<3>, Axis<0>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; P]; O]; N])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; P]; O]; N])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; O]; P], (Axis<3>, Axis<2>, Axis<0>, Axis<1>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; M]; O]; P])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; M]; O]; P])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; P]; O], (Axis<2>, Axis<3>, Axis<0>, Axis<1>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; M]; P]; O])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; M]; P]; O])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; M]; P], (Axis<3>, Axis<0>, Axis<2>, Axis<1>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; O]; M]; P])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; O]; M]; P])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; P]; M], (Axis<0>, Axis<3>, Axis<2>, Axis<1>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; O]; P]; M])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; O]; P]; M])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; M]; O], (Axis<2>, Axis<0>, Axis<3>, Axis<1>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; P]; M]; O])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; P]; M]; O])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; O]; M], (Axis<0>, Axis<2>, Axis<3>, Axis<1>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; P]; O]; M])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; P]; O]; M])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; N]; P], (Axis<3>, Axis<1>, Axis<0>, Axis<2>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; M]; N]; P])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; M]; N]; P])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; P]; N], (Axis<1>, Axis<3>, Axis<0>, Axis<2>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; M]; P]; N])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; M]; P]; N])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; M]; P], (Axis<3>, Axis<0>, Axis<1>, Axis<2>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; N]; M]; P])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; N]; M]; P])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; P]; M], (Axis<0>, Axis<3>, Axis<1>, Axis<2>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; N]; P]; M])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; N]; P]; M])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; M]; N], (Axis<1>, Axis<0>, Axis<3>, Axis<2>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; P]; M]; N])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; P]; M]; N])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; N]; M], (Axis<0>, Axis<1>, Axis<3>, Axis<2>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; P]; N]; M])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; P]; N]; M])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; N]; O], (Axis<2>, Axis<1>, Axis<0>, Axis<3>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; P]; M]; N]; O])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; P]; M]; N]; O])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; O]; N], (Axis<1>, Axis<2>, Axis<0>, Axis<3>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; P]; M]; O]; N])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; P]; M]; O]; N])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; N]; M]; O], (Axis<2>, Axis<0>, Axis<1>, Axis<3>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; P]; N]; M]; O])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; P]; N]; M]; O])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; N]; O]; M], (Axis<0>, Axis<2>, Axis<1>, Axis<3>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; P]; N]; O]; M])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; P]; N]; O]; M])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; O]; M]; N], (Axis<1>, Axis<0>, Axis<2>, Axis<3>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; P]; O]; M]; N])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; P]; O]; M]; N])

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>, Axis<3>)> for Cpu

source

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; P]; O]; N]; M])

source

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; P]; O]; N]; M])

source

impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; M]; N]; O], (Axis<2>, Axis<1>, Axis<0>)> for Cpu

source

fn permute(a: &[[[f32; O]; N]; M], b: &mut [[[f32; M]; N]; O])

source

fn inverse_permute(a: &mut [[[f32; O]; N]; M], b: &[[[f32; M]; N]; O])

source

impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; M]; O]; N], (Axis<1>, Axis<2>, Axis<0>)> for Cpu

source

fn permute(a: &[[[f32; O]; N]; M], b: &mut [[[f32; M]; O]; N])

source

fn inverse_permute(a: &mut [[[f32; O]; N]; M], b: &[[[f32; M]; O]; N])

source

impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; N]; M]; O], (Axis<2>, Axis<0>, Axis<1>)> for Cpu

source

fn permute(a: &[[[f32; O]; N]; M], b: &mut [[[f32; N]; M]; O])

source

fn inverse_permute(a: &mut [[[f32; O]; N]; M], b: &[[[f32; N]; M]; O])

source

impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; N]; O]; M], (Axis<0>, Axis<2>, Axis<1>)> for Cpu

source

fn permute(a: &[[[f32; O]; N]; M], b: &mut [[[f32; N]; O]; M])

source

fn inverse_permute(a: &mut [[[f32; O]; N]; M], b: &[[[f32; N]; O]; M])

source

impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; O]; M]; N], (Axis<1>, Axis<0>, Axis<2>)> for Cpu

source

fn permute(a: &[[[f32; O]; N]; M], b: &mut [[[f32; O]; M]; N])

source

fn inverse_permute(a: &mut [[[f32; O]; N]; M], b: &[[[f32; O]; M]; N])

source

impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu

source

fn permute(a: &[[[f32; O]; N]; M], b: &mut [[[f32; O]; N]; M])

source

fn inverse_permute(a: &mut [[[f32; O]; N]; M], b: &[[[f32; O]; N]; M])

source

impl<const M: usize, const N: usize> DevicePermute<[[f32; N]; M], [[f32; M]; N], (Axis<1>, Axis<0>)> for Cpu

source

fn permute(a: &[[f32; N]; M], b: &mut [[f32; M]; N])

source

fn inverse_permute(a: &mut [[f32; N]; M], b: &[[f32; M]; N])

source

impl<const M: usize, const N: usize> DevicePermute<[[f32; N]; M], [[f32; N]; M], (Axis<0>, Axis<1>)> for Cpu

source

fn permute(a: &[[f32; N]; M], b: &mut [[f32; N]; M])

source

fn inverse_permute(a: &mut [[f32; N]; M], b: &[[f32; N]; M])

source

impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolAvg> for Cpu

source

fn pool_forward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out: &mut [[[f32; { _ }]; { _ }]; C]
)

Forward operation that modifies the out image.

source

fn pool_backward<const C: usize, const H: usize, const W: usize>(
 _inp: &[[[f32; W]; H]; C],
 out_g: &[[[f32; { _ }]; { _ }]; C],
 inp_g: &mut [[[f32; W]; H]; C]
)

Backward operation that modifies the gradients of img, weight, and bias.

source

impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMax> for Cpu

source

fn pool_forward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out: &mut [[[f32; { _ }]; { _ }]; C]
)

Forward operation that modifies the out image.

source

fn pool_backward<const C: usize, const H: usize, const W: usize>(
 inp: &[[[f32; W]; H]; C],
 out_g: &[[[f32; { _ }]; { _ }]; C],
 inp_g: &mut [[[f32; W]; H]; C]
)

Backward operation that modifies the gradients of img, weight, and bias.

source

impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMin> for Cpu

source

fn pool_forward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out: &mut [[[f32; { _ }]; { _ }]; C]
)

Forward operation that modifies the out image.

source

fn pool_backward<const C: usize, const H: usize, const W: usize>(
 inp: &[[[f32; W]; H]; C],
 out_g: &[[[f32; { _ }]; { _ }]; C],
 inp_g: &mut [[[f32; W]; H]; C]
)

Backward operation that modifies the gradients of img, weight, and bias.

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>, Axis<3>)> for Cpu

type Reduced = f32

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu

type Reduced = [f32; P]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<3>)> for Cpu

type Reduced = [f32; O]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>)> for Cpu

type Reduced = [[f32; P]; O]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<2>, Axis<3>)> for Cpu

type Reduced = [f32; N]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<2>)> for Cpu

type Reduced = [[f32; P]; N]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<3>)> for Cpu

type Reduced = [[f32; O]; N]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<2>, Axis<3>)> for Cpu

type Reduced = [f32; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<2>)> for Cpu

type Reduced = [[f32; P]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<3>)> for Cpu

type Reduced = [[f32; O]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<2>, Axis<3>)> for Cpu

type Reduced = [[f32; N]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<0>> for Cpu

type Reduced = [[[f32; P]; O]; N]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<1>> for Cpu

type Reduced = [[[f32; P]; O]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<2>> for Cpu

type Reduced = [[[f32; P]; N]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<3>> for Cpu

type Reduced = [[[f32; O]; N]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu

type Reduced = f32

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<1>)> for Cpu

type Reduced = [f32; O]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<2>)> for Cpu

type Reduced = [f32; N]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<1>, Axis<2>)> for Cpu

type Reduced = [f32; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<0>> for Cpu

type Reduced = [[f32; O]; N]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<1>> for Cpu

type Reduced = [[f32; O]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<2>> for Cpu

type Reduced = [[f32; N]; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], (Axis<0>, Axis<1>)> for Cpu

type Reduced = f32

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], Axis<0>> for Cpu

type Reduced = [f32; N]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], Axis<1>> for Cpu

type Reduced = [f32; M]

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<T: CountElements, const M: usize> DeviceReduce<[T; M], AllAxes> for Cpuwhere
T::Dtype: CountElements<Dtype = T::Dtype>,
Self: DeviceReduce<T, AllAxes> + FillElements<[T; M]> + FillElements<T::Dtype>,

type Reduced = <Cpu as DeviceReduce<T, AllAxes>>::Reduced

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<T::Dtype>>(
r: &mut Self::Reduced,
t: &[T; M]
)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<T::Dtype>>(
t: &mut [T; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<const M: usize> DeviceReduce<[f32; M], Axis<0>> for Cpu

type Reduced = f32

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &[f32; M])

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [f32; M],
r: &Self::Reduced
)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl DeviceReduce<f32, AllAxes> for Cpu

type Reduced = f32

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &f32)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(t: &mut f32, r: &Self::Reduced)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl DeviceReduce<f32, Axis<0>> for Cpu

type Reduced = f32

The smaller type.

source

fn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &f32)

Reduces T into Self::Reduced with accumulator A without resetting the values in r.

source

fn broadcast_into_no_reset<A: Accumulator<f32>>(t: &mut f32, r: &Self::Reduced)

Broadcasts Self::Reduced into T with accumulator A without resetting the values in t. Read more

source

fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)

Fills r with Accumulator::INIT before reducing.

source

fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)

Fills t with Accumulator::INIT before broadcasting.

source

fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>

Allocates Self::Reduced then calls DeviceReduce::reduce_into()

source

impl<T: CountElements, const M: usize> FillElements<[T; M]> for Cpuwhere
Self: FillElements<T>,

source

impl<T: CountElements, const M: usize> ForEachElement<[T; M]> for Cpuwhere
Self: ForEachElement<T>,

source

fn foreach_m<F: FnMut(&mut <[T; M] as CountElements>::Dtype)>(
a: &mut [T; M],
f: &mut F
)

Mutate elements of a by applying f to all elements of a.

source

fn foreach_mm<F>(a: &mut [T; M], b: &mut [T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &mut T::Dtype),

Mutate elements of a and b by applying f to all elements of (a, b). mm stands for mut mut Read more

source

fn foreach_mr<F>(a: &mut [T; M], b: &[T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &T::Dtype),

Mutate elements of a by applying f to all elements of (a, b). mr stands for mut ref Read more

source

fn foreach_mmm<F>(a: &mut [T; M], b: &mut [T; M], c: &mut [T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &mut T::Dtype, &mut T::Dtype),

Mutate elements of a, b, and c by applying f to all elements of (a, b, c). mmm stands for mut mut mut Read more

source

fn foreach_mrr<F>(a: &mut [T; M], b: &[T; M], c: &[T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &T::Dtype, &T::Dtype),

Mutate elements of a by applying f to all elements of (a, b, c). mrr stands for mut ref ref Read more

source

impl ForEachElement<f32> for Cpu

source

fn foreach_m<F: FnMut(&mut <f32 as CountElements>::Dtype)>(
a: &mut f32,
f: &mut F
)

Mutate elements of a by applying f to all elements of a.

source

fn foreach_mm<F: FnMut(&mut f32, &mut f32)>(a: &mut f32, b: &mut f32, f: &mut F)

Mutate elements of a and b by applying f to all elements of (a, b). mm stands for mut mut Read more

source

fn foreach_mr<F: FnMut(&mut f32, &f32)>(a: &mut f32, b: &f32, f: &mut F)

Mutate elements of a by applying f to all elements of (a, b). mr stands for mut ref Read more

source

fn foreach_mmm<F>(a: &mut f32, b: &mut f32, c: &mut f32, f: &mut F)where
F: FnMut(&mut f32, &mut f32, &mut f32),

Mutate elements of a, b, and c by applying f to all elements of (a, b, c). mmm stands for mut mut mut Read more

source

fn foreach_mrr<F>(a: &mut f32, b: &f32, c: &f32, f: &mut F)where
F: FnMut(&mut f32, &f32, &f32),

Mutate elements of a by applying f to all elements of (a, b, c). mrr stands for mut ref ref Read more

source

impl<const BATCH: usize, const M: usize, const K: usize, const N: usize> MatMul<[[[f32; K]; M]; BATCH], [[[f32; N]; K]; BATCH], [[f32; N]; M]> for Cpuwhere
Self: MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]>,

source

fn mm(
    a: &[[[f32; K]; M]; BATCH],
    b: &[[[f32; N]; K]; BATCH],
    c: &mut [[f32; N]; M]
)

Broadcast c BATCH times.

source

fn mm_at(
    a: &[[[f32; M]; K]; BATCH],
    b: &[[[f32; N]; K]; BATCH],
    c: &mut [[f32; N]; M]
)

Broadcast c BATCH times.

source

fn mm_bt(
    a: &[[[f32; K]; M]; BATCH],
    b: &[[[f32; K]; N]; BATCH],
    c: &mut [[f32; N]; M]
)

Broadcast c BATCH times.

source

fn mm_atct(
    a: &[[[f32; M]; K]; BATCH],
    b: &[[[f32; N]; K]; BATCH],
    c: &mut [[f32; M]; N]
)

Broadcast c BATCH times.

source

impl<const BATCH: usize, const M: usize, const K: usize, const N: usize> MatMul<[[[f32; K]; M]; BATCH], [[f32; N]; K], [[[f32; N]; M]; BATCH]> for Cpuwhere
Self: MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]>,

source

fn mm(
    a: &[[[f32; K]; M]; BATCH],
    b: &[[f32; N]; K],
    c: &mut [[[f32; N]; M]; BATCH]
)

Broadcast b BATCH times.

source

fn mm_at(
    a: &[[[f32; M]; K]; BATCH],
    b: &[[f32; N]; K],
    c: &mut [[[f32; N]; M]; BATCH]
)

Broadcast b BATCH times.

source

fn mm_bt(
    a: &[[[f32; K]; M]; BATCH],
    b: &[[f32; K]; N],
    c: &mut [[[f32; N]; M]; BATCH]
)

Broadcast b BATCH times.

source

fn mm_atct(
    a: &[[[f32; M]; K]; BATCH],
    b: &[[f32; N]; K],
    c: &mut [[[f32; M]; N]; BATCH]
)

Broadcast b BATCH times.

source

impl<const M: usize, const K: usize, const N: usize> MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]> for Cpu

source

fn mm(a: &[[f32; K]; M], b: &[[f32; N]; K], c: &mut [[f32; N]; M])

Matmul

source

fn mm_at(a: &[[f32; M]; K], b: &[[f32; N]; K], c: &mut [[f32; N]; M])

Matmul, a is transposed.

source

fn mm_bt(a: &[[f32; K]; M], b: &[[f32; K]; N], c: &mut [[f32; N]; M])

Matmul, b is transposed

source

fn mm_atct(a: &[[f32; M]; K], b: &[[f32; N]; K], c: &mut [[f32; M]; N])

Matmul, a and c are transposed

source

impl<const BATCH: usize, A, B, C> MatMul<[A; BATCH], [B; BATCH], [C; BATCH]> for Cpuwhere
 Self: MatMul<A, B, C>,
 A: Transpose,
 B: Transpose,
 C: Transpose,
 [A; BATCH]: Transpose<T = [A::T; BATCH]>,
 [B; BATCH]: Transpose<T = [B::T; BATCH]>,
 [C; BATCH]: Transpose<T = [C::T; BATCH]>,

source

fn mm(a: &[A; BATCH], b: &[B; BATCH], c: &mut [C; BATCH])

Batched matmul

source

fn mm_at(a: &[A::T; BATCH], b: &[B; BATCH], c: &mut [C; BATCH])

Batched matmul

source

fn mm_bt(a: &[A; BATCH], b: &[B::T; BATCH], c: &mut [C; BATCH])

Batched matmul

source

fn mm_atct(a: &[A::T; BATCH], b: &[B; BATCH], c: &mut [C::T; BATCH])

Batched matmul

source

impl<A: Transpose, B: Transpose, C: Transpose> MatMulOp<A, B, C> for Cpuwhere
Self: MatMul<A, B, C> + MatMul<C, B::T, A> + MatMul<A::T, C, B>,

Auto Trait Implementations

impl RefUnwindSafe for Cpu

impl Send for Cpu

impl Sync for Cpu

impl Unpin for Cpu

impl UnwindSafe for Cpu

Blanket Implementations

source

impl<T> Any for Twhere
T: 'static + ?Sized,

source

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source

impl<T> Borrow<T> for Twhere
T: ?Sized,

const: unstable · source

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source

impl<T> BorrowMut<T> for Twhere
T: ?Sized,

const: unstable · source

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source

impl<T> From<T> for T

const: unstable · source

fn from(t: T) -> T

Returns the argument unchanged.

source

impl<T, U> Into for Twhere
U: From<T>,

const: unstable · source

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source

impl<T, U> TryFrom for Twhere
U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

const: unstable · source

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source

impl<T, U> TryInto for Twhere
U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

const: unstable · source

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct dfdx::devices::Cpu

Implementations

impl Cpu

pub fn vm<const K: usize, const N: usize>( a: &[f32; K], b: &[[f32; N]; K], c: &mut [f32; N])

pub fn vm_bt<const K: usize, const N: usize>( a: &[f32; K], b_t: &[[f32; K]; N], c: &mut [f32; N])

pub fn vv<const M: usize, const N: usize>( a: &[f32; M], b: &[f32; N], c: &mut [[f32; N]; M])

Trait Implementations

impl AllocateZeros for Cpu

fn zeros<T: CountElements>() -> Box<T>

impl<const M: usize, const N: usize, const O: usize, const P: usize> Device<[[[[f32; P]; O]; N]; M]> for Cpu

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

impl<const M: usize, const N: usize, const O: usize> Device<[[[f32; O]; N]; M]> for Cpu

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

impl<const M: usize, const N: usize> Device<[[f32; N]; M]> for Cpu

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

impl<const M: usize> Device<[f32; M]> for Cpu

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

impl Device<f32> for Cpu

fn map<F: FnMut(&T::Dtype) -> T::Dtype>(t: &T, f: F) -> Box<T>

impl<const S: usize, const P: usize> DeviceConv2D<S, P> for Cpuwhere Self: AllocateZeros,

fn conv_forward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>( img: &[[[f32; W]; H]; C], weight: &[[[[f32; K]; K]; C]; O], bias: &[f32; O], out: &mut [[[f32; { _ }]; { _ }]; O])

fn conv_backward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>( img: &[[[f32; W]; H]; C], weight: &[[[[f32; K]; K]; C]; O], out_g: &[[[f32; { _ }]; { _ }]; O], img_g: &mut [[[f32; W]; H]; C], weight_g: &mut [[[[f32; K]; K]; C]; O], bias_g: &mut [f32; O])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; O]; P], (Axis<3>, Axis<2>, Axis<1>, Axis<0>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; N]; O]; P])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; N]; O]; P])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; P]; O], (Axis<2>, Axis<3>, Axis<1>, Axis<0>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; N]; P]; O])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; N]; P]; O])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; N]; P], (Axis<3>, Axis<1>, Axis<2>, Axis<0>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; O]; N]; P])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; O]; N]; P])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; P]; N], (Axis<1>, Axis<3>, Axis<2>, Axis<0>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; O]; P]; N])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; O]; P]; N])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; N]; O], (Axis<2>, Axis<1>, Axis<3>, Axis<0>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; P]; N]; O])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; P]; N]; O])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; O]; N], (Axis<1>, Axis<2>, Axis<3>, Axis<0>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; M]; P]; O]; N])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; M]; P]; O]; N])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; O]; P], (Axis<3>, Axis<2>, Axis<0>, Axis<1>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; M]; O]; P])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; M]; O]; P])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; P]; O], (Axis<2>, Axis<3>, Axis<0>, Axis<1>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; M]; P]; O])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; M]; P]; O])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; M]; P], (Axis<3>, Axis<0>, Axis<2>, Axis<1>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; O]; M]; P])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; O]; M]; P])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; P]; M], (Axis<0>, Axis<3>, Axis<2>, Axis<1>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; O]; P]; M])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; O]; P]; M])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; M]; O], (Axis<2>, Axis<0>, Axis<3>, Axis<1>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; P]; M]; O])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; P]; M]; O])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; O]; M], (Axis<0>, Axis<2>, Axis<3>, Axis<1>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; N]; P]; O]; M])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; N]; P]; O]; M])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; N]; P], (Axis<3>, Axis<1>, Axis<0>, Axis<2>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; M]; N]; P])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; M]; N]; P])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; P]; N], (Axis<1>, Axis<3>, Axis<0>, Axis<2>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; M]; P]; N])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; M]; P]; N])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; M]; P], (Axis<3>, Axis<0>, Axis<1>, Axis<2>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; N]; M]; P])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; N]; M]; P])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; P]; M], (Axis<0>, Axis<3>, Axis<1>, Axis<2>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; N]; P]; M])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; N]; P]; M])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; M]; N], (Axis<1>, Axis<0>, Axis<3>, Axis<2>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; P]; M]; N])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; P]; M]; N])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; N]; M], (Axis<0>, Axis<1>, Axis<3>, Axis<2>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; O]; P]; N]; M])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; O]; P]; N]; M])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; N]; O], (Axis<2>, Axis<1>, Axis<0>, Axis<3>)> for Cpu

fn permute(a: &[[[[f32; P]; O]; N]; M], b: &mut [[[[f32; P]; M]; N]; O])

fn inverse_permute(a: &mut [[[[f32; P]; O]; N]; M], b: &[[[[f32; P]; M]; N]; O])

impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; O]; N], (Axis<1>, Axis<2>, Axis<0>, Axis<3>)> for Cpu

pub fn vm<const K: usize, const N: usize>(
a: &[f32; K],
b: &[[f32; N]; K],
c: &mut [f32; N]
)

pub fn vm_bt<const K: usize, const N: usize>(
a: &[f32; K],
b_t: &[[f32; K]; N],
c: &mut [f32; N]
)

pub fn vv<const M: usize, const N: usize>(
a: &[f32; M],
b: &[f32; N],
c: &mut [[f32; N]; M]
)

impl<const S: usize, const P: usize> DeviceConv2D<S, P> for Cpuwhere
Self: AllocateZeros,

fn conv_forward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
img: &[[[f32; W]; H]; C],
weight: &[[[[f32; K]; K]; C]; O],
bias: &[f32; O],
out: &mut [[[f32; { _ }]; { _ }]; O]
)

fn conv_backward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
img: &[[[f32; W]; H]; C],
weight: &[[[[f32; K]; K]; C]; O],
out_g: &[[[f32; { _ }]; { _ }]; O],
img_g: &mut [[[f32; W]; H]; C],
weight_g: &mut [[[[f32; K]; K]; C]; O],
bias_g: &mut [f32; O]
)

fn pool_forward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out: &mut [[[f32; { _ }]; { _ }]; C]
)

fn pool_backward<const C: usize, const H: usize, const W: usize>(
_inp: &[[[f32; W]; H]; C],
out_g: &[[[f32; { _ }]; { _ }]; C],
inp_g: &mut [[[f32; W]; H]; C]
)

fn pool_forward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out: &mut [[[f32; { _ }]; { _ }]; C]
)

fn pool_backward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out_g: &[[[f32; { _ }]; { _ }]; C],
inp_g: &mut [[[f32; W]; H]; C]
)

fn pool_forward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out: &mut [[[f32; { _ }]; { _ }]; C]
)

fn pool_backward<const C: usize, const H: usize, const W: usize>(
inp: &[[[f32; W]; H]; C],
out_g: &[[[f32; { _ }]; { _ }]; C],
inp_g: &mut [[[f32; W]; H]; C]
)

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)

fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)

fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)