pub struct Cpu;
Expand description
The CPU device
Implementations
Trait Implementations
sourceimpl AllocateZeros for Cpu
impl AllocateZeros for Cpu
sourcefn zeros<T: CountElements>() -> Box<T>
fn zeros<T: CountElements>() -> Box<T>
Allocates using alloc_zeroed.
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> Device<[[[[f32; P]; O]; N]; M]> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> Device<[[[[f32; P]; O]; N]; M]> for Cpu
sourceimpl<const S: usize, const P: usize> DeviceConv2D<S, P> for Cpuwhere
Self: AllocateZeros,
impl<const S: usize, const P: usize> DeviceConv2D<S, P> for Cpuwhere
Self: AllocateZeros,
sourcefn conv_forward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
img: &[[[f32; W]; H]; C],
weight: &[[[[f32; K]; K]; C]; O],
bias: &[f32; O],
out: &mut [[[f32; { _ }]; { _ }]; O]
)
fn conv_forward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
img: &[[[f32; W]; H]; C],
weight: &[[[[f32; K]; K]; C]; O],
bias: &[f32; O],
out: &mut [[[f32; { _ }]; { _ }]; O]
)
Forward operation that modifies the
out
image.sourcefn conv_backward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
img: &[[[f32; W]; H]; C],
weight: &[[[[f32; K]; K]; C]; O],
out_g: &[[[f32; { _ }]; { _ }]; O],
img_g: &mut [[[f32; W]; H]; C],
weight_g: &mut [[[[f32; K]; K]; C]; O],
bias_g: &mut [f32; O]
)
fn conv_backward<const C: usize, const O: usize, const K: usize, const H: usize, const W: usize>(
img: &[[[f32; W]; H]; C],
weight: &[[[[f32; K]; K]; C]; O],
out_g: &[[[f32; { _ }]; { _ }]; O],
img_g: &mut [[[f32; W]; H]; C],
weight_g: &mut [[[[f32; K]; K]; C]; O],
bias_g: &mut [f32; O]
)
Backward operation that modifies the gradients of img, weight, and bias.
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; O]; P], (Axis<3>, Axis<2>, Axis<1>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; O]; P], (Axis<3>, Axis<2>, Axis<1>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; P]; O], (Axis<2>, Axis<3>, Axis<1>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; N]; P]; O], (Axis<2>, Axis<3>, Axis<1>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; N]; P], (Axis<3>, Axis<1>, Axis<2>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; N]; P], (Axis<3>, Axis<1>, Axis<2>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; P]; N], (Axis<1>, Axis<3>, Axis<2>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; O]; P]; N], (Axis<1>, Axis<3>, Axis<2>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; N]; O], (Axis<2>, Axis<1>, Axis<3>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; N]; O], (Axis<2>, Axis<1>, Axis<3>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; O]; N], (Axis<1>, Axis<2>, Axis<3>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; M]; P]; O]; N], (Axis<1>, Axis<2>, Axis<3>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; O]; P], (Axis<3>, Axis<2>, Axis<0>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; O]; P], (Axis<3>, Axis<2>, Axis<0>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; P]; O], (Axis<2>, Axis<3>, Axis<0>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; M]; P]; O], (Axis<2>, Axis<3>, Axis<0>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; M]; P], (Axis<3>, Axis<0>, Axis<2>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; M]; P], (Axis<3>, Axis<0>, Axis<2>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; P]; M], (Axis<0>, Axis<3>, Axis<2>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; O]; P]; M], (Axis<0>, Axis<3>, Axis<2>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; M]; O], (Axis<2>, Axis<0>, Axis<3>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; M]; O], (Axis<2>, Axis<0>, Axis<3>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; O]; M], (Axis<0>, Axis<2>, Axis<3>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; N]; P]; O]; M], (Axis<0>, Axis<2>, Axis<3>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; N]; P], (Axis<3>, Axis<1>, Axis<0>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; N]; P], (Axis<3>, Axis<1>, Axis<0>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; P]; N], (Axis<1>, Axis<3>, Axis<0>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; M]; P]; N], (Axis<1>, Axis<3>, Axis<0>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; M]; P], (Axis<3>, Axis<0>, Axis<1>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; M]; P], (Axis<3>, Axis<0>, Axis<1>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; P]; M], (Axis<0>, Axis<3>, Axis<1>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; N]; P]; M], (Axis<0>, Axis<3>, Axis<1>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; M]; N], (Axis<1>, Axis<0>, Axis<3>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; M]; N], (Axis<1>, Axis<0>, Axis<3>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; N]; M], (Axis<0>, Axis<1>, Axis<3>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; O]; P]; N]; M], (Axis<0>, Axis<1>, Axis<3>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; N]; O], (Axis<2>, Axis<1>, Axis<0>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; N]; O], (Axis<2>, Axis<1>, Axis<0>, Axis<3>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; O]; N], (Axis<1>, Axis<2>, Axis<0>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; M]; O]; N], (Axis<1>, Axis<2>, Axis<0>, Axis<3>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; N]; M]; O], (Axis<2>, Axis<0>, Axis<1>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; N]; M]; O], (Axis<2>, Axis<0>, Axis<1>, Axis<3>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; N]; O]; M], (Axis<0>, Axis<2>, Axis<1>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; N]; O]; M], (Axis<0>, Axis<2>, Axis<1>, Axis<3>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; O]; M]; N], (Axis<1>, Axis<0>, Axis<2>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; O]; M]; N], (Axis<1>, Axis<0>, Axis<2>, Axis<3>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DevicePermute<[[[[f32; P]; O]; N]; M], [[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>, Axis<3>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; M]; N]; O], (Axis<2>, Axis<1>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; M]; N]; O], (Axis<2>, Axis<1>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; M]; O]; N], (Axis<1>, Axis<2>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; M]; O]; N], (Axis<1>, Axis<2>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; N]; M]; O], (Axis<2>, Axis<0>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; N]; M]; O], (Axis<2>, Axis<0>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; N]; O]; M], (Axis<0>, Axis<2>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; N]; O]; M], (Axis<0>, Axis<2>, Axis<1>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; O]; M]; N], (Axis<1>, Axis<0>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; O]; M]; N], (Axis<1>, Axis<0>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DevicePermute<[[[f32; O]; N]; M], [[[f32; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu
sourceimpl<const M: usize, const N: usize> DevicePermute<[[f32; N]; M], [[f32; M]; N], (Axis<1>, Axis<0>)> for Cpu
impl<const M: usize, const N: usize> DevicePermute<[[f32; N]; M], [[f32; M]; N], (Axis<1>, Axis<0>)> for Cpu
sourceimpl<const M: usize, const N: usize> DevicePermute<[[f32; N]; M], [[f32; N]; M], (Axis<0>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize> DevicePermute<[[f32; N]; M], [[f32; N]; M], (Axis<0>, Axis<1>)> for Cpu
sourceimpl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolAvg> for Cpu
impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolAvg> for Cpu
sourceimpl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMax> for Cpu
impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMax> for Cpu
sourceimpl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMin> for Cpu
impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMin> for Cpu
sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>, Axis<3>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>, Axis<3>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<1>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<2>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<2>, Axis<3>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<2>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<0>, Axis<3>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<2>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<2>, Axis<3>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<2>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<1>, Axis<3>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<2>, Axis<3>)> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], (Axis<2>, Axis<3>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<0>> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<0>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<1>> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<1>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<2>> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<2>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<3>> for Cpu
impl<const M: usize, const N: usize, const O: usize, const P: usize> DeviceReduce<[[[[f32; P]; O]; N]; M], Axis<3>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[[f32; P]; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[[f32; P]; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<1>, Axis<2>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<1>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<0>, Axis<2>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<1>, Axis<2>)> for Cpu
impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], (Axis<1>, Axis<2>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<0>> for Cpu
impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<0>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<1>> for Cpu
impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<1>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<2>> for Cpu
impl<const M: usize, const N: usize, const O: usize> DeviceReduce<[[[f32; O]; N]; M], Axis<2>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[[f32; O]; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[[f32; O]; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], (Axis<0>, Axis<1>)> for Cpu
impl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], (Axis<0>, Axis<1>)> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], Axis<0>> for Cpu
impl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], Axis<0>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], Axis<1>> for Cpu
impl<const M: usize, const N: usize> DeviceReduce<[[f32; N]; M], Axis<1>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)
fn reduce_into_no_reset<A: Accumulator<f32>>(
r: &mut Self::Reduced,
t: &[[f32; N]; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [[f32; N]; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<T: CountElements, const M: usize> DeviceReduce<[T; M], AllAxes> for Cpuwhere
T::Dtype: CountElements<Dtype = T::Dtype>,
Self: DeviceReduce<T, AllAxes> + FillElements<[T; M]> + FillElements<T::Dtype>,
impl<T: CountElements, const M: usize> DeviceReduce<[T; M], AllAxes> for Cpuwhere
T::Dtype: CountElements<Dtype = T::Dtype>,
Self: DeviceReduce<T, AllAxes> + FillElements<[T; M]> + FillElements<T::Dtype>,
type Reduced = <Cpu as DeviceReduce<T, AllAxes>>::Reduced
type Reduced = <Cpu as DeviceReduce<T, AllAxes>>::Reduced
The smaller type.
sourcefn reduce_into_no_reset<A: Accumulator<T::Dtype>>(
r: &mut Self::Reduced,
t: &[T; M]
)
fn reduce_into_no_reset<A: Accumulator<T::Dtype>>(
r: &mut Self::Reduced,
t: &[T; M]
)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<T::Dtype>>(
t: &mut [T; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<T::Dtype>>(
t: &mut [T; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<const M: usize> DeviceReduce<[f32; M], Axis<0>> for Cpu
impl<const M: usize> DeviceReduce<[f32; M], Axis<0>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &[f32; M])
fn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &[f32; M])
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [f32; M],
r: &Self::Reduced
)
fn broadcast_into_no_reset<A: Accumulator<f32>>(
t: &mut [f32; M],
r: &Self::Reduced
)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl DeviceReduce<f32, AllAxes> for Cpu
impl DeviceReduce<f32, AllAxes> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &f32)
fn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &f32)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(t: &mut f32, r: &Self::Reduced)
fn broadcast_into_no_reset<A: Accumulator<f32>>(t: &mut f32, r: &Self::Reduced)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl DeviceReduce<f32, Axis<0>> for Cpu
impl DeviceReduce<f32, Axis<0>> for Cpu
sourcefn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &f32)
fn reduce_into_no_reset<A: Accumulator<f32>>(r: &mut Self::Reduced, t: &f32)
Reduces
T
into Self::Reduced
with accumulator A
without resetting the values in r
.sourcefn broadcast_into_no_reset<A: Accumulator<f32>>(t: &mut f32, r: &Self::Reduced)
fn broadcast_into_no_reset<A: Accumulator<f32>>(t: &mut f32, r: &Self::Reduced)
sourcefn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
fn reduce_into<A: Accumulator<T::Dtype>>(r: &mut Self::Reduced, t: &T)
Fills
r
with Accumulator::INIT before reducing.sourcefn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
fn broadcast_into<A: Accumulator<T::Dtype>>(t: &mut T, r: &Self::Reduced)
Fills
t
with Accumulator::INIT before broadcasting.sourcefn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
fn reduce<A: Accumulator<T::Dtype>>(t: &T) -> Box<Self::Reduced>
Allocates
Self::Reduced
then calls DeviceReduce::reduce_into()sourceimpl<T: CountElements, const M: usize> FillElements<[T; M]> for Cpuwhere
Self: FillElements<T>,
impl<T: CountElements, const M: usize> FillElements<[T; M]> for Cpuwhere
Self: FillElements<T>,
sourceimpl FillElements<f32> for Cpu
impl FillElements<f32> for Cpu
sourceimpl<T: CountElements, const M: usize> ForEachElement<[T; M]> for Cpuwhere
Self: ForEachElement<T>,
impl<T: CountElements, const M: usize> ForEachElement<[T; M]> for Cpuwhere
Self: ForEachElement<T>,
sourcefn foreach_m<F: FnMut(&mut <[T; M] as CountElements>::Dtype)>(
a: &mut [T; M],
f: &mut F
)
fn foreach_m<F: FnMut(&mut <[T; M] as CountElements>::Dtype)>(
a: &mut [T; M],
f: &mut F
)
Mutate elements of
a
by applying f
to all elements of a.sourcefn foreach_mm<F>(a: &mut [T; M], b: &mut [T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &mut T::Dtype),
fn foreach_mm<F>(a: &mut [T; M], b: &mut [T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &mut T::Dtype),
sourcefn foreach_mr<F>(a: &mut [T; M], b: &[T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &T::Dtype),
fn foreach_mr<F>(a: &mut [T; M], b: &[T; M], f: &mut F)where
F: FnMut(&mut T::Dtype, &T::Dtype),
sourceimpl ForEachElement<f32> for Cpu
impl ForEachElement<f32> for Cpu
sourcefn foreach_m<F: FnMut(&mut <f32 as CountElements>::Dtype)>(
a: &mut f32,
f: &mut F
)
fn foreach_m<F: FnMut(&mut <f32 as CountElements>::Dtype)>(
a: &mut f32,
f: &mut F
)
Mutate elements of
a
by applying f
to all elements of a.sourceimpl<const BATCH: usize, const M: usize, const K: usize, const N: usize> MatMul<[[[f32; K]; M]; BATCH], [[[f32; N]; K]; BATCH], [[f32; N]; M]> for Cpuwhere
Self: MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]>,
impl<const BATCH: usize, const M: usize, const K: usize, const N: usize> MatMul<[[[f32; K]; M]; BATCH], [[[f32; N]; K]; BATCH], [[f32; N]; M]> for Cpuwhere
Self: MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]>,
sourcefn mm(
a: &[[[f32; K]; M]; BATCH],
b: &[[[f32; N]; K]; BATCH],
c: &mut [[f32; N]; M]
)
fn mm(
a: &[[[f32; K]; M]; BATCH],
b: &[[[f32; N]; K]; BATCH],
c: &mut [[f32; N]; M]
)
Broadcast c
BATCH
times.
sourcefn mm_at(
a: &[[[f32; M]; K]; BATCH],
b: &[[[f32; N]; K]; BATCH],
c: &mut [[f32; N]; M]
)
fn mm_at(
a: &[[[f32; M]; K]; BATCH],
b: &[[[f32; N]; K]; BATCH],
c: &mut [[f32; N]; M]
)
Broadcast c
BATCH
times.
sourceimpl<const BATCH: usize, const M: usize, const K: usize, const N: usize> MatMul<[[[f32; K]; M]; BATCH], [[f32; N]; K], [[[f32; N]; M]; BATCH]> for Cpuwhere
Self: MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]>,
impl<const BATCH: usize, const M: usize, const K: usize, const N: usize> MatMul<[[[f32; K]; M]; BATCH], [[f32; N]; K], [[[f32; N]; M]; BATCH]> for Cpuwhere
Self: MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]>,
sourcefn mm(
a: &[[[f32; K]; M]; BATCH],
b: &[[f32; N]; K],
c: &mut [[[f32; N]; M]; BATCH]
)
fn mm(
a: &[[[f32; K]; M]; BATCH],
b: &[[f32; N]; K],
c: &mut [[[f32; N]; M]; BATCH]
)
Broadcast b
BATCH
times.
sourcefn mm_at(
a: &[[[f32; M]; K]; BATCH],
b: &[[f32; N]; K],
c: &mut [[[f32; N]; M]; BATCH]
)
fn mm_at(
a: &[[[f32; M]; K]; BATCH],
b: &[[f32; N]; K],
c: &mut [[[f32; N]; M]; BATCH]
)
Broadcast b
BATCH
times.
sourceimpl<const M: usize, const K: usize, const N: usize> MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]> for Cpu
impl<const M: usize, const K: usize, const N: usize> MatMul<[[f32; K]; M], [[f32; N]; K], [[f32; N]; M]> for Cpu
sourceimpl<const BATCH: usize, A, B, C> MatMul<[A; BATCH], [B; BATCH], [C; BATCH]> for Cpuwhere
Self: MatMul<A, B, C>,
A: Transpose,
B: Transpose,
C: Transpose,
[A; BATCH]: Transpose<T = [A::T; BATCH]>,
[B; BATCH]: Transpose<T = [B::T; BATCH]>,
[C; BATCH]: Transpose<T = [C::T; BATCH]>,
impl<const BATCH: usize, A, B, C> MatMul<[A; BATCH], [B; BATCH], [C; BATCH]> for Cpuwhere
Self: MatMul<A, B, C>,
A: Transpose,
B: Transpose,
C: Transpose,
[A; BATCH]: Transpose<T = [A::T; BATCH]>,
[B; BATCH]: Transpose<T = [B::T; BATCH]>,
[C; BATCH]: Transpose<T = [C::T; BATCH]>,
sourcefn mm(a: &[A; BATCH], b: &[B; BATCH], c: &mut [C; BATCH])
fn mm(a: &[A; BATCH], b: &[B; BATCH], c: &mut [C; BATCH])
Batched matmul
sourcefn mm_at(a: &[A::T; BATCH], b: &[B; BATCH], c: &mut [C; BATCH])
fn mm_at(a: &[A::T; BATCH], b: &[B; BATCH], c: &mut [C; BATCH])
Batched matmul
sourcefn mm_bt(a: &[A; BATCH], b: &[B::T; BATCH], c: &mut [C; BATCH])
fn mm_bt(a: &[A; BATCH], b: &[B::T; BATCH], c: &mut [C; BATCH])
Batched matmul
impl<A: Transpose, B: Transpose, C: Transpose> MatMulOp<A, B, C> for Cpuwhere
Self: MatMul<A, B, C> + MatMul<C, B::T, A> + MatMul<A::T, C, B>,
Auto Trait Implementations
impl RefUnwindSafe for Cpu
impl Send for Cpu
impl Sync for Cpu
impl Unpin for Cpu
impl UnwindSafe for Cpu
Blanket Implementations
sourceimpl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more