use std::marker::PhantomData;
use furiosa_mapping::*;
use furiosa_opt_lower::{config_divide_exact, config_divide_relaxed};
use furiosa_opt_macro::primitive;
use super::VeTensorShape;
use super::VectorFinalTensor;
use crate::array_vec::ArrayVec;
use crate::context::*;
use crate::engine::vector::MAX_TAGS;
use crate::engine::vector::alu::RngdAlu;
use crate::engine::vector::branch::{TagMode, apply_branch_config};
use crate::engine::vector::layer::{FpToFxp, FxpToFp};
use crate::engine::vector::op::semantics::{HasBinaryOp, HasTernaryOp, HasUnaryOp};
use crate::engine::vector::op::{
BinaryArgMode, ClipBinaryOpF32, ClipBinaryOpI32, FpBinaryOp, FpDivBinaryOp, FpTernaryOp, FpUnaryOp, FxpBinaryOp,
HasAlu, InterSliceReduceOpF32, InterSliceReduceOpI32, IntraSliceReduceOpF32, IntraSliceReduceOpI32,
LogicBinaryOpF32, LogicBinaryOpI32, TernaryArgMode,
};
use crate::engine::vector::operand::OperandTag;
use crate::prelude::TagFilter;
use crate::scalar::Opt;
use crate::tensor::*;
use crate::engine::vector::operand::{
BinaryOperandTag, IntoOperands, IntoTernaryOperandTags, TernaryOperandTag, VeRhs,
};
use crate::engine::vector::scalar::VeScalar;
use crate::engine::vector::stage::markers as stage;
use crate::engine::vector::stage::markers::CanTransitionTo;
use crate::engine::vector::stage::markers::VeOrder;
use crate::engine::vector::stage::markers::Way::{self, Way4, Way8};
use crate::engine::vector::stage::state::VeState;
use crate::engine::vector::tensor::verify::{
verify_vector_narrow_split, verify_vector_narrow_trim, verify_vector_widen_concat, verify_vector_widen_pad,
};
use crate::tensor_state::{HasTensor, NoTensor, TensorState};
use super::vector_tensor_pair::VectorTensorPair;
#[derive(Debug)]
pub struct VectorInitTensor<'l, const T: Tu, D: VeScalar, Chip: M, Cluster: M, Slice: M, Time: M, Packet: M> {
pub(crate) ctx: &'l mut TuContext<{ T }>,
pub(crate) inner: Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
}
impl<'l, const T: Tu, D: VeScalar, Chip: M, Cluster: M, Slice: M, Time: M, Packet: M>
VectorInitTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet>
{
pub fn new(
ctx: &'l mut TuContext<{ T }>,
inner: Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> Self {
Self { ctx, inner }
}
}
#[derive(Debug)]
pub struct VeTensorData<
S: stage::Stage,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
const VE_ORDER: VeOrder,
FS: stage::VeTensorContext = stage::Standalone,
const W: Way = { Way8 },
> {
pub(crate) inner: Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
pub(crate) tag: Tensor<u8, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
pub(crate) ve_state: VeState<StashD, Stash>,
pub(crate) _stage: PhantomData<S>,
pub(crate) _filter_state: PhantomData<FS>,
}
#[derive(Debug)]
pub struct VectorTensor<
'l,
const T: Tu,
S: stage::Stage,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
const VE_ORDER: VeOrder,
FS: stage::VeTensorContext = stage::Standalone,
const W: Way = { Way8 },
> {
pub(crate) ctx: &'l mut TuContext<{ T }>,
pub(crate) data: VeTensorData<S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>,
}
impl<
S: stage::Stage,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const W: Way,
const VE_ORDER: VeOrder,
> VeTensorData<S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>
{
pub fn ve_state_mut(&mut self) -> &mut VeState<StashD, Stash> {
&mut self.ve_state
}
pub fn ve_state(&self) -> &VeState<StashD, Stash> {
&self.ve_state
}
pub fn inner(&self) -> &Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>> {
&self.inner
}
pub fn tag(&self) -> &Tensor<u8, VeTensorShape<Chip, Cluster, Slice, Time, Packet>> {
&self.tag
}
pub fn into_parts(
self,
) -> (
Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
Tensor<u8, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
VeState<StashD, Stash>,
) {
(self.inner, self.tag, self.ve_state)
}
pub(crate) fn apply_binary<NextStage: stage::Stage, NextFS: stage::VeTensorContext>(
mut self,
alu: RngdAlu,
op_fn: impl Fn(Opt<D>, Opt<D>) -> Opt<D>,
operands: &ArrayVec<BinaryOperandTag<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>, MAX_TAGS>,
) -> VeTensorData<NextStage, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, NextFS, W> {
let uses_stash = operands.iter().any(|op| matches!(op.operand0, VeRhs::Stash));
let stash_data: Option<Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>> = if uses_stash {
self.ve_state.force_clone_stash_as()
} else {
None
};
self.ve_state.use_alu(alu);
let result = apply_binary_op(&self.inner, &self.tag, op_fn, operands.as_slice(), stash_data.as_ref());
VeTensorData {
inner: result,
tag: self.tag,
ve_state: self.ve_state,
_stage: PhantomData,
_filter_state: PhantomData,
}
}
}
impl<
'l,
const T: Tu,
S: stage::Stage,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const W: Way,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>
{
pub fn into_parts(
self,
) -> (
&'l mut TuContext<{ T }>,
Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
Tensor<u8, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
VeState<StashD, Stash>,
) {
let (inner, tag, ve_state) = self.data.into_parts();
(self.ctx, inner, tag, ve_state)
}
pub fn into_ctx_and_data(
self,
) -> (
&'l mut TuContext<{ T }>,
VeTensorData<S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>,
) {
(self.ctx, self.data)
}
pub fn ve_state_mut(&mut self) -> &mut VeState<StashD, Stash> {
self.data.ve_state_mut()
}
pub fn ve_state(&self) -> &VeState<StashD, Stash> {
self.data.ve_state()
}
pub fn inner(&self) -> &Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>> {
self.data.inner()
}
pub fn tag(&self) -> &Tensor<u8, VeTensorShape<Chip, Cluster, Slice, Time, Packet>> {
self.data.tag()
}
pub fn data(&self) -> &VeTensorData<S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W> {
&self.data
}
pub fn data_mut(
&mut self,
) -> &mut VeTensorData<S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W> {
&mut self.data
}
pub fn from_parts(
ctx: &'l mut TuContext<{ T }>,
inner: Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
tag: Tensor<u8, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
ve_state: VeState<StashD, Stash>,
) -> Self {
VectorTensor {
ctx,
data: VeTensorData {
inner,
tag,
ve_state,
_stage: PhantomData,
_filter_state: PhantomData,
},
}
}
pub fn from_ctx_and_data(
ctx: &'l mut TuContext<{ T }>,
data: VeTensorData<S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>,
) -> Self {
VectorTensor { ctx, data }
}
pub(crate) fn do_binary<NextStage: stage::Stage, NextFS: stage::VeTensorContext>(
self,
op: impl HasAlu + HasBinaryOp<D>,
mode: Option<BinaryArgMode>,
operands: ArrayVec<BinaryOperandTag<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>, MAX_TAGS>,
) -> VectorTensor<'l, T, NextStage, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, NextFS, W> {
let data = self.data.apply_binary(op.alu(), op.binary_op_fn(mode), &operands);
VectorTensor { ctx: self.ctx, data }
}
}
impl<
S: stage::Stashable,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
const W: Way,
const VE_ORDER: VeOrder,
> VeTensorData<S, D, Chip, Cluster, Slice, Time, Packet, D, NoTensor, VE_ORDER, stage::Standalone, W>
{
pub fn stash(
self,
) -> VeTensorData<
S,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
D,
HasTensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
VE_ORDER,
stage::Standalone,
W,
> {
let new_ve_state = self.ve_state.stash(&self.inner);
VeTensorData {
inner: self.inner,
tag: self.tag,
ve_state: new_ve_state,
_stage: PhantomData,
_filter_state: PhantomData,
}
}
}
impl<
'l,
const T: Tu,
S: stage::Stashable,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
const W: Way,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, D, NoTensor, VE_ORDER, stage::Standalone, W>
{
#[primitive(VectorTensor::vector_stash)]
pub fn vector_stash(
self,
) -> VectorTensor<
'l,
T,
S,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
D,
HasTensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
VE_ORDER,
stage::Standalone,
W,
> {
let new_ve_state = self.data.ve_state.stash(&self.data.inner);
VectorTensor {
ctx: self.ctx,
data: VeTensorData {
inner: self.data.inner,
tag: self.data.tag,
ve_state: new_ve_state,
_stage: PhantomData,
_filter_state: PhantomData,
},
}
}
}
impl<
'l,
const T: Tu,
S: stage::Stage,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::Commitable,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
{
#[primitive(VectorTensor::vector_final)]
pub fn vector_final(self) -> VectorFinalTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet> {
VectorFinalTensor::new(self.ctx, self.data.inner)
}
}
impl<
'l,
const T: Tu,
S: stage::IntraSliceStage + stage::CanTransitionTo<stage::InterSliceReduce>,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::Commitable,
>
VectorTensor<
'l,
T,
S,
i32,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
{ VeOrder::IntraFirst },
FS,
{ Way8 },
>
{
#[primitive(VectorTensor::vector_inter_slice_reduce)]
pub fn vector_inter_slice_reduce<OutSlice: M, OutTime: M>(
self,
op: InterSliceReduceOpI32,
) -> VectorInterSliceReduceTensor<'l, T, i32, Chip, Cluster, OutSlice, OutTime, Packet, { VeOrder::IntraFirst }>
{
let reduced = self
.data
.inner
.reduce_then_broadcast_with(op.lifted_reduce_fn(), Opt::Uninit);
create_inter_slice_reduce_tensor(self.ctx, reduced)
}
}
impl<
'l,
const T: Tu,
S: stage::IntraSliceStage + stage::CanTransitionTo<stage::InterSliceReduce>,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::Commitable,
>
VectorTensor<
'l,
T,
S,
f32,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
{ VeOrder::IntraFirst },
FS,
{ Way8 },
>
{
#[primitive(VectorTensor::vector_inter_slice_reduce)]
pub fn vector_inter_slice_reduce<OutSlice: M, OutTime: M>(
self,
op: InterSliceReduceOpF32,
) -> VectorInterSliceReduceTensor<'l, T, f32, Chip, Cluster, OutSlice, OutTime, Packet, { VeOrder::IntraFirst }>
{
let reduced = self
.data
.inner
.reduce_then_broadcast_with(op.lifted_reduce_fn(), Opt::Uninit);
create_inter_slice_reduce_tensor(self.ctx, reduced)
}
}
pub(crate) fn create_inter_slice_reduce_tensor<
'l,
const T: Tu,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
const VE_ORDER: VeOrder,
>(
ctx: &'l mut TuContext<{ T }>,
inner: Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorInterSliceReduceTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet, VE_ORDER> {
VectorTensor {
ctx,
data: VeTensorData {
inner,
tag: Tensor::uninit(),
ve_state: VeState::new(),
_stage: PhantomData,
_filter_state: PhantomData,
},
}
}
impl<'l, const T: Tu, D: VeScalar, Chip: M, Cluster: M, Slice: M, Time: M, Packet: M>
VectorInitTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet>
{
#[primitive(VectorInitTensor::vector_intra_slice_tag)]
pub fn vector_intra_slice_tag(
self,
branch: TagMode,
) -> VectorBranchTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet, D, NoTensor, { VeOrder::IntraFirst }> {
VectorBranchTensor::new(self.ctx, self.inner, branch)
}
#[primitive(VectorInitTensor::vector_intra_slice_unzip)]
pub fn vector_intra_slice_unzip<I: AxisName, TileTime: M, SplitTime: M>(
self,
) -> VectorTensorPair<'l, T, D, stage::Tag, Chip, Cluster, Slice, SplitTime, Packet> {
VectorTensorPair::new::<I, Time, TileTime>(self.ctx, self.inner)
}
}
impl<'l, const T: Tu, Chip: M, Cluster: M, Slice: M, Time: M, Packet: M>
VectorInitTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet>
{
#[primitive(VectorInitTensor::vector_inter_slice_reduce)]
pub fn vector_inter_slice_reduce<OutSlice: M, OutTime: M>(
self,
op: InterSliceReduceOpI32,
) -> VectorInterSliceReduceTensor<'l, T, i32, Chip, Cluster, OutSlice, OutTime, Packet, { VeOrder::InterFirst }>
{
let reduced = self
.inner
.reduce_then_broadcast_with(op.lifted_reduce_fn(), Opt::Uninit);
create_inter_slice_reduce_tensor(self.ctx, reduced)
}
}
impl<'l, const T: Tu, Chip: M, Cluster: M, Slice: M, Time: M, Packet: M>
VectorInitTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet>
{
#[primitive(VectorInitTensor::vector_inter_slice_reduce)]
pub fn vector_inter_slice_reduce<OutSlice: M, OutTime: M>(
self,
op: InterSliceReduceOpF32,
) -> VectorInterSliceReduceTensor<'l, T, f32, Chip, Cluster, OutSlice, OutTime, Packet, { VeOrder::InterFirst }>
{
let reduced = self
.inner
.reduce_then_broadcast_with(op.lifted_reduce_fn(), Opt::Uninit);
create_inter_slice_reduce_tensor(self.ctx, reduced)
}
}
pub type VectorInterSliceReduceTensor<'l, const T: Tu, D, Chip, Cluster, Slice, Time, Packet, const VE_ORDER: VeOrder> =
VectorTensor<
'l,
T,
stage::InterSliceReduce,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
D,
NoTensor,
VE_ORDER,
stage::Standalone,
{ Way8 },
>;
impl<
'l,
const T: Tu,
S: stage::InterSliceStage + stage::CanTransitionTo<stage::Tag>,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::Commitable,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, { VeOrder::InterFirst }, FS, { Way8 }>
{
#[primitive(VectorTensor::vector_intra_slice_tag)]
pub fn vector_intra_slice_tag(
self,
branch: TagMode,
) -> VectorBranchTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet, D, NoTensor, { VeOrder::InterFirst }> {
VectorBranchTensor::new(self.ctx, self.data.inner, branch)
}
}
pub type VectorBranchTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::Tag, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
impl<'l, const T: Tu, D: VeScalar, Chip: M, Cluster: M, Slice: M, Time: M, Packet: M, const VE_ORDER: VeOrder>
VectorBranchTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet, D, NoTensor, VE_ORDER>
{
pub fn new(
ctx: &'l mut TuContext<{ T }>,
inner: Tensor<D, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
branch_config: TagMode,
) -> Self {
assert_eq!(
Packet::SIZE,
8,
"VectorTensor requires Packet of 8 elements (one flit) in Way8 mode, got {}",
Packet::SIZE,
);
let tag = apply_branch_config(&inner, &branch_config);
Self::from_parts(ctx, inner, tag, VeState::new())
}
}
pub type VectorLogicTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::Logic, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorFxpTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::Fxp, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorFxpToFpTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::FxpToFp, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorNarrowTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way4 },
> = VectorTensor<'l, T, stage::Narrow, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorFpTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way4 },
> = VectorTensor<'l, T, stage::Fp, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorIntraSliceReduceTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way4 },
> = VectorTensor<'l, T, stage::IntraSliceReduce, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorFpDivTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way4 },
> = VectorTensor<'l, T, stage::FpDiv, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorWidenTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::Widen, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorFpToFxpTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::FpToFxp, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorClipTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::Clip, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
pub type VectorFilterTensor<
'l,
const T: Tu,
D,
Chip,
Cluster,
Slice,
Time,
Packet,
StashD,
Stash,
const VE_ORDER: VeOrder,
FS = stage::Standalone,
const W: Way = { Way8 },
> = VectorTensor<'l, T, stage::Filter, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, W>;
fn rhs_at<D: VeScalar, Mapping: M>(
rhs: &VeRhs<D, Mapping>,
stash_data: Option<&Tensor<D, Mapping>>,
index: &Index,
) -> Opt<D> {
match rhs {
VeRhs::Const { v } => Opt::Init(*v),
VeRhs::Stash => stash_data
.expect("VeRhs::Stash operand requires stash_data; caller must supply it")
.read_index(index.clone()),
VeRhs::Vrf { data } => data.read_index(index.clone()),
}
}
pub(super) fn apply_binary_op<D: VeScalar, Mapping: M>(
data: &Tensor<D, Mapping>,
tag: &Tensor<u8, Mapping>,
op: impl Fn(Opt<D>, Opt<D>) -> Opt<D>,
operands: &[BinaryOperandTag<D, Mapping>],
stash_data: Option<&Tensor<D, Mapping>>,
) -> Tensor<D, Mapping> {
data.apply_branch_operands(tag, operands, |index, operand, output| {
let rhs = rhs_at(operand.operand0(), stash_data, index);
let cur = output.read_index(index.clone());
output.write_index(index.clone(), op(cur, rhs));
})
}
pub(super) fn apply_unary_op<D: VeScalar, Mapping: M>(
data: &Tensor<D, Mapping>,
op: impl Fn(Opt<D>) -> Opt<D>,
) -> Tensor<D, Mapping> {
data.map(|&v| op(v))
}
pub(super) fn apply_ternary_op<Mapping: M>(
data: &Tensor<f32, Mapping>,
tag: &Tensor<u8, Mapping>,
op: impl Fn(Opt<f32>, Opt<f32>, Opt<f32>) -> Opt<f32>,
operands: &[TernaryOperandTag<Mapping>],
stash_data: Option<&Tensor<f32, Mapping>>,
) -> Tensor<f32, Mapping> {
data.apply_branch_operands(tag, operands, |index, operand, output| {
let rhs0 = rhs_at(operand.operand0(), stash_data, index);
let rhs1 = Opt::Init(operand.operand1());
let cur = output.read_index(index.clone());
output.write_index(index.clone(), op(cur, rhs0, rhs1));
})
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Logic>,
{
#[primitive(VectorTensor::vector_logic)]
pub fn vector_logic(
self,
op: LogicBinaryOpI32,
operand: impl IntoOperands<i32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorLogicTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, None, operand.into_operands())
}
#[primitive(VectorTensor::vector_logic_with_mode)]
pub fn vector_logic_with_mode(
self,
op: LogicBinaryOpI32,
mode: BinaryArgMode,
operand: impl IntoOperands<i32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorLogicTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, Some(mode), operand.into_operands())
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Logic>,
{
#[primitive(VectorTensor::vector_logic)]
pub fn vector_logic(
self,
op: LogicBinaryOpF32,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorLogicTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, None, operand.into_operands())
}
#[primitive(VectorTensor::vector_logic_with_mode)]
pub fn vector_logic_with_mode(
self,
op: LogicBinaryOpF32,
mode: BinaryArgMode,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorLogicTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, Some(mode), operand.into_operands())
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Fxp>,
{
#[primitive(VectorTensor::vector_fxp)]
pub fn vector_fxp(
self,
op: FxpBinaryOp,
operand: impl IntoOperands<i32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFxpTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, None, operand.into_operands())
}
#[primitive(VectorTensor::vector_fxp_with_mode)]
pub fn vector_fxp_with_mode(
self,
op: FxpBinaryOp,
mode: BinaryArgMode,
operand: impl IntoOperands<i32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFxpTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, Some(mode), operand.into_operands())
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::FxpToFp>,
{
#[primitive(VectorTensor::vector_fxp_to_fp)]
pub fn vector_fxp_to_fp(
self,
int_width: u32,
) -> VectorFxpToFpTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
let op = FxpToFp::new(int_width);
let op_fn = op.op_fn();
let result = self.inner().map(|v| v.map(&op_fn));
let (ctx, _inner, tag, ve_state) = self.into_parts();
VectorFxpToFpTensor::from_parts(ctx, result, tag, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Narrow>,
{
#[primitive(VectorTensor::vector_narrow_split)]
pub fn vector_narrow_split<Time2: M, Packet2: M>(
self,
) -> VectorNarrowTensor<'l, T, D, Chip, Cluster, Slice, Time2, Packet2, StashD, Stash, VE_ORDER, FS, { Way4 }> {
verify_vector_narrow_split::<Time, Packet, Time2, Packet2>();
let (ctx, inner, tag, ve_state) = self.into_parts();
let split_inner = inner.transpose::<VeTensorShape<Chip, Cluster, Slice, Time2, Packet2>>(true);
let split_eid = tag.transpose::<VeTensorShape<Chip, Cluster, Slice, Time2, Packet2>>(true);
VectorNarrowTensor::from_parts(ctx, split_inner, split_eid, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Narrow>,
{
#[primitive(VectorTensor::vector_narrow_trim)]
pub fn vector_narrow_trim<Packet2: M>(
self,
) -> VectorNarrowTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet2, StashD, Stash, VE_ORDER, FS, { Way4 }> {
verify_vector_narrow_trim::<Packet, Packet2>();
let (ctx, inner, tag, ve_state) = self.into_parts();
let stripped = inner.transpose::<VeTensorShape<Chip, Cluster, Slice, Time, Packet2>>(true);
let stripped_eid = tag.transpose::<VeTensorShape<Chip, Cluster, Slice, Time, Packet2>>(true);
VectorNarrowTensor::from_parts(ctx, stripped, stripped_eid, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }>
where
S: stage::Stage + CanTransitionTo<stage::Fp>,
{
#[primitive(VectorTensor::vector_fp_unary)]
pub fn vector_fp_unary(
mut self,
op: FpUnaryOp,
) -> VectorFpTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.ve_state_mut().use_alu(op.alu());
let result = apply_unary_op(self.inner(), op.unary_op_fn());
let (ctx, _inner, tag, ve_state) = self.into_parts();
VectorFpTensor::from_parts(ctx, result, tag, ve_state)
}
#[primitive(VectorTensor::vector_fp_binary)]
pub fn vector_fp_binary(
self,
op: FpBinaryOp,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFpTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, None, operand.into_operands())
}
#[primitive(VectorTensor::vector_fp_binary_with_mode)]
pub fn vector_fp_binary_with_mode(
self,
op: FpBinaryOp,
mode: BinaryArgMode,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFpTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, Some(mode), operand.into_operands())
}
#[primitive(VectorTensor::vector_fp_ternary)]
pub fn vector_fp_ternary(
self,
op: FpTernaryOp,
operands: impl IntoTernaryOperandTags<VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFpTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.vector_fp_ternary_with_mode(op, TernaryArgMode::Mode012, operands)
}
#[primitive(VectorTensor::vector_fp_ternary_with_mode)]
pub fn vector_fp_ternary_with_mode(
mut self,
op: FpTernaryOp,
mode: TernaryArgMode,
operands: impl IntoTernaryOperandTags<VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFpTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
let operands = operands.into_ternary_operands();
let stash_data: Option<Tensor<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>> =
self.ve_state().force_clone_stash_as();
self.ve_state_mut().use_alu(op.alu());
let op_fn = op.ternary_op_fn(Some(mode));
let result = apply_ternary_op(
self.inner(),
self.tag(),
op_fn,
operands.as_slice(),
stash_data.as_ref(),
);
let (ctx, _inner, tag, ve_state) = self.into_parts();
VectorFpTensor::from_parts(ctx, result, tag, ve_state)
}
}
fn verify_reduce_label(time: Mapping, packet: Mapping, out_time: Mapping, out_packet: Mapping, reduce_label: &Ident) {
let input = time.pair(packet.clone());
let output = out_time.pair(out_packet.clone());
let division_terms = config_divide_exact(&input, &output)
.expect("[Intra-slice reduce] divide failed: output shape must divide input shape");
let quotient = config_divide_relaxed(&input, &output).dividend_residue;
assert!(
quotient.idents().iter().all(|ident| ident == reduce_label),
"IntraSliceReduce: all reduced axes must match the specified reduce_label {}, got quotient {} with idents {:?}",
reduce_label,
quotient,
quotient.idents()
);
assert!(
division_terms
.iter()
.all(|d| d.idents.iter().all(|ident| ident != reduce_label)),
"IntraSliceReduce: all the reduce axes should be fully reduced (not present in the division terms), got reduce_label {} appearing in division {:?}",
reduce_label,
division_terms,
);
let packet = packet.normalize();
let out_packet = out_packet.normalize();
assert!(
packet == out_packet || out_packet == <m![1 # 4]>::to_value().normalize(),
"IntraSliceReduce: Packet should be either preserved or reduced to 4 (for partial reduction), got Packet {packet} → OutPacket {out_packet}",
);
}
fn reduce_tag<Chip: M, Cluster: M, Slice: M, Time: M, Packet: M, OutTime: M, OutPacket: M>(
tag: Tensor<u8, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> Tensor<u8, VeTensorShape<Chip, Cluster, Slice, OutTime, OutPacket>> {
tag.reduce::<VeTensorShape<Chip, Cluster, Slice, OutTime, OutPacket>>(|_, y| y, Opt::Uninit)
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }>
where
S: stage::Stage + CanTransitionTo<stage::IntraSliceReduce>,
{
#[primitive(VectorTensor::vector_intra_slice_reduce)]
pub fn vector_intra_slice_reduce<Reduce: AxisName, OutTime: M, OutPacket: M>(
mut self,
op: IntraSliceReduceOpI32,
) -> VectorIntraSliceReduceTensor<
'l,
T,
i32,
Chip,
Cluster,
Slice,
OutTime,
OutPacket,
StashD,
Stash,
VE_ORDER,
stage::Standalone,
{ Way4 },
>
{
self.ve_state_mut().use_alu(op.alu());
let (ctx, inner, tag, ve_state) = self.into_parts();
verify_reduce_label(
Time::to_value(),
Packet::to_value(),
OutTime::to_value(),
OutPacket::to_value(),
&Reduce::NAME,
);
let reduced_inner =
inner.reduce::<VeTensorShape<Chip, Cluster, Slice, OutTime, OutPacket>>(op.lifted_reduce_fn(), Opt::Uninit);
let reduced_eid = reduce_tag::<Chip, Cluster, Slice, Time, Packet, OutTime, OutPacket>(tag);
VectorIntraSliceReduceTensor::from_parts(ctx, reduced_inner, reduced_eid, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }>
where
S: stage::Stage + CanTransitionTo<stage::IntraSliceReduce>,
{
#[primitive(VectorTensor::vector_intra_slice_reduce)]
pub fn vector_intra_slice_reduce<Reduce: AxisName, OutTime: M, OutPacket: M>(
mut self,
op: IntraSliceReduceOpF32,
) -> VectorIntraSliceReduceTensor<
'l,
T,
f32,
Chip,
Cluster,
Slice,
OutTime,
OutPacket,
StashD,
Stash,
VE_ORDER,
stage::Standalone,
{ Way4 },
>
{
self.ve_state_mut().use_alu(op.alu());
let (ctx, inner, tag, ve_state) = self.into_parts();
verify_reduce_label(
Time::to_value(),
Packet::to_value(),
OutTime::to_value(),
OutPacket::to_value(),
&Reduce::NAME,
);
let reduced_inner =
inner.reduce::<VeTensorShape<Chip, Cluster, Slice, OutTime, OutPacket>>(op.lifted_reduce_fn(), Opt::Uninit);
let reduced_eid = reduce_tag::<Chip, Cluster, Slice, Time, Packet, OutTime, OutPacket>(tag);
VectorIntraSliceReduceTensor::from_parts(ctx, reduced_inner, reduced_eid, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }>
where
S: stage::Stage + CanTransitionTo<stage::FpDiv>,
{
#[primitive(VectorTensor::vector_fp_div)]
pub fn vector_fp_div(
self,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFpDivTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }> {
self.do_binary(FpDivBinaryOp::DivF, None, operand.into_operands())
}
#[primitive(VectorTensor::vector_fp_div_with_mode)]
pub fn vector_fp_div_with_mode(
self,
mode: BinaryArgMode,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorFpDivTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }> {
self.do_binary(FpDivBinaryOp::DivF, Some(mode), operand.into_operands())
}
}
impl<
'l,
const T: Tu,
S,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }>
where
S: stage::Stage + CanTransitionTo<stage::Widen>,
{
#[primitive(VectorTensor::vector_widen_concat)]
pub fn vector_widen_concat<Time2: M, Packet2: M>(
self,
) -> VectorWidenTensor<'l, T, D, Chip, Cluster, Slice, Time2, Packet2, StashD, Stash, VE_ORDER, FS, { Way8 }> {
verify_vector_widen_concat::<Time, Packet, Time2, Packet2>();
let (ctx, inner, tag, ve_state) = self.into_parts();
let concat_inner = inner.transpose::<VeTensorShape<Chip, Cluster, Slice, Time2, Packet2>>(true);
let concat_eid = tag.transpose::<VeTensorShape<Chip, Cluster, Slice, Time2, Packet2>>(true);
VectorWidenTensor::from_parts(ctx, concat_inner, concat_eid, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way4 }>
where
S: stage::Stage + CanTransitionTo<stage::Widen>,
{
#[primitive(VectorTensor::vector_widen_pad)]
pub fn vector_widen_pad<Packet2: M>(
self,
) -> VectorWidenTensor<'l, T, D, Chip, Cluster, Slice, Time, Packet2, StashD, Stash, VE_ORDER, FS, { Way8 }> {
verify_vector_widen_pad::<Packet, Packet2>();
let (ctx, inner, tag, ve_state) = self.into_parts();
let padded = inner.transpose::<VeTensorShape<Chip, Cluster, Slice, Time, Packet2>>(true);
let padded_eid = tag.transpose::<VeTensorShape<Chip, Cluster, Slice, Time, Packet2>>(true);
VectorWidenTensor::from_parts(ctx, padded, padded_eid, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::FpToFxp>,
{
#[primitive(VectorTensor::vector_fp_to_fxp)]
pub fn vector_fp_to_fxp(
self,
int_width: u32,
) -> VectorFpToFxpTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
let op = FpToFxp::new(int_width);
let op_fn = op.op_fn();
let result = self.inner().map(|&v| v.map(&op_fn));
let (ctx, _inner, tag, ve_state) = self.into_parts();
VectorFpToFxpTensor::from_parts(ctx, result, tag, ve_state)
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Clip>,
{
#[primitive(VectorTensor::vector_clip)]
pub fn vector_clip(
self,
op: ClipBinaryOpI32,
operand: impl IntoOperands<i32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorClipTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, None, operand.into_operands())
}
#[primitive(VectorTensor::vector_clip_with_mode)]
pub fn vector_clip_with_mode(
self,
op: ClipBinaryOpI32,
mode: BinaryArgMode,
operand: impl IntoOperands<i32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorClipTensor<'l, T, i32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, Some(mode), operand.into_operands())
}
}
impl<
'l,
const T: Tu,
S,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
FS: stage::VeTensorContext,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, FS, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Clip>,
{
#[primitive(VectorTensor::vector_clip)]
pub fn vector_clip(
self,
op: ClipBinaryOpF32,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorClipTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, None, operand.into_operands())
}
#[primitive(VectorTensor::vector_clip_with_mode)]
pub fn vector_clip_with_mode(
self,
op: ClipBinaryOpF32,
mode: BinaryArgMode,
operand: impl IntoOperands<f32, VeTensorShape<Chip, Cluster, Slice, Time, Packet>>,
) -> VectorClipTensor<'l, T, f32, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER> {
self.do_binary(op, Some(mode), operand.into_operands())
}
}
impl<
'l,
const T: Tu,
S,
D: VeScalar,
Chip: M,
Cluster: M,
Slice: M,
Time: M,
Packet: M,
StashD: VeScalar,
Stash: TensorState<StashD>,
const VE_ORDER: VeOrder,
> VectorTensor<'l, T, S, D, Chip, Cluster, Slice, Time, Packet, StashD, Stash, VE_ORDER, stage::Standalone, { Way8 }>
where
S: stage::Stage + CanTransitionTo<stage::Filter>,
{
#[primitive(VectorTensor::vector_filter)]
pub fn vector_filter<Time2: M>(
self,
_config: TagFilter,
) -> VectorFilterTensor<
'l,
T,
D,
Chip,
Cluster,
Slice,
Time2,
Packet,
StashD,
Stash,
VE_ORDER,
stage::Standalone,
{ Way8 },
> {
todo!("Implement vector_filter operation")
}
}