pub struct DequantizePerTokenPlan<TIn: Element, TOut: IntElement> { /* private fields */ }Expand description
dequantize_per_token plan.
x[n, d] = scale[n] * (q[n, d] - zero_point[n]). Inverse of
QuantizePerTokenPlan.
When to use: FP recovery from W8A8-style per-row-quantized
activations. Pair with
DequantizePerTokenBackwardPlan.
Dtypes: input int {s8, u8}; output FP {f32, f64, f16, bf16}.
Shape limits: rank-2 [N, D]; per-row scale and zp of
length N.
Workspace: none.
Precision guarantee: deterministic, bit-stable.
Implementations§
Source§impl<TIn: Element, TOut: IntElement> DequantizePerTokenPlan<TIn, TOut>
impl<TIn: Element, TOut: IntElement> DequantizePerTokenPlan<TIn, TOut>
Sourcepub fn select(
_stream: &Stream,
desc: &DequantizePerTokenDescriptor,
_pref: PlanPreference,
) -> Result<Self>
pub fn select( _stream: &Stream, desc: &DequantizePerTokenDescriptor, _pref: PlanPreference, ) -> Result<Self>
Pick a kernel for desc.
Sourcepub fn can_implement(
&self,
args: &DequantizePerTokenArgs<'_, TIn, TOut>,
) -> Result<()>
pub fn can_implement( &self, args: &DequantizePerTokenArgs<'_, TIn, TOut>, ) -> Result<()>
Validate args.
Sourcepub fn workspace_size(&self) -> usize
pub fn workspace_size(&self) -> usize
Workspace bytes — none.
Sourcepub fn precision_guarantee(&self) -> PrecisionGuarantee
pub fn precision_guarantee(&self) -> PrecisionGuarantee
Numerical guarantees.
Auto Trait Implementations§
impl<TIn, TOut> Freeze for DequantizePerTokenPlan<TIn, TOut>
impl<TIn, TOut> RefUnwindSafe for DequantizePerTokenPlan<TIn, TOut>where
TIn: RefUnwindSafe,
TOut: RefUnwindSafe,
impl<TIn, TOut> Send for DequantizePerTokenPlan<TIn, TOut>
impl<TIn, TOut> Sync for DequantizePerTokenPlan<TIn, TOut>
impl<TIn, TOut> Unpin for DequantizePerTokenPlan<TIn, TOut>
impl<TIn, TOut> UnsafeUnpin for DequantizePerTokenPlan<TIn, TOut>
impl<TIn, TOut> UnwindSafe for DequantizePerTokenPlan<TIn, TOut>where
TIn: UnwindSafe,
TOut: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more