Struct SmoothQuantLinearArgs

Source

pub struct SmoothQuantLinearArgs<'a, TIn: Element, TWQ: IntElement> {
    pub act_q: TensorRef<'a, S8, 2>,
    pub weight_q: TensorRef<'a, TWQ, 2>,
    pub weight_scale: TensorRef<'a, TIn, 1>,
    pub output: TensorMut<'a, TIn, 2>,
    pub act_scale_scratch: TensorMut<'a, TIn, 1>,
}

Expand description

Args bundle for a SmoothQuant linear launch.

act_scale_scratch is a caller-owned [M] FP scratch buffer used to broadcast the descriptor’s per-tensor act_scale into the per-row form the underlying quantized_linear_w8a8 kernel consumes. Caller-owned so it can be reused across launches without re-allocation — the Plan’s workspace_size() returns 0.

Fields§

§act_q: TensorRef<'a, S8, 2>

Pre-quantized int8 activation [M, K].

§weight_q: TensorRef<'a, TWQ, 2>

Pre-smoothed-then-quantized int8 weight [N, K].

§weight_scale: TensorRef<'a, TIn, 1>

Per-output-channel weight scale [N] in FP.

§output: TensorMut<'a, TIn, 2>

FP output [M, N].

§act_scale_scratch: TensorMut<'a, TIn, 1>

Scratch for the per-row broadcast of act_scale. [M] FP. Caller-owned; reused across launches. Populated by the plan before the matmul launch.

Auto Trait Implementations§

§

impl<'a, TIn, TWQ> !UnwindSafe for SmoothQuantLinearArgs<'a, TIn, TWQ>

§

impl<'a, TIn, TWQ> Freeze for SmoothQuantLinearArgs<'a, TIn, TWQ>

§

impl<'a, TIn, TWQ> RefUnwindSafe for SmoothQuantLinearArgs<'a, TIn, TWQ>
where TWQ: RefUnwindSafe, TIn: RefUnwindSafe,

§

impl<'a, TIn, TWQ> Send for SmoothQuantLinearArgs<'a, TIn, TWQ>
where TWQ: Sync, TIn: Sync + Send,

§

impl<'a, TIn, TWQ> Sync for SmoothQuantLinearArgs<'a, TIn, TWQ>
where TWQ: Sync, TIn: Sync,

§

SmoothQuantLinearArgs

Struct SmoothQuantLinearArgs Copy item path

Fields§

Auto Trait Implementations§

impl<'a, TIn, TWQ> !UnwindSafe for SmoothQuantLinearArgs<'a, TIn, TWQ>

impl<'a, TIn, TWQ> Freeze for SmoothQuantLinearArgs<'a, TIn, TWQ>

impl<'a, TIn, TWQ> RefUnwindSafe for SmoothQuantLinearArgs<'a, TIn, TWQ>where TWQ: RefUnwindSafe, TIn: RefUnwindSafe,

impl<'a, TIn, TWQ> Send for SmoothQuantLinearArgs<'a, TIn, TWQ>where TWQ: Sync, TIn: Sync + Send,

impl<'a, TIn, TWQ> Sync for SmoothQuantLinearArgs<'a, TIn, TWQ>where TWQ: Sync, TIn: Sync,

impl<'a, TIn, TWQ> Unpin for SmoothQuantLinearArgs<'a, TIn, TWQ>

impl<'a, TIn, TWQ> UnsafeUnpin for SmoothQuantLinearArgs<'a, TIn, TWQ>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct SmoothQuantLinearArgs

impl<'a, TIn, TWQ> RefUnwindSafe for SmoothQuantLinearArgs<'a, TIn, TWQ>
where TWQ: RefUnwindSafe, TIn: RefUnwindSafe,

impl<'a, TIn, TWQ> Send for SmoothQuantLinearArgs<'a, TIn, TWQ>
where TWQ: Sync, TIn: Sync + Send,

impl<'a, TIn, TWQ> Sync for SmoothQuantLinearArgs<'a, TIn, TWQ>
where TWQ: Sync, TIn: Sync,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,