Struct SmoothQuantMigrator

Source

pub struct SmoothQuantMigrator {
    pub config: SmoothQuantConfig,
}

Expand description

Applies per-channel scaling to balance quantization difficulty.

The migrator operates on linear layers:

Activations X — shape [n_tokens, n_channels]
Weights W — shape [n_out, n_channels] (transposed: Y = X W^T)

Fields§

§config: SmoothQuantConfig

Migration configuration.

Implementations§

Source §

impl SmoothQuantMigrator

Source

pub fn new(alpha: f32) -> Self

Create a migrator with the given alpha.

Source

pub fn compute_migration_scales( &self, act_max: &[f32], weight_max: &[f32], ) -> QuantResult<Vec<f32>>

Compute per-channel migration scales from pre-aggregated statistics.

§Parameters

act_max — per-channel max absolute value of activations (length n_ch).
weight_max — per-channel (column) max absolute value of weights (length n_ch).

§Returns

Scale vector s of length n_ch where s[j] = act_max[j]^alpha / weight_max[j]^(1−alpha).

§Errors

QuantError::DimensionMismatch — act_max and weight_max differ in length.
QuantError::EmptyInput — either slice is empty.

Source

pub fn compute_act_stats( acts: &[f32], n_tokens: usize, n_channels: usize, ) -> QuantResult<Vec<f32>>

Compute per-channel max absolute values from an activation tensor.

§Parameters

acts — row-major activation matrix [n_tokens, n_channels].
n_tokens — number of tokens (rows).
n_channels — hidden dimension (columns).

§Errors

QuantError::DimensionMismatch — slice length ≠ n_tokens × n_channels.
QuantError::EmptyInput — either dimension is 0.

Source

pub fn compute_weight_stats( weights: &[f32], n_out: usize, n_channels: usize, ) -> QuantResult<Vec<f32>>

Compute per-column (input-channel) max absolute values from a weight matrix.

§Parameters

weights — row-major weight matrix [n_out, n_channels].
n_out — number of output features (rows).
n_channels — number of input features (columns).

§Errors

QuantError::DimensionMismatch — slice length ≠ n_out × n_channels.
QuantError::EmptyInput — either dimension is 0.

Source

pub fn smooth_activations( acts: &mut [f32], scales: &[f32], n_tokens: usize, n_channels: usize, ) -> QuantResult<()>

Divide each activation channel j by scales[j] in-place.

§Errors

QuantError::DimensionMismatch — inconsistent lengths.

Source

pub fn smooth_weights( weights: &mut [f32], scales: &[f32], n_out: usize, n_channels: usize, ) -> QuantResult<()>

Multiply each weight column j (input channel) by scales[j] in-place.

Weights are assumed to have shape [n_out, n_channels].

§Errors

QuantError::DimensionMismatch — inconsistent lengths.

Source

pub fn smooth_layer( &self, acts: &mut [f32], weights: &mut [f32], n_tokens: usize, n_channels: usize, n_out: usize, ) -> QuantResult<Vec<f32>>

Smooth a complete linear layer: compute scales, apply to activations and weights.

§Parameters

acts — mutable activation matrix [n_tokens, n_channels].
weights — mutable weight matrix [n_out, n_channels].
n_tokens — token (batch) dimension.
n_channels — input feature dimension.
n_out — output feature dimension.

§Returns

The per-channel migration scales used (length n_channels).

§Errors

Propagates all dimension and empty-input errors from sub-operations.

Trait Implementations§

Source §

impl Clone for SmoothQuantMigrator

Source §

fn clone(&self) -> SmoothQuantMigrator

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for SmoothQuantMigrator

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Copy for SmoothQuantMigrator

Auto Trait Implementations§

§

impl UnwindSafe for SmoothQuantMigrator

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct SmoothQuantMigrator Copy item path

Fields§

Implementations§

impl SmoothQuantMigrator

pub fn new(alpha: f32) -> Self

pub fn compute_migration_scales( &self, act_max: &[f32], weight_max: &[f32], ) -> QuantResult<Vec<f32>>

§Parameters

§Returns

§Errors

pub fn compute_act_stats( acts: &[f32], n_tokens: usize, n_channels: usize, ) -> QuantResult<Vec<f32>>

§Parameters

§Errors

pub fn compute_weight_stats( weights: &[f32], n_out: usize, n_channels: usize, ) -> QuantResult<Vec<f32>>

§Parameters

§Errors

pub fn smooth_activations( acts: &mut [f32], scales: &[f32], n_tokens: usize, n_channels: usize, ) -> QuantResult<()>

§Errors

pub fn smooth_weights( weights: &mut [f32], scales: &[f32], n_out: usize, n_channels: usize, ) -> QuantResult<()>

§Errors

pub fn smooth_layer( &self, acts: &mut [f32], weights: &mut [f32], n_tokens: usize, n_channels: usize, n_out: usize, ) -> QuantResult<Vec<f32>>

§Parameters

§Returns

§Errors

Trait Implementations§

impl Clone for SmoothQuantMigrator

fn clone(&self) -> SmoothQuantMigrator

fn clone_from(&mut self, source: &Self)

impl Debug for SmoothQuantMigrator

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Copy for SmoothQuantMigrator

Auto Trait Implementations§

impl Freeze for SmoothQuantMigrator

impl RefUnwindSafe for SmoothQuantMigrator

impl Send for SmoothQuantMigrator

impl Sync for SmoothQuantMigrator

impl Unpin for SmoothQuantMigrator

impl UnsafeUnpin for SmoothQuantMigrator

impl UnwindSafe for SmoothQuantMigrator

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct SmoothQuantMigrator

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,