Struct MinMaxQuantizer

Source

pub struct MinMaxQuantizer { /* private fields */ }

Expand description

Calibrates quantization parameters using tensor min/max statistics.

Implementations§

Source §

impl MinMaxQuantizer

Source

pub fn new( bits: u32, scheme: QuantScheme, granularity: QuantGranularity, ) -> Self

Create a new quantizer.

§Panics

Panics if bits is 0 or > 16.

Source

pub fn int8_symmetric() -> Self

Standard INT8 symmetric per-tensor quantizer.

Source

pub fn int4_per_group(group_size: usize) -> Self

Standard INT4 symmetric per-group quantizer (group = 128, as in GGML).

Source

pub fn calibrate(&self, tensor: &[f32]) -> QuantResult<QuantParams>

Calibrate parameters from a flat tensor.

For PerChannel, the tensor is assumed to be in row-major layout with n_channels rows of length tensor.len() / n_channels.

§Errors

QuantError::EmptyInput — if tensor is empty.
QuantError::GroupSizeMismatch — if PerGroup size does not divide.
QuantError::DimensionMismatch — if PerChannel axis is inconsistent.

Source

pub fn calibrate_2d( &self, tensor: &[f32], rows: usize, cols: usize, ) -> QuantResult<QuantParams>

Calibrate from a 2-D tensor (rows = channels).

Returns one (scale, zp) per row.

§Errors

QuantError::EmptyInput if rows == 0.
QuantError::DimensionMismatch if cols == 0.

Source

pub fn quantize( &self, tensor: &[f32], params: &QuantParams, ) -> QuantResult<Vec<i32>>

Quantize a flat tensor given pre-computed params (PerTensor mode).

Returns Vec<i32> of integer codes.

§Errors

QuantError::InvalidScale if params.scales[0] <= 0.

Source

pub fn quantize_grouped( &self, tensor: &[f32], params: &QuantParams, group_size: usize, ) -> QuantResult<Vec<i32>>

Quantize using per-group params.

§Errors

QuantError::GroupSizeMismatch if tensor size is not divisible by group_size.

Source

pub fn dequantize(&self, codes: &[i32], params: &QuantParams) -> Vec<f32>

Dequantize integer codes back to f32.

Source

pub fn dequantize_grouped( &self, codes: &[i32], params: &QuantParams, group_size: usize, ) -> Vec<f32>

Dequantize per-group codes.

Trait Implementations§

Source §

impl Clone for MinMaxQuantizer

Source §

fn clone(&self) -> MinMaxQuantizer

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for MinMaxQuantizer

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl UnwindSafe for MinMaxQuantizer

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct MinMaxQuantizer Copy item path

Implementations§

impl MinMaxQuantizer

pub fn new( bits: u32, scheme: QuantScheme, granularity: QuantGranularity, ) -> Self

§Panics

pub fn int8_symmetric() -> Self

pub fn int4_per_group(group_size: usize) -> Self

pub fn calibrate(&self, tensor: &[f32]) -> QuantResult<QuantParams>

§Errors

pub fn calibrate_2d( &self, tensor: &[f32], rows: usize, cols: usize, ) -> QuantResult<QuantParams>

§Errors

pub fn quantize( &self, tensor: &[f32], params: &QuantParams, ) -> QuantResult<Vec<i32>>

§Errors

pub fn quantize_grouped( &self, tensor: &[f32], params: &QuantParams, group_size: usize, ) -> QuantResult<Vec<i32>>

§Errors

pub fn dequantize(&self, codes: &[i32], params: &QuantParams) -> Vec<f32>

pub fn dequantize_grouped( &self, codes: &[i32], params: &QuantParams, group_size: usize, ) -> Vec<f32>

Trait Implementations§

impl Clone for MinMaxQuantizer

fn clone(&self) -> MinMaxQuantizer

fn clone_from(&mut self, source: &Self)

impl Debug for MinMaxQuantizer

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for MinMaxQuantizer

impl RefUnwindSafe for MinMaxQuantizer

impl Send for MinMaxQuantizer

impl Sync for MinMaxQuantizer

impl Unpin for MinMaxQuantizer

impl UnsafeUnpin for MinMaxQuantizer

impl UnwindSafe for MinMaxQuantizer

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct MinMaxQuantizer

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,