Enum QuantizationMethod

Source

pub enum QuantizationMethod {
    Uniform,
    Symmetric,
    Affine,
    PowerOfTwo,
    Int4,
    UInt4,
    Float16,
    BFloat16,
    PerChannelSymmetric,
    PerChannelAffine,
}

Expand description

Supported methods of quantization

Variants§

§

Uniform

Uniform quantization maps the input range to uniform discrete levels with equal spacing between consecutive levels

§

Symmetric

Symmetric quantization is centered around zero and has equal positive and negative ranges, making it suitable for weight matrices

§

Affine

Affine quantization uses the formula q = scale * (x - zero_point) allowing better representation of asymmetric distributions

§

PowerOfTwo

Power-of-two quantization uses powers of 2 for the scale factor, enabling efficient implementation with bitshifts

§

Int4

Int4 quantization uses 4-bit signed integers, packing two values into each byte for memory efficiency. This is useful for model compression in ML applications.

§

UInt4

UInt4 quantization uses 4-bit unsigned integers, packing two values into each byte. This provides a positive-only range with maximum memory efficiency.

§

Float16

Float16 quantization uses IEEE 754 16-bit half-precision floating point format. It provides a good balance between precision and memory efficiency for ML models.

§

BFloat16

BFloat16 quantization uses the “brain floating point” 16-bit format, which has the same exponent size as f32 but fewer mantissa bits. This is especially well-suited for deep learning applications.

§

PerChannelSymmetric

Per-channel symmetric quantization applies different symmetric quantization parameters to each channel (column), improving accuracy for matrices with varying distributions across channels.

§

PerChannelAffine

Per-channel affine quantization applies different affine quantization parameters to each channel (column), allowing for better representation of asymmetric distributions that vary by channel.

QuantizationMethod

Enum QuantizationMethod Copy item path

Variants§

Uniform

Symmetric

Affine

PowerOfTwo

Int4

UInt4

Float16

BFloat16

PerChannelSymmetric

PerChannelAffine

Trait Implementations§

impl Clone for QuantizationMethod

fn clone(&self) -> QuantizationMethod

fn clone_from(&mut self, source: &Self)

impl Debug for QuantizationMethod

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl PartialEq for QuantizationMethod

fn eq(&self, other: &QuantizationMethod) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Copy for QuantizationMethod

impl Eq for QuantizationMethod

impl StructuralPartialEq for QuantizationMethod

Auto Trait Implementations§

impl Freeze for QuantizationMethod

impl RefUnwindSafe for QuantizationMethod

impl Send for QuantizationMethod

impl Sync for QuantizationMethod

impl Unpin for QuantizationMethod

impl UnwindSafe for QuantizationMethod

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Enum QuantizationMethod

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,