Enum QuantScheme

Source

pub enum QuantScheme {
Show 14 variants    Int8Block {
        block_size: u32,
    },
    Int8BlockAsym {
        block_size: u32,
    },
    Int4Block {
        block_size: u32,
    },
    Fp8E4m3,
    Fp8E5m2,
    GgufQ4K,
    GgufQ5K,
    GgufQ6K,
    GgufQ8K,
    GgufQ2K,
    GgufQ3K,
    GgufQ4_0,
    GgufQ8_0,
    Nvfp4Block,
}

Expand description

How a tensor is quantized. Mirrors the schemes RLX needs for LLM inference on Apple Silicon: blockwise int8 (GPTQ-style), blockwise int4 (Q4_K), and per-tensor fp8 (e4m3 / e5m2).

Each variant carries the parameters the dequantizer needs to read at runtime — scale, zero-point, block size. Where these live in the actual weight tensor is up to the loader (#56).

Variants§

§

Int8Block

Symmetric int8 with one scale per block_size elements.

Fields

§block_size: u32

§

Int8BlockAsym

Asymmetric int8 with scale + zero-point per block_size elements.

Fields

§block_size: u32

§

Int4Block

Int4 packed two-per-byte, scale per block_size elements (Q4_K-ish; matches GGUF block layout).

Fields

§block_size: u32

§

Fp8E4m3

FP8 e4m3 (no scale; same domain as half).

§

Fp8E5m2

FP8 e5m2 (no scale; wider range than e4m3).

§

GgufQ4K

GGUF / llama.cpp Q4_K super-block (256 elements / 144 bytes). Packs an f16 super-scale + f16 super-min + 8 sub-block 6-bit scales + 8 sub-block 6-bit mins + 128 nibbles. Block layout is fixed by the format — there’s no block_size knob.

§

GgufQ5K

GGUF Q5_K (256 / 176 bytes). Adds a 32-byte high-bit plane on top of Q4_K.

§

GgufQ6K

GGUF Q6_K (256 / 210 bytes). Per-sub-block signed scales, no min term.

§

GgufQ8K

GGUF Q8_K (256 / 276 bytes). Per-super-block f32 scale plus i8 quants and a 32-byte sum-of-blocks table that’s only used by Q8_K × Q8_K matmul accumulation paths.

§

GgufQ2K

GGUF Q2_K (256 / 84 bytes). 2-bit quants with per-sub-block scale/min.

§

GgufQ3K

GGUF Q3_K (256 / 110 bytes). 3-bit quants with hmask high bit plane.

§

GgufQ4_0

GGUF Q4_0 (32 / 18 bytes). Legacy llama.cpp block: f16 scale + nibbles.

§

GgufQ8_0

GGUF Q8_0 (32 / 34 bytes). Legacy block: f16 scale + 32×i8 quants.

§

Nvfp4Block

NVIDIA FP4 (E2M1) block — fixed 16-element groups, FP8 E4M3 block scales, optional f32 global scale on input 3 (legacy zp slot). Used by FLUX.2 / MLX nvfp4 checkpoints.

Enum QuantScheme Copy item path

Variants§

Int8Block

Fields

Int8BlockAsym

Fields

Int4Block

Fields

Fp8E4m3

Fp8E5m2

GgufQ4K

GgufQ5K

GgufQ6K

GgufQ8K

GgufQ2K

GgufQ3K

GgufQ4_0

GgufQ8_0

Nvfp4Block

Implementations§

impl QuantScheme

pub const fn bits_per_element_x10(self) -> u32

pub const fn bits_per_element(self) -> u32

pub const fn has_scale(self) -> bool

pub const fn scale_is_fp8(self) -> bool

pub const fn nvfp4_group_size(self) -> u32

pub const fn has_zero_point(self) -> bool

pub const fn gguf_block_size(self) -> u32

pub const fn gguf_block_bytes(self) -> u32

pub const fn is_gguf(self) -> bool

Trait Implementations§

impl Clone for QuantScheme

fn clone(&self) -> QuantScheme

fn clone_from(&mut self, source: &Self)

impl Copy for QuantScheme

impl Debug for QuantScheme

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

impl<'de> Deserialize<'de> for QuantScheme

fn deserialize<__D>( __deserializer: __D, ) -> Result<QuantScheme, <__D as Deserializer<'de>>::Error>where __D: Deserializer<'de>,

impl Display for QuantScheme

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

impl PartialEq for QuantScheme

fn eq(&self, other: &QuantScheme) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Serialize for QuantScheme

fn serialize<__S>( &self, __serializer: __S, ) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where __S: Serializer,

impl StructuralPartialEq for QuantScheme

Auto Trait Implementations§

impl Freeze for QuantScheme

impl RefUnwindSafe for QuantScheme

impl Send for QuantScheme

impl Sync for QuantScheme

impl Unpin for QuantScheme

impl UnsafeUnpin for QuantScheme

impl UnwindSafe for QuantScheme

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> ToOwned for Twhere T: Clone,

Enum QuantScheme

fn deserialize<D>( deserializer: D, ) -> Result<QuantScheme, <D as Deserializer<'de>>::Error>
where __D: Deserializer<'de>,

fn serialize<S>( &self, serializer: S, ) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for T
where T: Clone,

impl<T> ToString for T
where T: Display + ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T> WasmNotSend for T
where T: Send,

impl<T> WasmNotSendSync for T
where T: WasmNotSend + WasmNotSync,

impl<T> WasmNotSync for T
where T: Sync,