Enum MatWeight

Source

pub enum MatWeight {
    F32(Vec<f32>),
    Packed {
        key: String,
        scheme: QuantScheme,
        shape: Vec<usize>,
    },
}

Expand description

Storage variant for matmul weight tensors. The big projections (qkv / gate / ffn / lm_head) dominate the load footprint; the Packed variant keeps GGUF K-quant bytes in-place so the graph can emit Op::DequantMatMul instead of a full F32 dequant.

Norm weights, conv kernels, scalar params etc. stay as Vec<f32> in the layer structs (their footprint is negligible and the RmsNorm / Conv ops don’t have a packed variant).

Variants§

§

F32(Vec<f32>)

Already dequantized to f32, row-major [out, in]. The builder transposes to [in, out] before issuing MatMul.

§

Packed

GGUF-packed K-quant metadata only. The actual bytes are looked up in the loader at upload time via rlx_core::weight_loader::GgufLoader::tensor_bytes_borrowed — eliminates the per-tensor Vec<u8> allocation that otherwise costs ~16 GB of memcpy on Qwen3.6-27B Q4_K_M.

key is the loader-resolvable name (post-HF↔GGUF mapping); shape is [out, in] after the safetensors-style dim reversal.

Fields

§key: String

§scheme: QuantScheme

§shape: Vec<usize>

Enum MatWeight Copy item path

Variants§

F32(Vec<f32>)

Packed

Fields

Implementations§

impl MatWeight

pub fn len(&self) -> usize

pub fn is_empty(&self) -> bool

pub fn shape(&self) -> &[usize]

pub fn is_packed(&self) -> bool

pub fn packed_key(&self) -> Option<&str>

Trait Implementations§

impl Clone for MatWeight

fn clone(&self) -> MatWeight

fn clone_from(&mut self, source: &Self)

impl Debug for MatWeight

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Auto Trait Implementations§

impl Freeze for MatWeight

impl RefUnwindSafe for MatWeight

impl Send for MatWeight

impl Sync for MatWeight

impl Unpin for MatWeight

impl UnsafeUnpin for MatWeight

impl UnwindSafe for MatWeight

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Enum MatWeight

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,