Struct CudaNf4TransformerBlock

Source

pub struct CudaNf4TransformerBlock { /* private fields */ }

Expand description

CUDA-accelerated transformer block with NF4-quantized frozen weights.

Stores the 7 projection weights as packed NF4 (4-bit) + per-block scales instead of fp32, achieving ~8x compression. Norm weights remain fp32 (negligible size).

§VRAM Savings (Qwen3-4B example)

Component	fp32	NF4
Frozen weights (36L × 7 projections)	16.0 GB	2.1 GB

§Forward Only

NF4 blocks are frozen — no backward pass needed. LoRA adapters (fp32) handle the trainable parameters separately. The forward pass uses fused dequant+GEMM kernels that read NF4 directly without materializing fp32 weights.

Struct CudaNf4TransformerBlock Copy item path

§VRAM Savings (Qwen3-4B example)

§Forward Only

Implementations§

impl CudaNf4TransformerBlock

pub fn set_fp16_weights(&mut self, stream: &CudaStream) -> Result<()>

pub fn layer_idx(&self) -> usize

impl CudaNf4TransformerBlock

pub fn download_lora_weights( &self, ) -> Result<(Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>)>

pub fn upload_lora_weights( &mut self, a_q: &[f32], b_q: &[f32], a_v: &[f32], b_v: &[f32], ) -> Result<()>

Auto Trait Implementations§

impl Freeze for CudaNf4TransformerBlock

impl RefUnwindSafe for CudaNf4TransformerBlock

impl Send for CudaNf4TransformerBlock

impl Sync for CudaNf4TransformerBlock

impl Unpin for CudaNf4TransformerBlock

impl UnsafeUnpin for CudaNf4TransformerBlock

impl UnwindSafe for CudaNf4TransformerBlock

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> Conv for T

fn conv<T>(self) -> Twhere Self: Into<T>,

impl<T> Downcast<T> for T

fn downcast(&self) -> &T

impl<T> FmtForward for T

fn fmt_binary(self) -> FmtBinary<Self>where Self: Binary,

fn fmt_display(self) -> FmtDisplay<Self>where Self: Display,

fn fmt_lower_exp(self) -> FmtLowerExp<Self>where Self: LowerExp,

fn fmt_lower_hex(self) -> FmtLowerHex<Self>where Self: LowerHex,

fn fmt_octal(self) -> FmtOctal<Self>where Self: Octal,

fn fmt_pointer(self) -> FmtPointer<Self>where Self: Pointer,

fn fmt_upper_exp(self) -> FmtUpperExp<Self>where Self: UpperExp,

fn fmt_upper_hex(self) -> FmtUpperHex<Self>where Self: UpperHex,

fn fmt_list(self) -> FmtList<Self>where &'a Self: for<'a> IntoIterator,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pipe for Twhere T: ?Sized,

fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere Self: Sized,

fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere R: 'a,

fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere R: 'a,

fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> Rwhere Self: Borrow<B>, B: 'a + ?Sized, R: 'a,

fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> Rwhere Self: BorrowMut<B>, B: 'a + ?Sized, R: 'a,

fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> Rwhere Self: AsRef<U>, U: 'a + ?Sized, R: 'a,

fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> Rwhere Self: AsMut<U>, U: 'a + ?Sized, R: 'a,

fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> Rwhere Self: Deref<Target = T>, T: 'a + ?Sized, R: 'a,

fn pipe_deref_mut<'a, T, R>( &'a mut self, func: impl FnOnce(&'a mut T) -> R, ) -> Rwhere Self: DerefMut<Target = T> + Deref, T: 'a + ?Sized, R: 'a,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

type Output = T

impl<T> Tap for T

fn tap(self, func: impl FnOnce(&Self)) -> Self

fn tap_mut(self, func: impl FnOnce(&mut Self)) -> Self

fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Selfwhere Self: Borrow<B>, B: ?Sized,

fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Selfwhere Self: BorrowMut<B>, B: ?Sized,

fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Selfwhere Self: AsRef<R>, R: ?Sized,

fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Selfwhere Self: AsMut<R>, R: ?Sized,

fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Selfwhere Self: Deref<Target = T>, T: ?Sized,

fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Selfwhere Self: DerefMut<Target = T> + Deref, T: ?Sized,

Struct CudaNf4TransformerBlock

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn conv<T>(self) -> T
where Self: Into<T>,

fn fmt_binary(self) -> FmtBinary<Self>
where Self: Binary,

fn fmt_display(self) -> FmtDisplay<Self>
where Self: Display,

fn fmt_lower_exp(self) -> FmtLowerExp<Self>
where Self: LowerExp,

fn fmt_lower_hex(self) -> FmtLowerHex<Self>
where Self: LowerHex,

fn fmt_octal(self) -> FmtOctal<Self>
where Self: Octal,

fn fmt_pointer(self) -> FmtPointer<Self>
where Self: Pointer,

fn fmt_upper_exp(self) -> FmtUpperExp<Self>
where Self: UpperExp,

fn fmt_upper_hex(self) -> FmtUpperHex<Self>
where Self: UpperHex,

fn fmt_list(self) -> FmtList<Self>
where &'a Self: for<'a> IntoIterator,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> Pipe for T
where T: ?Sized,

fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> R
where Self: Sized,

fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> R
where R: 'a,

fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> R
where R: 'a,

fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
where Self: Borrow<B>, B: 'a + ?Sized, R: 'a,

fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
where Self: BorrowMut<B>, B: 'a + ?Sized, R: 'a,

fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
where Self: AsRef<U>, U: 'a + ?Sized, R: 'a,

fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
where Self: AsMut<U>, U: 'a + ?Sized, R: 'a,

fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
where Self: Deref<Target = T>, T: 'a + ?Sized, R: 'a,

fn pipe_deref_mut<'a, T, R>( &'a mut self, func: impl FnOnce(&'a mut T) -> R, ) -> R
where Self: DerefMut<Target = T> + Deref, T: 'a + ?Sized, R: 'a,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow<B>, B: ?Sized,

fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut<B>, B: ?Sized,

fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow<B>, B: ?Sized,

fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut<B>, B: ?Sized,

fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

fn tap_deref_mut_dbg<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

fn try_conv<T>(self) -> Result<T, Self::Error>
where Self: TryInto<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<S, T> Upcast<T> for S
where T: UpcastFrom<S> + ?Sized, S: ?Sized,

fn upcast(&self) -> &T
where Self: ErasableGeneric, T: ErasableGeneric<Repr = Self::Repr>,

fn upcast_into(self) -> T
where Self: Sized + ErasableGeneric, T: ErasableGeneric<Repr = Self::Repr>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

impl<T> WasmNotSend for T
where T: Send,

impl<T> WasmNotSendSync for T
where T: WasmNotSend + WasmNotSync,

impl<T> WasmNotSync for T
where T: Sync,