Struct GptqLinear

Source

pub struct GptqLinear<B: Backend> { /* private fields */ }

Implementations§

Source §

impl<B: Backend> GptqLinear

Source

pub fn from_raw( qweight: &[i32], scales: &[f32], qzeros: &[i32], g_idx: Option<&[i32]>, bits: u32, group_size: usize, in_features: usize, out_features: usize, ) -> Result<Self>

Build from raw host-side GPTQ tensors. The Backend repacks into its preferred format once; inference uses the repacked store.

qweight: [k/8, n] i32 (packed int4) scales: [k/group_size, n] f32 (converted from f16 by caller) qzeros: [k/group_size, n/8] i32 g_idx: [k] i32 — optional, only used for desc_act=true

Source

pub fn from_store( store: B::GptqStore, in_features: usize, out_features: usize, ) -> Self

Construct directly from a pre-built backend store (e.g. tests).

Source

pub fn with_bias(self, bias: &[f32]) -> Self

Attach a bias vector ([out_features] f32 on host, uploaded to backend). Qwen2.5 / Llama-with-bias variants require this.

Source

pub fn store(&self) -> &B::GptqStore

Trait Implementations§

Source §

fn forward( &self, ctx: &mut B::Context, input: &B::Buffer, out: &mut B::Buffer, m: usize, )

Append GEMM work onto ctx. Caller flushes the context when results must be materialised.

Auto Trait Implementations§

§

impl Freeze for GptqLinear
where ::GptqStore: Freeze, ::Buffer: Freeze,

§

impl RefUnwindSafe for GptqLinear
where ::GptqStore: RefUnwindSafe, ::Buffer: RefUnwindSafe,

§

impl Send for GptqLinear

§

impl Sync for GptqLinear

§

impl Unpin for GptqLinear
where ::GptqStore: Unpin, ::Buffer: Unpin,

§

impl UnsafeUnpin for GptqLinear
where ::GptqStore: UnsafeUnpin, ::Buffer: UnsafeUnpin,

§

impl UnwindSafe for GptqLinear
where ::GptqStore: UnwindSafe, ::Buffer: UnwindSafe,

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

GptqLinear

Struct GptqLinear

Implementations§

impl<B: Backend> GptqLinear<B>

pub fn from_raw( qweight: &[i32], scales: &[f32], qzeros: &[i32], g_idx: Option<&[i32]>, bits: u32, group_size: usize, in_features: usize, out_features: usize, ) -> Result<Self>

pub fn from_store( store: B::GptqStore, in_features: usize, out_features: usize, ) -> Self

pub fn with_bias(self, bias: &[f32]) -> Self

pub fn store(&self) -> &B::GptqStore

Trait Implementations§

impl<B: Backend> Linear<B> for GptqLinear<B>

fn in_features(&self) -> usize

fn out_features(&self) -> usize

fn forward( &self, ctx: &mut B::Context, input: &B::Buffer, out: &mut B::Buffer, m: usize, )

Auto Trait Implementations§

impl<B> Freeze for GptqLinear<B>
where <B as Backend>::GptqStore: Freeze, <B as Backend>::Buffer: Freeze,

impl<B> RefUnwindSafe for GptqLinear<B>
where <B as Backend>::GptqStore: RefUnwindSafe, <B as Backend>::Buffer: RefUnwindSafe,

impl<B> Send for GptqLinear<B>

impl<B> Sync for GptqLinear<B>

impl<B> Unpin for GptqLinear<B>
where <B as Backend>::GptqStore: Unpin, <B as Backend>::Buffer: Unpin,

impl<B> UnsafeUnpin for GptqLinear<B>
where <B as Backend>::GptqStore: UnsafeUnpin, <B as Backend>::Buffer: UnsafeUnpin,

impl<B> UnwindSafe for GptqLinear<B>
where <B as Backend>::GptqStore: UnwindSafe, <B as Backend>::Buffer: UnwindSafe,

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T
where U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for T
where U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn vzip(self) -> V

Struct GptqLinear Copy item path

Implementations§

impl<B: Backend> GptqLinear<B>

pub fn from_raw( qweight: &[i32], scales: &[f32], qzeros: &[i32], g_idx: Option<&[i32]>, bits: u32, group_size: usize, in_features: usize, out_features: usize, ) -> Result<Self>

pub fn from_store( store: B::GptqStore, in_features: usize, out_features: usize, ) -> Self

pub fn with_bias(self, bias: &[f32]) -> Self

pub fn store(&self) -> &B::GptqStore

Trait Implementations§

impl<B: Backend> Linear<B> for GptqLinear<B>

fn in_features(&self) -> usize

fn out_features(&self) -> usize

fn forward( &self, ctx: &mut B::Context, input: &B::Buffer, out: &mut B::Buffer, m: usize, )

Auto Trait Implementations§

impl<B> Freeze for GptqLinear<B>where <B as Backend>::GptqStore: Freeze, <B as Backend>::Buffer: Freeze,

impl<B> RefUnwindSafe for GptqLinear<B>where <B as Backend>::GptqStore: RefUnwindSafe, <B as Backend>::Buffer: RefUnwindSafe,

impl<B> Send for GptqLinear<B>

impl<B> Sync for GptqLinear<B>

impl<B> Unpin for GptqLinear<B>where <B as Backend>::GptqStore: Unpin, <B as Backend>::Buffer: Unpin,

impl<B> UnsafeUnpin for GptqLinear<B>where <B as Backend>::GptqStore: UnsafeUnpin, <B as Backend>::Buffer: UnsafeUnpin,

impl<B> UnwindSafe for GptqLinear<B>where <B as Backend>::GptqStore: UnwindSafe, <B as Backend>::Buffer: UnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct GptqLinear

impl<B> Freeze for GptqLinear<B>
where <B as Backend>::GptqStore: Freeze, <B as Backend>::Buffer: Freeze,

impl<B> RefUnwindSafe for GptqLinear<B>
where <B as Backend>::GptqStore: RefUnwindSafe, <B as Backend>::Buffer: RefUnwindSafe,

impl<B> Unpin for GptqLinear<B>
where <B as Backend>::GptqStore: Unpin, <B as Backend>::Buffer: Unpin,

impl<B> UnsafeUnpin for GptqLinear<B>
where <B as Backend>::GptqStore: UnsafeUnpin, <B as Backend>::Buffer: UnsafeUnpin,

impl<B> UnwindSafe for GptqLinear<B>
where <B as Backend>::GptqStore: UnwindSafe, <B as Backend>::Buffer: UnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,