ferrum_kernels::quant_linear::cpu_dequant

Struct CpuGptqLinear

pub struct CpuGptqLinear {
    pub weight_f32: Vec<f32>,
    pub bias: Option<Vec<f32>>,
    pub in_features: usize,
    pub out_features: usize,
}

Expand description

CPU GPTQ Linear: holds dequantized fp32 weights [out_features, in_features] row-major, optional bias [out_features], dispatches via CpuBackend::gemm.

The dequantization happens once in BackendQuantMarlin::load_gptq — inference is just a regular f32 GEMM.

Fields§

§weight_f32: Vec<f32>§bias: Option<Vec<f32>>§in_features: usize§out_features: usize

Trait Implementations§

Source §

fn forward( &self, ctx: &mut <CpuBackend as Backend>::Context, input: &<CpuBackend as Backend>::Buffer, out: &mut <CpuBackend as Backend>::Buffer, m: usize, )

Append GEMM work onto ctx. Caller flushes the context when results must be materialised.

Auto Trait Implementations§

§

impl Freeze for CpuGptqLinear

§

impl RefUnwindSafe for CpuGptqLinear

§

impl Send for CpuGptqLinear

§

impl Sync for CpuGptqLinear

§

impl Unpin for CpuGptqLinear

§

impl UnsafeUnpin for CpuGptqLinear

§

impl UnwindSafe for CpuGptqLinear

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> Same for T

Source §

type Output = T

Should always be Self

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

fn vzip(self) -> V

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

CpuGptqLinear

Struct CpuGptqLinear Copy item path

Fields§

Trait Implementations§

impl Linear<CpuBackend> for CpuGptqLinear

fn in_features(&self) -> usize

fn out_features(&self) -> usize

fn forward( &self, ctx: &mut <CpuBackend as Backend>::Context, input: &<CpuBackend as Backend>::Buffer, out: &mut <CpuBackend as Backend>::Buffer, m: usize, )

Auto Trait Implementations§

impl Freeze for CpuGptqLinear

impl RefUnwindSafe for CpuGptqLinear

impl Send for CpuGptqLinear

impl Sync for CpuGptqLinear

impl Unpin for CpuGptqLinear

impl UnsafeUnpin for CpuGptqLinear

impl UnwindSafe for CpuGptqLinear

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Same for T

type Output = T

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct CpuGptqLinear

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,