ferrum_kernels/quant_linear/
cpu_gguf.rs

1//! `Linear<CpuBackend>` impl for GGUF k-quant weights.
2//!
3//! Phase 3e/3: replaces the old `BackendQuantGguf::gemm_quant` impl on
4//! CpuBackend. The kernel call (Q4_K dequant + `Self::gemm`) lives
5//! inside `CpuGgufLinear::forward` instead of the trait method body.
6
7use crate::backend::cpu::{CpuBackend, CpuQuantStore};
8use crate::Linear;
9
10/// CPU GGUF Linear: holds a `CpuQuantStore` (currently Q4_K-dequantised
11/// weights) plus shape, dispatches via `CpuBackend::gemm`.
12pub struct CpuGgufLinear {
13    pub store: CpuQuantStore,
14    pub in_features: usize,
15    pub out_features: usize,
16}
17
18impl Linear<CpuBackend> for CpuGgufLinear {
19    fn in_features(&self) -> usize {
20        self.in_features
21    }
22
23    fn out_features(&self) -> usize {
24        self.out_features
25    }
26
27    fn forward(
28        &self,
29        ctx: &mut <CpuBackend as crate::backend::Backend>::Context,
30        input: &<CpuBackend as crate::backend::Backend>::Buffer,
31        out: &mut <CpuBackend as crate::backend::Backend>::Buffer,
32        m: usize,
33    ) {
34        match &self.store {
35            CpuQuantStore::Q4K {
36                weights,
37                n_rows,
38                n_cols,
39            } => {
40                <CpuBackend as crate::backend::Backend>::gemm(
41                    ctx, input, weights, out, m, *n_rows, *n_cols,
42                );
43            }
44        }
45    }
46}
ferrum_kernels/quant_linear/cpu_gguf.rs

ferrum_kernels/quant_linear/
cpu_gguf.rs