ferrum_quantization/
gptq.rs1use ferrum_kernels::backend::{Backend, BackendQuantMarlin};
9use ferrum_kernels::Linear;
10use ferrum_types::Result;
11use std::sync::Arc;
12
13pub struct GptqLinear<B: Backend + BackendQuantMarlin> {
18 inner: Box<dyn Linear<B> + Send + Sync>,
19}
20
21impl<B: Backend + BackendQuantMarlin> GptqLinear<B> {
22 #[allow(clippy::too_many_arguments)]
32 pub fn from_raw(
33 qweight: &[i32],
34 scales: &[f32],
35 qzeros: &[i32],
36 g_idx: Option<&[i32]>,
37 bias: Option<&[f32]>,
38 bits: u32,
39 group_size: usize,
40 in_features: usize,
41 out_features: usize,
42 ) -> Result<Self> {
43 let inner = B::load_gptq(
44 qweight,
45 scales,
46 qzeros,
47 g_idx,
48 bias,
49 bits,
50 group_size,
51 in_features,
52 out_features,
53 )?;
54 Ok(Self { inner })
55 }
56}
57
58impl<B: Backend + BackendQuantMarlin> Linear<B> for GptqLinear<B> {
59 fn in_features(&self) -> usize {
60 self.inner.in_features()
61 }
62
63 fn out_features(&self) -> usize {
64 self.inner.out_features()
65 }
66
67 fn forward(&self, ctx: &mut B::Context, input: &B::Buffer, out: &mut B::Buffer, m: usize) {
68 self.inner.forward(ctx, input, out, m);
69 }
70}
71
72pub struct StackedExpertLinear<B: Backend + BackendQuantMarlin> {
80 inner: Box<dyn Linear<B> + Send + Sync>,
81 k: usize,
83 expert_n: usize,
85}
86
87impl<B: Backend + BackendQuantMarlin> StackedExpertLinear<B> {
88 pub fn new(
91 stack: Arc<dyn ferrum_kernels::MarlinExpertStack<B>>,
92 expert_offset: usize,
93 expert_n: usize,
94 ) -> Result<Self> {
95 let k = stack.k();
96 let inner = stack.make_expert_linear(expert_offset, expert_n, None)?;
97 Ok(Self { inner, k, expert_n })
98 }
99
100 pub fn new_with_bias(
101 stack: Arc<dyn ferrum_kernels::MarlinExpertStack<B>>,
102 expert_offset: usize,
103 expert_n: usize,
104 bias: &[f32],
105 ) -> Result<Self> {
106 let k = stack.k();
107 let inner = stack.make_expert_linear(expert_offset, expert_n, Some(bias))?;
108 Ok(Self { inner, k, expert_n })
109 }
110}
111
112impl<B: Backend + BackendQuantMarlin> Linear<B> for StackedExpertLinear<B> {
113 fn in_features(&self) -> usize {
114 self.k
115 }
116
117 fn out_features(&self) -> usize {
118 self.expert_n
119 }
120
121 fn forward(&self, ctx: &mut B::Context, input: &B::Buffer, out: &mut B::Buffer, m: usize) {
122 self.inner.forward(ctx, input, out, m);
123 }
124}