baracuda_kernels/quantize/
mod.rs1pub mod dequantize_per_channel;
33pub mod dequantize_per_channel_backward;
34pub mod dequantize_per_tensor;
35pub mod dequantize_per_tensor_backward;
36pub mod fake_quantize;
37pub mod fake_quantize_backward;
38pub mod per_channel;
39pub mod per_channel_backward;
40pub mod per_tensor;
41pub mod per_tensor_backward;
42
43pub mod dequantize_per_group;
47pub mod dequantize_per_group_backward;
48pub mod dequantize_per_token;
49pub mod dequantize_per_token_backward;
50pub mod per_group;
51pub mod per_group_backward;
52pub mod per_token;
53pub mod per_token_backward;
54
55pub mod dynamic_range;
57pub mod quantized_linear;
58
59pub mod smoothquant;
62
63pub mod gguf;
68
69pub mod nf4;
76
77pub use dequantize_per_channel::{
78 DequantizePerChannelArgs, DequantizePerChannelDescriptor, DequantizePerChannelPlan,
79};
80pub use dequantize_per_channel_backward::{
81 DequantizePerChannelBackwardArgs, DequantizePerChannelBackwardDescriptor,
82 DequantizePerChannelBackwardPlan,
83};
84pub use dequantize_per_tensor::{
85 DequantizePerTensorArgs, DequantizePerTensorDescriptor, DequantizePerTensorPlan,
86};
87pub use dequantize_per_tensor_backward::{
88 DequantizePerTensorBackwardArgs, DequantizePerTensorBackwardDescriptor,
89 DequantizePerTensorBackwardPlan,
90};
91pub use fake_quantize::{FakeQuantizeArgs, FakeQuantizeDescriptor, FakeQuantizePlan};
92pub use fake_quantize_backward::{
93 FakeQuantizeBackwardArgs, FakeQuantizeBackwardDescriptor, FakeQuantizeBackwardPlan,
94};
95pub use per_channel::{QuantizePerChannelArgs, QuantizePerChannelDescriptor, QuantizePerChannelPlan};
96pub use per_channel_backward::{
97 QuantizePerChannelBackwardArgs, QuantizePerChannelBackwardDescriptor,
98 QuantizePerChannelBackwardPlan,
99};
100pub use per_tensor::{QuantizePerTensorArgs, QuantizePerTensorDescriptor, QuantizePerTensorPlan};
101pub use per_tensor_backward::{
102 QuantizePerTensorBackwardArgs, QuantizePerTensorBackwardDescriptor,
103 QuantizePerTensorBackwardPlan,
104};
105
106pub use per_token::{QuantizePerTokenArgs, QuantizePerTokenDescriptor, QuantizePerTokenPlan};
107pub use per_token_backward::{
108 QuantizePerTokenBackwardArgs, QuantizePerTokenBackwardDescriptor, QuantizePerTokenBackwardPlan,
109};
110pub use dequantize_per_token::{
111 DequantizePerTokenArgs, DequantizePerTokenDescriptor, DequantizePerTokenPlan,
112};
113pub use dequantize_per_token_backward::{
114 DequantizePerTokenBackwardArgs, DequantizePerTokenBackwardDescriptor,
115 DequantizePerTokenBackwardPlan,
116};
117pub use per_group::{QuantizePerGroupArgs, QuantizePerGroupDescriptor, QuantizePerGroupPlan};
118pub use per_group_backward::{
119 QuantizePerGroupBackwardArgs, QuantizePerGroupBackwardDescriptor, QuantizePerGroupBackwardPlan,
120};
121pub use dequantize_per_group::{
122 DequantizePerGroupArgs, DequantizePerGroupDescriptor, DequantizePerGroupPlan,
123};
124pub use dequantize_per_group_backward::{
125 DequantizePerGroupBackwardArgs, DequantizePerGroupBackwardDescriptor,
126 DequantizePerGroupBackwardPlan,
127};
128
129pub use dynamic_range::{
131 DynamicRangeMode, DynamicRangeQuantizeArgs, DynamicRangeQuantizeDescriptor,
132 DynamicRangeQuantizePlan, DynamicRangeScope,
133};
134pub use quantized_linear::{
135 QuantizedLinearArgs, QuantizedLinearDescriptor, QuantizedLinearPlan,
136};
137
138pub use smoothquant::{
140 SmoothQuantLinearArgs, SmoothQuantLinearDescriptor, SmoothQuantLinearPlan,
141};
142
143pub use gguf::{
145 BlockQ2K, BlockQ3K, BlockQ4_0, BlockQ4_1, BlockQ4K, BlockQ5_0, BlockQ5_1, BlockQ5K, BlockQ6K,
146 BlockQ8_0, BlockQ8K, GgufDequantizeArgs, GgufDequantizeDescriptor, GgufDequantizePlan,
147 GgufMmvqArgs, GgufMmvqDescriptor, GgufMmvqPlan,
148};
149
150pub use gguf::{
153 GgufMmvqBatchedActivation, GgufMmvqBatchedArgs, GgufMmvqBatchedDescriptor,
154 GgufMmvqBatchedFormat, GgufMmvqBatchedPlan,
155};
156
157pub use gguf::{GgufMmvqMultiMArgs, GgufMmvqMultiMDescriptor, GgufMmvqMultiMPlan};
160
161pub use nf4::{
166 Nf4Activation, Nf4DequantizeArgs, Nf4DequantizePlan, Nf4Descriptor, Nf4MmvqArgs,
167 Nf4MmvqMultiMArgs, Nf4MmvqMultiMDescriptor, Nf4MmvqMultiMPlan, Nf4MmvqPlan, NF4_CODEBOOK,
168};
169
170use baracuda_cutlass::{Error, Result};
171
172pub(crate) fn map_status(code: i32) -> Result<()> {
175 match code {
176 0 => Ok(()),
177 1 => Err(Error::MisalignedOperand),
178 2 => Err(Error::InvalidProblem(
179 "baracuda-kernels-sys reported invalid problem",
180 )),
181 3 => Err(Error::Unsupported(
182 "baracuda-kernels-sys reported unsupported configuration",
183 )),
184 4 => Err(Error::WorkspaceTooSmall { needed: 0, got: 0 }),
185 n => Err(Error::CutlassInternal(n)),
186 }
187}
188
189pub(crate) fn validate_input_element(
192 tin_kind: baracuda_kernels_types::ElementKind,
193 plan_name: &'static str,
194) -> Result<()> {
195 use baracuda_kernels_types::ElementKind;
196 if !matches!(
197 tin_kind,
198 ElementKind::F32 | ElementKind::F64 | ElementKind::F16 | ElementKind::Bf16
199 ) {
200 return Err(Error::Unsupported(plan_name));
201 }
202 Ok(())
203}
204
205pub(crate) fn validate_output_element(
208 tout_kind: baracuda_kernels_types::ElementKind,
209 plan_name: &'static str,
210) -> Result<()> {
211 use baracuda_kernels_types::ElementKind;
212 if !matches!(tout_kind, ElementKind::S8 | ElementKind::U8) {
213 return Err(Error::Unsupported(plan_name));
214 }
215 Ok(())
216}
217
218#[inline]
225pub fn default_q_range(out_kind: baracuda_kernels_types::ElementKind) -> Option<(i32, i32)> {
226 use baracuda_kernels_types::ElementKind;
227 match out_kind {
228 ElementKind::S8 => Some((-128, 127)),
229 ElementKind::U8 => Some((0, 255)),
230 _ => None,
231 }
232}