Skip to main content

oxicuda_quant/scheme/
mod.rs

1//! # Quantization Schemes
2//!
3//! This module exposes a suite of post-training quantization (PTQ) strategies:
4//!
5//! | Module        | Scheme                                      | Primary use |
6//! |---------------|---------------------------------------------|-------------|
7//! | `minmax`      | Min-Max calibration (INT4/INT8)             | General PTQ |
8//! | `nf4`         | NormalFloat4 (QLoRA)                        | 4-bit weights |
9//! | `fp8`         | FP8 E4M3 / E5M2 (Hopper / Blackwell)        | Training & inference |
10//! | `gptq`        | GPTQ Hessian-guided quantization            | LLM weights |
11//! | `smooth_quant`| SmoothQuant activation–weight migration     | LLM activations |
12
13pub mod fp8;
14pub mod gptq;
15pub mod minmax;
16pub mod nf4;
17pub mod smooth_quant;
18
19pub use fp8::{Fp8Codec, Fp8Format};
20pub use gptq::{GptqConfig, GptqOutput, GptqQuantizer};
21pub use minmax::{MinMaxQuantizer, QuantGranularity, QuantParams, QuantScheme};
22pub use nf4::{NF4_LUT, Nf4Quantizer};
23pub use smooth_quant::{SmoothQuantConfig, SmoothQuantMigrator};