oxicuda_quant/scheme/mod.rs
1//! # Quantization Schemes
2//!
3//! This module exposes a suite of post-training quantization (PTQ) strategies:
4//!
5//! | Module | Scheme | Primary use |
6//! |---------------|---------------------------------------------|-------------|
7//! | `minmax` | Min-Max calibration (INT4/INT8) | General PTQ |
8//! | `nf4` | NormalFloat4 (QLoRA) | 4-bit weights |
9//! | `fp8` | FP8 E4M3 / E5M2 (Hopper / Blackwell) | Training & inference |
10//! | `gptq` | GPTQ Hessian-guided quantization | LLM weights |
11//! | `smooth_quant`| SmoothQuant activation–weight migration | LLM activations |
12
13pub mod fp8;
14pub mod gptq;
15pub mod minmax;
16pub mod nf4;
17pub mod smooth_quant;
18
19pub use fp8::{Fp8Codec, Fp8Format};
20pub use gptq::{GptqConfig, GptqOutput, GptqQuantizer};
21pub use minmax::{MinMaxQuantizer, QuantGranularity, QuantParams, QuantScheme};
22pub use nf4::{NF4_LUT, Nf4Quantizer};
23pub use smooth_quant::{SmoothQuantConfig, SmoothQuantMigrator};