Skip to main content

entrenar/quant/
mod.rs

1//! Quantization: QAT and PTQ
2//!
3//! Provides quantization for QLoRA and Quantization-Aware Training:
4//! - 4-bit block-wise quantization for QLoRA
5//! - Fake quantization with STE for QAT
6//! - PTQ calibration (min-max, percentile, moving average)
7//! - GGUF-compatible Q4_0/Q8_0 formats
8//! - Per-channel vs per-tensor quantization granularity
9//! - Quantization error analysis and metrics
10//! - Accuracy degradation benchmarks
11
12mod benchmarks;
13mod calibration;
14mod double_quant;
15mod error_analysis;
16mod fake_quantize;
17mod gguf_quant;
18mod granularity;
19mod quant4bit;
20
21pub use benchmarks::{
22    accuracy_retention, compare_bit_width_degradation, generate_gaussian_weights,
23    generate_multi_channel_weights, generate_uniform_weights, generate_weights_with_outliers,
24    run_benchmark, run_full_benchmark_suite, BenchmarkSuite, QuantBenchmarkResult,
25};
26pub use calibration::{
27    calibrate_min_max, calibrate_percentile, CalibrationMethod, CalibrationResult, Calibrator,
28};
29pub use double_quant::{
30    dequantize_4bit_double, quantize_4bit_double, DoubleQuantized4Bit, DOUBLE_QUANT_BLOCK_SIZE,
31};
32pub use error_analysis::{
33    analyze_error, analyze_outlier_impact, compare_bit_widths, error_within_bounds,
34    scale_sensitivity, theoretical_max_error, theoretical_sqnr, QuantErrorStats,
35};
36pub use fake_quantize::{fake_quantize, ste_backward, FakeQuantConfig, FakeQuantize};
37pub use gguf_quant::{GGUFQuantType, GGUF_BLOCK_SIZE, Q4_0, Q8_0};
38pub use granularity::{
39    calibrate_per_channel, calibrate_per_group, calibrate_per_tensor, compare_granularities,
40    dequantize_tensor, dequantize_with_params, quantization_mse, quantize_tensor,
41    quantize_with_params, QuantGranularity, QuantMode, QuantParams, QuantizedTensor,
42};
43pub use quant4bit::{dequantize_4bit, quantize_4bit, Quantized4Bit, BLOCK_SIZE};