Skip to main content

quantize_rs/
lib.rs

1//! Neural network quantization toolkit for ONNX models.
2//!
3//! `quantize-rs` converts FP32 ONNX model weights to INT8 or INT4,
4//! reducing model size by 4--8x with minimal accuracy loss. It supports
5//! per-tensor and per-channel quantization, calibration-based range
6//! optimization, and writes ONNX-Runtime-compatible QDQ models.
7//!
8//! # Modules
9//!
10//! - [`quantization`] -- core quantization logic (INT8/INT4, per-channel, packing)
11//! - [`onnx_utils`] -- ONNX model loading, weight extraction, QDQ save, validation
12//! - [`calibration`] -- (feature `calibration`) calibration datasets, activation-based inference, range methods
13//! - [`config`] -- YAML/TOML configuration file support
14//! - [`errors`] -- typed error enum ([`QuantizeError`]) for all public API functions
15//!
16//! # Feature flags
17//!
18//! - **`calibration`** *(default)* -- enables activation-based calibration (adds `tract-onnx`, `ndarray`)
19//! - **`python`** -- enables PyO3 bindings (`quantize_rs` Python module)
20
21pub mod calibration;
22pub mod config;
23pub mod errors;
24/// Raw prost-generated ONNX protobuf types.  Use the higher-level wrappers
25/// in [`onnx_utils`] instead — this module is an implementation detail and
26/// may change without notice.
27#[doc(hidden)]
28pub mod onnx_proto;
29pub mod onnx_utils;
30pub mod quantization;
31
32// ---- Stable public re-exports (prefer these over reaching into submodules) ----
33
34#[cfg(feature = "calibration")]
35pub use calibration::inference::ActivationEstimator;
36pub use calibration::{stats::ActivationStats, CalibrationDataset};
37pub use config::Config;
38pub use errors::QuantizeError;
39pub use onnx_utils::graph_builder::QdqWeightInput;
40pub use onnx_utils::{ConnectivityReport, ModelInfo, OnnxModel, QuantizedWeightInfo, WeightTensor};
41pub use quantization::{
42    pack_int4, unpack_int4, QuantConfig, QuantParams, QuantizedWeightOutput, Quantizer,
43};
44
45/// Library version string, read from `Cargo.toml` at compile time.
46pub const VERSION: &str = env!("CARGO_PKG_VERSION");
47
48#[cfg(test)]
49mod tests {
50    use super::*;
51
52    #[test]
53    fn test_version() {
54        assert!(!VERSION.is_empty());
55    }
56}
57
58#[cfg(feature = "python")]
59mod python;