Skip to main content

axonml_quant/
lib.rs

1//! Axonml Quant - Model Quantization Library
2//!
3//! Provides quantization support for reducing model size and improving
4//! inference performance. Supports multiple quantization formats:
5//!
6//! - **Q8_0**: 8-bit quantization (block size 32)
7//! - **Q4_0**: 4-bit quantization (block size 32)
8//! - **Q4_1**: 4-bit quantization with min/max (block size 32)
9//! - **F16**: Half-precision floating point
10//!
11//! # Example
12//! ```ignore
13//! use axonml_quant::{quantize_tensor, QuantType};
14//!
15//! let tensor = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0], &[4])?;
16//! let quantized = quantize_tensor(&tensor, QuantType::Q8_0)?;
17//! let dequantized = dequantize_tensor(&quantized)?;
18//! ```
19//!
20//! @version 0.1.0
21//! @author AutomataNexus Development Team
22
23#![warn(missing_docs)]
24#![warn(clippy::all)]
25#![allow(clippy::module_name_repetitions)]
26#![allow(clippy::must_use_candidate)]
27#![allow(clippy::missing_errors_doc)]
28
29pub mod error;
30pub mod types;
31pub mod quantize;
32pub mod dequantize;
33pub mod calibration;
34
35pub use error::{QuantError, QuantResult};
36pub use types::{QuantType, QuantizedTensor, QuantizedBlock};
37pub use quantize::{quantize_tensor, quantize_model};
38pub use dequantize::{dequantize_tensor, dequantize_block};
39pub use calibration::{CalibrationData, calibrate};
40
41// =============================================================================
42// Constants
43// =============================================================================
44
45/// Default block size for quantization.
46pub const DEFAULT_BLOCK_SIZE: usize = 32;
47
48/// Maximum block size supported.
49pub const MAX_BLOCK_SIZE: usize = 256;
50
51// =============================================================================
52// Tests
53// =============================================================================
54
55#[cfg(test)]
56mod tests {
57    use super::*;
58
59    #[test]
60    fn test_constants() {
61        assert!(DEFAULT_BLOCK_SIZE > 0);
62        assert!(MAX_BLOCK_SIZE >= DEFAULT_BLOCK_SIZE);
63    }
64}