scirs2_linalg/quantization/
mod.rs

1//! Quantization-aware linear algebra operations
2//!
3//! This module provides functions and types for working with quantized matrices and vectors.
4//! Quantization reduces the precision of floating-point numbers to save memory and
5//! computational resources, which is particularly useful in machine learning applications.
6//!
7//! ## Overview
8//!
9//! * Quantization of matrices and vectors to lower bit-width representations
10//! * Linear algebra operations on quantized data
11//! * Support for different quantization methods (uniform, symmetric, affine)
12//! * Efficient operations with mixed quantized and floating-point data
13//!
14//! ## Examples
15//!
16//! Basic quantization:
17//!
18//! ```
19//! use scirs2_core::ndarray::{Array2, array};
20//! use scirs2_linalg::quantization::{quantize_matrix, dequantize_matrix, QuantizationMethod};
21//!
22//! let a = array![[1.0_f32, 2.5, 3.7], [4.2, 5.0, 6.1]];
23//!
24//! // Quantize to 8-bit
25//! let (quantized, params) = quantize_matrix(&a.view(), 8, QuantizationMethod::Affine);
26//!
27//! // Dequantize back to floating point
28//! let a_dequantized = dequantize_matrix(&quantized, &params);
29//!
30//! // Check the error exists but is bounded
31//! let max_error = (&a - &a_dequantized).mapv(|x| x.abs()).fold(0.0_f32, |acc, &b| acc.max(b));
32//! assert!(max_error > 0.0); // There should be some quantization error
33//! assert!(max_error < 10.0); // But it should be bounded
34//! ```
35//!
36//! Quantized matrix multiplication:
37//!
38//! ```
39//! use scirs2_core::ndarray::{Array2, array};
40//! use scirs2_linalg::quantization::{quantize_matrix, QuantizationMethod, quantized_matmul};
41//!
42//! let a = array![[1.0_f32, 2.0], [3.0, 4.0]];
43//! let b = array![[5.0_f32, 6.0], [7.0, 8.0]];
44//!
45//! // Quantize both matrices to 8-bit
46//! let (a_q, a_params) = quantize_matrix(&a.view(), 8, QuantizationMethod::Symmetric);
47//! let (b_q, b_params) = quantize_matrix(&b.view(), 8, QuantizationMethod::Symmetric);
48//!
49//! // Perform quantized matrix multiplication
50//! let c_q = quantized_matmul(&a_q, &a_params, &b_q, &b_params).unwrap();
51//!
52//! // Regular matrix multiplication for comparison
53//! let c = a.dot(&b);
54//!
55//! // Check the error is acceptable
56//! let rel_error = (&c - &c_q).mapv(|x| x.abs()).sum() / c.sum();
57//! assert!(rel_error < 0.1); // Relative error should be small
58//! ```
59
60// Core type definitions
61pub mod types;
62
63// Data structure modules
64pub mod matrix;
65pub mod vector;
66
67// Function modules
68pub mod conversions;
69pub mod operations;
70
71// Test module
72#[cfg(test)]
73pub mod tests;
74
75// Existing submodules
76pub mod calibration;
77pub mod calibration_ema;
78pub mod fusion;
79pub mod out_of_core;
80pub mod quantized_matrixfree;
81pub mod simd;
82pub mod solvers;
83pub mod stability;
84
85// Re-export all public types and functions for backward compatibility
86
87// Types
88pub use self::types::{QuantizationMethod, QuantizationParams, QuantizedDataType};
89
90// Matrix types and functions
91pub use self::matrix::{get_quantizedmatrix_2d_i8, QuantizedData2D, QuantizedMatrix};
92
93// Vector types and functions
94pub use self::vector::{get_quantized_vector_1d_i8, QuantizedData1D, QuantizedVector};
95
96// Conversion functions
97pub use self::conversions::{
98    dequantize_matrix, dequantize_vector_public as dequantize_vector, fake_quantize,
99    fake_quantize_vector, quantize_matrix, quantize_matrix_per_channel, quantize_vector,
100};
101
102// Operation functions
103pub use self::operations::{quantized_dot, quantized_matmul, quantized_matvec};
104
105// Re-export submodule public APIs
106pub use calibration::*;
107pub use calibration_ema::*;
108pub use fusion::*;
109pub use out_of_core::*;
110pub use quantized_matrixfree::*;
111pub use simd::*;
112pub use solvers::*;
113pub use stability::*;