ruvector_cnn/quantize/mod.rs
1//! INT8 Quantization Module (ADR-091)
2//!
3//! This module provides comprehensive INT8 quantization support for CNN models:
4//! - **Phase 1** (params, tensor): Core quantization infrastructure (NEW)
5//! - **Phase 2** (calibration): Histogram-based range estimation
6//! - **Phase 3** (graph_rewrite): BatchNorm fusion, zero-point optimization, Q/DQ insertion
7//! - **Phase 4**: Kernel Dispatch - Runtime selection of optimized INT8 kernels
8//!
9//! ## ADR-091 Phase 1 Components (New)
10//!
11//! - `params`: Quantization parameters (scale, zero_point, qmin, qmax)
12//! - `tensor`: Quantized tensor types with metadata
13//! - Enhanced `calibration`: CalibrationCollector with MinMax, Percentile, MSE, Entropy methods
14
15// ADR-091 Phase 1: Core infrastructure (NEW)
16pub mod params;
17pub mod tensor;
18
19// Existing implementation (Phase 2-3)
20pub mod calibration;
21pub mod graph_rewrite;
22
23// Phase 1 exports
24pub use params::{QuantizationParams as QuantParams, QuantizationScheme, QuantizationMode};
25pub use tensor::{QuantizedTensor, QuantizationMetadata};
26
27// Existing exports (kept for backward compatibility)
28pub use calibration::{CalibrationHistogram, QuantizationParams, Quantizer};
29pub use graph_rewrite::{
30 ComputationGraph, GraphNode, NodeParams, NodeType,
31 fuse_batchnorm_to_conv, fuse_relu, fuse_hardswish, fuse_zp_to_bias,
32 generate_hardswish_lut, insert_qdq_nodes,
33};