scirs2-neural 0.4.2

//! GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers
//!
//! Implements the GPTQ algorithm for optimal weight quantization using Hessian
//! information from calibration data. GPTQ processes weights column-by-column,
//! finding the optimal quantized value for each weight while compensating
//! remaining weights for the quantization error.
//!
//! Reference: Frantar et al., "GPTQ: Accurate Post-Training Quantization
//! for Generative Pre-trained Transformers", 2023.

use crate::error::{Error, Result};
use scirs2_core::ndarray::{Array1, Array2, ArrayD, IxDyn};

/// GPTQ configuration
#[derive(Debug, Clone)]
pub struct GptqConfig {
    /// Number of bits for quantization (typically 4 or 8)
    pub bits: u8,
    /// Block size for lazy batch updates (default 128)
    pub block_size: usize,
    /// Dampening factor for Hessian diagonal (prevents singularity)
    pub damp_percent: f64,
    /// Whether to use symmetric quantization
    pub symmetric: bool,
    /// Group size for group quantization (0 = per-tensor)
    pub group_size: usize,
}

impl Default for GptqConfig {
    fn default() -> Self {
        Self {
            bits: 4,
            block_size: 128,
            damp_percent: 0.01,
            symmetric: true,
            group_size: 128,
        }
    }
}

/// Result of GPTQ quantization
#[derive(Debug, Clone)]
pub struct GptqResult {
    /// Quantized weight matrix (stored as f64 for simulation)
    pub quantized_weights: Array2<f64>,
    /// Scale factors (one per group or one per column)
    pub scales: Vec<f64>,
    /// Zero points (one per group or one per column)
    pub zeros: Vec<f64>,
    /// Quantization error (Frobenius norm of W - Q)
    pub quantization_error: f64,
    /// Number of bits used
    pub bits: u8,
}

/// GPTQ quantizer
///
/// Performs Hessian-based optimal quantization of weight matrices.
#[derive(Debug)]
pub struct GptqQuantizer {
    config: GptqConfig,
}

impl GptqQuantizer {
    /// Create a new GPTQ quantizer with the given config
    pub fn new(config: GptqConfig) -> Self {
        Self { config }
    }

    /// Compute the Hessian approximation from calibration data
    ///
    /// H = 2 * X^T * X / n_samples
    ///
    /// where X is the matrix of calibration inputs (n_samples x n_features).
    pub fn compute_hessian(&self, calibration_data: &Array2<f64>) -> Result<Array2<f64>> {
        let n_samples = calibration_data.nrows();
        if n_samples == 0 {
            return Err(Error::InvalidArgument(
                "Calibration data must have at least one sample".to_string(),
            ));
        }

        let xt = calibration_data.t();
        let mut h = xt.dot(calibration_data);

        // Normalize
        let n = n_samples as f64;
        h.mapv_inplace(|v| 2.0 * v / n);

        Ok(h)
    }

    /// Add dampening to the Hessian diagonal to ensure positive definiteness
    fn dampen_hessian(&self, hessian: &mut Array2<f64>) {
        let n = hessian.nrows();
        // Find max diagonal element
        let max_diag = (0..n)
            .map(|i| hessian[[i, i]].abs())
            .fold(0.0_f64, f64::max);

        let damp = self.config.damp_percent * max_diag;
        for i in 0..n {
            hessian[[i, i]] += damp;
        }
    }

    /// Quantize a single value to the given bit-width
    fn quantize_value(&self, val: f64, scale: f64, zero: f64) -> f64 {
        let qmax = (1u64 << self.config.bits) as f64 - 1.0;
        if scale > 0.0 {
            let q = (val / scale + zero).round().clamp(0.0, qmax);
            (q - zero) * scale
        } else {
            0.0
        }
    }

    /// Compute scale and zero for a group of weights
    fn compute_group_params(&self, weights: &[f64]) -> (f64, f64) {
        let min_val = weights.iter().cloned().fold(f64::INFINITY, f64::min);
        let max_val = weights.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
        let qmax = (1u64 << self.config.bits) as f64 - 1.0;

        if self.config.symmetric {
            let abs_max = max_val.abs().max(min_val.abs());
            let scale = if qmax > 0.0 {
                (2.0 * abs_max) / qmax
            } else {
                1.0
            };
            let zero = qmax / 2.0; // midpoint
            (scale, zero)
        } else {
            let range = max_val - min_val;
            let scale = if qmax > 0.0 && range > 0.0 {
                range / qmax
            } else {
                1.0
            };
            let zero = if scale > 0.0 {
                (-min_val / scale).round().clamp(0.0, qmax)
            } else {
                0.0
            };
            (scale, zero)
        }
    }

    /// Perform GPTQ quantization on a weight matrix
    ///
    /// # Arguments
    /// * `weights` - Weight matrix (out_features x in_features)
    /// * `hessian` - Hessian matrix (in_features x in_features)
    ///
    /// # Returns
    /// * `GptqResult` containing quantized weights, scales, zeros, and error
    pub fn quantize(&self, weights: &Array2<f64>, hessian: &Array2<f64>) -> Result<GptqResult> {
        let (n_rows, n_cols) = (weights.nrows(), weights.ncols());

        if hessian.nrows() != n_cols || hessian.ncols() != n_cols {
            return Err(Error::InvalidArgument(format!(
                "Hessian shape ({}, {}) doesn't match weight columns {}",
                hessian.nrows(),
                hessian.ncols(),
                n_cols
            )));
        }

        let mut h = hessian.clone();
        self.dampen_hessian(&mut h);

        // Compute Cholesky-like decomposition diagonal (simplified)
        // We use the diagonal of H^{-1} as the error weighting
        let h_diag: Vec<f64> = (0..n_cols).map(|i| h[[i, i]]).collect();

        let mut w = weights.clone();
        let mut quantized = Array2::<f64>::zeros((n_rows, n_cols));
        let mut scales = Vec::new();
        let mut zeros = Vec::new();

        let block_size = self.config.block_size.min(n_cols);
        let group_size = if self.config.group_size > 0 {
            self.config.group_size
        } else {
            n_cols
        };

        // Process columns in blocks
        let mut col = 0;
        while col < n_cols {
            let block_end = (col + block_size).min(n_cols);

            // Collect error matrix for this block to do lazy batch update
            let mut err_block = Array2::<f64>::zeros((n_rows, block_end - col));

            for j in col..block_end {
                // Determine group for this column
                let group_idx = j / group_size;

                // Compute group parameters if at group boundary
                if j % group_size == 0 {
                    let group_end = ((group_idx + 1) * group_size).min(n_cols);
                    // Collect weights in this group across all rows
                    let mut group_weights = Vec::new();
                    for r in 0..n_rows {
                        for c in j..group_end {
                            group_weights.push(w[[r, c]]);
                        }
                    }
                    let (s, z) = self.compute_group_params(&group_weights);
                    scales.push(s);
                    zeros.push(z);
                }

                let scale = scales.last().copied().unwrap_or(1.0);
                let zero = zeros.last().copied().unwrap_or(0.0);

                // Quantize column j for all rows
                for r in 0..n_rows {
                    let w_val = w[[r, j]];
                    let q_val = self.quantize_value(w_val, scale, zero);
                    quantized[[r, j]] = q_val;

                    let err = w_val - q_val;
                    err_block[[r, j - col]] = err;
                }

                // Update remaining columns in this block
                // w[:, j+1:block_end] += err[:, j] * H[j, j+1:block_end] / H[j, j]
                let h_jj = h_diag[j];
                if h_jj.abs() > 1e-12 {
                    for k in (j + 1)..block_end {
                        let h_jk = h[[j, k]];
                        let update_factor = h_jk / h_jj;
                        for r in 0..n_rows {
                            w[[r, k]] -= err_block[[r, j - col]] * update_factor;
                        }
                    }
                }
            }

            // Lazy batch update: propagate error to remaining columns
            // w[:, block_end:] += err_block * H[col:block_end, block_end:] / diag(H[col:block_end])
            if block_end < n_cols {
                for j in col..block_end {
                    let h_jj = h_diag[j];
                    if h_jj.abs() > 1e-12 {
                        for k in block_end..n_cols {
                            let h_jk = h[[j, k]];
                            let update_factor = h_jk / h_jj;
                            for r in 0..n_rows {
                                w[[r, k]] -= err_block[[r, j - col]] * update_factor;
                            }
                        }
                    }
                }
            }

            col = block_end;
        }

        // Compute quantization error (Frobenius norm)
        let error = weights
            .iter()
            .zip(quantized.iter())
            .map(|(&w, &q)| (w - q).powi(2))
            .sum::<f64>()
            .sqrt();

        Ok(GptqResult {
            quantized_weights: quantized,
            scales,
            zeros,
            quantization_error: error,
            bits: self.config.bits,
        })
    }

    /// Convenience: quantize weights using calibration data directly
    ///
    /// Computes the Hessian from calibration data, then runs GPTQ.
    pub fn quantize_with_calibration(
        &self,
        weights: &Array2<f64>,
        calibration_data: &Array2<f64>,
    ) -> Result<GptqResult> {
        let hessian = self.compute_hessian(calibration_data)?;
        self.quantize(weights, &hessian)
    }
}

/// Naive (round-to-nearest) quantization for comparison
pub fn naive_quantize(
    weights: &Array2<f64>,
    bits: u8,
    symmetric: bool,
) -> Result<(Array2<f64>, f64)> {
    let qmax = (1u64 << bits) as f64 - 1.0;

    let min_val = weights.iter().cloned().fold(f64::INFINITY, f64::min);
    let max_val = weights.iter().cloned().fold(f64::NEG_INFINITY, f64::max);

    let (scale, zero) = if symmetric {
        let abs_max = max_val.abs().max(min_val.abs());
        let s = if qmax > 0.0 {
            2.0 * abs_max / qmax
        } else {
            1.0
        };
        (s, qmax / 2.0)
    } else {
        let range = max_val - min_val;
        let s = if qmax > 0.0 && range > 0.0 {
            range / qmax
        } else {
            1.0
        };
        let z = if s > 0.0 {
            (-min_val / s).round().clamp(0.0, qmax)
        } else {
            0.0
        };
        (s, z)
    };

    let quantized = weights.mapv(|w| {
        if scale > 0.0 {
            let q = (w / scale + zero).round().clamp(0.0, qmax);
            (q - zero) * scale
        } else {
            0.0
        }
    });

    let error = weights
        .iter()
        .zip(quantized.iter())
        .map(|(&w, &q)| (w - q).powi(2))
        .sum::<f64>()
        .sqrt();

    Ok((quantized, error))
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_test_weights(rows: usize, cols: usize) -> Array2<f64> {
        let mut rng = scirs2_core::random::rng();
        let data: Vec<f64> = (0..rows * cols)
            .map(|_| scirs2_core::random::RngExt::random_range(&mut rng, -1.0..1.0))
            .collect();
        Array2::from_shape_vec((rows, cols), data).expect("test: create weights")
    }

    fn make_calibration_data(n_samples: usize, n_features: usize) -> Array2<f64> {
        let mut rng = scirs2_core::random::rng();
        let data: Vec<f64> = (0..n_samples * n_features)
            .map(|_| scirs2_core::random::RngExt::random_range(&mut rng, -1.0..1.0))
            .collect();
        Array2::from_shape_vec((n_samples, n_features), data).expect("test: create calibration")
    }

    #[test]
    fn test_hessian_computation() {
        let config = GptqConfig::default();
        let gptq = GptqQuantizer::new(config);
        let calib = make_calibration_data(100, 16);
        let h = gptq.compute_hessian(&calib).expect("test: hessian");
        assert_eq!(h.nrows(), 16);
        assert_eq!(h.ncols(), 16);
        // Hessian should be symmetric
        for i in 0..16 {
            for j in 0..16 {
                assert!(
                    (h[[i, j]] - h[[j, i]]).abs() < 1e-10,
                    "Hessian not symmetric at ({}, {})",
                    i,
                    j
                );
            }
        }
    }

    #[test]
    fn test_gptq_basic_quantization() {
        let config = GptqConfig {
            bits: 8,
            block_size: 4,
            group_size: 8,
            ..Default::default()
        };
        let gptq = GptqQuantizer::new(config);
        let weights = make_test_weights(4, 8);
        let calib = make_calibration_data(50, 8);
        let result = gptq
            .quantize_with_calibration(&weights, &calib)
            .expect("test: gptq");
        assert_eq!(result.quantized_weights.nrows(), 4);
        assert_eq!(result.quantized_weights.ncols(), 8);
        assert_eq!(result.bits, 8);
        assert!(result.quantization_error >= 0.0);
    }

    #[test]
    fn test_gptq_reduces_error_vs_naive() {
        let config = GptqConfig {
            bits: 4,
            block_size: 8,
            group_size: 16,
            symmetric: true,
            ..Default::default()
        };
        let gptq = GptqQuantizer::new(config);

        // Use a structured weight matrix where Hessian-based optimization helps
        let weights = make_test_weights(8, 32);
        let calib = make_calibration_data(200, 32);

        let gptq_result = gptq
            .quantize_with_calibration(&weights, &calib)
            .expect("test: gptq");
        let (_, naive_error) = naive_quantize(&weights, 4, true).expect("test: naive");

        // GPTQ should generally produce lower or equal error than naive
        // (may not always be strictly lower for small random matrices,
        //  but the error compensation mechanism should help)
        // We check that GPTQ error is not catastrophically worse
        assert!(
            gptq_result.quantization_error < naive_error * 1.5,
            "GPTQ error {} is too much worse than naive error {}",
            gptq_result.quantization_error,
            naive_error
        );
    }

    #[test]
    fn test_gptq_4bit() {
        let config = GptqConfig {
            bits: 4,
            block_size: 4,
            group_size: 8,
            ..Default::default()
        };
        let gptq = GptqQuantizer::new(config);
        let weights = make_test_weights(4, 16);
        let calib = make_calibration_data(50, 16);
        let result = gptq
            .quantize_with_calibration(&weights, &calib)
            .expect("test: gptq 4bit");
        assert_eq!(result.bits, 4);
    }

    #[test]
    fn test_hessian_empty_calibration() {
        let config = GptqConfig::default();
        let gptq = GptqQuantizer::new(config);
        let calib = Array2::<f64>::zeros((0, 8));
        let result = gptq.compute_hessian(&calib);
        assert!(result.is_err());
    }

    #[test]
    fn test_hessian_shape_mismatch() {
        let config = GptqConfig::default();
        let gptq = GptqQuantizer::new(config);
        let weights = make_test_weights(4, 8);
        let hessian = Array2::<f64>::zeros((4, 4)); // wrong shape
        let result = gptq.quantize(&weights, &hessian);
        assert!(result.is_err());
    }

    #[test]
    fn test_naive_quantize() {
        let weights = make_test_weights(4, 8);
        let (quantized, error) = naive_quantize(&weights, 8, true).expect("test: naive");
        assert_eq!(quantized.shape(), weights.shape());
        assert!(error >= 0.0);
    }

    #[test]
    fn test_gptq_asymmetric() {
        let config = GptqConfig {
            bits: 8,
            symmetric: false,
            block_size: 4,
            group_size: 8,
            ..Default::default()
        };
        let gptq = GptqQuantizer::new(config);
        let weights = make_test_weights(4, 8);
        let calib = make_calibration_data(50, 8);
        let result = gptq
            .quantize_with_calibration(&weights, &calib)
            .expect("test: gptq asymmetric");
        assert!(result.quantization_error >= 0.0);
    }

    #[test]
    fn test_gptq_per_tensor() {
        let config = GptqConfig {
            bits: 8,
            block_size: 4,
            group_size: 0, // per-tensor
            ..Default::default()
        };
        let gptq = GptqQuantizer::new(config);
        let weights = make_test_weights(4, 8);
        let calib = make_calibration_data(50, 8);
        let result = gptq
            .quantize_with_calibration(&weights, &calib)
            .expect("test: gptq per-tensor");
        // With group_size=0 (per-tensor), we get 1 scale/zero
        assert_eq!(result.scales.len(), 1);
    }
}