turboquant 0.1.1

use nalgebra::{DMatrix, DVector};
use rand::SeedableRng;
use rand_distr::{Distribution, Normal};
use serde::{Deserialize, Serialize};

use crate::codebook::Codebook;
use crate::error::{Result, TurboQuantError};
use crate::scalar_quant::ScalarQuantizer;
use crate::utils::validate_finite_vector;

/// Polar coordinate representation of a vector.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PolarCoords {
    /// The overall norm of the vector.
    pub radius: f64,
    /// Polar angles at each level, (d-1) angles total across all levels.
    /// angles[level][i] for level ∈ 0..num_levels.
    pub angles: Vec<Vec<f64>>,
    /// Original dimension.
    pub dim: usize,
}

/// PolarQuant quantized representation.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[must_use]
pub struct PolarQuantized {
    /// Quantized radius (single value).
    pub radius_idx: u8,
    /// Quantized angle indices per level.
    pub angle_indices: Vec<Vec<u8>>,
    /// Bit width used for quantization.
    pub bit_width: u8,
    /// Original dimension.
    pub dim: usize,
    /// Number of levels = log2(dim).
    pub num_levels: usize,
}

impl PolarQuantized {
    /// Total bytes for storage.
    pub fn bytes(&self) -> f64 {
        let total_angles: usize = self.angle_indices.iter().map(|v| v.len()).sum();
        ((total_angles + 1) as f64 * self.bit_width as f64) / 8.0
    }

    pub fn compression_ratio(&self) -> f64 {
        (self.dim as f64 * 4.0) / self.bytes()
    }
}

/// PolarQuant: a hierarchical quantizer for KV cache vectors.
///
/// Uses a recursive polar transformation to convert Cartesian coordinates
/// into nested polar angles, then quantizes angles at each level.
///
/// The polar transform is norm-preserving and the angle distribution becomes
/// more uniform at each level, making it well-suited for scalar quantization.
///
/// Algorithm:
///   Level 1: Pair (x_{2j-1}, x_{2j}) → (radius_j, angle_j) = (‖pair‖, atan2)
///   Level ℓ: Pair adjacent radii from level ℓ-1 → new radii + angles
///   ...
///   Final: one overall radius + (d-1) angles across all levels
#[derive(Debug, Serialize, Deserialize)]
pub struct PolarQuant {
    /// Random preconditioner S ∈ ℝ^{d×d} (Hadamard-like random signs).
    preconditioner: DMatrix<f64>,
    /// Codebooks: one per level (angles at each level have different distributions).
    codebooks: Vec<Codebook>,
    /// Codebook for the radius.
    radius_codebook: Codebook,
    pub dim: usize,
    pub num_levels: usize,
    pub bit_width: u8,
}

impl PolarQuant {
    fn expected_angle_count(&self, level: usize) -> usize {
        self.dim >> (level + 1)
    }

    fn validate_polar_coords(&self, p: &PolarCoords) -> Result<()> {
        if p.dim != self.dim {
            return Err(TurboQuantError::DimensionMismatch {
                expected: self.dim,
                got: p.dim,
            });
        }
        if !p.radius.is_finite() || p.radius < 0.0 {
            return Err(TurboQuantError::InvalidValue {
                context: "polar radius".into(),
                value: p.radius,
            });
        }
        if p.angles.len() != self.num_levels {
            return Err(TurboQuantError::LengthMismatch {
                context: "polar angle levels".into(),
                expected: self.num_levels,
                got: p.angles.len(),
            });
        }

        for (level, angles) in p.angles.iter().enumerate() {
            let expected = self.expected_angle_count(level);
            if angles.len() != expected {
                return Err(TurboQuantError::LengthMismatch {
                    context: format!("polar angles at level {level}"),
                    expected,
                    got: angles.len(),
                });
            }
            validate_finite_vector(angles, "polar angle")?;
        }

        Ok(())
    }

    fn validate_quantized(&self, q: &PolarQuantized) -> Result<()> {
        if q.dim != self.dim {
            return Err(TurboQuantError::DimensionMismatch {
                expected: self.dim,
                got: q.dim,
            });
        }
        if q.bit_width != self.bit_width {
            return Err(TurboQuantError::BitWidthMismatch {
                expected: self.bit_width,
                got: q.bit_width,
            });
        }
        if q.num_levels != self.num_levels {
            return Err(TurboQuantError::LengthMismatch {
                context: "polar quantization level count".into(),
                expected: self.num_levels,
                got: q.num_levels,
            });
        }
        if q.angle_indices.len() != self.num_levels {
            return Err(TurboQuantError::LengthMismatch {
                context: "polar angle index levels".into(),
                expected: self.num_levels,
                got: q.angle_indices.len(),
            });
        }

        let radius_quantizer = ScalarQuantizer::from_codebook(self.radius_codebook.clone());
        radius_quantizer.validate_indices(&[q.radius_idx])?;

        for (level, indices) in q.angle_indices.iter().enumerate() {
            let expected = self.expected_angle_count(level);
            if indices.len() != expected {
                return Err(TurboQuantError::LengthMismatch {
                    context: format!("polar angle indices at level {level}"),
                    expected,
                    got: indices.len(),
                });
            }

            let quantizer = ScalarQuantizer::from_codebook(self.codebooks[level].clone());
            quantizer.validate_indices(indices)?;
        }

        Ok(())
    }

    /// Create a PolarQuant for dimension `dim`.
    ///
    /// # Arguments
    /// * `dim` - Must be a power of 2 for clean recursive decomposition.
    /// * `seed` - Random seed for the preconditioner.
    /// * `bit_width` - Bits per angle (1-8).
    pub fn new(dim: usize, seed: u64, bit_width: u8) -> Result<Self> {
        if dim == 0 || dim & (dim - 1) != 0 {
            // For simplicity, pad to next power of 2 if needed,
            // but we'll require power-of-2 for now.
            return Err(TurboQuantError::InvalidDimension(dim));
        }
        if !(1..=8).contains(&bit_width) {
            return Err(TurboQuantError::InvalidBitWidth(bit_width));
        }

        let num_levels = (dim as f64).log2() as usize;

        // Random preconditioner: diagonal random signs matrix
        // (simulates a random Hadamard transform to decorrelate)
        let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
        let normal = Normal::new(0.0, 1.0).unwrap();

        // We use a random orthogonal preconditioner for more thorough decorrelation.
        let data: Vec<f64> = (0..dim * dim).map(|_| normal.sample(&mut rng)).collect();
        let g = DMatrix::from_vec(dim, dim, data);
        let qr = g.qr();
        let preconditioner = qr.q();

        // Generate codebooks for each level.
        // At level ℓ, there are dim/2^ℓ angles, each following approximately
        // a uniform distribution on [0, π] (since atan2 of independent normals
        // is uniform on [-π, π], but we use [0, π] after folding).
        // We use a uniform distribution approximation for angles.
        let mut codebooks = Vec::with_capacity(num_levels);
        for _level in 0..num_levels {
            // Angle codebook: uniform on [-π, π] with equally spaced centroids.
            let k = 1usize << bit_width;
            let centroids: Vec<f64> = (0..k)
                .map(|i| {
                    let u = (i as f64 + 0.5) / k as f64;
                    // Map to [-π, π]
                    u * 2.0 * std::f64::consts::PI - std::f64::consts::PI
                })
                .collect();
            let boundaries: Vec<f64> = centroids.windows(2).map(|w| (w[0] + w[1]) / 2.0).collect();
            codebooks.push(Codebook {
                centroids,
                boundaries,
                bit_width,
            });
        }

        // Radius codebook: nonneg values, use Lloyd-Max on chi distribution
        // For simplicity, use a uniform codebook on [0, 2] (typical range).
        let k = 1usize << bit_width;
        let centroids: Vec<f64> = (0..k).map(|i| (i as f64 + 0.5) / k as f64 * 2.0).collect();
        let boundaries: Vec<f64> = centroids.windows(2).map(|w| (w[0] + w[1]) / 2.0).collect();
        let radius_codebook = Codebook {
            centroids,
            boundaries,
            bit_width,
        };

        Ok(Self {
            preconditioner,
            codebooks,
            radius_codebook,
            dim,
            num_levels,
            bit_width,
        })
    }

    /// Apply the preconditioner: x' = S · x.
    fn precondition(&self, x: &[f64]) -> Vec<f64> {
        let xv = DVector::from_vec(x.to_vec());
        let y = &self.preconditioner * xv;
        y.data.into()
    }

    /// Apply inverse preconditioner: x = Sᵀ · x'.
    fn precondition_inverse(&self, x: &[f64]) -> Vec<f64> {
        let xv = DVector::from_vec(x.to_vec());
        let y = self.preconditioner.transpose() * xv;
        y.data.into()
    }

    /// Convert a vector to its hierarchical polar representation.
    ///
    /// Level 1: For each pair (x_{2j}, x_{2j+1}):
    ///   r_j = sqrt(x_{2j}^2 + x_{2j+1}^2)
    ///   θ_j = atan2(x_{2j+1}, x_{2j})
    ///
    /// Subsequent levels: treat radii as the new coordinates and repeat.
    pub fn to_polar(&self, x: &[f64]) -> Result<PolarCoords> {
        if x.len() != self.dim {
            return Err(TurboQuantError::DimensionMismatch {
                expected: self.dim,
                got: x.len(),
            });
        }
        validate_finite_vector(x, "PolarQuant input")?;

        // Apply preconditioner
        let x_pre = self.precondition(x);

        let mut current = x_pre.clone();
        let mut all_angles: Vec<Vec<f64>> = Vec::new();

        for _level in 0..self.num_levels {
            let n = current.len();
            let mut next_radii = Vec::with_capacity(n / 2);
            let mut level_angles = Vec::with_capacity(n / 2);

            for j in 0..n / 2 {
                let a = current[2 * j];
                let b = current[2 * j + 1];
                let r = (a * a + b * b).sqrt();
                let theta = b.atan2(a); // atan2(y, x) ∈ (-π, π]
                next_radii.push(r);
                level_angles.push(theta);
            }

            all_angles.push(level_angles);
            current = next_radii;
        }

        // At this point, current should have length 1: the overall radius
        let radius = current[0];

        Ok(PolarCoords {
            radius,
            angles: all_angles,
            dim: self.dim,
        })
    }

    /// Reconstruct a Cartesian vector from its polar coordinates.
    pub fn from_polar(&self, p: &PolarCoords) -> Result<Vec<f64>> {
        self.validate_polar_coords(p)?;

        // Start from the single radius and reconstruct level by level
        let mut current = vec![p.radius];

        for level in (0..self.num_levels).rev() {
            let angles = &p.angles[level];
            let n = angles.len();
            let mut prev = Vec::with_capacity(n * 2);

            for j in 0..n {
                let r = current[j];
                let theta = angles[j];
                prev.push(r * theta.cos());
                prev.push(r * theta.sin());
            }

            current = prev;
        }

        // Undo preconditioner
        let result = self.precondition_inverse(&current);
        Ok(result)
    }

    /// Quantize a vector using the hierarchical polar transform.
    pub fn quantize(&self, x: &[f64]) -> Result<PolarQuantized> {
        let polar = self.to_polar(x)?;

        // Quantize radius
        let radius_idx = self.quantize_angle(&self.radius_codebook, polar.radius);

        // Quantize angles at each level
        let angle_indices: Vec<Vec<u8>> = polar
            .angles
            .iter()
            .enumerate()
            .map(|(level, angles)| {
                let cb = &self.codebooks[level.min(self.codebooks.len() - 1)];
                angles.iter().map(|&a| self.quantize_angle(cb, a)).collect()
            })
            .collect();

        Ok(PolarQuantized {
            radius_idx,
            angle_indices,
            bit_width: self.bit_width,
            dim: self.dim,
            num_levels: self.num_levels,
        })
    }

    /// Dequantize from the polar quantized representation.
    pub fn dequantize(&self, q: &PolarQuantized) -> Result<Vec<f64>> {
        self.validate_quantized(q)?;

        // Dequantize radius
        let radius = self.radius_codebook.dequantize_scalar(q.radius_idx);

        // Dequantize angles at each level
        let angles: Vec<Vec<f64>> = q
            .angle_indices
            .iter()
            .enumerate()
            .map(|(level, indices)| {
                let cb = &self.codebooks[level.min(self.codebooks.len() - 1)];
                indices.iter().map(|&i| cb.dequantize_scalar(i)).collect()
            })
            .collect();

        let polar = PolarCoords {
            radius,
            angles,
            dim: self.dim,
        };
        self.from_polar(&polar)
    }

    fn quantize_angle(&self, cb: &Codebook, angle: f64) -> u8 {
        cb.quantize_scalar(angle)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn random_vector(dim: usize, seed: u64) -> Vec<f64> {
        use rand::SeedableRng;
        use rand_distr::{Distribution, Normal};
        let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
        let normal = Normal::new(0.0, 1.0).unwrap();
        (0..dim).map(|_| normal.sample(&mut rng)).collect()
    }

    #[test]
    fn test_polar_roundtrip() {
        let dim = 8;
        let pq = PolarQuant::new(dim, 42, 4).unwrap();
        let x = random_vector(dim, 1);
        let polar = pq.to_polar(&x).unwrap();
        let x_back = pq.from_polar(&polar).unwrap();

        for (a, b) in x.iter().zip(x_back.iter()) {
            assert!((a - b).abs() < 1e-8, "a={}, b={}", a, b);
        }
    }

    #[test]
    fn test_quantize_dequantize_shape() {
        let dim = 16;
        let pq = PolarQuant::new(dim, 7, 4).unwrap();
        let x = random_vector(dim, 5);
        let q = pq.quantize(&x).unwrap();
        let recon = pq.dequantize(&q).unwrap();
        assert_eq!(recon.len(), dim);
    }

    #[test]
    fn test_polar_angles_count() {
        let dim = 8;
        let pq = PolarQuant::new(dim, 1, 2).unwrap();
        let x = random_vector(dim, 2);
        let polar = pq.to_polar(&x).unwrap();
        // Total angles: dim - 1 = 7 distributed across num_levels = 3 levels
        let total_angles: usize = polar.angles.iter().map(|v| v.len()).sum();
        assert_eq!(
            total_angles,
            dim - 1,
            "expected {} angles, got {}",
            dim - 1,
            total_angles
        );
    }

    #[test]
    fn test_invalid_dim_not_power_of_2() {
        assert!(PolarQuant::new(10, 1, 2).is_err());
    }

    #[test]
    fn test_invalid_dim_zero() {
        assert!(PolarQuant::new(0, 1, 2).is_err());
    }

    #[test]
    fn test_invalid_bit_width() {
        assert!(PolarQuant::new(8, 1, 0).is_err());
        assert!(PolarQuant::new(8, 1, 9).is_err());
    }

    #[test]
    fn test_dequantize_dimension_mismatch() {
        let pq = PolarQuant::new(8, 42, 4).unwrap();
        let bad_q = PolarQuantized {
            radius_idx: 0,
            angle_indices: vec![vec![0; 8], vec![0; 4], vec![0; 2]],
            bit_width: 4,
            dim: 16, // wrong dim
            num_levels: 4,
        };
        assert!(pq.dequantize(&bad_q).is_err());
    }

    #[test]
    fn test_to_polar_dimension_mismatch() {
        let pq = PolarQuant::new(8, 42, 4).unwrap();
        let bad_x = vec![1.0; 16]; // wrong dim
        assert!(pq.to_polar(&bad_x).is_err());
    }

    #[test]
    fn test_polar_compression_ratio() {
        let dim = 16;
        let pq = PolarQuant::new(dim, 42, 4).unwrap();
        let x = random_vector(dim, 1);
        let q = pq.quantize(&x).unwrap();
        let ratio = q.compression_ratio();
        assert!(ratio > 1.0, "compression ratio should be > 1: {}", ratio);
    }

    #[test]
    fn test_from_polar_rejects_missing_levels() {
        let pq = PolarQuant::new(8, 42, 4).unwrap();
        let coords = PolarCoords {
            radius: 1.0,
            angles: vec![vec![0.0; 4], vec![0.0; 2]],
            dim: 8,
        };
        assert!(matches!(
            pq.from_polar(&coords),
            Err(TurboQuantError::LengthMismatch { .. })
        ));
    }
}