Skip to main content

ruvector_dither/
quantize.rs

1//! Drop-in quantization helpers that apply dither before rounding.
2
3use crate::DitherSource;
4
5/// Quantize a single value with deterministic dither.
6///
7/// # Arguments
8/// - `x`      – input activation in `[-1.0, 1.0]`
9/// - `bits`   – quantizer bit-width (e.g. 3, 5, 7, 8)
10/// - `eps`    – dither amplitude in LSB units (0.0 = no dither, 0.5 = half-LSB recommended)
11/// - `source` – stateful dither sequence
12///
13/// Returns the quantized value in `[-1.0, 1.0]`.
14///
15/// # Example
16/// ```
17/// use ruvector_dither::{GoldenRatioDither, quantize_dithered};
18/// let mut d = GoldenRatioDither::new(0.0);
19/// let q = quantize_dithered(0.314, 8, 0.5, &mut d);
20/// assert!(q >= -1.0 && q <= 1.0);
21/// ```
22#[inline]
23pub fn quantize_dithered(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> f32 {
24    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
25    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
26    let lsb = 1.0 / qmax;
27    let dither = source.next(eps * lsb);
28    let shifted = (x + dither) * qmax;
29    let rounded = shifted.round().clamp(-qmax, qmax);
30    rounded / qmax
31}
32
33/// Quantize a slice in-place with deterministic dither.
34///
35/// Each element gets an independent dither sample from `source`.
36///
37/// # Example
38/// ```
39/// use ruvector_dither::{GoldenRatioDither, quantize_slice_dithered};
40/// let mut vals = vec![0.1_f32, 0.5, -0.3, 0.9, -0.8];
41/// let mut d = GoldenRatioDither::new(0.0);
42/// quantize_slice_dithered(&mut vals, 5, 0.5, &mut d);
43/// for &v in &vals {
44///     assert!(v >= -1.0 && v <= 1.0);
45/// }
46/// ```
47pub fn quantize_slice_dithered(
48    xs: &mut [f32],
49    bits: u32,
50    eps: f32,
51    source: &mut impl DitherSource,
52) {
53    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
54    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
55    let lsb = 1.0 / qmax;
56    for x in xs.iter_mut() {
57        let dither = source.next(eps * lsb);
58        let shifted = (*x + dither) * qmax;
59        *x = shifted.round().clamp(-qmax, qmax) / qmax;
60    }
61}
62
63/// Quantize to a raw integer code (signed, in `[-(2^(bits-1)), 2^(bits-1)-1]`).
64///
65/// Useful when you need the integer representation rather than a re-scaled float.
66#[inline]
67pub fn quantize_to_code(x: f32, bits: u32, eps: f32, source: &mut impl DitherSource) -> i32 {
68    assert!(bits >= 2 && bits <= 31, "bits must be in [2, 31]");
69    let qmax = ((1u32 << (bits - 1)) - 1) as f32;
70    let lsb = 1.0 / qmax;
71    let dither = source.next(eps * lsb);
72    ((x + dither) * qmax).round().clamp(-qmax, qmax) as i32
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78    use crate::{GoldenRatioDither, PiDither};
79
80    #[test]
81    fn output_in_unit_range() {
82        let mut d = GoldenRatioDither::new(0.0);
83        for bits in [3u32, 5, 7, 8] {
84            for &x in &[-1.0_f32, -0.5, 0.0, 0.5, 1.0] {
85                let q = quantize_dithered(x, bits, 0.5, &mut d);
86                assert!(q >= -1.0 && q <= 1.0, "bits={bits}, x={x}, q={q}");
87            }
88        }
89    }
90
91    #[test]
92    fn dither_reduces_idle_tones() {
93        // A constant signal at exactly 0.5 * LSB without dither quantizes
94        // to the same code every time (idle tone).  With dither the code
95        // alternates, so the variance of codes should be > 0.
96        let bits = 5u32;
97        let qmax = ((1u32 << (bits - 1)) - 1) as f32;
98        let lsb = 1.0 / qmax;
99        let x = 0.5 * lsb; // exactly half an LSB
100
101        let mut codes_with: Vec<i32> = Vec::with_capacity(256);
102        let mut d = GoldenRatioDither::new(0.0);
103        for _ in 0..256 {
104            codes_with.push(quantize_to_code(x, bits, 0.5, &mut d));
105        }
106        let unique: std::collections::HashSet<i32> = codes_with.iter().copied().collect();
107        assert!(unique.len() > 1, "dithered signal must produce >1 unique code");
108    }
109
110    #[test]
111    fn slice_quantize_in_bounds() {
112        let mut vals: Vec<f32> = (-50..=50).map(|i| i as f32 * 0.02).collect();
113        let mut pi = PiDither::new(0);
114        quantize_slice_dithered(&mut vals, 7, 0.5, &mut pi);
115        for v in vals {
116            assert!(v >= -1.0 && v <= 1.0, "out of range: {v}");
117        }
118    }
119
120    #[test]
121    fn deterministic_with_same_seed() {
122        let input = vec![0.1_f32, 0.4, -0.7, 0.9];
123        let quantize = |input: &[f32]| {
124            let mut buf = input.to_vec();
125            let mut d = GoldenRatioDither::new(0.5);
126            quantize_slice_dithered(&mut buf, 8, 0.5, &mut d);
127            buf
128        };
129        assert_eq!(quantize(&input), quantize(&input));
130    }
131}