numquant/
int_range.rs

1use std::{marker::PhantomData, ops::Range};
2
3use crate::linear::Linear;
4
5/// Quantizes/dequantizes to a value between 0 and `Q_MAX` stored in type `T`.
6/// The range for the unquantized value is between `MIN` and `MAX`. Values outside of this are clamped.
7pub struct IntRange<T, const Q_MAX: u32, const MIN: i64, const MAX: i64>(PhantomData<T>);
8
9impl<T, const Q_MAX: u32, const MIN: i64, const MAX: i64> Linear for IntRange<T, Q_MAX, MIN, MAX>
10where
11    T: 'static + Copy,
12    u32: TryInto<T>,
13{
14    type Type = T;
15
16    fn range() -> Range<f64> {
17        MIN as f64..MAX as f64
18    }
19
20    fn q_max() -> Self::Type {
21        match Q_MAX.try_into() {
22            Ok(v) => v,
23            Err(_) => panic!("Q_MAX not convertible to T"),
24        }
25    }
26}
27
28/// Quantizes/dequantizes to a value stored in an `u8`, using the full range of the `u8`.
29/// The range for the unquantized value is between `MIN` and `MAX`. Values outside of this are clamped.
30pub type U8<const MIN: i64, const MAX: i64> = IntRange<u8, 0xff, MIN, MAX>;
31
32/// Quantizes/dequantizes to a value stored in an `u16`, using the full range of the `u16`.
33/// The range for the unquantized value is between `MIN` and `MAX`. Values outside of this are clamped.
34pub type U16<const MIN: i64, const MAX: i64> = IntRange<u16, 0xffff, MIN, MAX>;
35
36/// Quantizes/dequantizes to a value stored in an `u32`, using the full range of the `u32`.
37/// The range for the unquantized value is between `MIN` and `MAX`. Values outside of this are clamped.
38pub type U32<const MIN: i64, const MAX: i64> = IntRange<u32, 0xffffffff, MIN, MAX>;
39
40#[cfg(test)]
41mod tests {
42    use approx::assert_abs_diff_eq;
43
44    use crate::quantize::Quantize;
45    use crate::{Quantized, U8};
46
47    #[test]
48    fn test_byte() {
49        type Type = U8<100, 150>;
50        let value = 125.0;
51        let v = Quantized::<Type>::from_f64(value);
52        approx::assert_abs_diff_eq!(v.to_f64(), value, epsilon = Type::max_error());
53    }
54
55    #[test]
56    fn quantize_values() {
57        assert_eq!(U8::<0, 60>::quantize(0.0,), 0x00);
58        assert_eq!(U8::<0, 60>::quantize(30.0,), 0x80);
59        assert_eq!(U8::<0, 60>::quantize(60.0,), 0xff);
60    }
61
62    #[test]
63    fn unquantize_values() {
64        type Type = U8<0, 100>;
65        assert_eq!(Type::dequantize(0x00), 0.0);
66        assert_abs_diff_eq!(Type::dequantize(0x80), 50.0, epsilon = Type::max_error());
67        assert_eq!(Type::dequantize(0xff), 100.0);
68    }
69
70    #[test]
71    fn quantize_out_of_range_clamps() {
72        assert_eq!(U8::<0, 100>::quantize(-1.0,), 0x00);
73        assert_eq!(U8::<0, 100>::quantize(100.1,), 0xff);
74    }
75}