common_traits/
fastrange.rs

1use crate::Integer;
2
3/// Fast division, modulo reduction, and an alternative operation that maps a number between 0 and `n`.
4///
5///
6///
7/// # Reference
8/// <https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/>
9/// <https://github.com/lemire/fastmod>
10/// <https://github.com/lemire/fastrange>
11pub trait FastRange: Integer + Sized {
12    type MaskType: Integer + Copy;
13    /// Given a value "UnsignedInt", produces an integer in [0,p) without division.
14    /// The function is as fair as possible in the sense that if you iterate
15    /// through all possible values of "UnsignedInt", then you will generate all
16    /// possible outputs as uniformly as possible.
17    ///
18    /// This is equivalent to computing ⌊n * (UnsignedInt / 2^w)⌋ in fixed comma
19    fn fast_range(&self, d: Self) -> Self;
20    //. fastmod computes (a / d) given precomputed M for d>1,
21    fn fast_div_mask(&self, mask: Self::MaskType) -> Self;
22    /// fastmod computes (a % d) given precomputed M,
23    fn fast_mod_mask(&self, d: Self, mask: Self::MaskType) -> Self;
24
25    // fastmod computes (a / d) for d>1
26    #[inline(always)]
27    fn fast_div(&self, d: Self) -> Self {
28        let mask = d.compute_mask_fast();
29        self.fast_div_mask(mask)
30    }
31    /// fastmod computes (a % d)
32    #[inline(always)]
33    fn fast_mod(&self, d: Self) -> Self {
34        let mask = d.compute_mask_fast();
35        self.fast_mod_mask(d, mask)
36    }
37    /// checks whether n % d == 0
38    #[inline(always)]
39    fn fast_is_divisible(&self, d: Self) -> bool {
40        let mask = d.compute_mask_fast();
41        self.fast_is_divisible_mask(mask)
42    }
43
44    /// Compute the mask needed by `fast_div_mask` and `fast_mod_mask`
45    /// M = floor( (1<<64) / d ) + 1
46    ///
47    // you must have that d is different from 0 and -2147483648
48    // if d = -1 and a = -2147483648, the result is undefined
49    fn compute_mask_fast(&self) -> Self::MaskType;
50
51    /// fastmod computes (a % d) == 0 given precomputed M,
52    fn fast_is_divisible_mask(&self, mask: Self::MaskType) -> bool;
53}
54
55impl FastRange for u8 {
56    type MaskType = u16;
57    #[inline(always)]
58    fn fast_range(&self, d: Self) -> Self {
59        ((*self as u16 * d as u16) >> 8) as u8
60    }
61    #[inline(always)]
62    fn fast_div_mask(&self, mask: Self::MaskType) -> Self {
63        ((*self as u32).wrapping_mul(mask as u32) >> 16) as u8
64    }
65    #[inline(always)]
66    fn fast_mod_mask(&self, d: Self, mask: Self::MaskType) -> Self {
67        debug_assert_eq!(mask, d.compute_mask_fast());
68        let low_bits = (*self as u16).wrapping_mul(mask);
69        ((low_bits as u32).wrapping_mul(d as u32) >> 16) as u8
70    }
71    #[inline(always)]
72    fn compute_mask_fast(&self) -> Self::MaskType {
73        (u16::MAX / *self as u16).wrapping_add(1)
74    }
75    #[inline(always)]
76    fn fast_is_divisible_mask(&self, mask: Self::MaskType) -> bool {
77        (*self as u16).wrapping_mul(mask) <= mask.wrapping_sub(1)
78    }
79}
80
81impl FastRange for u16 {
82    type MaskType = u32;
83    #[inline(always)]
84    fn fast_range(&self, d: Self) -> Self {
85        ((*self as u32 * d as u32) >> 16) as u16
86    }
87    #[inline(always)]
88    fn fast_div_mask(&self, mask: Self::MaskType) -> Self {
89        ((*self as u64).wrapping_mul(mask as u64) >> 32) as u16
90    }
91    #[inline(always)]
92    fn fast_mod_mask(&self, d: Self, mask: Self::MaskType) -> Self {
93        debug_assert_eq!(mask, d.compute_mask_fast());
94        let low_bits = (*self as u32).wrapping_mul(mask);
95        ((low_bits as u64).wrapping_mul(d as u64) >> 32) as u16
96    }
97    #[inline(always)]
98    fn compute_mask_fast(&self) -> Self::MaskType {
99        (u32::MAX / *self as u32).wrapping_add(1)
100    }
101    #[inline(always)]
102    fn fast_is_divisible_mask(&self, mask: Self::MaskType) -> bool {
103        (*self as u32).wrapping_mul(mask) <= mask.wrapping_sub(1)
104    }
105}
106
107impl FastRange for u32 {
108    type MaskType = u64;
109    #[inline(always)]
110    fn fast_range(&self, d: Self) -> Self {
111        ((*self as u64).wrapping_mul(d as u64) >> 32) as u32
112    }
113    #[inline(always)]
114    fn fast_div_mask(&self, mask: Self::MaskType) -> Self {
115        ((*self as u128).wrapping_mul(mask as u128) >> 64) as u32
116    }
117    #[inline(always)]
118    fn fast_mod_mask(&self, d: Self, mask: Self::MaskType) -> Self {
119        debug_assert_eq!(mask, d.compute_mask_fast());
120        let low_bits = (*self as u64).wrapping_mul(mask);
121        ((low_bits as u128).wrapping_mul(d as u128) >> 64) as u32
122    }
123    #[inline(always)]
124    fn compute_mask_fast(&self) -> Self::MaskType {
125        (u64::MAX / *self as u64).wrapping_add(1)
126    }
127    #[inline(always)]
128    fn fast_is_divisible_mask(&self, mask: Self::MaskType) -> bool {
129        (*self as u64).wrapping_mul(mask) <= mask.wrapping_sub(1)
130    }
131}
132
133#[inline(always)]
134/// Do a 128-bit multiply and return the top 64 bits
135fn mul128_u64(low_bits: u128, d: u64) -> u64 {
136    let mut bottom_half = (low_bits & (u64::MAX as u128)).wrapping_mul(d as u128); // Won't overflow but avoid check
137    bottom_half >>= 64; // Only need the top 64 bits, as we'll shift the lower half away;
138    let top_half = (low_bits >> 64).wrapping_mul(d as u128);
139    let mut both_halves = bottom_half + top_half; // Both halves are already shifted down by 64
140    both_halves >>= 64; // Get top half of both_halves
141    both_halves as u64
142}
143
144impl FastRange for u64 {
145    type MaskType = u128;
146    #[inline(always)]
147    fn fast_range(&self, d: Self) -> Self {
148        ((*self as u128).wrapping_mul(d as u128) >> 64) as u64
149    }
150    #[inline(always)]
151    fn fast_div_mask(&self, mask: Self::MaskType) -> Self {
152        mul128_u64(mask, *self)
153    }
154    #[inline(always)]
155    fn fast_mod_mask(&self, d: Self, mask: Self::MaskType) -> Self {
156        debug_assert_eq!(mask, d.compute_mask_fast());
157        let low_bits = (*self as u128).wrapping_mul(mask);
158        mul128_u64(low_bits, d)
159    }
160    #[inline(always)]
161    fn compute_mask_fast(&self) -> Self::MaskType {
162        // what follows is just ((__uint128_t)0 - 1) / d) + 1 spelled out
163        let mut mask: u128 = u64::MAX as u128;
164        mask <<= 64;
165        mask |= u64::MAX as u128;
166        mask /= *self as u128;
167        mask = mask.wrapping_add(1);
168        mask
169    }
170    #[inline(always)]
171    fn fast_is_divisible_mask(&self, mask: Self::MaskType) -> bool {
172        (*self as u128).wrapping_mul(mask) <= mask.wrapping_sub(1)
173    }
174}
175
176macro_rules! impl_usize {
177    ($ty:ty, $pw:literal, $mask:ty) => {
178        #[cfg(target_pointer_width = $pw)]
179        impl FastRange for usize {
180            type MaskType = $mask;
181
182            #[inline(always)]
183            fn fast_range(&self, d: Self) -> Self {
184                (*self as $ty).fast_range(d as $ty) as usize
185            }
186
187            #[inline(always)]
188            fn fast_div_mask(&self, mask: Self::MaskType) -> Self {
189                (*self as $ty).fast_div_mask(mask) as usize
190            }
191            #[inline(always)]
192            fn fast_mod_mask(&self, d: Self, mask: Self::MaskType) -> Self {
193                (*self as $ty).fast_mod_mask(d as $ty, mask) as usize
194            }
195            #[inline(always)]
196            fn compute_mask_fast(&self) -> Self::MaskType {
197                (*self as $ty).compute_mask_fast() as Self::MaskType
198            }
199            #[inline(always)]
200            fn fast_is_divisible_mask(&self, mask: Self::MaskType) -> bool {
201                (*self as $ty).fast_is_divisible_mask(mask)
202            }
203        }
204    };
205}
206
207impl_usize!(u64, "64", u128);
208impl_usize!(u32, "32", u64);
209impl_usize!(u16, "16", u32);