fastdivide/
lib.rs

1/*!
2# Yes, but what is it really ?
3
4Division is a very costly operation for your CPU (probably between 10
5and 40 cycles).
6
7You may have noticed that when the divisor is known at compile time,
8your compiler transforms the operations into a cryptic combination of
9a multiplication and bitshift.
10
11Fastdivide is about doing the same trick your compiler uses but
12when the divisor is unknown at compile time.
13Of course, it requires preprocessing a datastructure that is
14specific to your divisor, and using it only makes sense if
15this preprocessing is amortized by a high number of division (with the
16same divisor).
17
18# When is it useful ?
19
20You should probably use `fastdivide`, if you do a lot (> 10) of division with the same divisor ;
21and these divisions are a bottleneck in your program.
22
23This is for instance useful to compute histograms.
24
25
26
27# Example
28
29```rust
30use fastdivide::DividerU64;
31
32fn histogram(vals: &[u64], min: u64, interval: u64, output: &mut [usize]) {
33
34    // Preprocessing a datastructure dedicated
35    // to dividing `u64` by `interval`.
36    //
37    // This preprocessing is not cheap.
38    let divide = DividerU64::divide_by(interval);
39
40    // We reuse the same `Divider` for all of the
41    // values in vals.
42    for &val in vals {
43        if val < min {
44            continue;
45        }
46        let bucket_id = divide.divide(val - min) as usize;
47        if bucket_id < output.len() {
48            output[bucket_id as usize] += 1;
49        }
50    }
51}
52
53# let mut output = vec![0; 3];
54# histogram(&[0u64, 1u64, 4u64, 36u64, 2u64, 1u64], 1u64, 3u64, &mut output[..]);
55# assert_eq!(output[0], 3);
56# assert_eq!(output[1], 1);
57# assert_eq!(output[2], 0);
58
59```
60
61*/
62#![no_std]
63
64#[cfg(feature = "std")]
65extern crate std;
66
67#[cfg(test)]
68#[macro_use]
69extern crate std;
70
71// ported from  libdivide.h by ridiculous_fish
72//
73//  This file is not the original library, it is an attempt to port part
74//  of it to rust.
75
76// This algorithm is described in https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79pub enum DividerU64 {
80    Fast { magic: u64, shift: u8 },
81    BitShift(u8),
82    General { magic_low: u64, shift: u8 },
83}
84
85#[inline(always)]
86fn libdivide_mullhi_u64(x: u64, y: u64) -> u64 {
87    let xl = x as u128;
88    let yl = y as u128;
89    ((xl * yl) >> 64) as u64
90}
91
92
93#[inline(always)]
94fn floor_log2(n: u64) -> u8 {
95    assert_ne!(n, 0);
96    63u8 - (n.leading_zeros() as u8)
97}
98
99impl DividerU64 {
100    fn power_of_2_division(divisor: u64) -> Option<DividerU64> {
101        if divisor == 0 {
102            return None;
103        }
104        if !divisor.is_power_of_two() {
105            return None;
106        }
107        // Divisor is a power of 2. We can just do a bit shift.
108        Some(DividerU64::BitShift(floor_log2(divisor)))
109    }
110
111    fn fast_path(divisor: u64) -> Option<DividerU64> {
112        if divisor.is_power_of_two() {
113            return None;
114        }
115        let floor_log_2_d: u8 = floor_log2(divisor);
116        let u = 1u128 << (floor_log_2_d + 64);
117        let proposed_magic_number: u128 = u / divisor as u128;
118        let reminder: u64 = (u - proposed_magic_number * (divisor as u128)) as u64;
119        assert!(reminder > 0 && reminder < divisor);
120        let e: u64 = divisor - reminder;
121        // This is a sufficient condition for our 64-bits magic number
122        // condition to work as described in
123        // See https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
124        if e >= (1u64 << floor_log_2_d) {
125            return None;
126        }
127        Some(DividerU64::Fast {
128            magic: (proposed_magic_number as u64) + 1u64,
129            shift: floor_log_2_d,
130        })
131    }
132
133    fn general_path(divisor: u64) -> DividerU64 {
134        // p=⌈log2d⌉
135        let p: u8 = 64u8 - (divisor.leading_zeros() as u8);
136        // m=⌈2^{64+p} / d⌉. This is a 33 bit number, so keep only the low 32 bits.
137        // we do a little dance to avoid the overflow if p = 64.
138        let e = 1u128 << (63 + p);
139        let m = 2 + (e + (e - divisor as u128)) / divisor as u128;
140        DividerU64::General {
141            magic_low: m as u64,
142            shift: p - 1,
143        }
144    }
145
146    pub fn divide_by(divisor: u64) -> DividerU64 {
147        assert!(divisor > 0u64);
148        Self::power_of_2_division(divisor)
149            .or_else(|| DividerU64::fast_path(divisor))
150            .unwrap_or_else(|| DividerU64::general_path(divisor))
151    }
152
153    #[inline(always)]
154    pub fn divide(&self, n: u64) -> u64 {
155        match *self {
156            DividerU64::Fast { magic, shift } => {
157                // The divisor has a magic number that is lower than 32 bits.
158                // We get away with a multiplication and a bit-shift.
159                libdivide_mullhi_u64(magic, n) >> shift
160            }
161            DividerU64::BitShift(d) => n >> d,
162            DividerU64::General { magic_low, shift } => {
163                // magic only contains the low 64 bits of our actual magic number which actually has a 65 bits.
164                // The following dance computes n * (magic + 2^64) >> shift
165                let q = libdivide_mullhi_u64(magic_low, n);
166                let t = ((n - q) >> 1).wrapping_add(q);
167                t >> shift
168            }
169        }
170    }
171}
172
173impl core::ops::Div<DividerU64> for u64 {
174    type Output = u64;
175
176    #[inline(always)]
177    fn div(self, denom: DividerU64) -> Self::Output {
178        denom.divide(self)
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::DividerU64;
185    use proptest::prelude::*;
186
187    #[test]
188    fn test_divide_op() {
189        let divider = DividerU64::divide_by(2);
190        let res = 4u64 / divider;
191        assert_eq!(res, 2);
192        let res = 8u64 / divider;
193        assert_eq!(res, 4);
194    }
195
196    #[test]
197    fn test_divide_by_4() {
198        let divider = DividerU64::divide_by(4);
199        assert!(matches!(divider, DividerU64::BitShift(2)));
200    }
201
202    #[test]
203    fn test_divide_by_7() {
204        let divider = DividerU64::divide_by(7);
205        assert!(matches!(divider, DividerU64::General { .. }));
206    }
207
208    #[test]
209    fn test_divide_by_11() {
210        let divider = DividerU64::divide_by(11);
211        assert_eq!(
212            divider,
213            DividerU64::Fast {
214                magic: 13415813871788764812,
215                shift: 3
216            }
217        );
218    }
219
220
221    #[test]
222    fn test_floor_log2() {
223        for i in [1, 2, 3, 4, 10, 15, 16, 31, 32, 33, u64::MAX] {
224            let log_i = super::floor_log2(i);
225            let lower_bound = 1 << log_i;
226            let upper_bound = lower_bound - 1 + lower_bound;
227            assert!(lower_bound <= i);
228            assert!(upper_bound >= i);
229        }
230    }
231
232
233    proptest! {
234        #![proptest_config(ProptestConfig::with_cases(100000))]
235        #[test]
236        fn test_proptest(n in 0..u64::MAX, d in 1..u64::MAX) {
237            let divider = DividerU64::divide_by(d);
238            let quotient = divider.divide(n);
239            assert_eq!(quotient, n / d);
240        }
241    }
242
243    proptest! {
244        #![proptest_config(ProptestConfig::with_cases(100000))]
245        #[test]
246        fn test_proptest_divide_by_7(n in 0..u64::MAX) {
247            let divider = DividerU64::divide_by(7);
248            let quotient = divider.divide(n);
249            assert_eq!(quotient, n / 7);
250        }
251    }
252
253    proptest! {
254        #![proptest_config(ProptestConfig::with_cases(10000))]
255        #[test]
256        fn test_proptest_divide_by_11(n in 0..u64::MAX) {
257            let divider = DividerU64::divide_by(11);
258            let quotient = divider.divide(n);
259            assert_eq!(quotient, n / 11);
260        }
261    }
262
263    proptest! {
264        #![proptest_config(ProptestConfig::with_cases(10000))]
265        #[test]
266        fn test_proptest_divide_by_any(d in 1..u64::MAX) {
267            DividerU64::divide_by(d);
268        }
269    }
270
271    #[test]
272    fn test_libdivide() {
273        for d in (1u64..100u64)
274            .chain(vec![2048, 234234131223u64].into_iter())
275            .chain((5..63).map(|i| 1 << i))
276        {
277            let divider = DividerU64::divide_by(d);
278            for i in (0u64..10_000).chain(vec![2048, 234234131223u64, 1 << 43, 1 << 43 + 1]) {
279                assert_eq!(divider.divide(i), i / d);
280            }
281        }
282    }
283}
fastdivide/lib.rs

fastdivide/
lib.rs