1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/*!
# Yes, but what is it really ?

Division is a very costly operation for your CPU.
You may have noticed that when the divisor is known at compile time, your compiler transforms the operations into a cryptic combination of
a multiplication and bitshift.

Fastdivide is about doing the same trick your compiler uses but 
when the divisor is unknown at compile time. 
Of course, it requires preprocessing a datastructure that is 
specific to your divisor, and using it only makes sense if 
this preprocessing is amortized by a high number of division (with the 
same divisor).

# When is it useful ?

If you do a lot (> 10) of division with the same divisor ; and this division is a bottleneck in your program.
This is for instance useful to compute histograms.

# Example

```rust
use fastdivide::DividerU64;

fn histogram(vals: &[u64], min: u64, interval: u64, output: &mut [usize]) {
    
    // Preprocessing a datastructure dedicated
    // to dividing `u64` by `interval`.
    //
    // This preprocessing is not cheap.
    let divide = DividerU64::divide_by(interval);
    
    // We reuse the same `Divider` for all of the 
    // values in vals.
    for &val in vals {
        if val < min {
            continue;
        }
        let bucket_id = divide.divide(val - min) as usize;
        if bucket_id < output.len() {
            output[bucket_id as usize] += 1; 
        }
    }
}

# let mut output = vec![0; 3];
# histogram(&[0u64, 1u64, 4u64, 36u64, 2u64, 1u64], 1u64, 3u64, &mut output[..]);
# assert_eq!(output[0], 3);
# assert_eq!(output[1], 1);
# assert_eq!(output[2], 0);

```


*/

#![feature(i128_type)]
#![cfg_attr(test, feature(test))]
#[cfg(test)]
extern crate test;

// ported from  libdivide.h by ridiculous_fish
//
//  This file is not the original library, it is an attempt to port part
//  of it to rust.
// 
const LIBDIVIDE_ADD_MARKER: u8 = 0x40;
const LIBDIVIDE_U64_SHIFT_PATH: u8 = 0x80;
const LIBDIVIDE_64_SHIFT_MASK: u8 = 0x3F;

fn count_leading_zeros(mut val: u64) -> u8 {
    if val == 0 {
        return 64;
    }
    let mut result = 0u8;
    while (val & (1u64 << 63)) == 0 {
        val <<= 1;
        result += 1;
    }
    result
}

#[derive(Debug)]
pub struct DividerU64 {
    magic: u64,
    more: u8,
}

fn libdivide_mullhi_u64(x: u64, y: u64) -> u64 {
    let xl = x as u128;
    let yl = y as u128;
    ((xl * yl) >> 64) as u64
}


impl DividerU64 {
    pub fn divide_by(divisor: u64) -> DividerU64 {
        assert!(divisor > 0u64);
        let floor_log_2_d: u8 = 63u8 - count_leading_zeros(divisor);
        if divisor & (divisor - 1) == 0 {
            DividerU64 {
                magic: 0u64,
                more: floor_log_2_d | LIBDIVIDE_U64_SHIFT_PATH
            }
        } else {
            let u = 1u128 << (floor_log_2_d + 64);
            let mut proposed_m: u128 = u / divisor as u128;
            let reminder: u64 = (u - proposed_m * divisor as u128) as u64;
            assert!(reminder > 0 && reminder < divisor);
            let e: u64 = divisor - reminder;
            let more: u8;
            if e < (1u64 << floor_log_2_d) {
                more = floor_log_2_d;
            } else {
                proposed_m += proposed_m;
                let twice_rem = reminder * 2;
                if twice_rem >= divisor || twice_rem < reminder {
                    proposed_m += 1;
                }
                more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
            }
            DividerU64 {
                more: more,
                magic: (proposed_m as u64) + 1u64,
            }
        }
    }

    #[inline(always)]
    pub fn divide(&self, n: u64) -> u64 {
        if self.more & LIBDIVIDE_U64_SHIFT_PATH != 0 {
            n >> (self.more & LIBDIVIDE_64_SHIFT_MASK)
        }
        else {
            let q = libdivide_mullhi_u64(self.magic, n);
            if self.more & LIBDIVIDE_ADD_MARKER != 0 {
                let t = ((n - q) >> 1).wrapping_add(q);
                t >> (self.more & LIBDIVIDE_64_SHIFT_MASK)
            }
            else {
                q >> self.more
            }
        }
    }
}



#[cfg(test)]
mod tests {
    use super::DividerU64;
    use test::Bencher;
    use test;

    #[test]
    fn test_libdivide() {
        for d in (1u64..100u64)
            .chain(vec![2048, 234234131223u64].into_iter())
            .chain((5..63).map(|i| 1 << i)) {
            let divider = DividerU64::divide_by(d);
            for i in (0u64..10_000).chain(vec![2048, 234234131223u64,  1 << 43,  1 << 43 + 1]) {
                assert_eq!(divider.divide(i), i / d);
            }

        }
    }

    #[bench]
    fn bench_normal_divide(b: &mut Bencher) {
        let q: u64 = test::black_box(112u64);
        b.iter(|| { 
            let n: u64 = test::black_box(152342341u64);
            n / q
        })
    }

    #[bench]
    fn bench_fast_divide(b: &mut Bencher) {
        let fast_divider = DividerU64::divide_by(112u64);
        b.iter(|| { 
            let n: u64 = test::black_box(152342341u64);
            fast_divider.divide(n)
        })
    }
}