dashu_base/
bit.rs

1//! Trait definitions for bitwise operations.
2//!
3//! Most traits are only implemented for unsigned integers yet.
4
5use core::num::FpCategory;
6
7use crate::{
8    Approximation::{self, *},
9    Sign::{self, *},
10};
11
12/// Bit query for integers
13///
14/// # Examples
15///
16/// ```
17/// # use dashu_base::BitTest;
18/// // query a bit of the number
19/// assert_eq!(0b10010.bit(1), true);
20/// assert_eq!(0b10010.bit(3), false);
21/// assert_eq!(0b10010.bit(100), false);
22/// assert_eq!((-0b10010).bit(1), true);
23/// assert_eq!((-0b10010).bit(3), true);
24/// assert_eq!((-0b10010).bit(100), true);
25///
26/// // query the bit length of the number
27/// assert_eq!(0.bit_len(), 0);
28/// assert_eq!(17.bit_len(), 5);
29/// assert_eq!((-17).bit_len(), 5);
30/// assert_eq!(0b101000000.bit_len(), 9);
31/// ```
32pub trait BitTest {
33    /// Effective bit length of the binary representation.
34    ///
35    /// For 0, the length is 0.
36    ///
37    /// For positive numbers it is:
38    /// * number of digits in base 2
39    /// * the index of the top 1 bit plus one
40    /// * the floored base-2 logarithm of the number plus one.
41    ///
42    /// For negative numbers it is:
43    /// * number of digits in base 2 without the sign
44    /// * the index of the top 0 bit plus one
45    /// * the floored base-2 logarithm of the absolute value of the number plus one.
46    fn bit_len(&self) -> usize;
47
48    /// Returns true if the `n`-th bit is set in its two's complement binary representation, n starts from 0.
49    fn bit(&self, n: usize) -> bool;
50}
51
52/// Functions related to the power of two.
53///
54/// # Examples
55/// ```
56/// use dashu_base::PowerOfTwo;
57///
58/// let n = 5u32;
59/// assert!(!n.is_power_of_two());
60/// assert_eq!(n.next_power_of_two(), 8);
61/// ```
62pub trait PowerOfTwo {
63    /// Test if self is a power of two (`2^k`)
64    fn is_power_of_two(&self) -> bool;
65    /// Get the smallest power of two greater than or equal to self.
66    fn next_power_of_two(self) -> Self;
67}
68
69macro_rules! impl_bit_ops_for_uint {
70    ($($T:ty)*) => {$(
71        impl BitTest for $T {
72            #[inline]
73            fn bit_len(&self) -> usize {
74                (<$T>::BITS - self.leading_zeros()) as usize
75            }
76            #[inline]
77            fn bit(&self, position: usize) -> bool {
78                if position >= <$T>::BITS as usize {
79                    return false;
80                } else {
81                    self & (1 << position) > 0
82                }
83            }
84        }
85
86        impl PowerOfTwo for $T {
87            #[inline]
88            fn is_power_of_two(&self) -> bool {
89                <$T>::is_power_of_two(*self)
90            }
91            #[inline]
92            fn next_power_of_two(self) -> $T {
93                <$T>::next_power_of_two(self)
94            }
95        }
96    )*}
97}
98impl_bit_ops_for_uint!(u8 u16 u32 u64 u128 usize);
99
100macro_rules! impl_bit_ops_for_int {
101    ($($T:ty)*) => {$(
102        impl BitTest for $T {
103            #[inline]
104            fn bit_len(&self) -> usize {
105                self.unsigned_abs().bit_len()
106            }
107            #[inline]
108            fn bit(&self, position: usize) -> bool {
109                if position >= <$T>::BITS as usize {
110                    return self < &0;
111                } else {
112                    self & (1 << position) > 0
113                }
114            }
115        }
116    )*}
117}
118impl_bit_ops_for_int!(i8 i16 i32 i64 i128 isize);
119
120/// Support encoding and decoding of floats into (mantissa, exponent) parts.
121///
122/// See the docs of each method for the details
123///
124/// # Examples
125///
126/// ```
127/// # use dashu_base::{FloatEncoding, Approximation::*, Sign::*};
128/// use core::num::FpCategory;
129///
130/// assert_eq!(0f64.decode(), Ok((0, -1074))); // exponent will not be reduced
131/// assert_eq!(1f32.decode(), Ok((1 << 23, -23)));
132/// assert_eq!(f32::INFINITY.decode(), Err(FpCategory::Infinite));
133///
134/// assert_eq!(f64::encode(0, 1), Exact(0f64));
135/// assert_eq!(f32::encode(1, 0), Exact(1f32));
136/// assert_eq!(f32::encode(i32::MAX, 100), Inexact(f32::INFINITY, Positive));
137/// ```
138pub trait FloatEncoding {
139    type Mantissa;
140    type Exponent;
141
142    /// Convert a float number `mantissa * 2^exponent` into `(mantissa, exponent)` parts faithfully.
143    ///
144    /// This method will not reduce the result (e.g. turn `2 * 2^-1` into `1 * 2^0`), and it
145    /// will return [Err] when the float number is nan or infinite.
146    fn decode(self) -> Result<(Self::Mantissa, Self::Exponent), FpCategory>;
147
148    /// Convert `(mantissa, exponent)` to `mantissa * 2^exponent` faithfully.
149    ///
150    /// It won't generate `NaN` values. However if the actual value is out of the
151    /// representation range, it might return an infinity or subnormal number.
152    ///
153    /// If any rounding happened during the conversion, it should follow the default
154    /// behavior defined by IEEE 754 (round to nearest, ties to even)
155    ///
156    /// The returned approximation is exact if the input can be exactly representable by f32,
157    /// otherwise the error field of the approximation contains the sign of `result - mantissa * 2^exp`.
158    fn encode(mantissa: Self::Mantissa, exponent: Self::Exponent) -> Approximation<Self, Sign>
159    where
160        Self: Sized;
161}
162
163/// Round to even floating point adjustment, based on the bottom
164/// bit of mantissa and additional 2 bits (i.e. 3 bits in units of ULP/4).
165#[inline]
166fn round_to_even_adjustment(bits: u8) -> bool {
167    bits >= 0b110 || bits == 0b011
168}
169
170impl FloatEncoding for f32 {
171    type Mantissa = i32;
172    type Exponent = i16;
173
174    #[inline]
175    fn decode(self) -> Result<(i32, i16), FpCategory> {
176        let bits: u32 = self.to_bits();
177        let sign_bit = bits >> 31;
178        let mantissa_bits = bits & 0x7fffff;
179
180        // deal with inf/nan values
181        let mut exponent = ((bits >> 23) & 0xff) as i16;
182        if exponent == 0xff {
183            return if mantissa_bits != 0 {
184                Err(FpCategory::Nan)
185            } else {
186                Err(FpCategory::Infinite)
187            };
188        }
189
190        // then parse values
191        let mantissa = if exponent == 0 {
192            // subnormal
193            exponent = -126 - 23;
194            mantissa_bits
195        } else {
196            // normal
197            exponent -= 127 + 23; // bias + mantissa shift
198            mantissa_bits | 0x800000
199        } as i32;
200
201        let sign = Sign::from(sign_bit > 0);
202        Ok((mantissa * sign, exponent))
203    }
204
205    #[inline]
206    fn encode(mantissa: i32, exponent: i16) -> Approximation<Self, Sign> {
207        if mantissa == 0 {
208            return Exact(0f32);
209        }
210
211        // clear sign
212        let sign = (mantissa < 0) as u32;
213        let mut mantissa = mantissa.unsigned_abs();
214
215        let zeros = mantissa.leading_zeros();
216        let top_bit = (u32::BITS - zeros) as i16 + exponent;
217
218        if top_bit > 128 {
219            // overflow
220            return if sign == 0 {
221                Inexact(f32::INFINITY, Sign::Positive)
222            } else {
223                Inexact(f32::NEG_INFINITY, Sign::Negative)
224            };
225        } else if top_bit < -125 - 23 {
226            // underflow
227            return if sign == 0 {
228                Inexact(0f32, Sign::Negative)
229            } else {
230                Inexact(-0f32, Sign::Positive)
231            };
232        };
233
234        let bits; // bit representation
235        let round_bits; // for rounding
236        if top_bit <= -125 {
237            // subnormal float
238            // (this branch includes 1e-125, the smallest positive normal f32)
239
240            // first remove the exponent
241            let shift = exponent + 126 + 23;
242            if shift >= 0 {
243                round_bits = 0; // not rounding is required
244                mantissa <<= shift as u32;
245            } else {
246                let shifted = mantissa << (30 + shift) as u32;
247                round_bits = (shifted >> 28 & 0b110) as u8 | ((shifted & 0xfffffff) != 0) as u8;
248                mantissa >>= (-shift) as u32;
249            }
250
251            // then compose the bit representation of f32
252            bits = (sign << 31) | mantissa;
253        } else {
254            // normal float
255            // first normalize the mantissa (and remove the top bit)
256            if mantissa == 1 {
257                mantissa = 0; // shl will overflow
258            } else {
259                mantissa <<= zeros + 1;
260            }
261
262            // then calculate the exponent (bias is 127)
263            let exponent = (exponent + 127 + u32::BITS as i16) as u32 - zeros - 1;
264
265            // then compose the bit representation of f32
266            bits = (sign << 31) | (exponent << 23) | (mantissa >> 9);
267
268            // get the low bit of mantissa and two extra bits, and adding round-to-even adjustment
269            round_bits = ((mantissa >> 7) & 0b110) as u8 | ((mantissa & 0x7f) != 0) as u8;
270        };
271
272        if round_bits & 0b11 == 0 {
273            // If two extra bits are all zeros, then the float is exact
274            Exact(f32::from_bits(bits))
275        } else {
276            let sign = Sign::from(sign > 0);
277            if round_to_even_adjustment(round_bits) {
278                // If the mantissa overflows, this correctly increases the exponent and sets the mantissa to 0.
279                // If the exponent overflows, we correctly get the representation of infinity.
280                Inexact(f32::from_bits(bits + 1), Positive * sign)
281            } else {
282                Inexact(f32::from_bits(bits), Negative * sign)
283            }
284        }
285    }
286}
287
288impl FloatEncoding for f64 {
289    type Mantissa = i64;
290    type Exponent = i16;
291
292    #[inline]
293    fn decode(self) -> Result<(i64, i16), FpCategory> {
294        let bits: u64 = self.to_bits();
295        let sign_bit = bits >> 63;
296        let mantissa_bits = bits & 0xfffffffffffff;
297
298        // deal with inf/nan values
299        let mut exponent = ((bits >> 52) & 0x7ff) as i16;
300        if exponent == 0x7ff {
301            return if mantissa_bits != 0 {
302                Err(FpCategory::Nan)
303            } else {
304                Err(FpCategory::Infinite)
305            };
306        }
307
308        // then parse values
309        let mantissa = if exponent == 0 {
310            // subnormal
311            exponent = -1022 - 52;
312            mantissa_bits
313        } else {
314            // normal
315            exponent -= 1023 + 52; // bias + mantissa shift
316            mantissa_bits | 0x10000000000000
317        } as i64;
318
319        if sign_bit == 0 {
320            Ok((mantissa, exponent))
321        } else {
322            Ok((-mantissa, exponent))
323        }
324    }
325
326    #[inline]
327    fn encode(mantissa: i64, exponent: i16) -> Approximation<Self, Sign> {
328        if mantissa == 0 {
329            return Exact(0f64);
330        }
331
332        // clear sign
333        let sign = (mantissa < 0) as u64;
334        let mut mantissa = mantissa.unsigned_abs();
335
336        let zeros = mantissa.leading_zeros();
337        let top_bit = (u64::BITS - zeros) as i16 + exponent;
338
339        if top_bit > 1024 {
340            // overflow
341            return if sign == 0 {
342                Inexact(f64::INFINITY, Sign::Positive)
343            } else {
344                Inexact(f64::NEG_INFINITY, Sign::Negative)
345            };
346        } else if top_bit < -1022 - 52 {
347            // underflow
348            return if sign == 0 {
349                Inexact(0f64, Sign::Negative)
350            } else {
351                Inexact(-0f64, Sign::Positive)
352            };
353        };
354
355        let bits; // bit representation
356        let round_bits; // for rounding
357        if top_bit <= -1022 {
358            // subnormal float
359            // (this branch includes 1e-1022, the smallest positive normal f32)
360
361            // first remove the exponent
362            let shift = exponent + 1022 + 52;
363            if shift >= 0 {
364                round_bits = 0; // not rounding is required
365                mantissa <<= shift as u32;
366            } else {
367                let shifted = mantissa << (62 + shift) as u64;
368                round_bits =
369                    (shifted >> 60 & 0b110) as u8 | ((shifted & 0xfffffffffffffff) != 0) as u8;
370                mantissa >>= (-shift) as u32;
371            }
372
373            // then compose the bit representation of f64
374            bits = (sign << 63) | mantissa;
375        } else {
376            // normal float
377            // first normalize the mantissa (and remove the top bit)
378            if mantissa == 1 {
379                mantissa = 0; // shl will overflow
380            } else {
381                mantissa <<= zeros + 1;
382            }
383
384            // then calculate the exponent (bias is 1023)
385            let exponent = (exponent + 1023 + u64::BITS as i16) as u64 - zeros as u64 - 1;
386
387            // then compose the bit representation of f64
388            bits = (sign << 63) | (exponent << 52) | (mantissa >> 12);
389
390            // get the low bit of mantissa and two extra bits, and adding round-to-even adjustment
391            round_bits = ((mantissa >> 10) & 0b110) as u8 | ((mantissa & 0x3ff) != 0) as u8;
392        };
393
394        if round_bits & 0b11 == 0 {
395            // If two extra bits are all zeros, then the float is exact
396            Exact(f64::from_bits(bits))
397        } else {
398            let sign = Sign::from(sign > 0);
399            if round_to_even_adjustment(round_bits) {
400                // If the mantissa overflows, this correctly increases the exponent and sets the mantissa to 0.
401                // If the exponent overflows, we correctly get the representation of infinity.
402                Inexact(f64::from_bits(bits + 1), Positive * sign)
403            } else {
404                Inexact(f64::from_bits(bits), Negative * sign)
405            }
406        }
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413
414    #[test]
415    fn test_float_encoding() {
416        // special values
417        assert_eq!(f32::INFINITY.decode(), Err(FpCategory::Infinite));
418        assert_eq!(f32::NEG_INFINITY.decode(), Err(FpCategory::Infinite));
419        assert_eq!(f32::NAN.decode(), Err(FpCategory::Nan));
420        assert_eq!(f64::INFINITY.decode(), Err(FpCategory::Infinite));
421        assert_eq!(f64::NEG_INFINITY.decode(), Err(FpCategory::Infinite));
422        assert_eq!(f64::NAN.decode(), Err(FpCategory::Nan));
423
424        // round trip test
425        let f32_cases = [
426            0.,
427            -1.,
428            1.,
429            f32::MIN,
430            f32::MAX,
431            f32::MIN_POSITIVE,
432            -f32::MIN_POSITIVE,
433            f32::EPSILON,
434            f32::from_bits(0x1),      // smallest f32
435            f32::from_bits(0x7ff),    // some subnormal value
436            f32::from_bits(0x7fffff), // largest subnormal number
437            f32::from_bits(0x800000), // smallest normal number
438            -123.4567,
439            core::f32::consts::PI,
440        ];
441        for f in f32_cases {
442            let (man, exp) = f.decode().unwrap();
443            assert_eq!(f32::encode(man, exp), Exact(f));
444        }
445
446        let f64_cases = [
447            0.,
448            -1.,
449            1.,
450            f64::MIN,
451            f64::MAX,
452            f64::MIN_POSITIVE,
453            -f64::MIN_POSITIVE,
454            f64::EPSILON,
455            f64::from_bits(0x1),              // smallest f64
456            f64::from_bits(0x7fffff),         // largest subnormal number
457            f64::from_bits(0xfffffffffffff),  // some subnormal value
458            f64::from_bits(0x10000000000000), // smallest normal number
459            -123456.789012345,
460            core::f64::consts::PI,
461        ];
462        for f in f64_cases {
463            let (man, exp) = f.decode().unwrap();
464            assert_eq!(f64::encode(man, exp), Exact(f));
465        }
466
467        // test out of ranges
468        assert_eq!(f32::encode(1, 128), Inexact(f32::INFINITY, Sign::Positive));
469        assert_eq!(f32::encode(-1, 128), Inexact(f32::NEG_INFINITY, Sign::Negative));
470        assert_eq!(f32::encode(1, -150), Inexact(0f32, Sign::Negative));
471        assert_eq!(f32::encode(-1, -150), Inexact(-0f32, Sign::Positive));
472        assert_eq!(f64::encode(1, 1024), Inexact(f64::INFINITY, Sign::Positive));
473        assert_eq!(f64::encode(-1, 1024), Inexact(f64::NEG_INFINITY, Sign::Negative));
474        assert_eq!(f64::encode(1, -1075), Inexact(0f64, Sign::Negative));
475        assert_eq!(f64::encode(-1, -1075), Inexact(-0f64, Sign::Positive));
476
477        // test rounding
478        assert_eq!(f32::encode(3, -150), Inexact(f32::from_bits(0x00000002), Sign::Positive));
479        assert_eq!(f32::encode(-5, -150), Inexact(f32::from_bits(0x80000002), Sign::Positive));
480        assert_eq!(f32::encode(i32::MAX, 50), Inexact(f32::from_bits(0x68000000), Sign::Positive));
481        assert_eq!(
482            f32::encode(i32::MAX, -150),
483            Inexact(f32::from_bits(0x04000000), Sign::Positive)
484        );
485        assert_eq!(
486            f32::encode(i32::MAX, -160),
487            Inexact(f32::from_bits(0x00100000), Sign::Positive)
488        );
489        assert_eq!(
490            f32::encode(i32::MAX, -170),
491            Inexact(f32::from_bits(0x00000400), Sign::Positive)
492        );
493        assert_eq!(
494            f64::encode(3, -1075),
495            Inexact(f64::from_bits(0x0000000000000002), Sign::Positive)
496        );
497        assert_eq!(
498            f64::encode(-5, -1075),
499            Inexact(f64::from_bits(0x8000000000000002), Sign::Positive)
500        );
501        assert_eq!(
502            f64::encode(i64::MAX, 500),
503            Inexact(f64::from_bits(0x6320000000000000), Sign::Positive)
504        );
505        assert_eq!(
506            f64::encode(i64::MAX, -1075),
507            Inexact(f64::from_bits(0x00b0000000000000), Sign::Positive)
508        );
509        assert_eq!(
510            f64::encode(i64::MAX, -1095),
511            Inexact(f64::from_bits(0x0000040000000000), Sign::Positive)
512        );
513        assert_eq!(f64::encode(i64::MAX, -1115), Inexact(f64::from_bits(0x400000), Sign::Positive));
514
515        // other cases
516        assert_eq!(f32::encode(1, 0), Exact(1f32));
517        assert_eq!(f64::encode(1, 0), Exact(1f64));
518        assert_eq!(f32::encode(0x1000000, -173), Exact(f32::from_bits(0x1)));
519        assert_eq!(f64::encode(0x40000000000000, -1128), Exact(f64::from_bits(0x1)));
520    }
521}