summavy_common/
lib.rs

1#![allow(clippy::len_without_is_empty)]
2
3use std::ops::Deref;
4
5pub use byteorder::LittleEndian as Endianness;
6
7mod bitset;
8pub mod file_slice;
9mod group_by;
10mod serialize;
11mod vint;
12mod writer;
13pub use bitset::*;
14pub use group_by::GroupByIteratorExtended;
15pub use ownedbytes::{OwnedBytes, StableDeref};
16pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
17pub use vint::{
18    deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128,
19    serialize_vint_u32, write_u32_vint, VInt, VIntU128,
20};
21pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
22
23/// Has length trait
24pub trait HasLen {
25    /// Return length
26    fn len(&self) -> usize;
27
28    /// Returns true iff empty.
29    fn is_empty(&self) -> bool {
30        self.len() == 0
31    }
32}
33
34impl<T: Deref<Target = [u8]>> HasLen for T {
35    fn len(&self) -> usize {
36        self.deref().len()
37    }
38}
39
40const HIGHEST_BIT: u64 = 1 << 63;
41
42/// Maps a `i64` to `u64`
43///
44/// For simplicity, tantivy internally handles `i64` as `u64`.
45/// The mapping is defined by this function.
46///
47/// Maps `i64` to `u64` so that
48/// `-2^63 .. 2^63-1` is mapped
49///     to
50/// `0 .. 2^64-1`
51/// in that order.
52///
53/// This is more suited than simply casting (`val as u64`)
54/// because of bitpacking.
55///
56/// Imagine a list of `i64` ranging from -10 to 10.
57/// When casting negative values, the negative values are projected
58/// to values over 2^63, and all values end up requiring 64 bits.
59///
60/// # See also
61/// The reverse mapping is [`u64_to_i64()`].
62#[inline]
63pub fn i64_to_u64(val: i64) -> u64 {
64    (val as u64) ^ HIGHEST_BIT
65}
66
67/// Reverse the mapping given by [`i64_to_u64()`].
68#[inline]
69pub fn u64_to_i64(val: u64) -> i64 {
70    (val ^ HIGHEST_BIT) as i64
71}
72
73/// Maps a `f64` to `u64`
74///
75/// For simplicity, tantivy internally handles `f64` as `u64`.
76/// The mapping is defined by this function.
77///
78/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
79///
80/// This is more suited than simply casting (`val as u64`)
81/// which would truncate the result
82///
83/// # Reference
84///
85/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
86/// explains the mapping in a clear manner.
87///
88/// # See also
89/// The reverse mapping is [`u64_to_f64()`].
90#[inline]
91pub fn f64_to_u64(val: f64) -> u64 {
92    let bits = val.to_bits();
93    if val.is_sign_positive() {
94        bits ^ HIGHEST_BIT
95    } else {
96        !bits
97    }
98}
99
100/// Reverse the mapping given by [`f64_to_u64()`].
101#[inline]
102pub fn u64_to_f64(val: u64) -> f64 {
103    f64::from_bits(if val & HIGHEST_BIT != 0 {
104        val ^ HIGHEST_BIT
105    } else {
106        !val
107    })
108}
109
110#[cfg(test)]
111pub mod test {
112
113    use proptest::prelude::*;
114
115    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, BinarySerializable, FixedSize};
116
117    fn test_i64_converter_helper(val: i64) {
118        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
119    }
120
121    fn test_f64_converter_helper(val: f64) {
122        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
123    }
124
125    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
126        let mut buffer = Vec::new();
127        O::default().serialize(&mut buffer).unwrap();
128        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
129    }
130
131    proptest! {
132        #[test]
133        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
134            let left_u64 = f64_to_u64(left);
135            let right_u64 = f64_to_u64(right);
136            assert_eq!(left_u64 < right_u64,  left < right);
137        }
138    }
139
140    #[test]
141    fn test_i64_converter() {
142        assert_eq!(i64_to_u64(i64::MIN), u64::MIN);
143        assert_eq!(i64_to_u64(i64::MAX), u64::MAX);
144        test_i64_converter_helper(0i64);
145        test_i64_converter_helper(i64::MIN);
146        test_i64_converter_helper(i64::MAX);
147        for i in -1000i64..1000i64 {
148            test_i64_converter_helper(i);
149        }
150    }
151
152    #[test]
153    fn test_f64_converter() {
154        test_f64_converter_helper(f64::INFINITY);
155        test_f64_converter_helper(f64::NEG_INFINITY);
156        test_f64_converter_helper(0.0);
157        test_f64_converter_helper(-0.0);
158        test_f64_converter_helper(1.0);
159        test_f64_converter_helper(-1.0);
160    }
161
162    #[test]
163    fn test_f64_order() {
164        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
165            .contains(&f64_to_u64(f64::NAN))); // nan is not a number
166        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
167        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
168        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
169        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
170        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
171        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
172        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
173    }
174}