tantivy_common/
lib.rs

1#![allow(clippy::len_without_is_empty)]
2
3use std::ops::Deref;
4
5pub use byteorder::LittleEndian as Endianness;
6
7mod bitset;
8pub mod bounds;
9mod byte_count;
10mod datetime;
11pub mod file_slice;
12mod group_by;
13pub mod json_path_writer;
14mod serialize;
15mod vint;
16mod writer;
17pub use bitset::*;
18pub use byte_count::ByteCount;
19pub use datetime::{DateTime, DateTimePrecision};
20pub use group_by::GroupByIteratorExtended;
21pub use json_path_writer::JsonPathWriter;
22pub use ownedbytes::{OwnedBytes, StableDeref};
23pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
24pub use vint::{
25    VInt, VIntU128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint,
26};
27pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
28
29/// Has length trait
30pub trait HasLen {
31    /// Return length
32    fn len(&self) -> usize;
33
34    /// Returns true iff empty.
35    fn is_empty(&self) -> bool {
36        self.len() == 0
37    }
38}
39
40impl<T: Deref<Target = [u8]>> HasLen for T {
41    fn len(&self) -> usize {
42        self.deref().len()
43    }
44}
45
46const HIGHEST_BIT: u64 = 1 << 63;
47
48/// Maps a `i64` to `u64`
49///
50/// For simplicity, tantivy internally handles `i64` as `u64`.
51/// The mapping is defined by this function.
52///
53/// Maps `i64` to `u64` so that
54/// `-2^63 .. 2^63-1` is mapped
55///     to
56/// `0 .. 2^64-1`
57/// in that order.
58///
59/// This is more suited than simply casting (`val as u64`)
60/// because of bitpacking.
61///
62/// Imagine a list of `i64` ranging from -10 to 10.
63/// When casting negative values, the negative values are projected
64/// to values over 2^63, and all values end up requiring 64 bits.
65///
66/// # See also
67/// The reverse mapping is [`u64_to_i64()`].
68#[inline]
69pub fn i64_to_u64(val: i64) -> u64 {
70    (val as u64) ^ HIGHEST_BIT
71}
72
73/// Reverse the mapping given by [`i64_to_u64()`].
74#[inline]
75pub fn u64_to_i64(val: u64) -> i64 {
76    (val ^ HIGHEST_BIT) as i64
77}
78
79/// Maps a `f64` to `u64`
80///
81/// For simplicity, tantivy internally handles `f64` as `u64`.
82/// The mapping is defined by this function.
83///
84/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
85///
86/// This is more suited than simply casting (`val as u64`)
87/// which would truncate the result
88///
89/// # Reference
90///
91/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
92/// explains the mapping in a clear manner.
93///
94/// # See also
95/// The reverse mapping is [`u64_to_f64()`].
96#[inline]
97pub fn f64_to_u64(val: f64) -> u64 {
98    let bits = val.to_bits();
99    if val.is_sign_positive() {
100        bits ^ HIGHEST_BIT
101    } else {
102        !bits
103    }
104}
105
106/// Reverse the mapping given by [`f64_to_u64()`].
107#[inline]
108pub fn u64_to_f64(val: u64) -> f64 {
109    f64::from_bits(if val & HIGHEST_BIT != 0 {
110        val ^ HIGHEST_BIT
111    } else {
112        !val
113    })
114}
115
116/// Replaces a given byte in the `bytes` slice of bytes.
117///
118/// This function assumes that the needle is rarely contained in the bytes string
119/// and offers a fast path if the needle is not present.
120#[inline]
121pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) {
122    if !bytes.contains(&needle) {
123        return;
124    }
125    for b in bytes {
126        if *b == needle {
127            *b = replacement;
128        }
129    }
130}
131
132#[cfg(test)]
133pub(crate) mod test {
134
135    use proptest::prelude::*;
136
137    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
138
139    fn test_i64_converter_helper(val: i64) {
140        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
141    }
142
143    fn test_f64_converter_helper(val: f64) {
144        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
145    }
146
147    proptest! {
148        #[test]
149        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
150            let left_u64 = f64_to_u64(left);
151            let right_u64 = f64_to_u64(right);
152            assert_eq!(left_u64 < right_u64,  left < right);
153        }
154    }
155
156    #[test]
157    fn test_i64_converter() {
158        assert_eq!(i64_to_u64(i64::MIN), u64::MIN);
159        assert_eq!(i64_to_u64(i64::MAX), u64::MAX);
160        test_i64_converter_helper(0i64);
161        test_i64_converter_helper(i64::MIN);
162        test_i64_converter_helper(i64::MAX);
163        for i in -1000i64..1000i64 {
164            test_i64_converter_helper(i);
165        }
166    }
167
168    #[test]
169    fn test_f64_converter() {
170        test_f64_converter_helper(f64::INFINITY);
171        test_f64_converter_helper(f64::NEG_INFINITY);
172        test_f64_converter_helper(0.0);
173        test_f64_converter_helper(-0.0);
174        test_f64_converter_helper(1.0);
175        test_f64_converter_helper(-1.0);
176    }
177
178    #[test]
179    fn test_f64_order() {
180        assert!(
181            !(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
182                .contains(&f64_to_u64(f64::NAN))
183        ); // nan is not a number
184        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
185        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
186        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
187        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
188        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
189        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
190        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
191    }
192
193    #[test]
194    fn test_replace_in_place() {
195        let test_aux = |before_replacement: &[u8], expected: &[u8]| {
196            let mut bytes: Vec<u8> = before_replacement.to_vec();
197            super::replace_in_place(b'b', b'c', &mut bytes);
198            assert_eq!(&bytes[..], expected);
199        };
200        test_aux(b"", b"");
201        test_aux(b"b", b"c");
202        test_aux(b"baaa", b"caaa");
203        test_aux(b"aaab", b"aaac");
204        test_aux(b"aaabaa", b"aaacaa");
205        test_aux(b"aaaaaa", b"aaaaaa");
206        test_aux(b"bbbb", b"cccc");
207    }
208}