tantivy_columnar/column_values/u128_based/compact_space/
mod.rs

1/// This codec takes a large number space (u128) and reduces it to a compact number space.
2///
3/// It will find spaces in the number range. For example:
4///
5/// 100, 101, 102, 103, 104, 50000, 50001
6/// could be mapped to
7/// 100..104 -> 0..4
8/// 50000..50001 -> 5..6
9///
10/// Compact space 0..=6 requires much less bits than 100..=50001
11///
12/// The codec is created to compress ip addresses, but may be employed in other use cases.
13use std::{
14    cmp::Ordering,
15    collections::BTreeSet,
16    io::{self, Write},
17    ops::{Range, RangeInclusive},
18};
19
20mod blank_range;
21mod build_compact_space;
22
23use build_compact_space::get_compact_space;
24use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
25use tantivy_bitpacker::{BitPacker, BitUnpacker};
26
27use crate::column_values::ColumnValues;
28use crate::RowId;
29
30/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
31/// blanks depends on the number of blanks.
32///
33/// The number is taken by looking at a real dataset. It is optimized for larger datasets.
34const COST_PER_BLANK_IN_BITS: usize = 36;
35
36#[derive(Debug, Clone, Eq, PartialEq)]
37pub struct CompactSpace {
38    ranges_mapping: Vec<RangeMapping>,
39}
40
41/// Maps the range from the original space to compact_start + range.len()
42#[derive(Debug, Clone, Eq, PartialEq)]
43struct RangeMapping {
44    value_range: RangeInclusive<u128>,
45    compact_start: u32,
46}
47impl RangeMapping {
48    fn range_length(&self) -> u32 {
49        (self.value_range.end() - self.value_range.start()) as u32 + 1
50    }
51
52    // The last value of the compact space in this range
53    fn compact_end(&self) -> u32 {
54        self.compact_start + self.range_length() - 1
55    }
56}
57
58impl BinarySerializable for CompactSpace {
59    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
60        VInt(self.ranges_mapping.len() as u64).serialize(writer)?;
61
62        let mut prev_value = 0;
63        for value_range in self
64            .ranges_mapping
65            .iter()
66            .map(|range_mapping| &range_mapping.value_range)
67        {
68            let blank_delta_start = value_range.start() - prev_value;
69            VIntU128(blank_delta_start).serialize(writer)?;
70            prev_value = *value_range.start();
71
72            let blank_delta_end = value_range.end() - prev_value;
73            VIntU128(blank_delta_end).serialize(writer)?;
74            prev_value = *value_range.end();
75        }
76
77        Ok(())
78    }
79
80    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
81        let num_ranges = VInt::deserialize(reader)?.0;
82        let mut ranges_mapping: Vec<RangeMapping> = vec![];
83        let mut value = 0u128;
84        let mut compact_start = 1u32; // 0 is reserved for `null`
85        for _ in 0..num_ranges {
86            let blank_delta_start = VIntU128::deserialize(reader)?.0;
87            value += blank_delta_start;
88            let blank_start = value;
89
90            let blank_delta_end = VIntU128::deserialize(reader)?.0;
91            value += blank_delta_end;
92            let blank_end = value;
93
94            let range_mapping = RangeMapping {
95                value_range: blank_start..=blank_end,
96                compact_start,
97            };
98            let range_length = range_mapping.range_length();
99            ranges_mapping.push(range_mapping);
100            compact_start += range_length;
101        }
102
103        Ok(Self { ranges_mapping })
104    }
105}
106
107impl CompactSpace {
108    /// Amplitude is the value range of the compact space including the sentinel value used to
109    /// identify null values. The compact space is 0..=amplitude .
110    ///
111    /// It's only used to verify we don't exceed u64 number space, which would indicate a bug.
112    fn amplitude_compact_space(&self) -> u128 {
113        self.ranges_mapping
114            .last()
115            .map(|last_range| last_range.compact_end() as u128)
116            .unwrap_or(1) // compact space starts at 1, 0 == null
117    }
118
119    fn get_range_mapping(&self, pos: usize) -> &RangeMapping {
120        &self.ranges_mapping[pos]
121    }
122
123    /// Returns either Ok(the value in the compact space) or if it is outside the compact space the
124    /// Err(position where it would be inserted)
125    fn u128_to_compact(&self, value: u128) -> Result<u32, usize> {
126        self.ranges_mapping
127            .binary_search_by(|probe| {
128                let value_range: &RangeInclusive<u128> = &probe.value_range;
129                if value < *value_range.start() {
130                    Ordering::Greater
131                } else if value > *value_range.end() {
132                    Ordering::Less
133                } else {
134                    Ordering::Equal
135                }
136            })
137            .map(|pos| {
138                let range_mapping = &self.ranges_mapping[pos];
139                let pos_in_range: u32 = (value - range_mapping.value_range.start()) as u32;
140                range_mapping.compact_start + pos_in_range
141            })
142    }
143
144    /// Unpacks a value from compact space u32 to u128 space
145    fn compact_to_u128(&self, compact: u32) -> u128 {
146        let pos = self
147            .ranges_mapping
148            .binary_search_by_key(&compact, |range_mapping| range_mapping.compact_start)
149            // Correctness: Overflow. The first range starts at compact space 0, the error from
150            // binary search can never be 0
151            .unwrap_or_else(|e| e - 1);
152
153        let range_mapping = &self.ranges_mapping[pos];
154        let diff = compact - range_mapping.compact_start;
155        range_mapping.value_range.start() + diff as u128
156    }
157}
158
159pub struct CompactSpaceCompressor {
160    params: IPCodecParams,
161}
162
163#[derive(Debug, Clone)]
164pub struct IPCodecParams {
165    compact_space: CompactSpace,
166    bit_unpacker: BitUnpacker,
167    min_value: u128,
168    max_value: u128,
169    num_vals: RowId,
170    num_bits: u8,
171}
172
173impl CompactSpaceCompressor {
174    pub fn num_vals(&self) -> RowId {
175        self.params.num_vals
176    }
177
178    /// Taking the vals as Vec may cost a lot of memory. It is used to sort the vals.
179    pub fn train_from(iter: impl Iterator<Item = u128>) -> Self {
180        let mut values_sorted = BTreeSet::new();
181        // Total number of values, with their redundancy.
182        let mut total_num_values = 0u32;
183        for val in iter {
184            total_num_values += 1u32;
185            values_sorted.insert(val);
186        }
187        let min_value = *values_sorted.iter().next().unwrap_or(&0);
188        let max_value = *values_sorted.iter().last().unwrap_or(&0);
189
190        let compact_space =
191            get_compact_space(&values_sorted, total_num_values, COST_PER_BLANK_IN_BITS);
192        let amplitude_compact_space = compact_space.amplitude_compact_space();
193
194        assert!(
195            amplitude_compact_space <= u64::MAX as u128,
196            "case unsupported."
197        );
198
199        let num_bits = tantivy_bitpacker::compute_num_bits(amplitude_compact_space as u64);
200
201        assert_eq!(
202            compact_space
203                .u128_to_compact(max_value)
204                .expect("could not convert max value to compact space"),
205            amplitude_compact_space as u32
206        );
207        CompactSpaceCompressor {
208            params: IPCodecParams {
209                compact_space,
210                bit_unpacker: BitUnpacker::new(num_bits),
211                min_value,
212                max_value,
213                num_vals: total_num_values,
214                num_bits,
215            },
216        }
217    }
218
219    fn write_footer(self, writer: &mut impl Write) -> io::Result<()> {
220        let writer = &mut CountingWriter::wrap(writer);
221        self.params.serialize(writer)?;
222
223        let footer_len = writer.written_bytes() as u32;
224        footer_len.serialize(writer)?;
225
226        Ok(())
227    }
228
229    pub fn compress_into(
230        self,
231        vals: impl Iterator<Item = u128>,
232        write: &mut impl Write,
233    ) -> io::Result<()> {
234        let mut bitpacker = BitPacker::default();
235        for val in vals {
236            let compact = self
237                .params
238                .compact_space
239                .u128_to_compact(val)
240                .map_err(|_| {
241                    io::Error::new(
242                        io::ErrorKind::InvalidData,
243                        "Could not convert value to compact_space. This is a bug.",
244                    )
245                })?;
246            bitpacker.write(compact as u64, self.params.num_bits, write)?;
247        }
248        bitpacker.close(write)?;
249        self.write_footer(write)?;
250        Ok(())
251    }
252}
253
254#[derive(Debug, Clone)]
255pub struct CompactSpaceDecompressor {
256    data: OwnedBytes,
257    params: IPCodecParams,
258}
259
260impl BinarySerializable for IPCodecParams {
261    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
262        // header flags for future optional dictionary encoding
263        let footer_flags = 0u64;
264        footer_flags.serialize(writer)?;
265
266        VIntU128(self.min_value).serialize(writer)?;
267        VIntU128(self.max_value).serialize(writer)?;
268        VIntU128(self.num_vals as u128).serialize(writer)?;
269        self.num_bits.serialize(writer)?;
270
271        self.compact_space.serialize(writer)?;
272
273        Ok(())
274    }
275
276    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
277        let _header_flags = u64::deserialize(reader)?;
278        let min_value = VIntU128::deserialize(reader)?.0;
279        let max_value = VIntU128::deserialize(reader)?.0;
280        let num_vals = VIntU128::deserialize(reader)?.0 as u32;
281        let num_bits = u8::deserialize(reader)?;
282        let compact_space = CompactSpace::deserialize(reader)?;
283
284        Ok(Self {
285            compact_space,
286            bit_unpacker: BitUnpacker::new(num_bits),
287            min_value,
288            max_value,
289            num_vals,
290            num_bits,
291        })
292    }
293}
294
295/// Exposes the compact space compressed values as u64.
296///
297/// This allows faster access to the values, as u64 is faster to work with than u128.
298/// It also allows to handle u128 values like u64, via the `open_u64_lenient` as a uniform
299/// access interface.
300///
301/// When converting from the internal u64 to u128 `compact_to_u128` can be used.
302pub struct CompactSpaceU64Accessor(CompactSpaceDecompressor);
303impl CompactSpaceU64Accessor {
304    pub(crate) fn open(data: OwnedBytes) -> io::Result<CompactSpaceU64Accessor> {
305        let decompressor = CompactSpaceU64Accessor(CompactSpaceDecompressor::open(data)?);
306        Ok(decompressor)
307    }
308    /// Convert a compact space value to u128
309    pub fn compact_to_u128(&self, compact: u32) -> u128 {
310        self.0.compact_to_u128(compact)
311    }
312}
313
314impl ColumnValues<u64> for CompactSpaceU64Accessor {
315    #[inline]
316    fn get_val(&self, doc: u32) -> u64 {
317        let compact = self.0.get_compact(doc);
318        compact as u64
319    }
320
321    fn min_value(&self) -> u64 {
322        self.0.u128_to_compact(self.0.min_value()).unwrap() as u64
323    }
324
325    fn max_value(&self) -> u64 {
326        self.0.u128_to_compact(self.0.max_value()).unwrap() as u64
327    }
328
329    fn num_vals(&self) -> u32 {
330        self.0.params.num_vals
331    }
332
333    #[inline]
334    fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
335        Box::new(self.0.iter_compact().map(|el| el as u64))
336    }
337
338    #[inline]
339    fn get_row_ids_for_value_range(
340        &self,
341        value_range: RangeInclusive<u64>,
342        position_range: Range<u32>,
343        positions: &mut Vec<u32>,
344    ) {
345        let value_range = self.0.compact_to_u128(*value_range.start() as u32)
346            ..=self.0.compact_to_u128(*value_range.end() as u32);
347        self.0
348            .get_row_ids_for_value_range(value_range, position_range, positions)
349    }
350}
351
352impl ColumnValues<u128> for CompactSpaceDecompressor {
353    #[inline]
354    fn get_val(&self, doc: u32) -> u128 {
355        self.get(doc)
356    }
357
358    fn min_value(&self) -> u128 {
359        self.min_value()
360    }
361
362    fn max_value(&self) -> u128 {
363        self.max_value()
364    }
365
366    fn num_vals(&self) -> u32 {
367        self.params.num_vals
368    }
369
370    #[inline]
371    fn iter(&self) -> Box<dyn Iterator<Item = u128> + '_> {
372        Box::new(self.iter())
373    }
374
375    #[inline]
376    fn get_row_ids_for_value_range(
377        &self,
378        value_range: RangeInclusive<u128>,
379        position_range: Range<u32>,
380        positions: &mut Vec<u32>,
381    ) {
382        if value_range.start() > value_range.end() {
383            return;
384        }
385        let position_range = position_range.start..position_range.end.min(self.num_vals());
386        let from_value = *value_range.start();
387        let to_value = *value_range.end();
388        assert!(to_value >= from_value);
389        let compact_from = self.u128_to_compact(from_value);
390        let compact_to = self.u128_to_compact(to_value);
391
392        // Quick return, if both ranges fall into the same non-mapped space, the range can't cover
393        // any values, so we can early exit
394        match (compact_to, compact_from) {
395            (Err(pos1), Err(pos2)) if pos1 == pos2 => return,
396            _ => {}
397        }
398
399        let compact_from = compact_from.unwrap_or_else(|pos| {
400            // Correctness: Out of bounds, if this value is Err(last_index + 1), we early exit,
401            // since the to_value also mapps into the same non-mapped space
402            let range_mapping = self.params.compact_space.get_range_mapping(pos);
403            range_mapping.compact_start
404        });
405        // If there is no compact space, we go to the closest upperbound compact space
406        let compact_to = compact_to.unwrap_or_else(|pos| {
407            // Correctness: Overflow, if this value is Err(0), we early exit,
408            // since the from_value also mapps into the same non-mapped space
409
410            // Get end of previous range
411            let pos = pos - 1;
412            let range_mapping = self.params.compact_space.get_range_mapping(pos);
413            range_mapping.compact_end()
414        });
415
416        let value_range = compact_from..=compact_to;
417        self.get_positions_for_compact_value_range(value_range, position_range, positions);
418    }
419}
420
421impl CompactSpaceDecompressor {
422    pub fn open(data: OwnedBytes) -> io::Result<CompactSpaceDecompressor> {
423        let (data_slice, footer_len_bytes) = data.split_at(data.len() - 4);
424        let footer_len = u32::deserialize(&mut &footer_len_bytes[..])?;
425
426        let data_footer = &data_slice[data_slice.len() - footer_len as usize..];
427        let params = IPCodecParams::deserialize(&mut &data_footer[..])?;
428        let decompressor = CompactSpaceDecompressor { data, params };
429
430        Ok(decompressor)
431    }
432
433    /// Converting to compact space for the decompressor is more complex, since we may get values
434    /// which are outside the compact space. e.g. if we map
435    /// 1000 => 5
436    /// 2000 => 6
437    ///
438    /// and we want a mapping for 1005, there is no equivalent compact space. We instead return an
439    /// error with the index of the next range.
440    fn u128_to_compact(&self, value: u128) -> Result<u32, usize> {
441        self.params.compact_space.u128_to_compact(value)
442    }
443
444    fn compact_to_u128(&self, compact: u32) -> u128 {
445        self.params.compact_space.compact_to_u128(compact)
446    }
447
448    #[inline]
449    fn iter_compact(&self) -> impl Iterator<Item = u32> + '_ {
450        (0..self.params.num_vals)
451            .map(move |idx| self.params.bit_unpacker.get(idx, &self.data) as u32)
452    }
453
454    #[inline]
455    fn iter(&self) -> impl Iterator<Item = u128> + '_ {
456        // TODO: Performance. It would be better to iterate on the ranges and check existence via
457        // the bit_unpacker.
458        self.iter_compact()
459            .map(|compact| self.compact_to_u128(compact))
460    }
461
462    #[inline]
463    pub fn get_compact(&self, idx: u32) -> u32 {
464        self.params.bit_unpacker.get(idx, &self.data) as u32
465    }
466
467    #[inline]
468    pub fn get(&self, idx: u32) -> u128 {
469        let compact = self.get_compact(idx);
470        self.compact_to_u128(compact)
471    }
472
473    pub fn min_value(&self) -> u128 {
474        self.params.min_value
475    }
476
477    pub fn max_value(&self) -> u128 {
478        self.params.max_value
479    }
480
481    fn get_positions_for_compact_value_range(
482        &self,
483        value_range: RangeInclusive<u32>,
484        position_range: Range<u32>,
485        positions: &mut Vec<u32>,
486    ) {
487        self.params.bit_unpacker.get_ids_for_value_range(
488            *value_range.start() as u64..=*value_range.end() as u64,
489            position_range,
490            &self.data,
491            positions,
492        );
493    }
494}
495
496#[cfg(test)]
497mod tests {
498
499    use itertools::Itertools;
500
501    use super::*;
502    use crate::column_values::u128_based::U128Header;
503    use crate::column_values::{open_u128_mapped, serialize_column_values_u128};
504
505    #[test]
506    fn compact_space_test() {
507        let ips: BTreeSet<u128> = [
508            2u128, 4u128, 1000, 1001, 1002, 1003, 1004, 1005, 1008, 1010, 1012, 1260,
509        ]
510        .into_iter()
511        .collect();
512        let compact_space = get_compact_space(&ips, ips.len() as u32, 11);
513        let amplitude = compact_space.amplitude_compact_space();
514        assert_eq!(amplitude, 17);
515        assert_eq!(1, compact_space.u128_to_compact(2).unwrap());
516        assert_eq!(2, compact_space.u128_to_compact(3).unwrap());
517        assert_eq!(compact_space.u128_to_compact(100).unwrap_err(), 1);
518
519        for (num1, num2) in (0..3).tuple_windows() {
520            assert_eq!(
521                compact_space.get_range_mapping(num1).compact_end() + 1,
522                compact_space.get_range_mapping(num2).compact_start
523            );
524        }
525
526        let mut output: Vec<u8> = Vec::new();
527        compact_space.serialize(&mut output).unwrap();
528
529        assert_eq!(
530            compact_space,
531            CompactSpace::deserialize(&mut &output[..]).unwrap()
532        );
533
534        for ip in ips {
535            let compact = compact_space.u128_to_compact(ip).unwrap();
536            assert_eq!(compact_space.compact_to_u128(compact), ip);
537        }
538    }
539
540    #[test]
541    fn compact_space_amplitude_test() {
542        let ips = &[100000u128, 1000000].into_iter().collect();
543        let compact_space = get_compact_space(ips, ips.len() as u32, 1);
544        let amplitude = compact_space.amplitude_compact_space();
545        assert_eq!(amplitude, 2);
546    }
547
548    fn test_all(mut data: OwnedBytes, expected: &[u128]) {
549        let _header = U128Header::deserialize(&mut data);
550        let decompressor = CompactSpaceDecompressor::open(data).unwrap();
551        for (idx, expected_val) in expected.iter().cloned().enumerate() {
552            let val = decompressor.get(idx as u32);
553            assert_eq!(val, expected_val);
554
555            let test_range = |range: RangeInclusive<u128>| {
556                let expected_positions = expected
557                    .iter()
558                    .positions(|val| range.contains(val))
559                    .map(|pos| pos as u32)
560                    .collect::<Vec<_>>();
561                let mut positions = Vec::new();
562                decompressor.get_row_ids_for_value_range(
563                    range,
564                    0..decompressor.num_vals(),
565                    &mut positions,
566                );
567                assert_eq!(positions, expected_positions);
568            };
569
570            test_range(expected_val.saturating_sub(1)..=expected_val);
571            test_range(expected_val..=expected_val);
572            test_range(expected_val..=expected_val.saturating_add(1));
573            test_range(expected_val.saturating_sub(1)..=expected_val.saturating_add(1));
574        }
575    }
576
577    fn test_aux_vals(u128_vals: &[u128]) -> OwnedBytes {
578        let mut out = Vec::new();
579        serialize_column_values_u128(&u128_vals, &mut out).unwrap();
580        let data = OwnedBytes::new(out);
581        test_all(data.clone(), u128_vals);
582        data
583    }
584
585    #[test]
586    fn test_range_1() {
587        let vals = &[
588            1u128,
589            100u128,
590            3u128,
591            99999u128,
592            100000u128,
593            100001u128,
594            4_000_211_221u128,
595            4_000_211_222u128,
596            333u128,
597        ];
598        let mut data = test_aux_vals(vals);
599
600        let _header = U128Header::deserialize(&mut data);
601        let decomp = CompactSpaceDecompressor::open(data).unwrap();
602        let complete_range = 0..vals.len() as u32;
603        for (pos, val) in vals.iter().enumerate() {
604            let val = *val;
605            let pos = pos as u32;
606            let mut positions = Vec::new();
607            decomp.get_row_ids_for_value_range(val..=val, pos..pos + 1, &mut positions);
608            assert_eq!(positions, vec![pos]);
609        }
610
611        // handle docid range out of bounds
612        let positions: Vec<u32> = get_positions_for_value_range_helper(&decomp, 0..=1, 1..u32::MAX);
613        assert!(positions.is_empty());
614
615        let positions =
616            get_positions_for_value_range_helper(&decomp, 0..=1, complete_range.clone());
617        assert_eq!(positions, vec![0]);
618        let positions =
619            get_positions_for_value_range_helper(&decomp, 0..=2, complete_range.clone());
620        assert_eq!(positions, vec![0]);
621        let positions =
622            get_positions_for_value_range_helper(&decomp, 0..=3, complete_range.clone());
623        assert_eq!(positions, vec![0, 2]);
624        assert_eq!(
625            get_positions_for_value_range_helper(
626                &decomp,
627                99999u128..=99999u128,
628                complete_range.clone()
629            ),
630            vec![3]
631        );
632        assert_eq!(
633            get_positions_for_value_range_helper(
634                &decomp,
635                99999u128..=100000u128,
636                complete_range.clone()
637            ),
638            vec![3, 4]
639        );
640        assert_eq!(
641            get_positions_for_value_range_helper(
642                &decomp,
643                99998u128..=100000u128,
644                complete_range.clone()
645            ),
646            vec![3, 4]
647        );
648        assert_eq!(
649            &get_positions_for_value_range_helper(
650                &decomp,
651                99998u128..=99999u128,
652                complete_range.clone()
653            ),
654            &[3]
655        );
656        assert!(get_positions_for_value_range_helper(
657            &decomp,
658            99998u128..=99998u128,
659            complete_range.clone()
660        )
661        .is_empty());
662        assert_eq!(
663            &get_positions_for_value_range_helper(
664                &decomp,
665                333u128..=333u128,
666                complete_range.clone()
667            ),
668            &[8]
669        );
670        assert_eq!(
671            &get_positions_for_value_range_helper(
672                &decomp,
673                332u128..=333u128,
674                complete_range.clone()
675            ),
676            &[8]
677        );
678        assert_eq!(
679            &get_positions_for_value_range_helper(
680                &decomp,
681                332u128..=334u128,
682                complete_range.clone()
683            ),
684            &[8]
685        );
686        assert_eq!(
687            &get_positions_for_value_range_helper(
688                &decomp,
689                333u128..=334u128,
690                complete_range.clone()
691            ),
692            &[8]
693        );
694
695        assert_eq!(
696            &get_positions_for_value_range_helper(
697                &decomp,
698                4_000_211_221u128..=5_000_000_000u128,
699                complete_range
700            ),
701            &[6, 7]
702        );
703    }
704
705    #[test]
706    fn test_empty() {
707        let vals = &[];
708        let data = test_aux_vals(vals);
709        let _decomp = CompactSpaceDecompressor::open(data).unwrap();
710    }
711
712    #[test]
713    fn test_range_2() {
714        let vals = &[
715            100u128,
716            99999u128,
717            100000u128,
718            100001u128,
719            4_000_211_221u128,
720            4_000_211_222u128,
721            333u128,
722        ];
723        let mut data = test_aux_vals(vals);
724        let _header = U128Header::deserialize(&mut data);
725        let decomp = CompactSpaceDecompressor::open(data).unwrap();
726        let complete_range = 0..vals.len() as u32;
727        assert!(
728            &get_positions_for_value_range_helper(&decomp, 0..=5, complete_range.clone())
729                .is_empty(),
730        );
731        assert_eq!(
732            &get_positions_for_value_range_helper(&decomp, 0..=100, complete_range.clone()),
733            &[0]
734        );
735        assert_eq!(
736            &get_positions_for_value_range_helper(&decomp, 0..=105, complete_range),
737            &[0]
738        );
739    }
740
741    fn get_positions_for_value_range_helper<C: ColumnValues<T> + ?Sized, T: PartialOrd>(
742        column: &C,
743        value_range: RangeInclusive<T>,
744        doc_id_range: Range<u32>,
745    ) -> Vec<u32> {
746        let mut positions = Vec::new();
747        column.get_row_ids_for_value_range(value_range, doc_id_range, &mut positions);
748        positions
749    }
750
751    #[test]
752    fn test_range_3() {
753        let vals = &[
754            200u128,
755            201,
756            202,
757            203,
758            204,
759            204,
760            206,
761            207,
762            208,
763            209,
764            210,
765            1_000_000,
766            5_000_000_000,
767        ];
768        let mut out = Vec::new();
769        serialize_column_values_u128(&&vals[..], &mut out).unwrap();
770        let decomp = open_u128_mapped(OwnedBytes::new(out)).unwrap();
771        let complete_range = 0..vals.len() as u32;
772
773        assert_eq!(
774            get_positions_for_value_range_helper(&*decomp, 199..=200, complete_range.clone()),
775            vec![0]
776        );
777
778        assert_eq!(
779            get_positions_for_value_range_helper(&*decomp, 199..=201, complete_range.clone()),
780            vec![0, 1]
781        );
782
783        assert_eq!(
784            get_positions_for_value_range_helper(&*decomp, 200..=200, complete_range.clone()),
785            vec![0]
786        );
787
788        assert_eq!(
789            get_positions_for_value_range_helper(&*decomp, 1_000_000..=1_000_000, complete_range),
790            vec![11]
791        );
792    }
793
794    #[test]
795    fn test_bug1() {
796        let vals = &[9223372036854775806];
797        let _data = test_aux_vals(vals);
798    }
799
800    #[test]
801    fn test_bug2() {
802        let vals = &[340282366920938463463374607431768211455u128];
803        let _data = test_aux_vals(vals);
804    }
805
806    #[test]
807    fn test_bug3() {
808        let vals = &[340282366920938463463374607431768211454];
809        let _data = test_aux_vals(vals);
810    }
811
812    #[test]
813    fn test_bug4() {
814        let vals = &[340282366920938463463374607431768211455, 0];
815        let _data = test_aux_vals(vals);
816    }
817
818    #[test]
819    fn test_first_large_gaps() {
820        let vals = &[1_000_000_000u128; 100];
821        let _data = test_aux_vals(vals);
822    }
823
824    use proptest::prelude::*;
825
826    fn num_strategy() -> impl Strategy<Value = u128> {
827        prop_oneof![
828            1 => prop::num::u128::ANY.prop_map(|num| u128::MAX - (num % 10) ),
829            1 => prop::num::u128::ANY.prop_map(|num| i64::MAX as u128 + 5 - (num % 10) ),
830            1 => prop::num::u128::ANY.prop_map(|num| i128::MAX as u128 + 5 - (num % 10) ),
831            1 => prop::num::u128::ANY.prop_map(|num| num % 10 ),
832            20 => prop::num::u128::ANY,
833        ]
834    }
835
836    proptest! {
837        #![proptest_config(ProptestConfig::with_cases(10))]
838
839        #[test]
840        fn compress_decompress_random(vals in proptest::collection::vec(num_strategy() , 1..1000)) {
841            let _data = test_aux_vals(&vals);
842        }
843    }
844}