tantivy/postings/
block_search.rs

1use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
2
3/// Search the first index containing an element greater or equal to
4/// the target.
5///
6/// The results should be equivalent to
7/// ```compile_fail
8/// block[..]
9//       .iter()
10//       .take_while(|&&val| val < target)
11//       .count()
12/// ```
13/// 
14/// the `start` argument is just used to hint that the response is
15/// greater than beyond `start`. The implementation may or may not use
16/// it for optimization.
17///
18/// # Assumption
19///
20/// - The block is sorted. Some elements may appear several times. This is the case at the
21///   end of the last block for instance.
22/// - The target is assumed smaller or equal to the last element of the block.
23pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
24    let mut start = 0;
25    let mut len = arr.len();
26    for _ in 0..7 {
27        len /= 2;
28        let pivot = unsafe { *arr.get_unchecked(start + len - 1) };
29        if pivot < target {
30            start += len;
31        }
32    }
33    start
34}
35
36#[cfg(test)]
37mod tests {
38    use std::collections::HashSet;
39
40    use proptest::prelude::*;
41
42    use super::branchless_binary_search;
43    use crate::docset::TERMINATED;
44    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
45
46    fn search_in_block_trivial_but_slow(block: &[u32], target: u32) -> usize {
47        block.iter().take_while(|&&val| val < target).count()
48    }
49
50    fn util_test_search_in_block(block: &[u32], target: u32) {
51        let cursor = search_in_block_trivial_but_slow(block, target);
52        assert!(cursor < COMPRESSION_BLOCK_SIZE);
53        assert!(block[cursor] >= target);
54        if cursor > 0 {
55            assert!(block[cursor - 1] < target);
56        }
57        assert_eq!(block.len(), COMPRESSION_BLOCK_SIZE);
58        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
59        output_buffer[..block.len()].copy_from_slice(block);
60        assert_eq!(branchless_binary_search(&output_buffer, target), cursor);
61    }
62
63    fn util_test_search_in_block_all(block: &[u32]) {
64        let mut targets = HashSet::new();
65        targets.insert(0);
66        for &val in block {
67            if val > 0 {
68                targets.insert(val - 1);
69            }
70            targets.insert(val);
71        }
72        for target in targets {
73            util_test_search_in_block(block, target);
74        }
75    }
76
77    #[test]
78    fn test_search_in_branchless_binary_search() {
79        let v: Vec<u32> = (0..COMPRESSION_BLOCK_SIZE).map(|i| i as u32 * 2).collect();
80        util_test_search_in_block_all(&v[..]);
81    }
82
83    fn monotonous_block() -> impl Strategy<Value = Vec<u32>> {
84        prop::collection::vec(0u32..5u32, COMPRESSION_BLOCK_SIZE).prop_map(|mut deltas| {
85            let mut el = 0;
86            for i in 0..COMPRESSION_BLOCK_SIZE {
87                el += deltas[i];
88                deltas[i] = el;
89            }
90            deltas
91        })
92    }
93
94    proptest! {
95        #[test]
96        fn test_proptest_branchless_binary_search(block in monotonous_block()) {
97            util_test_search_in_block_all(&block[..]);
98        }
99    }
100}