Skip to main content

lance_core/utils/
bit.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4/// Returns true if the given number is a power of two.
5///
6/// ```
7/// use lance_core::utils::bit::is_pwr_two;
8///
9/// assert!(is_pwr_two(1));
10/// assert!(is_pwr_two(2));
11/// assert!(is_pwr_two(1024));
12/// assert!(!is_pwr_two(3));
13/// assert!(!is_pwr_two(1000));
14/// ```
15pub fn is_pwr_two(n: u64) -> bool {
16    n & (n - 1) == 0
17}
18
19/// Returns the number of padding bytes needed to align `n` to `ALIGN`.
20///
21/// ```
22/// use lance_core::utils::bit::pad_bytes;
23///
24/// assert_eq!(pad_bytes::<8>(0), 0);
25/// assert_eq!(pad_bytes::<8>(1), 7);
26/// assert_eq!(pad_bytes::<8>(8), 0);
27/// assert_eq!(pad_bytes::<8>(9), 7);
28/// ```
29pub fn pad_bytes<const ALIGN: usize>(n: usize) -> usize {
30    debug_assert!(is_pwr_two(ALIGN as u64));
31    (ALIGN - (n & (ALIGN - 1))) & (ALIGN - 1)
32}
33
34/// Returns the number of padding bytes needed to align `n` to `align`.
35///
36/// ```
37/// use lance_core::utils::bit::pad_bytes_to;
38///
39/// assert_eq!(pad_bytes_to(0, 8), 0);
40/// assert_eq!(pad_bytes_to(1, 8), 7);
41/// assert_eq!(pad_bytes_to(8, 8), 0);
42/// assert_eq!(pad_bytes_to(9, 8), 7);
43/// ```
44pub fn pad_bytes_to(n: usize, align: usize) -> usize {
45    debug_assert!(is_pwr_two(align as u64));
46    (align - (n & (align - 1))) & (align - 1)
47}
48
49/// Returns the number of padding bytes needed to align `n` to `ALIGN` (u64 version).
50///
51/// ```
52/// use lance_core::utils::bit::pad_bytes_u64;
53///
54/// assert_eq!(pad_bytes_u64::<8>(0), 0);
55/// assert_eq!(pad_bytes_u64::<8>(1), 7);
56/// assert_eq!(pad_bytes_u64::<8>(8), 0);
57/// assert_eq!(pad_bytes_u64::<8>(9), 7);
58/// ```
59pub fn pad_bytes_u64<const ALIGN: u64>(n: u64) -> u64 {
60    debug_assert!(is_pwr_two(ALIGN));
61    (ALIGN - (n & (ALIGN - 1))) & (ALIGN - 1)
62}
63
64// This is a lookup table for the log2 of the first 256 numbers
65const LOG_TABLE_256: [u8; 256] = [
66    0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
67    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
68    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
69    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
70    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
71    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
72    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
73    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
74];
75
76/// Returns the number of bits needed to represent the given number.
77///
78/// Inspired by <https://graphics.stanford.edu/~seander/bithacks.html>
79///
80/// ```
81/// use lance_core::utils::bit::log_2_ceil;
82///
83/// assert_eq!(log_2_ceil(1), 1);
84/// assert_eq!(log_2_ceil(2), 2);
85/// assert_eq!(log_2_ceil(255), 8);
86/// assert_eq!(log_2_ceil(256), 9);
87/// ```
88pub fn log_2_ceil(val: u32) -> u32 {
89    assert!(val > 0);
90    let upper_half = val >> 16;
91    if upper_half == 0 {
92        let third_quarter = val >> 8;
93        if third_quarter == 0 {
94            // Use lowest 8 bits (upper 24 are 0)
95            LOG_TABLE_256[val as usize] as u32
96        } else {
97            // Use bits 16..24 (0..16 are 0)
98            LOG_TABLE_256[third_quarter as usize] as u32 + 8
99        }
100    } else {
101        let first_quarter = upper_half >> 8;
102        if first_quarter == 0 {
103            // Use bits 8..16 (0..8 are 0)
104            16 + LOG_TABLE_256[upper_half as usize] as u32
105        } else {
106            // Use most significant bits (it's a big number!)
107            24 + LOG_TABLE_256[first_quarter as usize] as u32
108        }
109    }
110}
111
112#[cfg(test)]
113pub mod tests {
114    use crate::utils::bit::{is_pwr_two, log_2_ceil, pad_bytes, pad_bytes_to, pad_bytes_u64};
115
116    #[test]
117    fn test_bit_utils() {
118        // Test values not in doctests
119        assert!(is_pwr_two(4));
120        assert!(is_pwr_two(1024));
121        assert!(!is_pwr_two(5));
122
123        // Test different alignment (64) not shown in doctests
124        assert_eq!(pad_bytes::<64>(100), 28);
125        assert_eq!(pad_bytes_to(100, 64), 28);
126        assert_eq!(pad_bytes_u64::<64>(100), 28);
127    }
128
129    #[test]
130    fn test_log_2_ceil() {
131        #[cfg_attr(coverage, coverage(off))]
132        fn classic_approach(mut val: u32) -> u32 {
133            let mut counter = 0;
134            while val > 0 {
135                val >>= 1;
136                counter += 1;
137            }
138            counter
139        }
140
141        for i in 1..(16 * 1024) {
142            assert_eq!(log_2_ceil(i), classic_approach(i));
143        }
144        assert_eq!(log_2_ceil(50 * 1024), classic_approach(50 * 1024));
145        assert_eq!(
146            log_2_ceil(1024 * 1024 * 1024),
147            classic_approach(1024 * 1024 * 1024)
148        );
149        // Cover the branch where upper_half != 0 but first_quarter == 0
150        // (value between 2^16 and 2^24)
151        assert_eq!(log_2_ceil(100_000), classic_approach(100_000));
152    }
153}