fastlanes/
lib.rs

1#![no_std]
2
3extern crate alloc;
4extern crate core;
5
6use core::mem::size_of;
7use num_traits::{PrimInt, Unsigned};
8
9mod bitpacking;
10mod bitpacking_cmp;
11mod delta;
12mod ffor;
13mod macros;
14mod rle;
15mod transpose;
16
17pub use bitpacking::*;
18pub use bitpacking_cmp::*;
19pub use delta::*;
20pub use ffor::*;
21pub use rle::*;
22pub use transpose::*;
23
24pub const FL_ORDER: [usize; 8] = [0, 4, 2, 6, 1, 5, 3, 7];
25
26pub trait FastLanes: Sized + Unsigned + PrimInt {
27    const T: usize = size_of::<Self>() * 8;
28    const LANES: usize = 1024 / Self::T;
29}
30
31impl FastLanes for u8 {}
32impl FastLanes for u16 {}
33impl FastLanes for u32 {}
34impl FastLanes for u64 {}
35
36// Macro for repeating a code block bit_size_of::<T> times.
37#[macro_export]
38macro_rules! seq_t {
39    ($ident:ident in u8 $body:tt) => {seq_macro::seq!($ident in 0..8 $body)};
40    ($ident:ident in u16 $body:tt) => {seq_macro::seq!($ident in 0..16 $body)};
41    ($ident:ident in u32 $body:tt) => {seq_macro::seq!($ident in 0..32 $body)};
42    ($ident:ident in u64 $body:tt) => {seq_macro::seq!($ident in 0..64 $body)};
43}
44
45const fn supported_bit_width(width: usize, type_width: usize) -> bool {
46    match type_width {
47        8 => width <= 8,
48        16 => width <= 16,
49        32 => width <= 32,
50        64 => width <= 64,
51        _ => unreachable!(),
52    }
53}
54
55pub trait FastLanesComparable: Copy {
56    type Bitpacked: FastLanes;
57
58    fn as_unpacked(inner: Self::Bitpacked) -> Self;
59}
60
61macro_rules! impl_fastlanes_comparable {
62    ($value_type:ty, $bitpacked_type:ty) => {
63        impl FastLanesComparable for $value_type {
64            type Bitpacked = $bitpacked_type;
65
66            #[inline]
67            #[allow(unnecessary_transmutes, clippy::useless_transmute)]
68            fn as_unpacked(inner: Self::Bitpacked) -> Self {
69                unsafe { core::mem::transmute(inner) }
70            }
71        }
72    };
73}
74
75impl_fastlanes_comparable!(u8, u8);
76impl_fastlanes_comparable!(i8, u8);
77
78impl_fastlanes_comparable!(u16, u16);
79impl_fastlanes_comparable!(i16, u16);
80
81impl_fastlanes_comparable!(u32, u32);
82impl_fastlanes_comparable!(i32, u32);
83
84impl_fastlanes_comparable!(u64, u64);
85impl_fastlanes_comparable!(i64, u64);
86
87// run the example code in the README as a test
88#[doc = include_str!("../README.md")]
89#[cfg(doctest)]
90pub struct ReadmeDoctests;
91
92#[cfg(test)]
93mod tests {
94    use crate::{BitPacking, FL_ORDER};
95
96    #[test]
97    fn test_ordering_is_own_inverse() {
98        // Check that FL_ORDER "round-trips"; i.e., it is its own inverse permutation.
99        for i in 0..8 {
100            assert_eq!(FL_ORDER[FL_ORDER[i]], i);
101        }
102    }
103
104    #[test]
105    fn pack_u16_into_u3_no_unsafe() {
106        const WIDTH: usize = 3;
107        const B: usize = 128 * WIDTH / size_of::<u16>();
108
109        // Generate some values.
110        let mut values: [u16; 1024] = [0; 1024];
111        for i in 0..1024 {
112            values[i] = (i % (1 << WIDTH)) as u16;
113        }
114
115        // Pack the values.
116        let mut packed = [0; B];
117        BitPacking::pack::<WIDTH, B>(&values, &mut packed);
118
119        // Unpack the values.
120        let mut unpacked = [0u16; 1024];
121        BitPacking::unpack::<WIDTH, B>(&packed, &mut unpacked);
122        assert_eq!(values, unpacked);
123
124        // Unpack a single value at index 14.
125        // Note that for more than ~10 values, it can be faster to unpack all values and then
126        // access the desired one.
127        for i in 0..1024 {
128            assert_eq!(BitPacking::unpack_single::<WIDTH, B>(&packed, i), values[i]);
129        }
130    }
131}