compressed_intvec/variable/
traits.rs

1//! Core traits for the [`variable`] module.
2//!
3//! This module defines the [`Storable`] trait, a generic abstraction that allows
4//! different integer types to be stored in an [`IntVec`]. It handles the necessary
5//! conversions to the [`u64`] word representation required by the underlying
6//! variable-length compression codecs.
7//!
8//! [`IntVec`]: crate::variable::IntVec
9//! [`variable`]: crate::variable
10
11use dsi_bitstream::prelude::{ToInt, ToNat};
12
13/// A trait for types that can be stored in a variable-length compressed vector.
14///
15/// This trait provides a bidirectional, lossless conversion between a user-facing
16/// element type (e.g., [`i32`], [`u16`]) and a [`u64`] representation. This abstraction
17/// is essential for [`IntVec`] to support a variety of integer types while using
18/// a common set of compression algorithms that operate on [`u64`].
19///
20/// # Portability and [`usize`]/[`isize`]
21///
22/// By default, this trait is implemented only for fixed-size integer types
23/// (e.g., [`u8`], [`i16`], [`u32`], [`i64`]). This design choice guarantees that data
24/// compressed on one machine architecture (e.g., 64-bit) can be safely
25/// decompressed on another (e.g., 32-bit) without data loss or corruption.
26///
27/// Support for the architecture-dependent types [`usize`] and [`isize`] can be
28/// enabled via the `arch-dependent-storable` feature flag.
29///
30/// ## Feature Flag: `arch-dependent-storable`
31///
32/// Activating this feature provides [`Storable`] implementations for [`usize`] and
33/// [`isize`]. However, this breaks the portability guarantee. An `IntVec<usize>`
34/// created on a 64-bit system with values greater than [`u32::MAX`] will cause a
35/// panic if it is read on a 32-bit system.
36///
37/// **Enable this feature only if you are certain that the data will never be
38/// shared across platforms with different pointer widths.**
39///
40/// # Zig-Zag Encoding for Signed Integers
41///
42/// For signed integer types, the implementation of this trait automatically
43/// applies **Zig-Zag encoding**. This is a reversible transformation that maps
44/// signed integers to unsigned integers, such that values close to zero (both
45/// positive and negative) are mapped to small unsigned integers. This is highly
46/// effective for variable-length codes, which use fewer bits to represent smaller
47/// numbers.
48///
49/// | Original Signed | Zig-Zag Unsigned |
50/// |-----------------|------------------|
51/// | 0               | 0                |
52/// | -1              | 1                |
53/// | 1               | 2                |
54/// | -2              | 3                |
55/// | 2               | 4                |
56/// | ...             | ...              |
57///
58/// [`IntVec`]: crate::variable::IntVec
59pub trait Storable: Sized + Copy {
60    /// Converts the element into its [`u64`] storage representation.
61    ///
62    /// For unsigned types, this is a direct cast. For signed types, this
63    /// applies Zig-Zag encoding.
64    fn to_word(self) -> u64;
65
66    /// Converts a [`u64`] storage word back into the element type.
67    ///
68    /// For unsigned types, this is a direct cast. For signed types, this
69    /// reverses the Zig-Zag encoding.
70    ///
71    /// # Panics
72    ///
73    /// This method will panic if the `word` contains a value that is out of
74    /// range for the target type. This can occur if the data is corrupted or,
75    /// in the case of [`usize`]/[`isize`] with the `arch-dependent-storable` feature,
76    /// if the data is read on an architecture with a smaller pointer width than
77    /// the one it was written on.
78    fn from_word(word: u64) -> Self;
79}
80
81macro_rules! impl_storable_for_unsigned {
82    ($($T:ty),*) => {$(
83        impl Storable for $T {
84            #[inline(always)]
85            fn to_word(self) -> u64 {
86                self as u64
87            }
88
89            #[inline(always)]
90            fn from_word(word: u64) -> Self {
91                word as Self
92            }
93        }
94    )*};
95}
96
97macro_rules! impl_storable_for_signed {
98    ($($T:ty),*) => {$(
99        impl Storable for $T {
100            #[inline(always)]
101            fn to_word(self) -> u64 {
102                self.to_nat().into()
103            }
104
105            #[inline(always)]
106            fn from_word(word: u64) -> Self {
107                ToInt::to_int(word)
108                    .try_into()
109                    .unwrap_or_else(|_| panic!("Value out of range for type"))
110            }
111        }
112    )*};
113}
114
115// Implement `Storable` for all primitive, fixed-size integer types.
116impl_storable_for_unsigned!(u8, u16, u32, u64);
117impl_storable_for_signed!(i8, i16, i32, i64);
118
119/// Contains `Storable` implementations for architecture-dependent types.
120/// This module is only compiled when the `arch-dependent-storable` feature is enabled.
121#[cfg(feature = "arch-dependent-storable")]
122mod arch_dependent {
123    use super::*;
124    use std::convert::TryFrom;
125
126    impl Storable for usize {
127        #[inline(always)]
128        fn to_word(self) -> u64 {
129            self as u64
130        }
131
132        #[inline(always)]
133        fn from_word(word: u64) -> Self {
134            usize::try_from(word).unwrap_or_else(|_| {
135                panic!("Value {} out of range for this architecture's usize", word)
136            })
137        }
138    }
139
140    impl Storable for isize {
141        #[inline(always)]
142        fn to_word(self) -> u64 {
143            self.to_nat() as u64
144        }
145
146        #[inline(always)]
147        fn from_word(word: u64) -> Self {
148            let word_as_usize = usize::try_from(word).unwrap_or_else(|_| {
149                panic!(
150                    "Value {} out of range for this architecture's usize (for isize decoding)",
151                    word
152                )
153            });
154            ToInt::to_int(word_as_usize)
155        }
156    }
157}