compressed_intvec/variable/traits.rs
1//! Core traits for the [`variable`] module.
2//!
3//! This module defines the [`Storable`] trait, a generic abstraction that allows
4//! different integer types to be stored in an [`IntVec`]. It handles the necessary
5//! conversions to the [`u64`] word representation required by the underlying
6//! variable-length compression codecs.
7//!
8//! [`IntVec`]: crate::variable::IntVec
9//! [`variable`]: crate::variable
10
11use dsi_bitstream::prelude::{ToInt, ToNat};
12
13/// A trait for types that can be stored in a variable-length compressed vector.
14///
15/// This trait provides a bidirectional, lossless conversion between a user-facing
16/// element type (e.g., [`i32`], [`u16`]) and a [`u64`] representation. This abstraction
17/// is essential for [`IntVec`] to support a variety of integer types while using
18/// a common set of compression algorithms that operate on [`u64`].
19///
20/// # Portability and [`usize`]/[`isize`]
21///
22/// By default, this trait is implemented only for fixed-size integer types
23/// (e.g., [`u8`], [`i16`], [`u32`], [`i64`]). This design choice guarantees that data
24/// compressed on one machine architecture (e.g., 64-bit) can be safely
25/// decompressed on another (e.g., 32-bit) without data loss or corruption.
26///
27/// Support for the architecture-dependent types [`usize`] and [`isize`] can be
28/// enabled via the `arch-dependent-storable` feature flag.
29///
30/// ## Feature Flag: `arch-dependent-storable`
31///
32/// Activating this feature provides [`Storable`] implementations for [`usize`] and
33/// [`isize`]. However, this breaks the portability guarantee. An `IntVec<usize>`
34/// created on a 64-bit system with values greater than [`u32::MAX`] will cause a
35/// panic if it is read on a 32-bit system.
36///
37/// **Enable this feature only if you are certain that the data will never be
38/// shared across platforms with different pointer widths.**
39///
40/// # Zig-Zag Encoding for Signed Integers
41///
42/// For signed integer types, the implementation of this trait automatically
43/// applies **Zig-Zag encoding**. This is a reversible transformation that maps
44/// signed integers to unsigned integers, such that values close to zero (both
45/// positive and negative) are mapped to small unsigned integers. This is highly
46/// effective for variable-length codes, which use fewer bits to represent smaller
47/// numbers.
48///
49/// | Original Signed | Zig-Zag Unsigned |
50/// |-----------------|------------------|
51/// | 0 | 0 |
52/// | -1 | 1 |
53/// | 1 | 2 |
54/// | -2 | 3 |
55/// | 2 | 4 |
56/// | ... | ... |
57///
58/// [`IntVec`]: crate::variable::IntVec
59pub trait Storable: Sized + Copy {
60 /// Converts the element into its [`u64`] storage representation.
61 ///
62 /// For unsigned types, this is a direct cast. For signed types, this
63 /// applies Zig-Zag encoding.
64 fn to_word(self) -> u64;
65
66 /// Converts a [`u64`] storage word back into the element type.
67 ///
68 /// For unsigned types, this is a direct cast. For signed types, this
69 /// reverses the Zig-Zag encoding.
70 ///
71 /// # Panics
72 ///
73 /// This method will panic if the `word` contains a value that is out of
74 /// range for the target type. This can occur if the data is corrupted or,
75 /// in the case of [`usize`]/[`isize`] with the `arch-dependent-storable` feature,
76 /// if the data is read on an architecture with a smaller pointer width than
77 /// the one it was written on.
78 fn from_word(word: u64) -> Self;
79}
80
81macro_rules! impl_storable_for_unsigned {
82 ($($T:ty),*) => {$(
83 impl Storable for $T {
84 #[inline(always)]
85 fn to_word(self) -> u64 {
86 self as u64
87 }
88
89 #[inline(always)]
90 fn from_word(word: u64) -> Self {
91 word as Self
92 }
93 }
94 )*};
95}
96
97macro_rules! impl_storable_for_signed {
98 ($($T:ty),*) => {$(
99 impl Storable for $T {
100 #[inline(always)]
101 fn to_word(self) -> u64 {
102 self.to_nat().into()
103 }
104
105 #[inline(always)]
106 fn from_word(word: u64) -> Self {
107 ToInt::to_int(word)
108 .try_into()
109 .unwrap_or_else(|_| panic!("Value out of range for type"))
110 }
111 }
112 )*};
113}
114
115// Implement `Storable` for all primitive, fixed-size integer types.
116impl_storable_for_unsigned!(u8, u16, u32, u64);
117impl_storable_for_signed!(i8, i16, i32, i64);
118
119/// Contains `Storable` implementations for architecture-dependent types.
120/// This module is only compiled when the `arch-dependent-storable` feature is enabled.
121#[cfg(feature = "arch-dependent-storable")]
122mod arch_dependent {
123 use super::*;
124 use std::convert::TryFrom;
125
126 impl Storable for usize {
127 #[inline(always)]
128 fn to_word(self) -> u64 {
129 self as u64
130 }
131
132 #[inline(always)]
133 fn from_word(word: u64) -> Self {
134 usize::try_from(word).unwrap_or_else(|_| {
135 panic!("Value {} out of range for this architecture's usize", word)
136 })
137 }
138 }
139
140 impl Storable for isize {
141 #[inline(always)]
142 fn to_word(self) -> u64 {
143 self.to_nat() as u64
144 }
145
146 #[inline(always)]
147 fn from_word(word: u64) -> Self {
148 let word_as_usize = usize::try_from(word).unwrap_or_else(|_| {
149 panic!(
150 "Value {} out of range for this architecture's usize (for isize decoding)",
151 word
152 )
153 });
154 ToInt::to_int(word_as_usize)
155 }
156 }
157}