compact_bytes/
lib.rs

1//! "Small string optimization" for a bytes.
2
3use std::alloc;
4use std::ptr::NonNull;
5
6mod fixed;
7mod growable;
8
9pub use fixed::CompactBytesSlice;
10pub use growable::CompactBytes;
11
12const INLINE_MASK: u8 = 0b1000_0000;
13
14/// Fixed capacity inline buffer of bytes.
15#[repr(C, align(8))]
16#[derive(Copy, Clone)]
17struct InlineBytes<const CAPACITY: usize> {
18    buffer: [u8; CAPACITY],
19    data: u8,
20}
21
22/// Inline storage for [`CompactBytes`].
23type InlineBytes23 = InlineBytes<23>;
24static_assertions::assert_eq_size!(InlineBytes23, Vec<u8>);
25
26/// Inline storage for [`CompactBytesSlice`].
27type InlineBytes15 = InlineBytes<15>;
28static_assertions::assert_eq_size!(InlineBytes15, Box<[u8]>);
29
30impl<const CAPACITY: usize> InlineBytes<CAPACITY> {
31    /// The amount of bytes this [`InlineBytes`] can store inline.
32    const CAPACITY: usize = CAPACITY;
33
34    /// Create an [`InlineBytes`] from the provided slice.
35    ///
36    /// Safety:
37    ///
38    /// * `slice` must have a length less than or equal to `CAPACITY`.
39    ///
40    #[inline]
41    pub unsafe fn new(slice: &[u8]) -> Self {
42        debug_assert!(slice.len() <= CAPACITY);
43
44        let len = slice.len();
45        let mut buffer = [0u8; CAPACITY];
46
47        // SAFETY: We know src and dst are valid for len bytes, nor do they overlap.
48        unsafe {
49            buffer
50                .as_mut_ptr()
51                .copy_from_nonoverlapping(slice.as_ptr(), len)
52        };
53
54        let data = INLINE_MASK | (len as u8);
55
56        InlineBytes { buffer, data }
57    }
58
59    #[inline]
60    pub const fn empty() -> Self {
61        let buffer = [0u8; CAPACITY];
62
63        // Even though the below statement as no effect, we leave it for better understanding.
64        #[allow(clippy::identity_op)]
65        let data = INLINE_MASK | 0;
66
67        InlineBytes { buffer, data }
68    }
69
70    pub fn len(&self) -> usize {
71        (self.data & !INLINE_MASK) as usize
72    }
73
74    /// Forces the length of [`InlineBytes`] to `new_len`.
75    ///
76    /// # Safety
77    /// * `new_len` must be less than or equal to the generic `CAPACITY`.
78    /// * `new_len` must be less than or equal to capacity.
79    /// * The bytes at `old_len..new_len` must be initialized.
80    ///
81    unsafe fn set_len(&mut self, new_len: usize) {
82        debug_assert!(new_len <= CAPACITY);
83        self.data = INLINE_MASK | (new_len as u8);
84    }
85}
86
87/// A growable heap allocation of bytes.
88#[repr(C)]
89struct HeapBytesGrowable {
90    ptr: NonNull<u8>,
91    len: usize,
92    cap: usize,
93}
94static_assertions::assert_eq_size!(HeapBytesGrowable, Vec<u8>);
95
96impl HeapBytesGrowable {
97    /// The minimum allocation size for a [`HeapBytesGrowable`].
98    pub const MIN_ALLOCATION_SIZE: usize = std::mem::size_of::<usize>() * 2;
99    /// The maximum allocation size for a [`HeapBytesGrowable`].
100    pub const MAX_ALLOCATION_SIZE: usize = usize::MAX >> 1;
101
102    #[inline]
103    pub fn new(slice: &[u8]) -> Self {
104        let len = slice.len();
105        let cap = len.max(Self::MIN_ALLOCATION_SIZE);
106
107        debug_assert!(cap <= Self::MAX_ALLOCATION_SIZE, "too large of allocation");
108        let ptr = heap::alloc_ptr(cap);
109
110        unsafe { ptr.as_ptr().copy_from_nonoverlapping(slice.as_ptr(), len) };
111
112        HeapBytesGrowable { ptr, len, cap }
113    }
114
115    pub fn with_capacity(capacity: usize) -> Self {
116        assert!(
117            capacity <= Self::MAX_ALLOCATION_SIZE,
118            "too large of allocation"
119        );
120
121        let len = 0;
122        let cap = capacity.max(Self::MIN_ALLOCATION_SIZE);
123        let ptr = heap::alloc_ptr(cap);
124
125        HeapBytesGrowable {
126            ptr,
127            len,
128            cap: capacity,
129        }
130    }
131
132    pub fn with_additional(slice: &[u8], additional: usize) -> Self {
133        let new_capacity = Self::amortized_growth(slice.len(), additional);
134        let mut row = Self::with_capacity(new_capacity);
135
136        debug_assert!(row.cap > slice.len());
137
138        // SAFETY: We know src and dst are both valid for len bytes, nor are they overlapping.
139        unsafe {
140            std::ptr::copy_nonoverlapping(slice.as_ptr(), row.ptr.as_ptr(), slice.len());
141        };
142        // Set our length.
143        row.len = slice.len();
144
145        row
146    }
147
148    pub unsafe fn set_len(&mut self, len: usize) {
149        self.len = len;
150    }
151
152    pub fn realloc(&mut self, new_capacity: usize) -> Result<usize, ()> {
153        // Can't shrink the heap allocation to be less than length, because we'd lose data.
154        if new_capacity < self.len {
155            return Err(());
156        }
157        // Do not reallocate to 0 capacity.
158        if new_capacity == 0 {
159            return Err(());
160        }
161
162        // Always allocate some minimum amount of bytes.
163        let new_capacity = new_capacity.max(Self::MIN_ALLOCATION_SIZE);
164
165        // Already at the appropriate size!
166        if new_capacity == self.cap {
167            return Ok(new_capacity);
168        }
169
170        let cur_layout = heap::layout(self.cap);
171        let new_layout = heap::layout(new_capacity);
172
173        // Check for overflow.
174        let new_size = new_layout.size();
175        if new_size < new_capacity {
176            return Err(());
177        }
178
179        // SAFETY:
180        // * Our pointer was allocated via the same allocator.
181        // * We used the same layout for the previous allocation.
182        // * `new_size` is correct.
183        let raw_ptr = unsafe { alloc::realloc(self.ptr.as_ptr(), cur_layout, new_size) };
184        let ptr = NonNull::new(raw_ptr).ok_or(())?;
185
186        self.ptr = ptr;
187        self.cap = new_capacity;
188
189        Ok(new_capacity)
190    }
191
192    #[inline]
193    fn dealloc(&mut self) {
194        heap::dealloc_ptr(self.ptr, self.cap);
195    }
196
197    /// [`HeapBytes`] grows at an amortized rates of 1.5x
198    ///
199    /// Note: this is different than [`std::vec::Vec`], which grows at a rate of 2x. It's debated
200    /// which is better, for now we'll stick with a rate of 1.5x
201    #[inline(always)]
202    pub fn amortized_growth(cur_len: usize, additional: usize) -> usize {
203        let required = cur_len.saturating_add(additional);
204        let amortized = cur_len.saturating_mul(3) / 2;
205        amortized.max(required)
206    }
207}
208
209impl Drop for HeapBytesGrowable {
210    fn drop(&mut self) {
211        self.dealloc()
212    }
213}
214
215/// A fixed size heap allocation of bytes.
216#[repr(C)]
217struct HeapBytesFixed {
218    ptr: NonNull<u8>,
219    len: usize,
220}
221static_assertions::assert_eq_size!(HeapBytesFixed, Box<[u8]>);
222
223impl HeapBytesFixed {
224    /// The maximum allocation size for a [`HeapBytesFixed`].
225    pub const MAX_ALLOCATION_SIZE: usize = usize::MAX >> 1;
226
227    #[inline]
228    pub fn new(slice: &[u8]) -> Self {
229        let len = slice.len();
230        debug_assert!(len <= Self::MAX_ALLOCATION_SIZE, "too large of allocation");
231
232        let ptr = heap::alloc_ptr(len);
233        unsafe { ptr.as_ptr().copy_from_nonoverlapping(slice.as_ptr(), len) };
234
235        HeapBytesFixed { ptr, len }
236    }
237
238    #[inline]
239    fn dealloc(&mut self) {
240        heap::dealloc_ptr(self.ptr, self.len);
241    }
242}
243
244mod heap {
245    use std::alloc;
246    use std::ptr::NonNull;
247
248    /// Create an allocation with [`layout`].
249    #[inline]
250    pub(crate) fn alloc_ptr(capacity: usize) -> NonNull<u8> {
251        let layout = layout(capacity);
252        debug_assert!(layout.size() > 0);
253
254        // SAFETY: We ensure that the layout is not zero sized, by enforcing a minimum size.
255        let ptr = unsafe { alloc::alloc(layout) };
256
257        NonNull::new(ptr).expect("failed to allocate HeapRow")
258    }
259
260    /// Drop an allocation that was created with [`layout`].
261    #[inline]
262    pub(crate) fn dealloc_ptr(ptr: NonNull<u8>, capacity: usize) {
263        let layout = layout(capacity);
264
265        // SAFETY:
266        // * The pointer was allocated via this allocator.
267        // * We used the same layout when allocating.
268        unsafe { alloc::dealloc(ptr.as_ptr(), layout) };
269    }
270
271    /// Returns the [`alloc::Layout`] for [`HeapBytesGrowable`] and [`HeapBytesSlice`].
272    ///
273    /// [`HeapBytesGrowable`]: super::HeapBytesGrowable
274    /// [`HeapBytesSlice`]: super::HeapBytesSlice
275    #[inline(always)]
276    pub(crate) fn layout(capacity: usize) -> alloc::Layout {
277        debug_assert!(capacity > 0, "tried to allocate a HeapRow with 0 capacity");
278        alloc::Layout::array::<u8>(capacity).expect("valid capacity")
279    }
280}