Skip to main content

kevy_bytes/
lib.rs

1//! `SmallBytes` — a 24-byte small-byte-string with inline-SSO optimization.
2//!
3//! Layout (**little-endian only**): a union of two 24-byte variants, distinguished
4//! by the byte at offset 23:
5//!
6//! - **Inline**: `[u8; 23]` data, then `u8` tag holding the inline length
7//!   (0..=22). The whole string lives in the value, no allocation.
8//! - **Heap (64-bit)**: `NonNull<u8>` ptr (8) + `usize` len (8) + `usize`
9//!   cap_and_tag (8). The high byte of `cap_and_tag` overlaps byte 23 of
10//!   the union and is fixed at `0xFF` (> 22) as the heap discriminator. The
11//!   low 56 bits hold the heap capacity (up to 72 PB).
12//! - **Heap (32-bit)**: `NonNull<u8>` ptr (4) + `u32` len (4) + `u32`
13//!   cap (4) + 11-byte pad, then `u8` tag fixed at `0xFF`. Same 24-byte
14//!   total, same discriminator byte at offset 23 — pointer / len fields
15//!   are 32-bit-native so a `wasm32-unknown-unknown` build picks up the
16//!   right size without shifting a `usize` past its bit width.
17//!
18//! The 64-bit layout is the one the kevy server runs on, and is locked
19//! against perf-affecting changes (cfg-gated 32-bit alternative lives
20//! alongside it without touching any 64-bit code path).
21//!
22//! This lets us store every byte string up to 22 bytes — covering the vast
23//! majority of Redis-style values — without any pointer-chase, while keeping
24//! `size_of::<SmallBytes>() == 24` (same as `Vec<u8>`). Used by `kevy-store`
25//! to make `Value::Str(SmallBytes)` fit alongside the boxed collection
26//! variants and keep `Entry` at 48 B.
27
28#![warn(missing_docs)]
29
30#[cfg(target_endian = "big")]
31compile_error!("kevy-bytes requires little-endian: heap-tag byte overlaps inline length byte");
32
33mod find_crlf;
34mod traits;
35
36pub use find_crlf::find_crlf;
37
38use std::alloc::{Layout, alloc, dealloc, handle_alloc_error};
39use std::mem::{self, ManuallyDrop};
40use std::ptr::NonNull;
41use std::slice;
42
43pub(crate) const INLINE_CAP: usize = 23;
44pub(crate) const INLINE_LEN_MAX: u8 = (INLINE_CAP - 1) as u8;
45
46#[cfg(target_pointer_width = "64")]
47const TAG_HEAP_BIT: usize = 0xFFusize << 56;
48#[cfg(target_pointer_width = "64")]
49const CAP_MASK: usize = (1usize << 56) - 1;
50
51/// Heap-rep marker byte at offset 23. Used by the 32-bit `Heap::new` to
52/// set its dedicated `tag` field; the 64-bit path encodes the same byte
53/// implicitly via the high byte of `cap_and_tag`.
54#[cfg(target_pointer_width = "32")]
55const HEAP_TAG_BYTE: u8 = 0xFF;
56
57#[repr(C)]
58#[derive(Copy, Clone)]
59struct Inline {
60    data: [u8; INLINE_CAP],
61    /// 0..=22 = inline length. The heap rep sets this byte to 0xFF either via
62    /// the high byte of `Heap::cap_and_tag` (64-bit, little-endian overlap)
63    /// or as a dedicated `tag` field at offset 23 (32-bit).
64    tag: u8,
65}
66
67/// 64-bit Heap rep — `ptr|len|cap_and_tag` × usize. High byte of
68/// `cap_and_tag` shadows `Inline::tag` (LE) so the discriminator byte at
69/// offset 23 = `0xFF`. Locked layout: the kevy server runs here and the
70/// perf budget assumes this exact shape.
71#[cfg(target_pointer_width = "64")]
72#[repr(C)]
73#[derive(Copy, Clone)]
74pub(crate) struct Heap {
75    pub(crate) ptr: NonNull<u8>,
76    pub(crate) len: usize,
77    /// High byte = 0xFF (heap marker, shadows `Inline::tag`); low 56 bits =
78    /// capacity (from the source `Vec<u8>` or our own alloc; ≥ len).
79    pub(crate) cap_and_tag: usize,
80}
81
82/// 32-bit Heap rep — `ptr(4)|len(4)|cap(4)|pad(11)|tag(1)`. The dedicated
83/// `tag` byte at offset 23 (= `0xFF`) plays the role the 64-bit `cap_and_tag`
84/// high byte does, so the discriminator check at offset 23 stays identical
85/// across both layouts. Unlocks `wasm32-unknown-unknown` (Wave 3 #7) without
86/// touching the 64-bit hot path.
87#[cfg(target_pointer_width = "32")]
88#[repr(C)]
89#[derive(Copy, Clone)]
90pub(crate) struct Heap {
91    pub(crate) ptr: NonNull<u8>,
92    pub(crate) len: u32,
93    pub(crate) cap: u32,
94    pub(crate) _pad: [u8; 11],
95    pub(crate) tag: u8,
96}
97
98impl Heap {
99    /// Build a Heap rep tagging the discriminator byte to `0xFF`. cfg-gated
100    /// so each pointer-width hits its native fields without runtime cost.
101    #[cfg(target_pointer_width = "64")]
102    #[inline]
103    pub(crate) fn new(ptr: NonNull<u8>, len: usize, cap: usize) -> Self {
104        debug_assert!(cap <= CAP_MASK, "kevy-bytes: capacity exceeds 56-bit field");
105        Self {
106            ptr,
107            len,
108            cap_and_tag: TAG_HEAP_BIT | (cap & CAP_MASK),
109        }
110    }
111    #[cfg(target_pointer_width = "32")]
112    #[inline]
113    pub(crate) fn new(ptr: NonNull<u8>, len: usize, cap: usize) -> Self {
114        // On 32-bit, `Vec<u8>` is bounded by the 4 GiB address space, so
115        // any source `len`/`cap` already fits in `u32`. Debug-assert to
116        // catch unexpected callers.
117        debug_assert!(
118            len <= u32::MAX as usize && cap <= u32::MAX as usize,
119            "kevy-bytes: len/cap exceeds u32 on 32-bit platform"
120        );
121        Self {
122            ptr,
123            len: len as u32,
124            cap: cap as u32,
125            _pad: [0; 11],
126            tag: HEAP_TAG_BYTE,
127        }
128    }
129
130    /// Live capacity (always returned as `usize` regardless of underlying
131    /// field width).
132    #[cfg(target_pointer_width = "64")]
133    #[inline]
134    fn capacity(&self) -> usize {
135        self.cap_and_tag & CAP_MASK
136    }
137    #[cfg(target_pointer_width = "32")]
138    #[inline]
139    fn capacity(&self) -> usize {
140        self.cap as usize
141    }
142
143    /// Live length (always `usize`).
144    #[cfg(target_pointer_width = "64")]
145    #[inline]
146    fn length(&self) -> usize {
147        self.len
148    }
149    #[cfg(target_pointer_width = "32")]
150    #[inline]
151    fn length(&self) -> usize {
152        self.len as usize
153    }
154}
155
156/// A 24-byte owned byte string with inline small-string optimization.
157///
158/// Strings of up to 22 bytes live entirely inside the value (no allocation,
159/// no pointer chase); larger strings spill to a heap buffer. The
160/// discriminator is a single byte at offset 23 (the tag, which doubles as
161/// the inline length 0..=22 OR equals 0xFF when the heap variant is active).
162///
163/// See the crate root for layout details.
164#[repr(C)]
165pub union SmallBytes {
166    inline: Inline,
167    heap: Heap,
168}
169
170const _: () = {
171    assert!(mem::size_of::<SmallBytes>() == 24);
172    assert!(mem::align_of::<SmallBytes>() == mem::align_of::<usize>());
173};
174
175unsafe impl Send for SmallBytes {}
176unsafe impl Sync for SmallBytes {}
177
178impl SmallBytes {
179    /// Empty inline `SmallBytes` (zero allocation).
180    pub const fn new() -> Self {
181        Self {
182            inline: Inline {
183                data: [0; INLINE_CAP],
184                tag: 0,
185            },
186        }
187    }
188
189    /// Construct from a byte slice — inline if `bytes.len() <= 22`, else heap.
190    pub fn from_slice(bytes: &[u8]) -> Self {
191        if bytes.len() <= INLINE_LEN_MAX as usize {
192            let mut data = [0u8; INLINE_CAP];
193            // SAFETY: bytes.len() ≤ 22 ≤ data.len(); non-overlapping regions.
194            unsafe {
195                std::ptr::copy_nonoverlapping(bytes.as_ptr(), data.as_mut_ptr(), bytes.len());
196            }
197            Self {
198                inline: Inline {
199                    data,
200                    tag: bytes.len() as u8,
201                },
202            }
203        } else {
204            Self::alloc_heap(bytes)
205        }
206    }
207
208    /// Take ownership of a `Vec<u8>` — inline if `vec.len() <= 22`, else **reuse
209    /// the vec's allocation** (no copy on the heap path).
210    pub fn from_vec(vec: Vec<u8>) -> Self {
211        if vec.len() <= INLINE_LEN_MAX as usize {
212            Self::from_slice(&vec)
213        } else {
214            let mut v = ManuallyDrop::new(vec);
215            // SAFETY: len > 22 ⇒ cap > 0 ⇒ Vec has an allocation, so the pointer
216            // is non-null. Vec guarantees a non-null pointer for any allocated
217            // Vec (and a dangling-but-non-null for empty, which we don't hit here).
218            let ptr = unsafe { NonNull::new_unchecked(v.as_mut_ptr()) };
219            let len = v.len();
220            let cap = v.capacity();
221            Self {
222                heap: Heap::new(ptr, len, cap),
223            }
224        }
225    }
226
227    #[inline]
228    fn alloc_heap(bytes: &[u8]) -> Self {
229        let len = bytes.len();
230        // `len > 22` (caller has already taken the heap branch) and `len` is
231        // a slice length ⇒ ≤ `isize::MAX` ⇒ well below the `usize::MAX -
232        // (align - 1)` bound `from_size_align_unchecked` needs. u8's align is 1.
233        // SAFETY: see above.
234        let layout = unsafe { Layout::from_size_align_unchecked(len, 1) };
235        // SAFETY: layout.size() > 0 (caller's heap branch guarantees len > 22).
236        let raw = unsafe { alloc(layout) };
237        let Some(ptr) = NonNull::new(raw) else {
238            handle_alloc_error(layout)
239        };
240        // SAFETY: alloc returned a writable region of `len` bytes; source is a
241        // disjoint slice.
242        unsafe {
243            std::ptr::copy_nonoverlapping(bytes.as_ptr(), ptr.as_ptr(), len);
244        }
245        Self {
246            heap: Heap::new(ptr, len, len),
247        }
248    }
249
250    /// True when stored inline; the byte at index 23 is the deciding tag in
251    /// either rep, so the check is a single load + compare.
252    #[inline]
253    fn is_inline(&self) -> bool {
254        // SAFETY: byte 23 is always initialised — either as Inline::tag (0..=22)
255        // or as the high byte of Heap::cap_and_tag (= 0xFF). Reading it through
256        // the Inline view is valid in either case (the union is `repr(C)`).
257        unsafe { self.inline.tag <= INLINE_LEN_MAX }
258    }
259
260    /// Number of bytes stored.
261    #[inline]
262    pub fn len(&self) -> usize {
263        if self.is_inline() {
264            // SAFETY: just verified `inline.tag` ≤ 22.
265            unsafe { self.inline.tag as usize }
266        } else {
267            // SAFETY: tag > 22 ⇒ heap variant is active.
268            unsafe { self.heap.length() }
269        }
270    }
271
272    /// Whether `len() == 0`.
273    #[inline]
274    pub fn is_empty(&self) -> bool {
275        self.len() == 0
276    }
277
278    /// Bytes this value holds on the heap (0 when inline). Lets memory-accounting
279    /// callers (e.g. `maxmemory` enforcement) charge only the off-stack footprint
280    /// without re-deriving the inline-length threshold.
281    #[inline]
282    pub fn heap_bytes(&self) -> usize {
283        if self.is_inline() { 0 } else { self.len() }
284    }
285
286    /// Borrow the bytes (no allocation; same for inline and heap variants).
287    #[inline]
288    pub fn as_slice(&self) -> &[u8] {
289        if self.is_inline() {
290            // SAFETY: first `tag` bytes of `data` are valid (zero-init at construction).
291            unsafe {
292                slice::from_raw_parts(self.inline.data.as_ptr(), self.inline.tag as usize)
293            }
294        } else {
295            // SAFETY: heap variant active; ptr/len originate from a Vec or our own alloc.
296            unsafe { slice::from_raw_parts(self.heap.ptr.as_ptr(), self.heap.length()) }
297        }
298    }
299
300    /// Copy into a fresh `Vec<u8>` (clone semantics).
301    pub fn to_vec(&self) -> Vec<u8> {
302        self.as_slice().to_vec()
303    }
304
305    /// Consume self and return an owned `Vec<u8>`. The heap path reuses the
306    /// existing allocation; the inline path copies into a new vec.
307    pub fn into_vec(self) -> Vec<u8> {
308        if self.is_inline() {
309            self.as_slice().to_vec()
310            // self drops as inline — nothing to free.
311        } else {
312            // SAFETY: heap variant active.
313            let (ptr, len, cap) = unsafe {
314                (
315                    self.heap.ptr.as_ptr(),
316                    self.heap.length(),
317                    self.heap.capacity(),
318                )
319            };
320            // Skip our Drop to avoid double-free; Vec::from_raw_parts now owns it.
321            let _do_not_drop = ManuallyDrop::new(self);
322            // SAFETY: ptr/len/cap originated from either a Vec<u8> (from_vec)
323            // or our own `alloc(Layout::array::<u8>(cap))` (alloc_heap, where
324            // cap == len) — both meet Vec::from_raw_parts' requirements.
325            unsafe { Vec::from_raw_parts(ptr, len, cap) }
326        }
327    }
328}
329
330impl Default for SmallBytes {
331    fn default() -> Self {
332        Self::new()
333    }
334}
335
336impl Drop for SmallBytes {
337    fn drop(&mut self) {
338        if self.is_inline() {
339            return;
340        }
341        // SAFETY: heap variant active; layout matches the one used at alloc
342        // time (either from Vec — Vec uses `Layout::array::<u8>(cap)` — or our
343        // own alloc_heap which used the same layout).
344        unsafe {
345            let cap = self.heap.capacity();
346            let layout = Layout::array::<u8>(cap).expect("kevy-bytes: drop layout");
347            dealloc(self.heap.ptr.as_ptr(), layout);
348        }
349    }
350}
351
352impl Clone for SmallBytes {
353    /// Specialised clone that bypasses `as_slice → from_slice → alloc_heap`'s
354    /// two layered length checks. Inline variant is a bitwise union copy (no
355    /// branch through the slice path); heap variant goes straight to a single
356    /// `alloc + memcpy` keyed on the already-known heap length.
357    #[inline]
358    fn clone(&self) -> Self {
359        if self.is_inline() {
360            // SAFETY: `Inline` is `repr(C)` + `Copy`; bitwise copy is sound
361            // when the source is currently in the inline variant (the tag
362            // byte ≤ 22 is part of the bit pattern we're copying, so the
363            // discriminator stays correct).
364            unsafe { Self { inline: self.inline } }
365        } else {
366            // SAFETY: tag > 22 ⇒ heap variant is active.
367            unsafe { self.clone_heap() }
368        }
369    }
370}
371
372impl SmallBytes {
373    /// Heap-fast-path clone. Caller must have established that `self` is in
374    /// the heap variant.
375    ///
376    /// # Safety
377    /// `self.heap` must be the active union variant (i.e. `is_inline()` is
378    /// false). `self.heap.ptr` must point to `self.heap.len` valid bytes.
379    #[inline]
380    unsafe fn clone_heap(&self) -> Self {
381        // SAFETY (covers the three `self.heap.*` reads): caller asserts the
382        // heap variant is active.
383        let (src_ptr, len) = unsafe { (self.heap.ptr.as_ptr(), self.heap.length()) };
384        // `len > 22 ⇒ len > 0`, and the high bits are guarded by `CAP_MASK`
385        // never letting cap exceed 2^56, well below `isize::MAX`, so the
386        // unchecked layout is sound. Allocator alignment for `u8` is 1.
387        let layout = unsafe { Layout::from_size_align_unchecked(len, 1) };
388        // SAFETY: layout.size() > 0.
389        let raw = unsafe { alloc(layout) };
390        let Some(ptr) = NonNull::new(raw) else {
391            handle_alloc_error(layout)
392        };
393        // SAFETY: src has `len` valid bytes; dst is freshly-allocated for `len`
394        // bytes; regions are disjoint.
395        unsafe { std::ptr::copy_nonoverlapping(src_ptr, ptr.as_ptr(), len) };
396        Self {
397            heap: Heap::new(ptr, len, len),
398        }
399    }
400}
401
402// `Debug`, `PartialOrd`, `Ord`, `Hash`, `AsRef<[u8]>`, `Borrow<[u8]>`,
403// `KevyHash`, `From<&[u8]>`, `From<Vec<u8>>` live in `crate::traits` —
404// they only need the public `as_slice()` view. `PartialEq` / `Eq` stay
405// here because the same-variant fast paths reach into `self.inline` /
406// `self.heap` directly.
407
408impl PartialEq for SmallBytes {
409    /// Specialised over the slice form (`as_slice == as_slice`) by branching
410    /// on variant **once** and reading the relevant length / pointer pair
411    /// directly. Same-variant cases (inline/inline + heap/heap, which are the
412    /// only ones produced by a single allocator) skip a redundant `as_slice`
413    /// dispatch on each side; the mixed case falls back to the slice form.
414    #[inline]
415    fn eq(&self, other: &Self) -> bool {
416        // SAFETY: byte 23 (`inline.tag`) is always a valid load in either
417        // variant — it's either the inline-length 0..=22 or 0xFF as the
418        // heap-discriminator overlap (see crate doc).
419        let self_tag = unsafe { self.inline.tag };
420        let other_tag = unsafe { other.inline.tag };
421        let self_inline = self_tag <= INLINE_LEN_MAX;
422        let other_inline = other_tag <= INLINE_LEN_MAX;
423        match (self_inline, other_inline) {
424            (true, true) => {
425                let len = self_tag as usize;
426                if len != other_tag as usize {
427                    return false;
428                }
429                // SAFETY: both in inline variant; first `len` bytes valid.
430                let a = unsafe {
431                    slice::from_raw_parts(self.inline.data.as_ptr(), len)
432                };
433                let b = unsafe {
434                    slice::from_raw_parts(other.inline.data.as_ptr(), len)
435                };
436                a == b
437            }
438            (false, false) => {
439                // SAFETY: both in heap variant.
440                let (a_len, b_len) =
441                    unsafe { (self.heap.length(), other.heap.length()) };
442                if a_len != b_len {
443                    return false;
444                }
445                // SAFETY: heap pointers + len are valid.
446                let a = unsafe {
447                    slice::from_raw_parts(self.heap.ptr.as_ptr(), a_len)
448                };
449                let b = unsafe {
450                    slice::from_raw_parts(other.heap.ptr.as_ptr(), b_len)
451                };
452                a == b
453            }
454            // Mixed inline/heap: this IS reachable in normal operation.
455            // It happens whenever HashMap (or any `==` consumer) compares
456            // an inline-length value (len ≤ 22) against a heap-length
457            // value (len > 22). Two SmallBytes of different lengths can
458            // *collide* on hashbrown's hash + quadratic probe, and the
459            // probe checks equality even though the lengths differ. The
460            // pre-fix `unreachable!()` here was a logic bug — it assumed
461            // the same-arm short-circuits cover all cases, but they only
462            // fire when both sides land in the same arm. Different-length
463            // collisions correctly fall through here. The right answer
464            // is just slice-form equality (which short-circuits on `len`
465            // internally), giving `false` whenever the lengths differ.
466            _ => self.as_slice() == other.as_slice(),
467        }
468    }
469}
470impl Eq for SmallBytes {}
471
472
473#[cfg(test)]
474mod tests;