kevy_bytes/lib.rs
1//! `SmallBytes` — a 24-byte small-byte-string with inline-SSO optimization.
2//!
3//! Layout (**little-endian only**): a union of two 24-byte variants, distinguished
4//! by the byte at offset 23:
5//!
6//! - **Inline**: `[u8; 23]` data, then `u8` tag holding the inline length
7//! (0..=22). The whole string lives in the value, no allocation.
8//! - **Heap (64-bit)**: `NonNull<u8>` ptr (8) + `usize` len (8) + `usize`
9//! cap_and_tag (8). The high byte of `cap_and_tag` overlaps byte 23 of
10//! the union and is fixed at `0xFF` (> 22) as the heap discriminator. The
11//! low 56 bits hold the heap capacity (up to 72 PB).
12//! - **Heap (32-bit)**: `NonNull<u8>` ptr (4) + `u32` len (4) + `u32`
13//! cap (4) + 11-byte pad, then `u8` tag fixed at `0xFF`. Same 24-byte
14//! total, same discriminator byte at offset 23 — pointer / len fields
15//! are 32-bit-native so a `wasm32-unknown-unknown` build picks up the
16//! right size without shifting a `usize` past its bit width.
17//!
18//! The 64-bit layout is the one the kevy server runs on, and is locked
19//! against perf-affecting changes (cfg-gated 32-bit alternative lives
20//! alongside it without touching any 64-bit code path).
21//!
22//! This lets us store every byte string up to 22 bytes — covering the vast
23//! majority of Redis-style values — without any pointer-chase, while keeping
24//! `size_of::<SmallBytes>() == 24` (same as `Vec<u8>`). Used by `kevy-store`
25//! to make `Value::Str(SmallBytes)` fit alongside the boxed collection
26//! variants and keep `Entry` at 48 B.
27
28#![warn(missing_docs)]
29
30#[cfg(target_endian = "big")]
31compile_error!("kevy-bytes requires little-endian: heap-tag byte overlaps inline length byte");
32
33mod traits;
34
35use std::alloc::{Layout, alloc, dealloc, handle_alloc_error};
36use std::mem::{self, ManuallyDrop};
37use std::ptr::NonNull;
38use std::slice;
39
40const INLINE_CAP: usize = 23;
41const INLINE_LEN_MAX: u8 = (INLINE_CAP - 1) as u8;
42
43#[cfg(target_pointer_width = "64")]
44const TAG_HEAP_BIT: usize = 0xFFusize << 56;
45#[cfg(target_pointer_width = "64")]
46const CAP_MASK: usize = (1usize << 56) - 1;
47
48/// Heap-rep marker byte at offset 23. Used by the 32-bit `Heap::new` to
49/// set its dedicated `tag` field; the 64-bit path encodes the same byte
50/// implicitly via the high byte of `cap_and_tag`.
51#[cfg(target_pointer_width = "32")]
52const HEAP_TAG_BYTE: u8 = 0xFF;
53
54#[repr(C)]
55#[derive(Copy, Clone)]
56struct Inline {
57 data: [u8; INLINE_CAP],
58 /// 0..=22 = inline length. The heap rep sets this byte to 0xFF either via
59 /// the high byte of `Heap::cap_and_tag` (64-bit, little-endian overlap)
60 /// or as a dedicated `tag` field at offset 23 (32-bit).
61 tag: u8,
62}
63
64/// 64-bit Heap rep — `ptr|len|cap_and_tag` × usize. High byte of
65/// `cap_and_tag` shadows `Inline::tag` (LE) so the discriminator byte at
66/// offset 23 = `0xFF`. Locked layout: the kevy server runs here and the
67/// perf budget assumes this exact shape.
68#[cfg(target_pointer_width = "64")]
69#[repr(C)]
70#[derive(Copy, Clone)]
71struct Heap {
72 ptr: NonNull<u8>,
73 len: usize,
74 /// High byte = 0xFF (heap marker, shadows `Inline::tag`); low 56 bits =
75 /// capacity (from the source `Vec<u8>` or our own alloc; ≥ len).
76 cap_and_tag: usize,
77}
78
79/// 32-bit Heap rep — `ptr(4)|len(4)|cap(4)|pad(11)|tag(1)`. The dedicated
80/// `tag` byte at offset 23 (= `0xFF`) plays the role the 64-bit `cap_and_tag`
81/// high byte does, so the discriminator check at offset 23 stays identical
82/// across both layouts. Unlocks `wasm32-unknown-unknown` (Wave 3 #7) without
83/// touching the 64-bit hot path.
84#[cfg(target_pointer_width = "32")]
85#[repr(C)]
86#[derive(Copy, Clone)]
87struct Heap {
88 ptr: NonNull<u8>,
89 len: u32,
90 cap: u32,
91 _pad: [u8; 11],
92 tag: u8,
93}
94
95impl Heap {
96 /// Build a Heap rep tagging the discriminator byte to `0xFF`. cfg-gated
97 /// so each pointer-width hits its native fields without runtime cost.
98 #[cfg(target_pointer_width = "64")]
99 #[inline]
100 fn new(ptr: NonNull<u8>, len: usize, cap: usize) -> Self {
101 debug_assert!(cap <= CAP_MASK, "kevy-bytes: capacity exceeds 56-bit field");
102 Self {
103 ptr,
104 len,
105 cap_and_tag: TAG_HEAP_BIT | (cap & CAP_MASK),
106 }
107 }
108 #[cfg(target_pointer_width = "32")]
109 #[inline]
110 fn new(ptr: NonNull<u8>, len: usize, cap: usize) -> Self {
111 // On 32-bit, `Vec<u8>` is bounded by the 4 GiB address space, so
112 // any source `len`/`cap` already fits in `u32`. Debug-assert to
113 // catch unexpected callers.
114 debug_assert!(
115 len <= u32::MAX as usize && cap <= u32::MAX as usize,
116 "kevy-bytes: len/cap exceeds u32 on 32-bit platform"
117 );
118 Self {
119 ptr,
120 len: len as u32,
121 cap: cap as u32,
122 _pad: [0; 11],
123 tag: HEAP_TAG_BYTE,
124 }
125 }
126
127 /// Live capacity (always returned as `usize` regardless of underlying
128 /// field width).
129 #[cfg(target_pointer_width = "64")]
130 #[inline]
131 fn capacity(&self) -> usize {
132 self.cap_and_tag & CAP_MASK
133 }
134 #[cfg(target_pointer_width = "32")]
135 #[inline]
136 fn capacity(&self) -> usize {
137 self.cap as usize
138 }
139
140 /// Live length (always `usize`).
141 #[cfg(target_pointer_width = "64")]
142 #[inline]
143 fn length(&self) -> usize {
144 self.len
145 }
146 #[cfg(target_pointer_width = "32")]
147 #[inline]
148 fn length(&self) -> usize {
149 self.len as usize
150 }
151}
152
153/// A 24-byte owned byte string with inline small-string optimization.
154///
155/// Strings of up to 22 bytes live entirely inside the value (no allocation,
156/// no pointer chase); larger strings spill to a heap buffer. The
157/// discriminator is a single byte at offset 23 (the tag, which doubles as
158/// the inline length 0..=22 OR equals 0xFF when the heap variant is active).
159///
160/// See the crate root for layout details.
161#[repr(C)]
162pub union SmallBytes {
163 inline: Inline,
164 heap: Heap,
165}
166
167const _: () = {
168 assert!(mem::size_of::<SmallBytes>() == 24);
169 assert!(mem::align_of::<SmallBytes>() == mem::align_of::<usize>());
170};
171
172unsafe impl Send for SmallBytes {}
173unsafe impl Sync for SmallBytes {}
174
175impl SmallBytes {
176 /// Empty inline `SmallBytes` (zero allocation).
177 pub const fn new() -> Self {
178 Self {
179 inline: Inline {
180 data: [0; INLINE_CAP],
181 tag: 0,
182 },
183 }
184 }
185
186 /// Construct from a byte slice — inline if `bytes.len() <= 22`, else heap.
187 pub fn from_slice(bytes: &[u8]) -> Self {
188 if bytes.len() <= INLINE_LEN_MAX as usize {
189 let mut data = [0u8; INLINE_CAP];
190 // SAFETY: bytes.len() ≤ 22 ≤ data.len(); non-overlapping regions.
191 unsafe {
192 std::ptr::copy_nonoverlapping(bytes.as_ptr(), data.as_mut_ptr(), bytes.len());
193 }
194 Self {
195 inline: Inline {
196 data,
197 tag: bytes.len() as u8,
198 },
199 }
200 } else {
201 Self::alloc_heap(bytes)
202 }
203 }
204
205 /// Take ownership of a `Vec<u8>` — inline if `vec.len() <= 22`, else **reuse
206 /// the vec's allocation** (no copy on the heap path).
207 pub fn from_vec(vec: Vec<u8>) -> Self {
208 if vec.len() <= INLINE_LEN_MAX as usize {
209 Self::from_slice(&vec)
210 } else {
211 let mut v = ManuallyDrop::new(vec);
212 // SAFETY: len > 22 ⇒ cap > 0 ⇒ Vec has an allocation, so the pointer
213 // is non-null. Vec guarantees a non-null pointer for any allocated
214 // Vec (and a dangling-but-non-null for empty, which we don't hit here).
215 let ptr = unsafe { NonNull::new_unchecked(v.as_mut_ptr()) };
216 let len = v.len();
217 let cap = v.capacity();
218 Self {
219 heap: Heap::new(ptr, len, cap),
220 }
221 }
222 }
223
224 #[inline]
225 fn alloc_heap(bytes: &[u8]) -> Self {
226 let len = bytes.len();
227 // `len > 22` (caller has already taken the heap branch) and `len` is
228 // a slice length ⇒ ≤ `isize::MAX` ⇒ well below the `usize::MAX -
229 // (align - 1)` bound `from_size_align_unchecked` needs. u8's align is 1.
230 // SAFETY: see above.
231 let layout = unsafe { Layout::from_size_align_unchecked(len, 1) };
232 // SAFETY: layout.size() > 0 (caller's heap branch guarantees len > 22).
233 let raw = unsafe { alloc(layout) };
234 let ptr = match NonNull::new(raw) {
235 Some(p) => p,
236 None => handle_alloc_error(layout),
237 };
238 // SAFETY: alloc returned a writable region of `len` bytes; source is a
239 // disjoint slice.
240 unsafe {
241 std::ptr::copy_nonoverlapping(bytes.as_ptr(), ptr.as_ptr(), len);
242 }
243 Self {
244 heap: Heap::new(ptr, len, len),
245 }
246 }
247
248 /// True when stored inline; the byte at index 23 is the deciding tag in
249 /// either rep, so the check is a single load + compare.
250 #[inline]
251 fn is_inline(&self) -> bool {
252 // SAFETY: byte 23 is always initialised — either as Inline::tag (0..=22)
253 // or as the high byte of Heap::cap_and_tag (= 0xFF). Reading it through
254 // the Inline view is valid in either case (the union is `repr(C)`).
255 unsafe { self.inline.tag <= INLINE_LEN_MAX }
256 }
257
258 /// Number of bytes stored.
259 #[inline]
260 pub fn len(&self) -> usize {
261 if self.is_inline() {
262 // SAFETY: just verified `inline.tag` ≤ 22.
263 unsafe { self.inline.tag as usize }
264 } else {
265 // SAFETY: tag > 22 ⇒ heap variant is active.
266 unsafe { self.heap.length() }
267 }
268 }
269
270 /// Whether `len() == 0`.
271 #[inline]
272 pub fn is_empty(&self) -> bool {
273 self.len() == 0
274 }
275
276 /// Bytes this value holds on the heap (0 when inline). Lets memory-accounting
277 /// callers (e.g. `maxmemory` enforcement) charge only the off-stack footprint
278 /// without re-deriving the inline-length threshold.
279 #[inline]
280 pub fn heap_bytes(&self) -> usize {
281 if self.is_inline() { 0 } else { self.len() }
282 }
283
284 /// Borrow the bytes (no allocation; same for inline and heap variants).
285 #[inline]
286 pub fn as_slice(&self) -> &[u8] {
287 if self.is_inline() {
288 // SAFETY: first `tag` bytes of `data` are valid (zero-init at construction).
289 unsafe {
290 slice::from_raw_parts(self.inline.data.as_ptr(), self.inline.tag as usize)
291 }
292 } else {
293 // SAFETY: heap variant active; ptr/len originate from a Vec or our own alloc.
294 unsafe { slice::from_raw_parts(self.heap.ptr.as_ptr(), self.heap.length()) }
295 }
296 }
297
298 /// Copy into a fresh `Vec<u8>` (clone semantics).
299 pub fn to_vec(&self) -> Vec<u8> {
300 self.as_slice().to_vec()
301 }
302
303 /// Consume self and return an owned `Vec<u8>`. The heap path reuses the
304 /// existing allocation; the inline path copies into a new vec.
305 pub fn into_vec(self) -> Vec<u8> {
306 if self.is_inline() {
307 self.as_slice().to_vec()
308 // self drops as inline — nothing to free.
309 } else {
310 // SAFETY: heap variant active.
311 let (ptr, len, cap) = unsafe {
312 (
313 self.heap.ptr.as_ptr(),
314 self.heap.length(),
315 self.heap.capacity(),
316 )
317 };
318 // Skip our Drop to avoid double-free; Vec::from_raw_parts now owns it.
319 let _do_not_drop = ManuallyDrop::new(self);
320 // SAFETY: ptr/len/cap originated from either a Vec<u8> (from_vec)
321 // or our own `alloc(Layout::array::<u8>(cap))` (alloc_heap, where
322 // cap == len) — both meet Vec::from_raw_parts' requirements.
323 unsafe { Vec::from_raw_parts(ptr, len, cap) }
324 }
325 }
326}
327
328impl Default for SmallBytes {
329 fn default() -> Self {
330 Self::new()
331 }
332}
333
334impl Drop for SmallBytes {
335 fn drop(&mut self) {
336 if self.is_inline() {
337 return;
338 }
339 // SAFETY: heap variant active; layout matches the one used at alloc
340 // time (either from Vec — Vec uses `Layout::array::<u8>(cap)` — or our
341 // own alloc_heap which used the same layout).
342 unsafe {
343 let cap = self.heap.capacity();
344 let layout = Layout::array::<u8>(cap).expect("kevy-bytes: drop layout");
345 dealloc(self.heap.ptr.as_ptr(), layout);
346 }
347 }
348}
349
350impl Clone for SmallBytes {
351 /// Specialised clone that bypasses `as_slice → from_slice → alloc_heap`'s
352 /// two layered length checks. Inline variant is a bitwise union copy (no
353 /// branch through the slice path); heap variant goes straight to a single
354 /// `alloc + memcpy` keyed on the already-known heap length.
355 #[inline]
356 fn clone(&self) -> Self {
357 if self.is_inline() {
358 // SAFETY: `Inline` is `repr(C)` + `Copy`; bitwise copy is sound
359 // when the source is currently in the inline variant (the tag
360 // byte ≤ 22 is part of the bit pattern we're copying, so the
361 // discriminator stays correct).
362 unsafe { Self { inline: self.inline } }
363 } else {
364 // SAFETY: tag > 22 ⇒ heap variant is active.
365 unsafe { self.clone_heap() }
366 }
367 }
368}
369
370impl SmallBytes {
371 /// Heap-fast-path clone. Caller must have established that `self` is in
372 /// the heap variant.
373 ///
374 /// # Safety
375 /// `self.heap` must be the active union variant (i.e. `is_inline()` is
376 /// false). `self.heap.ptr` must point to `self.heap.len` valid bytes.
377 #[inline]
378 unsafe fn clone_heap(&self) -> Self {
379 // SAFETY (covers the three `self.heap.*` reads): caller asserts the
380 // heap variant is active.
381 let (src_ptr, len) = unsafe { (self.heap.ptr.as_ptr(), self.heap.length()) };
382 // `len > 22 ⇒ len > 0`, and the high bits are guarded by `CAP_MASK`
383 // never letting cap exceed 2^56, well below `isize::MAX`, so the
384 // unchecked layout is sound. Allocator alignment for `u8` is 1.
385 let layout = unsafe { Layout::from_size_align_unchecked(len, 1) };
386 // SAFETY: layout.size() > 0.
387 let raw = unsafe { alloc(layout) };
388 let ptr = match NonNull::new(raw) {
389 Some(p) => p,
390 None => handle_alloc_error(layout),
391 };
392 // SAFETY: src has `len` valid bytes; dst is freshly-allocated for `len`
393 // bytes; regions are disjoint.
394 unsafe { std::ptr::copy_nonoverlapping(src_ptr, ptr.as_ptr(), len) };
395 Self {
396 heap: Heap::new(ptr, len, len),
397 }
398 }
399}
400
401// `Debug`, `PartialOrd`, `Ord`, `Hash`, `AsRef<[u8]>`, `Borrow<[u8]>`,
402// `KevyHash`, `From<&[u8]>`, `From<Vec<u8>>` live in `crate::traits` —
403// they only need the public `as_slice()` view. `PartialEq` / `Eq` stay
404// here because the same-variant fast paths reach into `self.inline` /
405// `self.heap` directly.
406
407impl PartialEq for SmallBytes {
408 /// Specialised over the slice form (`as_slice == as_slice`) by branching
409 /// on variant **once** and reading the relevant length / pointer pair
410 /// directly. Same-variant cases (inline/inline + heap/heap, which are the
411 /// only ones produced by a single allocator) skip a redundant `as_slice`
412 /// dispatch on each side; the mixed case falls back to the slice form.
413 #[inline]
414 fn eq(&self, other: &Self) -> bool {
415 // SAFETY: byte 23 (`inline.tag`) is always a valid load in either
416 // variant — it's either the inline-length 0..=22 or 0xFF as the
417 // heap-discriminator overlap (see crate doc).
418 let self_tag = unsafe { self.inline.tag };
419 let other_tag = unsafe { other.inline.tag };
420 let self_inline = self_tag <= INLINE_LEN_MAX;
421 let other_inline = other_tag <= INLINE_LEN_MAX;
422 match (self_inline, other_inline) {
423 (true, true) => {
424 let len = self_tag as usize;
425 if len != other_tag as usize {
426 return false;
427 }
428 // SAFETY: both in inline variant; first `len` bytes valid.
429 let a = unsafe {
430 slice::from_raw_parts(self.inline.data.as_ptr(), len)
431 };
432 let b = unsafe {
433 slice::from_raw_parts(other.inline.data.as_ptr(), len)
434 };
435 a == b
436 }
437 (false, false) => {
438 // SAFETY: both in heap variant.
439 let (a_len, b_len) =
440 unsafe { (self.heap.length(), other.heap.length()) };
441 if a_len != b_len {
442 return false;
443 }
444 // SAFETY: heap pointers + len are valid.
445 let a = unsafe {
446 slice::from_raw_parts(self.heap.ptr.as_ptr(), a_len)
447 };
448 let b = unsafe {
449 slice::from_raw_parts(other.heap.ptr.as_ptr(), b_len)
450 };
451 a == b
452 }
453 // Mixed inline/heap: should not happen via any safe constructor
454 // (heap variants always carry len > 22, inline always ≤ 22), so
455 // two equal-length values normally land in the same arm. But a
456 // database's query path MUST NOT panic on data shape — external
457 // causes (memory corruption, mmap/FFI bytes from a caller crate,
458 // a future unsafe transmute upstream) can violate the invariant.
459 // Falling back to slice-form equality is logically identical to
460 // the same-arm arms and stays sound (each side's `as_slice()`
461 // already chooses the right variant per `is_inline()`).
462 _ => self.as_slice() == other.as_slice(),
463 }
464 }
465}
466impl Eq for SmallBytes {}
467
468#[cfg(test)]
469mod tests {
470 use super::*;
471 use kevy_hash::KevyHash as _;
472 use std::hash::{Hash, Hasher};
473
474 #[test]
475 fn size_and_align() {
476 assert_eq!(mem::size_of::<SmallBytes>(), 24);
477 assert_eq!(mem::align_of::<SmallBytes>(), mem::align_of::<usize>());
478 }
479
480 #[test]
481 fn empty_is_inline() {
482 let s = SmallBytes::new();
483 assert!(s.is_inline());
484 assert_eq!(s.len(), 0);
485 assert!(s.is_empty());
486 assert_eq!(s.as_slice(), b"");
487 }
488
489 #[test]
490 fn inline_one_byte() {
491 let s = SmallBytes::from_slice(b"x");
492 assert!(s.is_inline());
493 assert_eq!(s.len(), 1);
494 assert_eq!(s.as_slice(), b"x");
495 }
496
497 #[test]
498 fn inline_at_boundary_22() {
499 let v: Vec<u8> = (0u8..22).collect();
500 let s = SmallBytes::from_slice(&v);
501 assert!(s.is_inline());
502 assert_eq!(s.len(), 22);
503 assert_eq!(s.as_slice(), v);
504 }
505
506 #[test]
507 fn heap_at_boundary_23() {
508 let v: Vec<u8> = (0u8..23).collect();
509 let s = SmallBytes::from_slice(&v);
510 assert!(!s.is_inline());
511 assert_eq!(s.len(), 23);
512 assert_eq!(s.as_slice(), v);
513 }
514
515 #[test]
516 fn heap_large() {
517 let v: Vec<u8> = (0..4096).map(|i| (i & 0xFF) as u8).collect();
518 let s = SmallBytes::from_slice(&v);
519 assert!(!s.is_inline());
520 assert_eq!(s.len(), 4096);
521 assert_eq!(s.as_slice(), v.as_slice());
522 }
523
524 #[test]
525 fn from_vec_inline() {
526 let s = SmallBytes::from_vec(vec![1u8, 2, 3]);
527 assert!(s.is_inline());
528 assert_eq!(s.as_slice(), &[1, 2, 3]);
529 }
530
531 #[test]
532 fn from_vec_heap_reuses_alloc() {
533 let mut v: Vec<u8> = (0u8..100).collect();
534 v.reserve(200);
535 let ptr_before = v.as_ptr();
536 let cap_before = v.capacity();
537 let s = SmallBytes::from_vec(v);
538 assert!(!s.is_inline());
539 // SAFETY: we know it's heap; peek to verify pointer reuse.
540 unsafe {
541 assert_eq!(s.heap.ptr.as_ptr() as *const u8, ptr_before);
542 assert_eq!(s.heap.capacity(), cap_before);
543 }
544 }
545
546 #[test]
547 fn into_vec_inline_copies() {
548 let s = SmallBytes::from_slice(b"hello");
549 let v = s.into_vec();
550 assert_eq!(v, b"hello");
551 }
552
553 #[test]
554 fn into_vec_heap_reuses_alloc() {
555 let original: Vec<u8> = (0u8..200).collect();
556 let ptr = original.as_ptr();
557 let cap = original.capacity();
558 let s = SmallBytes::from_vec(original);
559 let v = s.into_vec();
560 assert_eq!(v.as_ptr(), ptr);
561 assert_eq!(v.capacity(), cap);
562 assert_eq!(v.len(), 200);
563 }
564
565 #[test]
566 fn clone_inline() {
567 let s = SmallBytes::from_slice(b"abc");
568 let c = s.clone();
569 assert_eq!(s, c);
570 assert!(c.is_inline());
571 }
572
573 #[test]
574 fn clone_heap() {
575 let v: Vec<u8> = (0u8..50).collect();
576 let s = SmallBytes::from_slice(&v);
577 let c = s.clone();
578 assert_eq!(s, c);
579 assert!(!c.is_inline());
580 }
581
582 #[test]
583 fn eq_by_content() {
584 let a = SmallBytes::from_slice(b"short");
585 let b = SmallBytes::from_slice(b"short");
586 assert_eq!(a, b);
587 let c: Vec<u8> = (0u8..30).collect();
588 let d: Vec<u8> = (0u8..30).collect();
589 assert_eq!(SmallBytes::from_slice(&c), SmallBytes::from_slice(&d));
590 }
591
592 #[test]
593 fn ord_lex() {
594 let a = SmallBytes::from_slice(b"abc");
595 let b = SmallBytes::from_slice(b"abd");
596 assert!(a < b);
597 }
598
599 #[test]
600 fn debug_format_matches_slice() {
601 let s = SmallBytes::from_slice(&[1u8, 2, 3]);
602 let dbg = format!("{s:?}");
603 let exp = format!("{:?}", &[1u8, 2, 3][..]);
604 assert_eq!(dbg, exp);
605 }
606
607 #[test]
608 fn default_is_empty_inline() {
609 let s = SmallBytes::default();
610 assert!(s.is_inline());
611 assert_eq!(s.len(), 0);
612 }
613
614 #[test]
615 fn drop_heap_does_not_leak_or_double_free() {
616 // Loop a bunch to give miri/asan something to catch.
617 for n in [23usize, 64, 1024, 65536] {
618 let v: Vec<u8> = (0..n).map(|i| (i & 0xFF) as u8).collect();
619 let s = SmallBytes::from_slice(&v);
620 drop(s);
621 }
622 }
623
624 // ---- Effective coverage: trait impls + branch paths ---------------------
625
626 #[test]
627 fn eq_is_reflexive_and_symmetric_inline() {
628 let a = SmallBytes::from_slice(b"hi");
629 let b = SmallBytes::from_slice(b"hi");
630 let c = SmallBytes::from_slice(b"no");
631 assert_eq!(a, a);
632 assert_eq!(a, b);
633 assert_eq!(b, a);
634 assert_ne!(a, c);
635 }
636
637 #[test]
638 fn eq_is_reflexive_and_symmetric_heap() {
639 let v: Vec<u8> = (0u8..40).collect();
640 let a = SmallBytes::from_slice(&v);
641 let b = SmallBytes::from_slice(&v);
642 let mut w = v.clone();
643 w[0] = w[0].wrapping_add(1);
644 let c = SmallBytes::from_slice(&w);
645 assert_eq!(a, a);
646 assert_eq!(a, b);
647 assert_eq!(b, a);
648 assert_ne!(a, c);
649 }
650
651 #[test]
652 fn partial_cmp_matches_cmp_inline() {
653 let a = SmallBytes::from_slice(b"abc");
654 let b = SmallBytes::from_slice(b"abd");
655 assert_eq!(a.partial_cmp(&b), Some(std::cmp::Ordering::Less));
656 assert_eq!(b.partial_cmp(&a), Some(std::cmp::Ordering::Greater));
657 assert_eq!(a.partial_cmp(&a), Some(std::cmp::Ordering::Equal));
658 // Same chain via the Ord impl directly.
659 assert_eq!(a.cmp(&b), std::cmp::Ordering::Less);
660 assert_eq!(a.cmp(&a), std::cmp::Ordering::Equal);
661 }
662
663 #[test]
664 fn hash_agrees_with_byte_slice() {
665 use std::collections::hash_map::DefaultHasher;
666 let v: Vec<u8> = (0u8..40).collect();
667 let s = SmallBytes::from_slice(&v);
668 let mut h_slice = DefaultHasher::new();
669 v.as_slice().hash(&mut h_slice);
670 let mut h_sb = DefaultHasher::new();
671 s.hash(&mut h_sb);
672 // Same byte stream into the Hasher (Hash for [u8] writes len + bytes;
673 // ours delegates to as_slice so it matches).
674 assert_eq!(h_slice.finish(), h_sb.finish());
675 }
676
677 #[test]
678 fn kevy_hash_agrees_with_byte_slice() {
679 let v: Vec<u8> = (0u8..40).collect();
680 let s = SmallBytes::from_slice(&v);
681 assert_eq!(
682 s.kevy_hash(),
683 v.as_slice().kevy_hash(),
684 "KevyHash impl must agree with &[u8] so a KevyMap<SmallBytes, V> can be queried by Borrow<[u8]>"
685 );
686 let small = SmallBytes::from_slice(b"foo");
687 assert_eq!(small.kevy_hash(), (b"foo" as &[u8]).kevy_hash());
688 }
689
690 #[test]
691 fn as_ref_is_zero_copy_view() {
692 let s = SmallBytes::from_slice(b"abcdef");
693 let r: &[u8] = s.as_ref();
694 assert_eq!(r, b"abcdef");
695 // Same slice address as as_slice (the impl delegates to as_slice).
696 assert!(std::ptr::eq(r.as_ptr(), s.as_slice().as_ptr()));
697 }
698
699 #[test]
700 fn borrow_lookup_works_in_collection() {
701 use std::collections::HashMap;
702 let mut m: HashMap<SmallBytes, i32> = HashMap::new();
703 m.insert(SmallBytes::from_slice(b"key1"), 1);
704 m.insert(SmallBytes::from_slice(b"key2"), 2);
705 // Look up by &[u8] thanks to Borrow<[u8]>.
706 assert_eq!(m.get(b"key1".as_slice()), Some(&1));
707 assert_eq!(m.get(b"key2".as_slice()), Some(&2));
708 assert_eq!(m.get(b"none".as_slice()), None);
709 }
710
711 #[test]
712 fn from_byte_slice_round_trip() {
713 let a: SmallBytes = (&b"short"[..]).into();
714 assert_eq!(a.as_slice(), b"short");
715 let v: Vec<u8> = (0u8..40).collect();
716 let b: SmallBytes = v.as_slice().into();
717 assert_eq!(b.as_slice(), v.as_slice());
718 assert!(!b.is_inline());
719 }
720
721 #[test]
722 fn from_vec_dispatches_inline_or_heap() {
723 // ≤ 22 → inline (copies)
724 let inline_src: SmallBytes = vec![1u8, 2, 3].into();
725 assert!(inline_src.is_inline());
726 assert_eq!(inline_src.as_slice(), &[1, 2, 3]);
727 // > 22 → heap (reuses alloc; verified by from_vec_heap_reuses_alloc)
728 let v: Vec<u8> = (0u8..30).collect();
729 let heap_src: SmallBytes = v.clone().into();
730 assert!(!heap_src.is_inline());
731 assert_eq!(heap_src.as_slice(), v.as_slice());
732 }
733
734 #[test]
735 fn clone_heap_keeps_data_and_is_independent() {
736 // Cloned heap value must allocate a separate buffer (no shared
737 // pointer), so dropping the source doesn't invalidate the clone.
738 let v: Vec<u8> = (0u8..50).collect();
739 let src = SmallBytes::from_slice(&v);
740 let dup = src.clone();
741 // SAFETY: both in heap variant by len > 22.
742 unsafe {
743 assert_ne!(
744 src.heap.ptr.as_ptr(),
745 dup.heap.ptr.as_ptr(),
746 "clone must allocate a fresh buffer"
747 );
748 }
749 drop(src);
750 // dup remains valid.
751 assert_eq!(dup.as_slice(), v.as_slice());
752 }
753
754 #[test]
755 fn drop_inline_is_noop() {
756 // Just exercise the inline path of Drop (the `if self.is_inline()
757 // { return }` early-return); miri checks no UB.
758 for &n in &[0usize, 1, 5, 22] {
759 let s = SmallBytes::from_slice(&vec![b'x'; n]);
760 assert!(s.is_inline());
761 drop(s);
762 }
763 }
764
765 #[test]
766 fn into_vec_zero_size_path() {
767 // Empty (inline) → into_vec returns empty Vec without panic.
768 let s = SmallBytes::new();
769 let v = s.into_vec();
770 assert!(v.is_empty());
771 }
772
773 #[test]
774 fn to_vec_copies_inline_and_heap() {
775 let inline = SmallBytes::from_slice(b"hi");
776 assert_eq!(inline.to_vec(), b"hi");
777 let v: Vec<u8> = (0u8..30).collect();
778 let heap = SmallBytes::from_slice(&v);
779 let copy = heap.to_vec();
780 assert_eq!(copy, v);
781 // to_vec returns an owned independent Vec; heap can be modified
782 // via subsequent operations without affecting the returned Vec.
783 // (Just verify equality after going through .to_vec.)
784 assert_eq!(heap.as_slice(), v.as_slice());
785 }
786
787 // ===== alloc-count test =====
788 //
789 // The whole point of SmallBytes' SSO is "no heap alloc when payload ≤ 22
790 // bytes". We can prove it by swapping in a counting allocator and asserting
791 // the inline path produces ZERO Allocator::alloc calls. A heap-bound payload
792 // produces at least one. Wrapping the system allocator (not replacing it
793 // wholesale with a fake) keeps the test compatible with Rust's std types
794 // that the tests themselves use.
795 //
796 // Concurrency: the global allocator is shared by EVERY thread in the test
797 // process, and `cargo test` runs ~30 unrelated tests in this crate in
798 // parallel. A simple global flag would attribute their allocs to our
799 // measurement window. We instead key the recording on a thread-local so
800 // only the test thread *currently inside* `measure_allocs` counts.
801
802 use std::alloc::{GlobalAlloc, Layout, System};
803 use std::cell::Cell;
804
805 struct CountingAlloc {
806 inner: System,
807 }
808
809 thread_local! {
810 // `const { Cell::new(...) }` is lazily-zero-init at thread spawn — no
811 // heap alloc — so the allocator itself can safely consult them.
812 static THREAD_RECORDING: Cell<bool> = const { Cell::new(false) };
813 static THREAD_ALLOC_CALLS: Cell<usize> = const { Cell::new(0) };
814 }
815
816 unsafe impl GlobalAlloc for CountingAlloc {
817 unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
818 // `try_with` so if the TLS is being destroyed (process teardown)
819 // we still serve the alloc instead of panicking.
820 let _ = THREAD_RECORDING.try_with(|r| {
821 if r.get() {
822 let _ = THREAD_ALLOC_CALLS.try_with(|c| c.set(c.get() + 1));
823 }
824 });
825 // SAFETY: forwarding to the system allocator with the same layout.
826 unsafe { self.inner.alloc(layout) }
827 }
828 unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
829 // SAFETY: forwarding to the system allocator with the same layout.
830 unsafe { self.inner.dealloc(ptr, layout) }
831 }
832 }
833
834 #[global_allocator]
835 static COUNTING: CountingAlloc = CountingAlloc { inner: System };
836
837 fn measure_allocs<F: FnOnce()>(f: F) -> usize {
838 THREAD_ALLOC_CALLS.with(|c| c.set(0));
839 THREAD_RECORDING.with(|r| r.set(true));
840 f();
841 THREAD_RECORDING.with(|r| r.set(false));
842 THREAD_ALLOC_CALLS.with(|c| c.get())
843 }
844
845 #[test]
846 fn inline_payload_does_not_allocate() {
847 // Warm + capture: every inline-sized SmallBytes constructor + access
848 // must produce zero heap allocations. `INLINE_LEN_MAX` is the max
849 // payload length the inline variant can hold (one byte of the
850 // INLINE_CAP-byte buffer is the length+discriminant tag).
851 let max_inline = INLINE_LEN_MAX as usize;
852 let allocs = measure_allocs(|| {
853 for n in 0..=max_inline {
854 let s = SmallBytes::from_slice(&[0u8; INLINE_CAP][..n]);
855 std::hint::black_box(&s);
856 std::hint::black_box(s.as_slice());
857 std::hint::black_box(s.len());
858 let c = s.clone(); // Clone of an inline value is also alloc-free.
859 std::hint::black_box(&c);
860 drop(c);
861 drop(s);
862 }
863 });
864 assert_eq!(
865 allocs, 0,
866 "expected SSO inline path to be alloc-free, got {allocs} allocs"
867 );
868 }
869
870 #[test]
871 fn heap_payload_does_allocate() {
872 // Control: payload just over the inline cap MUST allocate. If this
873 // is 0 either SSO bumped its cap silently or the counter is broken —
874 // either way the inline-zero assertion above is meaningless.
875 let max_inline = INLINE_LEN_MAX as usize;
876 let allocs = measure_allocs(|| {
877 let s = SmallBytes::from_slice(&[7u8; INLINE_CAP + 8][..max_inline + 1]);
878 std::hint::black_box(&s);
879 drop(s);
880 });
881 assert!(
882 allocs >= 1,
883 "expected the heap path to allocate at least once, got {allocs}"
884 );
885 }
886
887 /// PartialEq on a forged "heap variant with len ≤ 22" must NOT panic.
888 /// The safe API never produces such a value, but external causes
889 /// (mmap, FFI, future unsafe code, memory corruption) can. A DB's
890 /// query path has to degrade to a correct boolean, not crash.
891 /// Pre-fix: `unreachable!()` on the mixed arm would `panic!`.
892 /// Post-fix: falls back to slice-form equality.
893 #[test]
894 fn partial_eq_mixed_arm_does_not_panic() {
895 use std::mem::ManuallyDrop;
896
897 let inline_hi = SmallBytes::from_slice(b"hi");
898 let inline_no = SmallBytes::from_slice(b"no");
899
900 // Forge a heap variant that claims to hold "hi" with len = 2 —
901 // invariant-violating, but mechanically possible if the union
902 // bytes were ever externally written. The backing Vec stays
903 // alive via ManuallyDrop so the forged pointer is valid for
904 // the read inside PartialEq.
905 let mut storage = ManuallyDrop::new(b"hi".to_vec());
906 let ptr = NonNull::new(storage.as_mut_ptr()).expect("non-null Vec");
907 let forged = ManuallyDrop::new(SmallBytes {
908 heap: Heap::new(ptr, 2, 2),
909 });
910
911 // Equal content: must return true, must NOT panic.
912 assert_eq!(inline_hi, *forged);
913 assert_eq!(*forged, inline_hi);
914 // Different content: must return false, must NOT panic.
915 assert_ne!(inline_no, *forged);
916 assert_ne!(*forged, inline_no);
917
918 // Don't drop the forged heap (cap=2 would dealloc the Vec's
919 // actual allocation with the wrong layout). ManuallyDrop guards
920 // both the storage Vec and the forged SmallBytes — process exit
921 // reclaims the leak.
922 let _ = (storage, forged);
923 }
924}