Skip to main content

libdd_tinybytes/
lib.rs

1// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4#![cfg_attr(not(test), deny(clippy::panic))]
5#![cfg_attr(not(test), deny(clippy::unwrap_used))]
6#![cfg_attr(not(test), deny(clippy::expect_used))]
7#![cfg_attr(not(test), deny(clippy::todo))]
8#![cfg_attr(not(test), deny(clippy::unimplemented))]
9
10#[cfg(feature = "serde")]
11use serde::Serialize;
12use std::{
13    borrow, cmp, fmt, hash,
14    ops::{self, RangeBounds},
15    ptr::NonNull,
16    sync::atomic::AtomicUsize,
17};
18
19/// Immutable bytes type with zero copy cloning and slicing.
20#[derive(Clone)]
21#[repr(C)] // fixed layout for ad-hoc conversion to slice
22pub struct Bytes {
23    data: NonNull<[u8]>,
24    // The `bytes`` field is used to ensure that the underlying bytes are freed when there are no
25    // more references to the `Bytes` object. For static buffers the field is `None`.
26    bytes: Option<RefCountedCell>,
27}
28
29/// The underlying bytes that the `Bytes` object references.
30pub trait UnderlyingBytes: AsRef<[u8]> + Send + Sync + 'static {}
31
32/// Since the Bytes type is immutable, and UnderlyingBytes is `Send + Sync``, it is safe to share
33/// `Bytes` across threads.
34unsafe impl Send for Bytes {}
35unsafe impl Sync for Bytes {}
36
37impl Bytes {
38    #[inline]
39    /// Creates a new `Bytes` from the given slice data and the refcount
40    ///
41    /// # Safety
42    ///
43    /// * the pointer should be valid for the given length
44    /// * the pointer should be valid for reads as long as the refcount or any of it's clone is not
45    ///   dropped
46    pub const unsafe fn from_raw_refcount(
47        ptr: NonNull<u8>,
48        len: usize,
49        refcount: RefCountedCell,
50    ) -> Self {
51        Self::from_raw(ptr, len, Some(refcount))
52    }
53
54    #[inline]
55    /// Creates a new `Bytes` from the given slice data and the refcount. Can be used after calling
56    /// into_raw().
57    ///
58    /// # Safety
59    ///
60    /// * the pointer should be valid for the given length
61    /// * the pointer should be valid for reads as long as the refcount or any of it's clone is not
62    ///   dropped
63    pub const unsafe fn from_raw(
64        ptr: NonNull<u8>,
65        len: usize,
66        bytes: Option<RefCountedCell>,
67    ) -> Self {
68        Self {
69            data: NonNull::slice_from_raw_parts(ptr, len),
70            bytes,
71        }
72    }
73
74    /// Creates empty `Bytes`.
75    #[inline]
76    pub const fn empty() -> Self {
77        Self::from_static(b"")
78    }
79
80    /// Creates `Bytes` from a static slice.
81    #[inline]
82    pub const fn from_static(value: &'static [u8]) -> Self {
83        Self {
84            data: NonNull::slice_from_raw_parts(
85                // SAFETY: static slice always have a valid pointer and length
86                unsafe { NonNull::new_unchecked(value.as_ptr().cast_mut()) },
87                value.len(),
88            ),
89            bytes: None,
90        }
91    }
92
93    /// Creates `Bytes` from a slice, by copying.
94    pub fn copy_from_slice(data: &[u8]) -> Self {
95        Self::from_underlying(data.to_vec())
96    }
97
98    /// Returns the length of the `Bytes`.
99    #[inline]
100    pub const fn len(&self) -> usize {
101        self.data.len()
102    }
103
104    /// Returns `true` if the `Bytes` is empty.
105    #[inline]
106    pub const fn is_empty(&self) -> bool {
107        self.data.len() == 0
108    }
109
110    /// Returns a slice of self for the provided range.
111    ///
112    /// This will return a new `Bytes` handle set to the slice, and will not copy the underlying
113    /// data.
114    ///
115    /// This operation is `O(1)`.
116    ///
117    /// # Panics
118    ///
119    /// Slicing will panic if the range does not conform to  `start <= end` and `end <= self.len()`.
120    ///
121    /// # Examples
122    ///
123    /// ```
124    /// use libdd_tinybytes::Bytes;
125    ///
126    /// let bytes = Bytes::copy_from_slice(b"hello world");
127    /// let slice = bytes.slice(0..5);
128    /// assert_eq!(slice.as_ref(), b"hello");
129    ///
130    /// let slice = bytes.slice(6..11);
131    /// assert_eq!(slice.as_ref(), b"world");
132    /// ```
133    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
134        use std::ops::Bound;
135
136        let len = self.len();
137
138        #[allow(clippy::expect_used)]
139        let start = match range.start_bound() {
140            Bound::Included(&n) => n,
141            Bound::Excluded(&n) => n.checked_add(1).expect("range start overflow"),
142            Bound::Unbounded => 0,
143        };
144
145        #[allow(clippy::expect_used)]
146        let end = match range.end_bound() {
147            Bound::Included(&n) => n.checked_add(1).expect("range end overflow"),
148            Bound::Excluded(&n) => n,
149            Bound::Unbounded => len,
150        };
151
152        assert!(
153            start <= end,
154            "range start must not be greater than end: {start:?} > {end:?}"
155        );
156        assert!(
157            end <= len,
158            "range end must not be greater than length: {end:?} > {len:?}"
159        );
160
161        if end == start {
162            Bytes::empty()
163        } else {
164            self.safe_slice_ref(start, end)
165        }
166    }
167
168    /// Returns a slice of self that is equivalent to the given `subset`, if it is a subset.
169    ///
170    /// When processing a `Bytes` buffer with other tools, one often gets a
171    /// `&[u8]` which is in fact a slice of the `Bytes`, i.e. a subset of it.
172    /// This function turns that `&[u8]` into another `Bytes`, as if one had
173    /// called `self.slice()` with the range that corresponds to `subset`.
174    ///
175    /// This operation is `O(1)`.
176    ///
177    /// # Examples
178    ///
179    /// ```
180    /// use libdd_tinybytes::Bytes;
181    ///
182    /// let bytes = Bytes::copy_from_slice(b"hello world");
183    /// let subset = &bytes.as_ref()[0..5];
184    /// let slice = bytes.slice_ref(subset).unwrap();
185    /// assert_eq!(slice.as_ref(), b"hello");
186    ///
187    /// let subset = &bytes.as_ref()[6..11];
188    /// let slice = bytes.slice_ref(subset).unwrap();
189    /// assert_eq!(slice.as_ref(), b"world");
190    ///
191    /// let invalid_subset = b"invalid";
192    /// assert!(bytes.slice_ref(invalid_subset).is_none());
193    /// ```
194    pub fn slice_ref(&self, subset: &[u8]) -> Option<Bytes> {
195        // An empty slice can be a subset of any slice.
196        if subset.is_empty() {
197            return Some(Bytes::empty());
198        }
199
200        let subset_start = subset.as_ptr() as usize;
201        let subset_end = subset_start + subset.len();
202        let self_start = self.data.addr().get();
203        let self_end = self_start + self.data.len();
204        if subset_start >= self_start && subset_end <= self_end {
205            Some(self.safe_slice_ref(subset_start - self_start, subset_end - self_start))
206        } else {
207            None
208        }
209    }
210
211    pub fn from_underlying<T: UnderlyingBytes>(value: T) -> Self {
212        unsafe {
213            let refcounted = make_refcounted(value);
214            let a = refcounted.data.cast::<CustomArc<T>>().as_ptr();
215
216            // SAFETY:
217            // * the pointer associated with a slice is non null and valid for the length of the
218            //   slice
219            // * it stays valid as long as value is not dropped
220            let data: &T = &(*a).data;
221            let (ptr, len) = {
222                let s = data.as_ref();
223                (NonNull::new_unchecked(s.as_ptr().cast_mut()), s.len())
224            };
225            Self::from_raw_refcount(ptr, len, refcounted)
226        }
227    }
228
229    #[inline]
230    fn ptr(&self) -> NonNull<u8> {
231        self.data.cast::<u8>()
232    }
233
234    #[inline]
235    fn safe_slice_ref(&self, start: usize, end: usize) -> Self {
236        if !(start <= end && end <= self.len()) {
237            #[allow(clippy::panic)]
238            {
239                panic!("Out of bound slicing of Bytes instance")
240            }
241        }
242        // SAFETY:
243        // * start is less than len, so the resulting pointer is
244        // going either inside the allocation or one past
245        // * we have 0 <= start <= end <= len so 0 <= end - start <= len - start. Since the new ptr
246        // points to ptr + start, then memory span is between ptr + start and (ptr + start) + (len -
247        // start) = ptr + len
248        Self {
249            data: NonNull::slice_from_raw_parts(unsafe { self.ptr().add(start) }, end - start),
250            // ptr: unsafe { self.ptr.add(start) },
251            // len: end - start,
252            bytes: self.bytes.clone(),
253        }
254    }
255
256    #[inline]
257    fn as_slice(&self) -> &[u8] {
258        // SAFETY: ptr is valid for the associated length
259        unsafe { std::slice::from_raw_parts(self.ptr().as_ptr().cast_const(), self.len()) }
260    }
261
262    #[inline]
263    pub fn into_raw(self) -> (NonNull<u8>, usize, Option<RefCountedCell>) {
264        (self.ptr(), self.len(), self.bytes)
265    }
266}
267
268// Implementations of `UnderlyingBytes` for common types.
269impl UnderlyingBytes for Vec<u8> {}
270impl UnderlyingBytes for Box<[u8]> {}
271impl UnderlyingBytes for String {}
272
273// Implementations of common traits for `Bytes`.
274impl Default for Bytes {
275    fn default() -> Self {
276        Self::empty()
277    }
278}
279
280impl<T: UnderlyingBytes> From<T> for Bytes {
281    fn from(value: T) -> Self {
282        Self::from_underlying(value)
283    }
284}
285
286impl AsRef<[u8]> for Bytes {
287    #[inline]
288    fn as_ref(&self) -> &[u8] {
289        self.as_slice()
290    }
291}
292
293impl borrow::Borrow<[u8]> for Bytes {
294    #[inline]
295    fn borrow(&self) -> &[u8] {
296        self.as_slice()
297    }
298}
299
300impl ops::Deref for Bytes {
301    type Target = [u8];
302    #[inline]
303    fn deref(&self) -> &Self::Target {
304        self.as_slice()
305    }
306}
307
308impl<T: AsRef<[u8]>> PartialEq<T> for Bytes {
309    #[inline]
310    fn eq(&self, other: &T) -> bool {
311        self.as_slice() == other.as_ref()
312    }
313}
314
315impl Eq for Bytes {}
316
317impl<T: AsRef<[u8]>> PartialOrd<T> for Bytes {
318    fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
319        self.as_slice().partial_cmp(other.as_ref())
320    }
321}
322
323impl Ord for Bytes {
324    fn cmp(&self, other: &Bytes) -> cmp::Ordering {
325        self.as_slice().cmp(other.as_slice())
326    }
327}
328
329impl hash::Hash for Bytes {
330    // TODO should we cache the hash since we know the bytes are immutable?
331    #[inline]
332    fn hash<H: hash::Hasher>(&self, state: &mut H) {
333        self.as_slice().hash(state);
334    }
335}
336
337impl fmt::Debug for Bytes {
338    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
339        fmt::Debug::fmt(self.as_slice(), f)
340    }
341}
342
343#[cfg(feature = "serde")]
344impl Serialize for Bytes {
345    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
346    where
347        S: serde::Serializer,
348    {
349        serializer.serialize_bytes(self.as_slice())
350    }
351}
352
353pub struct RefCountedCell {
354    data: NonNull<()>,
355    vtable: &'static RefCountedCellVTable,
356}
357
358unsafe impl Send for RefCountedCell {}
359unsafe impl Sync for RefCountedCell {}
360
361impl RefCountedCell {
362    #[inline]
363    /// Creates a new `RefCountedCell` from the given data and vtable.
364    ///
365    /// The data pointer can be used to store arbitrary data, that won't be dropped until the last
366    /// clone to the `RefCountedCell` is dropped.
367    /// The vtable customizes the behavior of a Waker which gets created from a RawWaker. For each
368    /// operation on the Waker, the associated function in the vtable of the underlying RawWaker
369    /// will be called.
370    ///
371    /// # Safety
372    ///
373    /// * The value pointed to by `data` must be 'static + Send + Sync
374    pub const unsafe fn from_raw(data: NonNull<()>, vtable: &'static RefCountedCellVTable) -> Self {
375        RefCountedCell { data, vtable }
376    }
377}
378
379impl Clone for RefCountedCell {
380    fn clone(&self) -> Self {
381        unsafe { (self.vtable.clone)(self.data) }
382    }
383}
384
385impl Drop for RefCountedCell {
386    fn drop(&mut self) {
387        unsafe { (self.vtable.drop)(self.data) }
388    }
389}
390
391pub struct RefCountedCellVTable {
392    pub clone: unsafe fn(NonNull<()>) -> RefCountedCell,
393    pub drop: unsafe fn(NonNull<()>),
394}
395
396/// A custom Arc implementation that contains only the strong count
397///
398/// This struct is not exposed to the outside of this functions and is
399/// only interacted with through the `RefCountedCell` API.
400struct CustomArc<T> {
401    rc: AtomicUsize,
402    #[allow(unused)]
403    data: T,
404}
405
406/// Creates a refcounted cell.
407///
408/// The data passed to this cell will only be dopped when the last
409/// clone of the cell is dropped.
410fn make_refcounted<T: Send + Sync + 'static>(data: T) -> RefCountedCell {
411    unsafe fn custom_arc_clone<T>(data: NonNull<()>) -> RefCountedCell {
412        let custom_arc = data.cast::<CustomArc<T>>().as_ref();
413        custom_arc
414            .rc
415            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
416        RefCountedCell::from_raw(
417            data,
418            &RefCountedCellVTable {
419                clone: custom_arc_clone::<T>,
420                drop: custom_arc_drop::<T>,
421            },
422        )
423    }
424
425    unsafe fn custom_arc_drop<T>(data: NonNull<()>) {
426        let custom_arc = data.cast::<CustomArc<T>>().as_ref();
427        if custom_arc
428            .rc
429            .fetch_sub(1, std::sync::atomic::Ordering::Release)
430            != 1
431        {
432            return;
433        }
434
435        // Run drop + free memory on the data manually rather than casting back to a box
436        // because otherwise miri complains
437
438        // See standard library documentation for std::sync::Arc to see why this is needed.
439        // https://github.com/rust-lang/rust/blob/2a5da7acd4c3eae638aa1c46f3a537940e60a0e4/library/alloc/src/sync.rs#L2647-L2675
440        std::sync::atomic::fence(std::sync::atomic::Ordering::Acquire);
441        {
442            let custom_arc = data.cast::<CustomArc<T>>().as_mut();
443            std::ptr::drop_in_place(custom_arc);
444        }
445
446        std::alloc::dealloc(
447            data.as_ptr() as *mut u8,
448            std::alloc::Layout::new::<CustomArc<T>>(),
449        );
450    }
451
452    let rc = Box::leak(Box::new(CustomArc {
453        rc: AtomicUsize::new(1),
454        data,
455    })) as *mut _ as *const ();
456    RefCountedCell {
457        data: unsafe { NonNull::new_unchecked(rc as *mut ()) },
458        vtable: &RefCountedCellVTable {
459            clone: custom_arc_clone::<T>,
460            drop: custom_arc_drop::<T>,
461        },
462    }
463}
464
465#[cfg(feature = "bytes_string")]
466mod bytes_string;
467#[cfg(feature = "bytes_string")]
468pub use bytes_string::BytesString;
469
470#[cfg(test)]
471mod test;