libdd-tinybytes 1.1.1

Tiny implementation of a bytes::Bytes like type that supports AsRef<[u8]>
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

#![cfg_attr(not(test), deny(clippy::panic))]
#![cfg_attr(not(test), deny(clippy::unwrap_used))]
#![cfg_attr(not(test), deny(clippy::expect_used))]
#![cfg_attr(not(test), deny(clippy::todo))]
#![cfg_attr(not(test), deny(clippy::unimplemented))]

#[cfg(feature = "serde")]
use serde::Serialize;
use std::{
    borrow, cmp, fmt, hash,
    ops::{self, RangeBounds},
    ptr::NonNull,
    sync::atomic::AtomicUsize,
};

/// Immutable bytes type with zero copy cloning and slicing.
#[derive(Clone)]
#[repr(C)] // fixed layout for ad-hoc conversion to slice
pub struct Bytes {
    data: NonNull<[u8]>,
    // The `bytes`` field is used to ensure that the underlying bytes are freed when there are no
    // more references to the `Bytes` object. For static buffers the field is `None`.
    bytes: Option<RefCountedCell>,
}

/// The underlying bytes that the `Bytes` object references.
pub trait UnderlyingBytes: AsRef<[u8]> + Send + Sync + 'static {}

/// Since the Bytes type is immutable, and UnderlyingBytes is `Send + Sync``, it is safe to share
/// `Bytes` across threads.
unsafe impl Send for Bytes {}
unsafe impl Sync for Bytes {}

impl Bytes {
    #[inline]
    /// Creates a new `Bytes` from the given slice data and the refcount
    ///
    /// # Safety
    ///
    /// * the pointer should be valid for the given length
    /// * the pointer should be valid for reads as long as the refcount or any of it's clone is not
    ///   dropped
    pub const unsafe fn from_raw_refcount(
        ptr: NonNull<u8>,
        len: usize,
        refcount: RefCountedCell,
    ) -> Self {
        Self::from_raw(ptr, len, Some(refcount))
    }

    #[inline]
    /// Creates a new `Bytes` from the given slice data and the refcount. Can be used after calling
    /// into_raw().
    ///
    /// # Safety
    ///
    /// * the pointer should be valid for the given length
    /// * the pointer should be valid for reads as long as the refcount or any of it's clone is not
    ///   dropped
    pub const unsafe fn from_raw(
        ptr: NonNull<u8>,
        len: usize,
        bytes: Option<RefCountedCell>,
    ) -> Self {
        Self {
            data: NonNull::slice_from_raw_parts(ptr, len),
            bytes,
        }
    }

    /// Creates empty `Bytes`.
    #[inline]
    pub const fn empty() -> Self {
        Self::from_static(b"")
    }

    /// Creates `Bytes` from a static slice.
    #[inline]
    pub const fn from_static(value: &'static [u8]) -> Self {
        Self {
            data: NonNull::slice_from_raw_parts(
                // SAFETY: static slice always have a valid pointer and length
                unsafe { NonNull::new_unchecked(value.as_ptr().cast_mut()) },
                value.len(),
            ),
            bytes: None,
        }
    }

    /// Creates `Bytes` from a slice, by copying.
    pub fn copy_from_slice(data: &[u8]) -> Self {
        Self::from_underlying(data.to_vec())
    }

    /// Returns the length of the `Bytes`.
    #[inline]
    pub const fn len(&self) -> usize {
        self.data.len()
    }

    /// Returns `true` if the `Bytes` is empty.
    #[inline]
    pub const fn is_empty(&self) -> bool {
        self.data.len() == 0
    }

    /// Returns a slice of self for the provided range.
    ///
    /// This will return a new `Bytes` handle set to the slice, and will not copy the underlying
    /// data.
    ///
    /// This operation is `O(1)`.
    ///
    /// # Panics
    ///
    /// Slicing will panic if the range does not conform to  `start <= end` and `end <= self.len()`.
    ///
    /// # Examples
    ///
    /// ```
    /// use libdd_tinybytes::Bytes;
    ///
    /// let bytes = Bytes::copy_from_slice(b"hello world");
    /// let slice = bytes.slice(0..5);
    /// assert_eq!(slice.as_ref(), b"hello");
    ///
    /// let slice = bytes.slice(6..11);
    /// assert_eq!(slice.as_ref(), b"world");
    /// ```
    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
        use std::ops::Bound;

        let len = self.len();

        #[allow(clippy::expect_used)]
        let start = match range.start_bound() {
            Bound::Included(&n) => n,
            Bound::Excluded(&n) => n.checked_add(1).expect("range start overflow"),
            Bound::Unbounded => 0,
        };

        #[allow(clippy::expect_used)]
        let end = match range.end_bound() {
            Bound::Included(&n) => n.checked_add(1).expect("range end overflow"),
            Bound::Excluded(&n) => n,
            Bound::Unbounded => len,
        };

        assert!(
            start <= end,
            "range start must not be greater than end: {start:?} > {end:?}"
        );
        assert!(
            end <= len,
            "range end must not be greater than length: {end:?} > {len:?}"
        );

        if end == start {
            Bytes::empty()
        } else {
            self.safe_slice_ref(start, end)
        }
    }

    /// Returns a slice of self that is equivalent to the given `subset`, if it is a subset.
    ///
    /// When processing a `Bytes` buffer with other tools, one often gets a
    /// `&[u8]` which is in fact a slice of the `Bytes`, i.e. a subset of it.
    /// This function turns that `&[u8]` into another `Bytes`, as if one had
    /// called `self.slice()` with the range that corresponds to `subset`.
    ///
    /// This operation is `O(1)`.
    ///
    /// # Examples
    ///
    /// ```
    /// use libdd_tinybytes::Bytes;
    ///
    /// let bytes = Bytes::copy_from_slice(b"hello world");
    /// let subset = &bytes.as_ref()[0..5];
    /// let slice = bytes.slice_ref(subset).unwrap();
    /// assert_eq!(slice.as_ref(), b"hello");
    ///
    /// let subset = &bytes.as_ref()[6..11];
    /// let slice = bytes.slice_ref(subset).unwrap();
    /// assert_eq!(slice.as_ref(), b"world");
    ///
    /// let invalid_subset = b"invalid";
    /// assert!(bytes.slice_ref(invalid_subset).is_none());
    /// ```
    pub fn slice_ref(&self, subset: &[u8]) -> Option<Bytes> {
        // An empty slice can be a subset of any slice.
        if subset.is_empty() {
            return Some(Bytes::empty());
        }

        let subset_start = subset.as_ptr() as usize;
        let subset_end = subset_start + subset.len();
        let self_start = self.data.addr().get();
        let self_end = self_start + self.data.len();
        if subset_start >= self_start && subset_end <= self_end {
            Some(self.safe_slice_ref(subset_start - self_start, subset_end - self_start))
        } else {
            None
        }
    }

    pub fn from_underlying<T: UnderlyingBytes>(value: T) -> Self {
        unsafe {
            let refcounted = make_refcounted(value);
            let a = refcounted.data.cast::<CustomArc<T>>().as_ptr();

            // SAFETY:
            // * the pointer associated with a slice is non null and valid for the length of the
            //   slice
            // * it stays valid as long as value is not dropped
            let data: &T = &(*a).data;
            let (ptr, len) = {
                let s = data.as_ref();
                (NonNull::new_unchecked(s.as_ptr().cast_mut()), s.len())
            };
            Self::from_raw_refcount(ptr, len, refcounted)
        }
    }

    #[inline]
    fn ptr(&self) -> NonNull<u8> {
        self.data.cast::<u8>()
    }

    #[inline]
    fn safe_slice_ref(&self, start: usize, end: usize) -> Self {
        if !(start <= end && end <= self.len()) {
            #[allow(clippy::panic)]
            {
                panic!("Out of bound slicing of Bytes instance")
            }
        }
        // SAFETY:
        // * start is less than len, so the resulting pointer is
        // going either inside the allocation or one past
        // * we have 0 <= start <= end <= len so 0 <= end - start <= len - start. Since the new ptr
        // points to ptr + start, then memory span is between ptr + start and (ptr + start) + (len -
        // start) = ptr + len
        Self {
            data: NonNull::slice_from_raw_parts(unsafe { self.ptr().add(start) }, end - start),
            // ptr: unsafe { self.ptr.add(start) },
            // len: end - start,
            bytes: self.bytes.clone(),
        }
    }

    #[inline]
    fn as_slice(&self) -> &[u8] {
        // SAFETY: ptr is valid for the associated length
        unsafe { std::slice::from_raw_parts(self.ptr().as_ptr().cast_const(), self.len()) }
    }

    #[inline]
    pub fn into_raw(self) -> (NonNull<u8>, usize, Option<RefCountedCell>) {
        (self.ptr(), self.len(), self.bytes)
    }
}

// Implementations of `UnderlyingBytes` for common types.
impl UnderlyingBytes for Vec<u8> {}
impl UnderlyingBytes for Box<[u8]> {}
impl UnderlyingBytes for String {}

// Implementations of common traits for `Bytes`.
impl Default for Bytes {
    fn default() -> Self {
        Self::empty()
    }
}

impl<T: UnderlyingBytes> From<T> for Bytes {
    fn from(value: T) -> Self {
        Self::from_underlying(value)
    }
}

impl AsRef<[u8]> for Bytes {
    #[inline]
    fn as_ref(&self) -> &[u8] {
        self.as_slice()
    }
}

impl borrow::Borrow<[u8]> for Bytes {
    #[inline]
    fn borrow(&self) -> &[u8] {
        self.as_slice()
    }
}

impl ops::Deref for Bytes {
    type Target = [u8];
    #[inline]
    fn deref(&self) -> &Self::Target {
        self.as_slice()
    }
}

impl<T: AsRef<[u8]>> PartialEq<T> for Bytes {
    #[inline]
    fn eq(&self, other: &T) -> bool {
        self.as_slice() == other.as_ref()
    }
}

impl Eq for Bytes {}

impl<T: AsRef<[u8]>> PartialOrd<T> for Bytes {
    fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
        self.as_slice().partial_cmp(other.as_ref())
    }
}

impl Ord for Bytes {
    fn cmp(&self, other: &Bytes) -> cmp::Ordering {
        self.as_slice().cmp(other.as_slice())
    }
}

impl hash::Hash for Bytes {
    // TODO should we cache the hash since we know the bytes are immutable?
    #[inline]
    fn hash<H: hash::Hasher>(&self, state: &mut H) {
        self.as_slice().hash(state);
    }
}

impl fmt::Debug for Bytes {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::Debug::fmt(self.as_slice(), f)
    }
}

#[cfg(feature = "serde")]
impl Serialize for Bytes {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        serializer.serialize_bytes(self.as_slice())
    }
}

pub struct RefCountedCell {
    data: NonNull<()>,
    vtable: &'static RefCountedCellVTable,
}

unsafe impl Send for RefCountedCell {}
unsafe impl Sync for RefCountedCell {}

impl RefCountedCell {
    #[inline]
    /// Creates a new `RefCountedCell` from the given data and vtable.
    ///
    /// The data pointer can be used to store arbitrary data, that won't be dropped until the last
    /// clone to the `RefCountedCell` is dropped.
    /// The vtable customizes the behavior of a Waker which gets created from a RawWaker. For each
    /// operation on the Waker, the associated function in the vtable of the underlying RawWaker
    /// will be called.
    ///
    /// # Safety
    ///
    /// * The value pointed to by `data` must be 'static + Send + Sync
    pub const unsafe fn from_raw(data: NonNull<()>, vtable: &'static RefCountedCellVTable) -> Self {
        RefCountedCell { data, vtable }
    }
}

impl Clone for RefCountedCell {
    fn clone(&self) -> Self {
        unsafe { (self.vtable.clone)(self.data) }
    }
}

impl Drop for RefCountedCell {
    fn drop(&mut self) {
        unsafe { (self.vtable.drop)(self.data) }
    }
}

pub struct RefCountedCellVTable {
    pub clone: unsafe fn(NonNull<()>) -> RefCountedCell,
    pub drop: unsafe fn(NonNull<()>),
}

/// A custom Arc implementation that contains only the strong count
///
/// This struct is not exposed to the outside of this functions and is
/// only interacted with through the `RefCountedCell` API.
struct CustomArc<T> {
    rc: AtomicUsize,
    #[allow(unused)]
    data: T,
}

/// Creates a refcounted cell.
///
/// The data passed to this cell will only be dopped when the last
/// clone of the cell is dropped.
fn make_refcounted<T: Send + Sync + 'static>(data: T) -> RefCountedCell {
    unsafe fn custom_arc_clone<T>(data: NonNull<()>) -> RefCountedCell {
        let custom_arc = data.cast::<CustomArc<T>>().as_ref();
        custom_arc
            .rc
            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
        RefCountedCell::from_raw(
            data,
            &RefCountedCellVTable {
                clone: custom_arc_clone::<T>,
                drop: custom_arc_drop::<T>,
            },
        )
    }

    unsafe fn custom_arc_drop<T>(data: NonNull<()>) {
        let custom_arc = data.cast::<CustomArc<T>>().as_ref();
        if custom_arc
            .rc
            .fetch_sub(1, std::sync::atomic::Ordering::Release)
            != 1
        {
            return;
        }

        // Run drop + free memory on the data manually rather than casting back to a box
        // because otherwise miri complains

        // See standard library documentation for std::sync::Arc to see why this is needed.
        // https://github.com/rust-lang/rust/blob/2a5da7acd4c3eae638aa1c46f3a537940e60a0e4/library/alloc/src/sync.rs#L2647-L2675
        std::sync::atomic::fence(std::sync::atomic::Ordering::Acquire);
        {
            let custom_arc = data.cast::<CustomArc<T>>().as_mut();
            std::ptr::drop_in_place(custom_arc);
        }

        std::alloc::dealloc(
            data.as_ptr() as *mut u8,
            std::alloc::Layout::new::<CustomArc<T>>(),
        );
    }

    let rc = Box::leak(Box::new(CustomArc {
        rc: AtomicUsize::new(1),
        data,
    })) as *mut _ as *const ();
    RefCountedCell {
        data: unsafe { NonNull::new_unchecked(rc as *mut ()) },
        vtable: &RefCountedCellVTable {
            clone: custom_arc_clone::<T>,
            drop: custom_arc_drop::<T>,
        },
    }
}

#[cfg(feature = "bytes_string")]
mod bytes_string;
#[cfg(feature = "bytes_string")]
pub use bytes_string::BytesString;

#[cfg(test)]
mod test;