arc_bytes/
lib.rs

1#![doc = include_str!("../README.md")]
2#![forbid(unsafe_code)]
3#![warn(
4    clippy::cargo,
5    missing_docs,
6    // clippy::missing_docs_in_private_items,
7    clippy::nursery,
8    clippy::pedantic,
9    future_incompatible,
10    rust_2018_idioms,
11)]
12#![cfg_attr(doc, deny(rustdoc::all))]
13#![allow(
14    clippy::missing_errors_doc, // TODO clippy::missing_errors_doc
15    clippy::option_if_let_else,
16    clippy::module_name_repetitions,
17)]
18
19use std::{
20    borrow::Cow,
21    cmp::Ordering,
22    fmt::{Debug, Write},
23    io::{self, ErrorKind, Read},
24    ops::{Bound, Deref, DerefMut, RangeBounds},
25    sync::Arc,
26};
27
28/// Formats the bytes contained in `slice` into the provided `writer`. This
29/// function groups the printed output in chukns of 8 hexadecimal characters,
30/// and inserts spaces between each group.
31///
32/// This function powers the [`Debug`] trait implementations within the crate.
33///
34/// ```rust
35/// let mut printed = String::default();
36/// arc_bytes::print_bytes(b"\x01\x23\x45\x67\x89", &mut printed).unwrap();
37/// assert_eq!(printed, "01234567 89");
38/// ```
39pub fn print_bytes<W: Write>(mut slice: &[u8], writer: &mut W) -> std::fmt::Result {
40    if let Ok(as_str) = std::str::from_utf8(slice) {
41        write!(writer, "{as_str:?}")
42    } else {
43        while !slice.is_empty() {
44            let (chunk, remaining) = slice.split_at(4.min(slice.len()));
45            slice = remaining;
46            for byte in chunk {
47                write!(writer, "{:02x}", byte)?;
48            }
49            if !slice.is_empty() {
50                writer.write_char(' ')?;
51            }
52        }
53        Ok(())
54    }
55}
56
57/// An immutable buffer of bytes that can be cloned, sliced, and read into
58/// multiple parts using a single refernce to the underlying buffer.
59///
60/// The read operations do not mutate the buffer when shortening the `self`
61/// instance. Instead, the position is tracked within the original source buffer.
62#[derive(Clone)]
63pub struct ArcBytes<'a> {
64    buffer: Bytes<'a>,
65    end: usize,
66    position: usize,
67}
68
69impl<'a> std::hash::Hash for ArcBytes<'a> {
70    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
71        self.as_slice().hash(state);
72    }
73}
74
75#[derive(Clone)]
76enum Bytes<'a> {
77    None,
78    Borrowed(&'a [u8]),
79    Owned(Arc<Vec<u8>>),
80}
81
82impl<'a> Deref for Bytes<'a> {
83    type Target = [u8];
84
85    fn deref(&self) -> &Self::Target {
86        match self {
87            Bytes::None => b"",
88            Bytes::Borrowed(bytes) => bytes,
89            Bytes::Owned(vec) => vec,
90        }
91    }
92}
93
94impl<'a> Default for ArcBytes<'a> {
95    fn default() -> Self {
96        Self::new()
97    }
98}
99
100impl<'a> AsRef<[u8]> for Bytes<'a> {
101    fn as_ref(&self) -> &[u8] {
102        &**self
103    }
104}
105
106#[test]
107fn default_is_new() {
108    assert_eq!(ArcBytes::new(), ArcBytes::default());
109}
110
111impl<'a> Debug for ArcBytes<'a> {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        let slice = self.as_slice();
114        write!(f, "ArcBytes {{ length: {}, bytes: [", slice.len())?;
115        print_bytes(slice, f)?;
116        f.write_str("] }")
117    }
118}
119
120#[test]
121fn debug_fmt() {
122    let test = ArcBytes::borrowed(b"\x01\x23\x45\x67\x89");
123    assert_eq!(
124        format!("{:?}", test),
125        "ArcBytes { length: 5, bytes: [01234567 89] }"
126    );
127}
128
129impl<'a> Eq for ArcBytes<'a> {}
130
131impl<'a, 'b> PartialEq<ArcBytes<'b>> for ArcBytes<'a> {
132    fn eq(&self, other: &ArcBytes<'b>) -> bool {
133        self.cmp(other) == Ordering::Equal
134    }
135}
136
137macro_rules! impl_std_cmp {
138    ($self:ty) => {
139        impl<'a> PartialEq<[u8]> for $self {
140            fn eq(&self, other: &[u8]) -> bool {
141                self.as_slice().cmp(other) == std::cmp::Ordering::Equal
142            }
143        }
144
145        impl<'a, const SIZE: usize> PartialEq<[u8; SIZE]> for $self {
146            fn eq(&self, other: &[u8; SIZE]) -> bool {
147                self.as_slice().cmp(other) == std::cmp::Ordering::Equal
148            }
149        }
150
151        impl<'a, 'b> PartialEq<&'b [u8]> for $self {
152            fn eq(&self, other: &&'b [u8]) -> bool {
153                self.as_slice().cmp(other) == std::cmp::Ordering::Equal
154            }
155        }
156
157        impl<'a> PartialOrd<[u8]> for $self {
158            fn partial_cmp(&self, other: &[u8]) -> Option<std::cmp::Ordering> {
159                self.as_slice().partial_cmp(other)
160            }
161        }
162
163        impl<'a, 'b, const SIZE: usize> PartialOrd<&'b [u8; SIZE]> for $self {
164            fn partial_cmp(&self, other: &&'b [u8; SIZE]) -> Option<std::cmp::Ordering> {
165                self.as_slice().partial_cmp(&other[..])
166            }
167        }
168
169        impl<'b, 'a> PartialOrd<&'b [u8]> for $self {
170            fn partial_cmp(&self, other: &&'b [u8]) -> Option<std::cmp::Ordering> {
171                self.as_slice().partial_cmp(other)
172            }
173        }
174
175        impl<'a, 'b, const N: usize> PartialEq<&'b [u8; N]> for $self {
176            fn eq(&self, other: &&'b [u8; N]) -> bool {
177                self.as_slice().cmp(*other) == std::cmp::Ordering::Equal
178            }
179        }
180    };
181}
182
183impl_std_cmp!(ArcBytes<'a>);
184
185impl<'a> Ord for ArcBytes<'a> {
186    fn cmp(&self, other: &Self) -> Ordering {
187        match (&self.buffer, &other.buffer) {
188            (Bytes::Owned(a), Bytes::Owned(b))
189                if Arc::ptr_eq(a, b)
190                    && self.position == other.position
191                    && self.end == other.end =>
192            {
193                Ordering::Equal
194            }
195            _ => (&**self).cmp(&**other),
196        }
197    }
198}
199
200#[test]
201fn ord_tests() {
202    // Test simple comparisons with two separate allocated arcs
203    assert_eq!(ArcBytes::borrowed(b"eq"), ArcBytes::borrowed(b"eq"));
204    assert!(ArcBytes::borrowed(b"hello") < ArcBytes::borrowed(b"world"));
205    // Test using the same underlying arc.
206    let buffer = ArcBytes::borrowed(b"eq");
207    let mut buffer_clone = buffer.clone();
208    assert_eq!(buffer_clone, buffer);
209    buffer_clone.read_bytes(1).unwrap();
210    assert_ne!(buffer_clone, buffer);
211    assert!(buffer_clone > buffer);
212}
213
214impl<'a, 'b> PartialOrd<ArcBytes<'b>> for ArcBytes<'a> {
215    fn partial_cmp(&self, other: &ArcBytes<'b>) -> Option<Ordering> {
216        Some(self.cmp(other))
217    }
218}
219
220impl<'a> ArcBytes<'a> {
221    /// Returns an empty instance.
222    ///
223    /// ```rust
224    /// # use arc_bytes::ArcBytes;
225    /// assert!(ArcBytes::new().is_empty());
226    /// ```
227    #[must_use]
228    pub const fn new() -> Self {
229        Self {
230            buffer: Bytes::None,
231            end: 0,
232            position: 0,
233        }
234    }
235
236    /// Returns an instance with the owned bytes.
237    ///
238    /// ```rust
239    /// # use arc_bytes::ArcBytes;
240    /// assert_eq!(ArcBytes::owned(b"hello".to_vec()), b"hello");
241    /// ```
242    #[must_use]
243    pub fn owned(buffer: Vec<u8>) -> Self {
244        Self::from(Cow::Owned(buffer))
245    }
246
247    /// Returns a borrowed instance.
248    ///
249    /// ```rust
250    /// # use arc_bytes::ArcBytes;
251    /// assert_eq!(ArcBytes::borrowed(b"hello"), b"hello");
252    /// ```
253    #[must_use]
254    pub fn borrowed(buffer: &'a [u8]) -> Self {
255        Self::from(Cow::Borrowed(buffer))
256    }
257
258    /// Converts this instance into a static lifetime, re-allocating if
259    /// necessary.
260    ///
261    /// ```rust
262    /// # use arc_bytes::ArcBytes;
263    /// assert_eq!(ArcBytes::borrowed(b"hello").to_owned(), b"hello");
264    /// ```
265    #[must_use]
266    pub fn into_owned(self) -> ArcBytes<'static> {
267        let buffer = match self.buffer {
268            Bytes::Owned(owned) => {
269                return ArcBytes {
270                    buffer: Bytes::Owned(owned),
271                    end: self.end,
272                    position: self.position,
273                }
274            }
275            other => other,
276        };
277        ArcBytes::from(buffer[self.position..self.end].to_vec())
278    }
279
280    /// Converts a clone of this instance into a static lifetime.
281    #[must_use]
282    pub fn to_owned(&self) -> ArcBytes<'static> {
283        self.clone().into_owned()
284    }
285
286    /// Converts this instance into a `Vec<u8>`, attempting to do so without
287    /// extra copying if possible.
288    #[must_use]
289    pub fn into_vec(self) -> Vec<u8> {
290        let buffer = match self.buffer {
291            Bytes::Owned(owned) => {
292                let owned = if self.position == 0 && self.end == owned.len() {
293                    match Arc::try_unwrap(owned) {
294                        Ok(vec) => return vec,
295                        Err(arc) => arc,
296                    }
297                } else {
298                    owned
299                };
300                Bytes::Owned(owned)
301            }
302            other => other,
303        };
304        buffer[self.position..self.end].to_vec()
305    }
306
307    /// Returns this instance as a slice of `u8`s.
308    ///
309    /// ```rust
310    /// # use arc_bytes::ArcBytes;
311    /// assert_eq!(ArcBytes::borrowed(b"hello").as_slice(), b"hello");
312    /// ```
313    #[must_use]
314    pub fn as_slice(&self) -> &[u8] {
315        if self.position < self.end {
316            &self.buffer[self.position..self.end]
317        } else {
318            b""
319        }
320    }
321
322    /// Returns a slice of these bytes as its own `ArcBytes` instance. This
323    /// performs no allocations, and instead references the original bytes.
324    ///
325    /// ```rust
326    /// # use arc_bytes::ArcBytes;
327    /// let original = ArcBytes::borrowed(b"abc");
328    /// let b = original.slice(1..=1);
329    /// assert_eq!(b, b"b");
330    /// ```
331    #[must_use]
332    pub fn slice<R: RangeBounds<usize>>(&self, range: R) -> Self {
333        let start = self.position.saturating_add(match range.start_bound() {
334            Bound::Included(&start) => start,
335            Bound::Excluded(start) => start.saturating_add(1),
336            Bound::Unbounded => 0,
337        });
338        let end = match range.end_bound() {
339            Bound::Included(&end) => self.position.saturating_add(end).saturating_add(1),
340            Bound::Excluded(&end) => self.position.saturating_add(end),
341            Bound::Unbounded => self.end,
342        }
343        .min(self.end);
344
345        Self {
346            buffer: self.buffer.clone(),
347            position: start,
348            end,
349        }
350    }
351
352    /// Reads `count` bytes from the front of the bytes, returning a new
353    /// instance that shares the same underlying bytes. `self` is advanced
354    /// inside of the buffer to point.
355    ///
356    /// ```rust
357    /// # use arc_bytes::ArcBytes;
358    /// let mut buffer = ArcBytes::borrowed(b"abc");
359    /// let ab = buffer.read_bytes(2).unwrap();
360    /// assert_eq!(ab, b"ab");
361    /// let c = buffer.read_bytes(1).unwrap();
362    /// assert_eq!(c, b"c");
363    /// assert_eq!(buffer, b"");
364    /// ```
365    pub fn read_bytes(&mut self, count: usize) -> Result<Self, std::io::Error> {
366        let start = self.position;
367        let end = self.position + count;
368        if end > self.end {
369            Err(std::io::Error::from(ErrorKind::UnexpectedEof))
370        } else {
371            self.position = end;
372            let result = Self {
373                buffer: self.buffer.clone(),
374                end,
375                position: start,
376            };
377            self.deallocate_if_empty();
378            Ok(result)
379        }
380    }
381
382    /// Splits the bytes into two parts at `offset`. This method will not panic
383    /// of `offset` is too large, instead it will be treated as if `offset` is
384    /// `self.len()` -- the first instance will contain all of the bytes, and
385    /// the second instance will be empty.
386    ///
387    /// ```rust
388    /// # use arc_bytes::ArcBytes;
389    /// let buffer = ArcBytes::borrowed(b"abc");
390    /// let (ab, c) = buffer.split_at(2);
391    /// assert_eq!(ab, b"ab");
392    ///
393    /// let (c, empty) = c.split_at(usize::MAX);
394    /// assert_eq!(c, b"c");
395    /// assert_eq!(empty, b"");
396    /// ```
397    #[must_use]
398    pub fn split_at(self, offset: usize) -> (Self, Self) {
399        let split_end = self.position.saturating_add(offset).min(self.end);
400        (
401            Self {
402                buffer: self.buffer.clone(),
403                position: self.position,
404                end: split_end,
405            },
406            Self {
407                buffer: self.buffer,
408                position: split_end,
409                end: self.end,
410            },
411        )
412    }
413
414    /// Returns an iterator for the contained bytes.
415    #[must_use]
416    pub const fn iter(&self) -> Iter<'_> {
417        Iter {
418            buffer: Cow::Borrowed(self),
419            offset: 0,
420        }
421    }
422
423    fn deallocate_if_empty(&mut self) {
424        if self.position == self.end {
425            self.buffer = Bytes::None;
426            self.position = 0;
427            self.end = 0;
428        }
429    }
430}
431
432#[test]
433fn slice_tests() {
434    let original = ArcBytes::borrowed(b"abc");
435    let b = original.slice(1..=1);
436    assert_eq!(b, b"b");
437    let b = original.slice(1..2);
438    assert_eq!(b, b"b");
439    let ab = original.slice(..2);
440    assert_eq!(ab, b"ab");
441    let abc = original.slice(..);
442    assert_eq!(abc, b"abc");
443    let bc = original.slice(1..);
444    assert_eq!(bc, b"bc");
445}
446
447impl<'a> From<Cow<'a, [u8]>> for ArcBytes<'a> {
448    fn from(buffer: Cow<'a, [u8]>) -> Self {
449        let buffer = match buffer {
450            Cow::Borrowed(borrowed) => Bytes::Borrowed(borrowed),
451            Cow::Owned(vec) => Bytes::Owned(Arc::new(vec)),
452        };
453        let end = buffer.len();
454        Self {
455            end,
456            buffer: if end > 0 { buffer } else { Bytes::None },
457            position: 0,
458        }
459    }
460}
461
462#[test]
463fn from_cow_tests() {
464    let has_bytes = ArcBytes::from(Cow::Borrowed(&b"a"[..]));
465    assert_eq!(has_bytes, b"a");
466
467    let empty = ArcBytes::from(Cow::Borrowed(&b""[..]));
468    assert!(matches!(empty.buffer, Bytes::None));
469}
470
471impl<'a> From<Vec<u8>> for ArcBytes<'a> {
472    fn from(buffer: Vec<u8>) -> Self {
473        Self::owned(buffer)
474    }
475}
476
477impl<'a> From<String> for ArcBytes<'a> {
478    fn from(buffer: String) -> Self {
479        Self::owned(buffer.into_bytes())
480    }
481}
482
483impl<'a> From<&'a str> for ArcBytes<'a> {
484    fn from(buffer: &'a str) -> Self {
485        Self::borrowed(buffer.as_bytes())
486    }
487}
488
489impl<'a> From<&'a [u8]> for ArcBytes<'a> {
490    fn from(buffer: &'a [u8]) -> Self {
491        Self::borrowed(buffer)
492    }
493}
494
495impl<'a, const N: usize> From<&'a [u8; N]> for ArcBytes<'a> {
496    fn from(buffer: &'a [u8; N]) -> Self {
497        Self::borrowed(buffer)
498    }
499}
500
501impl<'a, const N: usize> From<[u8; N]> for ArcBytes<'a> {
502    fn from(buffer: [u8; N]) -> Self {
503        Self::owned(buffer.to_vec())
504    }
505}
506
507#[test]
508fn conversion_tests() {
509    assert_eq!(ArcBytes::from(b"hello".to_vec()), b"hello");
510    assert_eq!(ArcBytes::from(String::from("hello")), b"hello");
511    assert_eq!(ArcBytes::from("hello"), b"hello");
512    assert_eq!(ArcBytes::from(&b"hello"[..]), b"hello");
513    assert_eq!(ArcBytes::from(b"hello"), b"hello");
514}
515
516impl<'a> Deref for ArcBytes<'a> {
517    type Target = [u8];
518
519    fn deref(&self) -> &Self::Target {
520        self.as_slice()
521    }
522}
523
524impl<'a> std::borrow::Borrow<[u8]> for ArcBytes<'a> {
525    fn borrow(&self) -> &[u8] {
526        &**self
527    }
528}
529
530impl<'a> AsRef<[u8]> for ArcBytes<'a> {
531    fn as_ref(&self) -> &[u8] {
532        &**self
533    }
534}
535
536impl<'a> Read for ArcBytes<'a> {
537    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
538        let end = self.buffer.len().min(self.position + buf.len());
539        let bytes_read = buf.len().min(end.saturating_sub(self.position));
540
541        if bytes_read == 0 {
542            return Err(io::Error::from(ErrorKind::UnexpectedEof));
543        }
544
545        buf[..bytes_read].copy_from_slice(&self.buffer[self.position..end]);
546        self.position = end;
547
548        self.deallocate_if_empty();
549
550        Ok(bytes_read)
551    }
552}
553
554#[test]
555fn read_tests() {
556    let mut buffer = ArcBytes::borrowed(b"abc");
557    let mut read_bytes = [0_u8; 2];
558    assert_eq!(buffer.read(&mut read_bytes).unwrap(), 2);
559    assert_eq!(&read_bytes, b"ab");
560    assert_eq!(buffer.read(&mut read_bytes).unwrap(), 1);
561    assert_eq!(&read_bytes, b"cb");
562    assert!(buffer.read(&mut read_bytes).is_err());
563    assert!(buffer.is_empty());
564}
565
566impl<'a> IntoIterator for ArcBytes<'a> {
567    type Item = u8;
568
569    type IntoIter = Iter<'a>;
570
571    fn into_iter(self) -> Self::IntoIter {
572        Iter {
573            buffer: Cow::Owned(self),
574            offset: 0,
575        }
576    }
577}
578
579/// An iterator for an [`ArcBytes`].
580pub struct Iter<'a> {
581    buffer: Cow<'a, ArcBytes<'a>>,
582    offset: usize,
583}
584
585impl<'a> Iterator for Iter<'a> {
586    type Item = u8;
587
588    fn next(&mut self) -> Option<Self::Item> {
589        if self.offset < self.buffer.len() {
590            let byte = self.buffer[self.offset];
591            self.offset += 1;
592            Some(byte)
593        } else {
594            None
595        }
596    }
597
598    fn size_hint(&self) -> (usize, Option<usize>) {
599        let length = self.buffer.len() - self.offset;
600        (length, Some(length))
601    }
602}
603
604#[test]
605fn iterator_tests() {
606    assert_eq!(ArcBytes::new().iter().count(), 0);
607    let iterated = ArcBytes::from(vec![0, 1, 2]).iter().collect::<Vec<_>>();
608    assert_eq!(iterated, vec![0, 1, 2]);
609}
610
611#[test]
612fn read_zero_bytes_at_end() {
613    let mut bytes = ArcBytes::from(&[0, 1, 2, 3]);
614    bytes.read_bytes(4).unwrap();
615    let empty = bytes.read_bytes(0).unwrap();
616    let empty = empty.into_owned();
617    assert!(empty.is_empty());
618}
619
620/// An instance of [`ArcBytes`] that is not borrowing its underlying data.
621#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
622pub struct OwnedBytes(pub ArcBytes<'static>);
623
624impl Deref for OwnedBytes {
625    type Target = ArcBytes<'static>;
626
627    fn deref(&self) -> &Self::Target {
628        &self.0
629    }
630}
631
632impl DerefMut for OwnedBytes {
633    fn deref_mut(&mut self) -> &mut Self::Target {
634        &mut self.0
635    }
636}
637
638impl From<Vec<u8>> for OwnedBytes {
639    fn from(vec: Vec<u8>) -> Self {
640        Self(ArcBytes::from(vec))
641    }
642}
643
644impl<'a> From<ArcBytes<'a>> for OwnedBytes {
645    fn from(bytes: ArcBytes<'a>) -> Self {
646        Self(bytes.into_owned())
647    }
648}
649
650impl<'a> From<&'a [u8]> for OwnedBytes {
651    fn from(bytes: &'a [u8]) -> Self {
652        Self(ArcBytes::owned(bytes.to_vec()))
653    }
654}
655
656impl<'a, const SIZE: usize> From<&'a [u8; SIZE]> for OwnedBytes {
657    fn from(bytes: &'a [u8; SIZE]) -> Self {
658        Self(ArcBytes::owned(bytes.to_vec()))
659    }
660}
661
662impl std::borrow::Borrow<[u8]> for OwnedBytes {
663    fn borrow(&self) -> &[u8] {
664        &**self
665    }
666}
667
668impl PartialEq<[u8]> for OwnedBytes {
669    fn eq(&self, other: &[u8]) -> bool {
670        self.0 == other
671    }
672}
673
674impl PartialOrd<[u8]> for OwnedBytes {
675    fn partial_cmp(&self, other: &[u8]) -> Option<Ordering> {
676        self.0.partial_cmp(other)
677    }
678}
679
680impl<'a> PartialEq<&'a [u8]> for OwnedBytes {
681    fn eq(&self, other: &&'a [u8]) -> bool {
682        self.0 == *other
683    }
684}
685
686impl<'a> PartialOrd<&'a [u8]> for OwnedBytes {
687    fn partial_cmp(&self, other: &&'a [u8]) -> Option<Ordering> {
688        self.0.partial_cmp(other)
689    }
690}
691
692impl<'a, const SIZE: usize> PartialEq<&'a [u8; SIZE]> for OwnedBytes {
693    fn eq(&self, other: &&'a [u8; SIZE]) -> bool {
694        self.0 == *other
695    }
696}
697
698impl<'a, const SIZE: usize> PartialOrd<&'a [u8; SIZE]> for OwnedBytes {
699    fn partial_cmp(&self, other: &&'a [u8; SIZE]) -> Option<Ordering> {
700        self.0.partial_cmp(other)
701    }
702}
703
704/// Efficient serialization implementation, ensuring bytes are written as a
705/// buffer of bytes not as a sequence.
706#[cfg(feature = "serde")]
707pub mod serde;