append_only_bytes/
lib.rs

1#![deny(clippy::undocumented_unsafe_blocks)]
2#![doc = include_str!("../README.md")]
3
4mod raw_bytes;
5use std::{
6    fmt::Debug,
7    ops::{Deref, Index, RangeBounds},
8    slice::SliceIndex,
9    sync::Arc,
10};
11
12use raw_bytes::RawBytes;
13#[cfg(feature = "serde")]
14mod serde;
15
16pub struct AppendOnlyBytes {
17    raw: Arc<RawBytes>,
18    len: usize,
19}
20
21impl Debug for AppendOnlyBytes {
22    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23        f.debug_struct("AppendOnlyBytes")
24            .field("data", &self.as_bytes())
25            .field("len", &self.len)
26            .finish()
27    }
28}
29
30impl Clone for AppendOnlyBytes {
31    fn clone(&self) -> Self {
32        let new = RawBytes::with_capacity(self.capacity());
33        // SAFETY: raw and new have at least self.len capacity
34        unsafe {
35            std::ptr::copy_nonoverlapping(self.raw.ptr(), new.ptr(), self.len);
36        }
37
38        Self {
39            #[allow(clippy::arc_with_non_send_sync)]
40            raw: Arc::new(new),
41            len: self.len,
42        }
43    }
44}
45
46#[derive(Clone)]
47pub struct BytesSlice {
48    raw: Arc<RawBytes>,
49    #[cfg(not(feature = "u32_range"))]
50    start: usize,
51    #[cfg(not(feature = "u32_range"))]
52    end: usize,
53    #[cfg(feature = "u32_range")]
54    start: u32,
55    #[cfg(feature = "u32_range")]
56    end: u32,
57}
58
59impl Debug for BytesSlice {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        f.debug_struct("BytesSlice")
62            .field("data", &&self[..])
63            .field("start", &self.start)
64            .field("end", &self.end)
65            .finish()
66    }
67}
68
69impl PartialEq for BytesSlice {
70    fn eq(&self, other: &Self) -> bool {
71        self.as_bytes() == other.as_bytes()
72    }
73}
74
75impl Eq for BytesSlice {}
76
77impl PartialOrd for BytesSlice {
78    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
79        Some(self.cmp(other))
80    }
81}
82
83impl Ord for BytesSlice {
84    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
85        self.as_bytes().cmp(other.as_bytes())
86    }
87}
88
89// SAFETY: It's Send & Sync because it doesn't have interior mutability. And the owner of the type can only append data to it.
90// All the existing data will never be changed.
91unsafe impl Send for AppendOnlyBytes {}
92// SAFETY: It's Send & Sync because it doesn't have interior mutability. And the owner of the type can only append data to it.
93// All the existing data will never be changed.
94unsafe impl Sync for AppendOnlyBytes {}
95
96const MIN_CAPACITY: usize = 32;
97impl AppendOnlyBytes {
98    #[inline(always)]
99    pub fn new() -> Self {
100        Self::with_capacity(0)
101    }
102
103    #[inline(always)]
104    pub fn as_bytes(&self) -> &[u8] {
105        // SAFETY: data inside len is initialized
106        unsafe { self.raw.slice(..self.len) }
107    }
108
109    #[inline(always)]
110    pub fn with_capacity(capacity: usize) -> Self {
111        #[allow(clippy::arc_with_non_send_sync)]
112        let raw = Arc::new(RawBytes::with_capacity(capacity));
113        Self { raw, len: 0 }
114    }
115
116    #[inline(always)]
117    pub fn len(&self) -> usize {
118        self.len
119    }
120
121    #[inline(always)]
122    pub fn capacity(&self) -> usize {
123        self.raw.capacity()
124    }
125
126    #[must_use]
127    pub fn is_empty(&self) -> bool {
128        self.len() == 0
129    }
130
131    #[inline(always)]
132    pub fn push_slice(&mut self, slice: &[u8]) {
133        self.reserve(slice.len());
134        // SAFETY: We have reserved enough space for the slice
135        unsafe {
136            std::ptr::copy_nonoverlapping(
137                slice.as_ptr(),
138                self.raw.ptr().add(self.len),
139                slice.len(),
140            );
141            self.len += slice.len();
142        }
143    }
144
145    #[inline(always)]
146    pub fn push_str(&mut self, slice: &str) {
147        self.push_slice(slice.as_bytes());
148    }
149
150    #[inline(always)]
151    pub fn push(&mut self, byte: u8) {
152        self.reserve(1);
153        // SAFETY: We have reserved enough space for the byte
154        unsafe {
155            std::ptr::write(self.raw.ptr().add(self.len), byte);
156            self.len += 1;
157        }
158    }
159
160    #[inline]
161    pub fn reserve(&mut self, size: usize) {
162        let target_capacity = self.len() + size;
163        if target_capacity > self.capacity() {
164            let mut new_capacity = (self.capacity() * 2).max(MIN_CAPACITY);
165            while new_capacity < target_capacity {
166                new_capacity *= 2;
167            }
168
169            let src = std::mem::replace(self, Self::with_capacity(new_capacity));
170            // SAFETY: copy from src to dst, both have at least the capacity of src.len()
171            unsafe {
172                std::ptr::copy_nonoverlapping(src.raw.ptr(), self.raw.ptr(), src.len());
173                self.len = src.len();
174            }
175        }
176    }
177
178    #[inline]
179    pub fn slice_str(&self, range: impl RangeBounds<usize>) -> Result<&str, std::str::Utf8Error> {
180        let (start, end) = get_range(range, self.len());
181        // SAFETY: data inside start..end is initialized
182        std::str::from_utf8(unsafe { self.raw.slice(start..end) })
183    }
184
185    #[inline]
186    pub fn slice(&self, range: impl RangeBounds<usize>) -> BytesSlice {
187        let (start, end) = get_range(range, self.len());
188        BytesSlice::new(self.raw.clone(), start, end)
189    }
190
191    #[inline(always)]
192    pub fn to_slice(self) -> BytesSlice {
193        let end = self.len();
194        BytesSlice::new(self.raw, 0, end)
195    }
196}
197
198impl Default for AppendOnlyBytes {
199    #[inline(always)]
200    fn default() -> Self {
201        Self::new()
202    }
203}
204
205#[inline(always)]
206fn get_range(range: impl RangeBounds<usize>, max_len: usize) -> (usize, usize) {
207    let start = match range.start_bound() {
208        std::ops::Bound::Included(&v) => v,
209        std::ops::Bound::Excluded(&v) => v + 1,
210        std::ops::Bound::Unbounded => 0,
211    };
212    let end = match range.end_bound() {
213        std::ops::Bound::Included(&v) => v + 1,
214        std::ops::Bound::Excluded(&v) => v,
215        std::ops::Bound::Unbounded => max_len,
216    };
217    assert!(start <= end);
218    assert!(end <= max_len);
219    (start, end)
220}
221
222impl<I: SliceIndex<[u8]>> Index<I> for AppendOnlyBytes {
223    type Output = I::Output;
224
225    #[inline]
226    fn index(&self, index: I) -> &Self::Output {
227        // SAFETY: data inside 0..self.len is initialized
228        unsafe { Index::index(self.raw.slice(..self.len), index) }
229    }
230}
231
232// SAFETY: It's Send & Sync because it doesn't have interior mutability. All the accessible data in this type will never be changed.
233unsafe impl Send for BytesSlice {}
234// SAFETY: It's Send & Sync because it doesn't have interior mutability. All the accessible data in this type will never be changed.
235unsafe impl Sync for BytesSlice {}
236
237#[cfg(not(feature = "u32_range"))]
238type Int = usize;
239#[cfg(feature = "u32_range")]
240type Int = u32;
241
242impl BytesSlice {
243    #[inline(always)]
244    fn new(raw: Arc<RawBytes>, start: usize, end: usize) -> Self {
245        Self {
246            raw,
247            start: start as Int,
248            end: end as Int,
249        }
250    }
251
252    #[inline(always)]
253    pub fn empty() -> Self {
254        Self {
255            #[allow(clippy::arc_with_non_send_sync)]
256            raw: Arc::new(RawBytes::with_capacity(0)),
257            start: 0,
258            end: 0,
259        }
260    }
261
262    #[inline(always)]
263    fn bytes(&self) -> &[u8] {
264        // SAFETY: data inside this range is guaranteed to be initialized
265        unsafe { self.raw.slice(self.start()..self.end()) }
266    }
267
268    #[inline(always)]
269    pub fn as_bytes(&self) -> &[u8] {
270        // SAFETY: data inside this range is guaranteed to be initialized
271        unsafe { self.raw.slice(self.start()..self.end()) }
272    }
273
274    #[inline(always)]
275    #[allow(clippy::unnecessary_cast)]
276    pub fn len(&self) -> usize {
277        (self.end - self.start) as usize
278    }
279
280    #[allow(clippy::arc_with_non_send_sync)]
281    pub fn from_bytes(bytes: &[u8]) -> Self {
282        let new = RawBytes::with_capacity(bytes.len());
283        // SAFETY: raw and new have at least self.len capacity
284        unsafe {
285            std::ptr::copy_nonoverlapping(bytes.as_ptr(), new.ptr(), bytes.len());
286        }
287
288        Self {
289            raw: Arc::new(new),
290            start: 0,
291            end: bytes.len() as Int,
292        }
293    }
294
295    #[inline(always)]
296    pub fn is_empty(&self) -> bool {
297        self.end == self.start
298    }
299
300    #[inline(always)]
301    #[allow(clippy::unnecessary_cast)]
302    pub fn slice_clone(&self, range: impl std::ops::RangeBounds<usize>) -> Self {
303        let (start, end) = get_range(range, (self.end - self.start) as usize);
304        Self::new(self.raw.clone(), self.start() + start, self.start() + end)
305    }
306
307    #[inline(always)]
308    #[allow(clippy::unnecessary_cast)]
309    pub fn slice_(&mut self, range: impl std::ops::RangeBounds<usize>) {
310        let (start, end) = get_range(range, (self.end - self.start) as usize);
311        self.end = self.start + end as Int;
312        self.start += start as Int;
313    }
314
315    #[inline(always)]
316    pub fn ptr_eq(&self, other: &Self) -> bool {
317        Arc::ptr_eq(&self.raw, &other.raw)
318    }
319
320    #[inline(always)]
321    pub fn can_merge(&self, other: &Self) -> bool {
322        self.ptr_eq(other) && self.end == other.start
323    }
324
325    #[inline(always)]
326    pub fn try_merge(&mut self, other: &Self) -> Result<(), MergeFailed> {
327        if self.can_merge(other) {
328            self.end = other.end;
329            Ok(())
330        } else {
331            Err(MergeFailed)
332        }
333    }
334
335    #[inline]
336    pub fn slice_str(&self, range: impl RangeBounds<usize>) -> Result<&str, std::str::Utf8Error> {
337        let (start, end) = get_range(range, self.len());
338        std::str::from_utf8(&self.deref()[start..end])
339    }
340
341    #[inline(always)]
342    #[allow(clippy::unnecessary_cast)]
343    pub fn start(&self) -> usize {
344        self.start as usize
345    }
346
347    #[inline(always)]
348    #[allow(clippy::unnecessary_cast)]
349    pub fn end(&self) -> usize {
350        self.end as usize
351    }
352}
353
354#[derive(Debug)]
355pub struct MergeFailed;
356
357impl Deref for BytesSlice {
358    type Target = [u8];
359
360    #[inline(always)]
361    fn deref(&self) -> &Self::Target {
362        self.bytes()
363    }
364}
365
366#[cfg(test)]
367mod tests {
368    use std::{
369        sync::mpsc::{self, Receiver, Sender},
370        thread,
371    };
372
373    use super::*;
374    #[test]
375    fn test() {
376        let mut a = AppendOnlyBytes::new();
377        let mut count = 0;
378        for _ in 0..100 {
379            a.push(8);
380            count += 1;
381            assert_eq!(a.len(), count);
382        }
383
384        for _ in 0..100 {
385            a.push_slice(&[1, 2]);
386            count += 2;
387            assert_eq!(a.len(), count);
388        }
389    }
390
391    #[test]
392    fn it_works() {
393        let mut a = AppendOnlyBytes::new();
394        a.push_str("123");
395        assert_eq!(a.slice_str(0..1).unwrap(), "1");
396        let b = a.slice(..);
397        for _ in 0..10 {
398            a.push_str("456");
399            dbg!(a.slice_str(..).unwrap());
400        }
401        let c = a.slice(..);
402        drop(a);
403        dbg!(c.slice_str(..).unwrap());
404        assert_eq!(c.len(), 33);
405        assert_eq!(c.slice_str(..6).unwrap(), "123456");
406
407        assert_eq!(b.deref(), "123".as_bytes());
408    }
409
410    #[test]
411    fn push_large() {
412        let mut a = AppendOnlyBytes::new();
413        a.push_slice(&[1; 10000]);
414        assert_eq!(a.as_bytes(), &[1; 10000]);
415    }
416
417    #[test]
418    fn threads() {
419        let mut a = AppendOnlyBytes::new();
420        a.push_str("123");
421        assert_eq!(a.slice_str(0..1).unwrap(), "1");
422        let (tx, rx): (Sender<AppendOnlyBytes>, Receiver<AppendOnlyBytes>) = mpsc::channel();
423        let b = a.slice(..);
424        let t = thread::spawn(move || {
425            for _ in 0..10 {
426                a.push_str("456");
427                dbg!(a.slice_str(..).unwrap());
428            }
429            let c = a.slice(..);
430            tx.send(a).unwrap();
431            dbg!(c.slice_str(..).unwrap());
432            assert_eq!(c.len(), 33);
433            assert_eq!(c.slice_str(..6).unwrap(), "123456");
434        });
435        let t1 = thread::spawn(move || {
436            assert_eq!(b.deref(), "123".as_bytes());
437            for _ in 0..10 {
438                let c = b.slice_clone(0..1);
439                assert_eq!(c.deref(), "1".as_bytes());
440            }
441        });
442
443        let a = rx.recv().unwrap();
444        assert_eq!(a.len(), 33);
445        assert_eq!(&a[..6], "123456".as_bytes());
446        t.join().unwrap();
447        t1.join().unwrap()
448    }
449
450    #[test]
451    fn from_bytes() {
452        let a = BytesSlice::from_bytes(b"123");
453        assert_eq!(a.len(), 3);
454        assert_eq!(a.slice_str(..).unwrap(), "123");
455    }
456}