snap_buf/
lib.rs

1#![no_std]
2//! A [SnapBuf] is like a `Vec<u8>` with cheap snapshotting using copy on write.
3//!
4//! Internally, the data is broken up into segments that are organized in a tree structure.
5//! Only modified subtrees are cloned, so buffers with only little differences can share most of their memory.
6//! Moreover, subtrees which contain only zeros take up no memory.
7
8extern crate alloc;
9#[cfg(feature = "std")]
10extern crate std;
11
12use alloc::sync::Arc;
13use core::cmp::Ordering;
14use core::ops::Range;
15use core::{iter, mem, slice};
16use smallvec::SmallVec;
17
18/// A copy on write buffer.
19///
20/// See the crate level documentation for more details.
21#[derive(Debug, Clone)]
22pub struct SnapBuf {
23    size: usize,
24    root_height: usize,
25    root: NodePointer,
26}
27
28const LEAF_SIZE: usize = if cfg!(feature = "test") { 32 } else { 4000 };
29const INNER_SIZE: usize = if cfg!(feature = "test") { 4 } else { 500 };
30
31#[cfg(feature = "test")]
32pub mod test;
33
34#[cfg(feature = "std")]
35mod cursor;
36
37#[cfg(feature = "std")]
38pub use cursor::SnapBufCursor;
39
40#[derive(Clone, Debug)]
41enum Node {
42    Inner([NodePointer; INNER_SIZE]),
43    Leaf([u8; LEAF_SIZE]),
44}
45
46#[derive(Clone, Debug)]
47struct NodePointer(Option<Arc<Node>>);
48
49macro_rules! deconstruct_range{
50    {$start:ident .. $end:ident = $range:expr,$height:expr} => {
51         let $start = $range.start;
52        let $end = $range.end;
53         // assert range overlaps
54        debug_assert!($start < tree_size($height) as isize);
55        debug_assert!($end > 0);
56    }
57}
58
59fn range_all<T, const C: usize>(x: &[T; C], mut f: impl FnMut(&T) -> bool) -> bool {
60    let last = f(x.last().unwrap());
61    last && x[0..C - 1].iter().all(f)
62}
63
64impl NodePointer {
65    fn children(&self) -> Option<&[NodePointer; INNER_SIZE]> {
66        match &**(self.0.as_ref()?) {
67            Node::Inner(x) => Some(x),
68            Node::Leaf(_) => None,
69        }
70    }
71
72    fn get_mut(&mut self, height: usize) -> &mut Node {
73        let arc = self.0.get_or_insert_with(|| {
74            Arc::new({
75                if height == 0 {
76                    Node::Leaf([0; LEAF_SIZE])
77                } else {
78                    Node::Inner(array_init::array_init(|_| NodePointer(None)))
79                }
80            })
81        });
82        Arc::make_mut(arc)
83    }
84
85    fn set_range<const FREE_ZEROS: bool>(&mut self, height: usize, start: isize, values: &[u8]) {
86        deconstruct_range!(start..end = start .. start + values.len() as isize ,height);
87        match self.get_mut(height) {
88            Node::Inner(children) => {
89                for (child_offset, child) in
90                    Self::affected_children(children, height - 1, start..end)
91                {
92                    child.set_range::<FREE_ZEROS>(height - 1, start - child_offset, values);
93                }
94                if FREE_ZEROS && range_all(children, |c| c.0.is_none()) {
95                    self.0 = None;
96                }
97            }
98            Node::Leaf(bytes) => {
99                let (src, dst) = if start < 0 {
100                    (&values[-start as usize..], &mut bytes[..])
101                } else {
102                    (values, &mut bytes[start as usize..])
103                };
104                let len = src.len().min(dst.len());
105                dst[..len].copy_from_slice(&src[..len]);
106                if FREE_ZEROS && range_all(bytes, |b| *b == 0) {
107                    self.0 = None;
108                }
109            }
110        }
111    }
112
113    fn affected_children(
114        children: &mut [NodePointer; INNER_SIZE],
115        child_height: usize,
116        range: Range<isize>,
117    ) -> impl Iterator<Item = (isize, &mut NodePointer)> {
118        let start = range.start.max(0) as usize;
119        let child_size = tree_size(child_height);
120        children
121            .iter_mut()
122            .enumerate()
123            .skip(start / child_size)
124            .map(move |(i, c)| ((i * child_size) as isize, c))
125            .take_while(move |(offset, _)| (*offset) < range.end)
126    }
127
128    fn fill_range(&mut self, height: usize, range: Range<isize>, value: u8) {
129        deconstruct_range!(start..end=range,height);
130        match self.get_mut(height) {
131            Node::Inner(children) => {
132                for (child_offset, child) in
133                    Self::affected_children(children, height - 1, range.clone())
134                {
135                    child.fill_range(height - 1, start - child_offset..end - child_offset, value);
136                }
137            }
138            Node::Leaf(bytes) => {
139                let write_start = start.max(0) as usize;
140                let write_end = (end as usize).min(bytes.len());
141                bytes[write_start..write_end].fill(value);
142            }
143        }
144    }
145
146    fn clear_range(&mut self, height: usize, range: Range<isize>) {
147        deconstruct_range!(start..end = range,height);
148        if start <= 0 && end as usize >= tree_size(height) || self.0.is_none() {
149            self.0 = None;
150            return;
151        }
152        match self.get_mut(height) {
153            Node::Inner(children) => {
154                for (child_offset, child) in
155                    Self::affected_children(children, height - 1, range.clone())
156                {
157                    child.clear_range(height - 1, start - child_offset..end - child_offset);
158                }
159                if range_all(children, |c| c.0.is_none()) {
160                    self.0 = None;
161                }
162            }
163            Node::Leaf(bytes) => {
164                let write_start = start.max(0) as usize;
165                let write_end = (end as usize).min(bytes.len());
166                bytes[write_start..write_end].fill(0);
167                if range_all(bytes, |b| *b == 0) {
168                    self.0 = None;
169                }
170            }
171        }
172    }
173
174    fn put_leaf(&mut self, height: usize, offset: usize, leaf: NodePointer) {
175        match self.get_mut(height) {
176            Node::Inner(children) => {
177                let range = offset as isize..offset as isize + 1;
178                let (co, c) = Self::affected_children(children, height - 1, range)
179                    .next()
180                    .unwrap();
181                c.put_leaf(height - 1, offset - co as usize, leaf);
182            }
183            Node::Leaf(_) => {
184                debug_assert_eq!(offset, 0);
185                *self = leaf;
186            }
187        }
188    }
189
190    fn locate_leaf(
191        &mut self,
192        height: usize,
193        offset: usize,
194    ) -> Option<(usize, &mut [u8; LEAF_SIZE])> {
195        self.0.as_ref()?;
196        match self.get_mut(height) {
197            Node::Inner(children) => {
198                let range = offset as isize..offset as isize + 1;
199                let (co, c) = Self::affected_children(children, height - 1, range)
200                    .next()
201                    .unwrap();
202                c.locate_leaf(height - 1, offset - co as usize)
203            }
204            Node::Leaf(x) => Some((offset, x)),
205        }
206    }
207
208    fn locate_leaf_ref(&self, heigth: usize, offset: usize) -> (usize, &[u8; LEAF_SIZE]) {
209        if let Some(x) = &self.0 {
210            match &**x {
211                Node::Inner(children) => {
212                    let child_size = tree_size(heigth - 1);
213                    children[offset / child_size].locate_leaf_ref(heigth - 1, offset % child_size)
214                }
215                Node::Leaf(data) => (offset, data),
216            }
217        } else {
218            (offset % LEAF_SIZE, &ZERO_LEAF)
219        }
220    }
221}
222
223const fn const_tree_size(height: usize) -> usize {
224    if height == 0 {
225        LEAF_SIZE
226    } else {
227        INNER_SIZE * const_tree_size(height - 1)
228    }
229}
230
231fn tree_size(height: usize) -> usize {
232    const_tree_size(height)
233}
234
235impl Default for SnapBuf {
236    fn default() -> Self {
237        Self::new()
238    }
239}
240
241static ZERO_LEAF: [u8; LEAF_SIZE] = [0; LEAF_SIZE];
242
243impl SnapBuf {
244    /// Creates an empty buffer.
245    pub fn new() -> Self {
246        Self {
247            root_height: 0,
248            size: 0,
249            root: NodePointer(None),
250        }
251    }
252
253    fn shrink(&mut self, new_len: usize) {
254        self.root.clear_range(
255            self.root_height,
256            new_len as isize..tree_size(self.root_height) as isize,
257        );
258        self.size = new_len;
259    }
260
261    fn grow_height_until(&mut self, min_size: usize) {
262        while tree_size(self.root_height) < min_size {
263            if self.root.0.is_some() {
264                let new_root = Arc::new(Node::Inner(array_init::array_init(|x| {
265                    if x == 0 {
266                        self.root.clone()
267                    } else {
268                        NodePointer(None)
269                    }
270                })));
271                self.root = NodePointer(Some(new_root.clone()));
272            }
273            self.root_height += 1;
274        }
275    }
276
277    fn grow_zero(&mut self, new_len: usize) {
278        self.grow_height_until(new_len);
279        self.size = new_len;
280    }
281
282    /// Resizes the buffer, to the given length.
283    ///
284    /// If `new_len` is greater than `len`, the new space in the buffer is filled with copies of value.
285    /// This is more efficient if `value == 0`.
286    #[inline]
287    pub fn resize(&mut self, new_len: usize, value: u8) {
288        match new_len.cmp(&self.size) {
289            Ordering::Less => {
290                self.shrink(new_len);
291            }
292            Ordering::Equal => {}
293            Ordering::Greater => {
294                let old_len = self.size;
295                self.grow_zero(new_len);
296                if value != 0 {
297                    self.fill_range(old_len..new_len, value);
298                }
299            }
300        }
301    }
302
303    /// Shortens the buffer, keeping the first `new_len` bytes and discarding the rest.
304    ///
305    /// If `new_len` is greater or equal to the buffer’s current length, this has no effect.
306    pub fn truncate(&mut self, new_len: usize) {
307        if new_len < self.size {
308            self.shrink(new_len);
309        }
310    }
311
312    /// Fill the given range with copies of value.
313    ///
314    /// This is equivalent to calling [write](Self::write) with a slice filled with value.
315    /// Calling this with `value = 0` is not guaranteed to free up the zeroed segments,
316    /// use [clear_range](Self::clear_range) if that is required.
317    pub fn fill_range(&mut self, range: Range<usize>, value: u8) {
318        if self.size < range.end {
319            self.grow_zero(range.end);
320        }
321        if range.is_empty() {
322            return;
323        }
324        let range = range.start as isize..range.end as isize;
325        self.root.fill_range(self.root_height, range, value);
326    }
327
328    pub fn read(&self, offset: usize) -> &[u8] {
329        if offset == self.size {
330            return &[];
331        }
332        assert!(offset < self.size);
333        let max_len = self.size - offset;
334        let (offset, leaf) = self.root.locate_leaf_ref(self.root_height, offset);
335        let data = &leaf[offset..];
336        &data[..data.len().min(max_len)]
337    }
338
339    /// Writes data at the specified offset.
340    ///
341    /// If this extends past the current end of the buffer, the buffer is automatically resized.
342    /// If offset is larger than the current buffer length, the space between the current buffer
343    /// end and the written region is filled with zeros.
344    ///
345    /// Zeroing parts of the buffer using this method is not guaranteed to free up the zeroed segments,
346    /// use [write_with_zeros](Self::write_with_zeros) if that is required.
347    pub fn write(&mut self, offset: usize, data: &[u8]) {
348        self.write_inner::<false>(offset, data);
349    }
350
351    /// Like [write](Self::write), but detects and frees newly zeroed segments in the buffer.
352    pub fn write_with_zeros(&mut self, offset: usize, data: &[u8]) {
353        self.write_inner::<true>(offset, data);
354    }
355
356    fn write_inner<const FREE_ZEROS: bool>(&mut self, offset: usize, data: &[u8]) {
357        let write_end = offset + data.len();
358        if self.size < write_end {
359            self.resize(write_end, 0);
360        }
361        if data.is_empty() {
362            return;
363        }
364        self.root
365            .set_range::<FREE_ZEROS>(self.root_height, offset as isize, data);
366    }
367
368    /// Returns `true` if the buffer length is zero.
369    pub fn is_empty(&self) -> bool {
370        self.len() == 0
371    }
372
373    /// Returns the length of the buffer, the number of bytes it contains.
374    ///
375    /// The memory footprint of the buffer may be much smaller than this due to omission of zero segments and sharing with other buffers.
376    pub fn len(&self) -> usize {
377        self.size
378    }
379
380    /// Fill a range with zeros, freeing memory if possible.
381    ///
382    /// # Panics
383    /// Panics if range end is `range.end` > `self.len()`.
384    pub fn clear_range(&mut self, range: Range<usize>) {
385        assert!(range.end <= self.size);
386        if range.is_empty() {
387            return;
388        }
389        self.root
390            .clear_range(self.root_height, range.start as isize..range.end as isize);
391    }
392
393    /// Clears all data and sets length to 0.
394    pub fn clear(&mut self) {
395        *self = Self::new();
396    }
397
398    fn iter_nodes_pre_order(&self) -> impl Iterator<Item = (&NodePointer, usize)> {
399        struct IterStack<'a> {
400            stack_end_height: usize,
401            stack: SmallVec<[&'a [NodePointer]; 5]>,
402        }
403
404        #[allow(clippy::needless_lifetimes)]
405        fn split_first_in_place<'x, 's, T>(x: &'x mut &'s [T]) -> &'s T {
406            let (first, rest) = mem::take(x).split_first().unwrap();
407            *x = rest;
408            first
409        }
410
411        impl<'a> Iterator for IterStack<'a> {
412            type Item = (&'a NodePointer, usize);
413
414            fn next(&mut self) -> Option<Self::Item> {
415                let visit_now = loop {
416                    let last_level = self.stack.last_mut()?;
417                    if last_level.is_empty() {
418                        self.stack.pop();
419                        self.stack_end_height += 1;
420                    } else {
421                        break split_first_in_place(last_level);
422                    }
423                };
424                let ret = (visit_now, self.stack_end_height);
425                if let Some(children) = visit_now.children() {
426                    self.stack.push(children);
427                    self.stack_end_height -= 1;
428                }
429                Some(ret)
430            }
431        }
432
433        let mut stack = SmallVec::new();
434        stack.push(slice::from_ref(&self.root));
435        IterStack {
436            stack_end_height: self.root_height,
437            stack,
438        }
439    }
440
441    /// Returns an iterator over the byte slices constituting the buffer.
442    ///
443    /// The returned slices may overlap.
444    pub fn chunks(&self) -> impl Iterator<Item = &[u8]> {
445        let mut emitted = 0;
446        self.iter_nodes_pre_order()
447            .flat_map(|(node, height)| match node.0.as_deref() {
448                None => {
449                    let leaf_count = INNER_SIZE.pow(height as u32);
450                    iter::repeat_n(&ZERO_LEAF, leaf_count)
451                }
452                Some(Node::Inner(_)) => iter::repeat_n(&ZERO_LEAF, 0),
453                Some(Node::Leaf(b)) => iter::repeat_n(b, 1),
454            })
455            .map(move |x| {
456                let emit = (self.size - emitted).min(x.len());
457                emitted += emit;
458                &x[..emit]
459            })
460            .filter(|x| !x.is_empty())
461    }
462
463    /// Returns an iterator over the buffer.
464    pub fn iter(&self) -> impl Iterator<Item = &u8> {
465        self.chunks().flat_map(|x| x.iter())
466    }
467
468    #[doc(hidden)]
469    pub fn bytes(&self) -> impl Iterator<Item = u8> + '_ {
470        self.iter().copied()
471    }
472
473    pub fn extend_from_slice(&mut self, data: &[u8]) {
474        self.write_with_zeros(self.size, data)
475    }
476}
477
478impl Extend<u8> for SnapBuf {
479    fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
480        fn generate_leaf(
481            start_at: usize,
482            iter: &mut impl Iterator<Item = u8>,
483        ) -> (usize, NodePointer) {
484            let mut consumed = start_at;
485            let first_non_zero = loop {
486                if let Some(x) = iter.next() {
487                    consumed += 1;
488                    if x != 0 {
489                        break x;
490                    }
491                } else {
492                    return (consumed, NodePointer(None));
493                }
494                if consumed == LEAF_SIZE {
495                    return (LEAF_SIZE, NodePointer(None));
496                }
497            };
498            let mut leaf = Arc::new(Node::Leaf([0u8; LEAF_SIZE]));
499            let leaf_mut = if let Node::Leaf(x) = Arc::get_mut(&mut leaf).unwrap() {
500                x
501            } else {
502                unreachable!()
503            };
504            leaf_mut[consumed - 1] = first_non_zero;
505            while consumed < LEAF_SIZE {
506                if let Some(x) = iter.next() {
507                    leaf_mut[consumed] = x;
508                    consumed += 1;
509                } else {
510                    break;
511                }
512            }
513            (consumed, NodePointer(Some(leaf)))
514        }
515
516        let it = &mut iter.into_iter();
517        if self.size < tree_size(self.root_height) {
518            if let Some((offset, first_leaf)) = self.root.locate_leaf(self.root_height, self.size) {
519                for i in offset..LEAF_SIZE {
520                    let Some(x) = it.next() else { return };
521                    first_leaf[i] = x;
522                    self.size += 1;
523                }
524                assert_eq!(self.size % LEAF_SIZE, 0);
525            }
526        } else {
527            assert_eq!(self.size % LEAF_SIZE, 0);
528        }
529        loop {
530            let in_leaf_offset = self.size % LEAF_SIZE;
531            let (consumed, leaf) = generate_leaf(in_leaf_offset, it);
532            let old_size = self.size;
533            self.size = old_size - in_leaf_offset + consumed;
534            self.grow_height_until(self.size);
535            if leaf.0.is_some() {
536                self.root
537                    .put_leaf(self.root_height, old_size - in_leaf_offset, leaf);
538            }
539            if consumed < LEAF_SIZE {
540                return;
541            }
542            assert_eq!(self.size % LEAF_SIZE, 0);
543        }
544    }
545}
546
547impl FromIterator<u8> for SnapBuf {
548    fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self {
549        let mut iter = iter.into_iter();
550        let mut ret = Self::new();
551        ret.extend(&mut iter);
552        ret
553    }
554}