snap_buf/
lib.rs

1#![no_std]
2//! A [SnapBuf] is like a `Vec<u8>` with cheap snapshotting using copy on write.
3//!
4//! Internally, the data is broken up into segments that are organized in a tree structure.
5//! Only modified subtrees are cloned, so buffers with only little differences can share most of their memory.
6//! Moreover, subtrees which contain only zeros take up no memory.
7
8extern crate alloc;
9#[cfg(feature = "test")]
10extern crate std;
11
12use alloc::sync::Arc;
13use core::cmp::Ordering;
14use core::ops::Range;
15use core::{iter, mem, slice};
16use smallvec::SmallVec;
17
18#[derive(Debug, Clone)]
19pub struct SnapBuf {
20    size: usize,
21    root_height: usize,
22    root: NodePointer,
23}
24
25const LEAF_SIZE: usize = if cfg!(feature = "test") { 32 } else { 4000 };
26const INNER_SIZE: usize = if cfg!(feature = "test") { 4 } else { 500 };
27
28#[cfg(feature = "test")]
29pub mod test;
30
31#[derive(Clone, Debug)]
32enum Node {
33    Inner([NodePointer; INNER_SIZE]),
34    Leaf([u8; LEAF_SIZE]),
35}
36
37#[derive(Clone, Debug)]
38struct NodePointer(Option<Arc<Node>>);
39
40macro_rules! deconstruct_range{
41    {$start:ident .. $end:ident = $range:expr,$height:expr} => {
42         let $start = $range.start;
43        let $end = $range.end;
44         // assert range overlaps
45        debug_assert!($start < tree_size($height) as isize);
46        debug_assert!($end > 0);
47    }
48}
49
50fn range_all<T, const C: usize>(x: &[T; C], mut f: impl FnMut(&T) -> bool) -> bool {
51    let last = f(x.last().unwrap());
52    last && x[0..C - 1].iter().all(f)
53}
54
55impl NodePointer {
56    fn children(&self) -> Option<&[NodePointer; INNER_SIZE]> {
57        match &**(self.0.as_ref()?) {
58            Node::Inner(x) => Some(x),
59            Node::Leaf(_) => None,
60        }
61    }
62
63    fn get_mut(&mut self, height: usize) -> &mut Node {
64        let arc = self.0.get_or_insert_with(|| {
65            Arc::new({
66                if height == 0 {
67                    Node::Leaf([0; LEAF_SIZE])
68                } else {
69                    Node::Inner(array_init::array_init(|_| NodePointer(None)))
70                }
71            })
72        });
73        Arc::make_mut(arc)
74    }
75
76    fn set_range<const FREE_ZEROS: bool>(&mut self, height: usize, start: isize, values: &[u8]) {
77        deconstruct_range!(start..end = start .. start + values.len() as isize ,height);
78        match self.get_mut(height) {
79            Node::Inner(children) => {
80                for (child_offset, child) in
81                    Self::affected_children(children, height - 1, start..end)
82                {
83                    child.set_range::<FREE_ZEROS>(height - 1, start - child_offset, values);
84                }
85                if FREE_ZEROS && range_all(children, |c| c.0.is_none()) {
86                    self.0 = None;
87                }
88            }
89            Node::Leaf(bytes) => {
90                let (src, dst) = if start < 0 {
91                    (&values[-start as usize..], &mut bytes[..])
92                } else {
93                    (values, &mut bytes[start as usize..])
94                };
95                let len = src.len().min(dst.len());
96                dst[..len].copy_from_slice(&src[..len]);
97                if FREE_ZEROS && range_all(bytes, |b| *b == 0) {
98                    self.0 = None;
99                }
100            }
101        }
102    }
103
104    fn affected_children(
105        children: &mut [NodePointer; INNER_SIZE],
106        child_height: usize,
107        range: Range<isize>,
108    ) -> impl Iterator<Item = (isize, &mut NodePointer)> {
109        let start = range.start.max(0) as usize;
110        let child_size = tree_size(child_height);
111        children
112            .iter_mut()
113            .enumerate()
114            .skip(start / child_size)
115            .map(move |(i, c)| ((i * child_size) as isize, c))
116            .take_while(move |(offset, _)| (*offset) < range.end)
117    }
118
119    fn fill_range(&mut self, height: usize, range: Range<isize>, value: u8) {
120        deconstruct_range!(start..end=range,height);
121        match self.get_mut(height) {
122            Node::Inner(children) => {
123                for (child_offset, child) in
124                    Self::affected_children(children, height - 1, range.clone())
125                {
126                    child.fill_range(height - 1, start - child_offset..end - child_offset, value);
127                }
128            }
129            Node::Leaf(bytes) => {
130                let write_start = start.max(0) as usize;
131                let write_end = (end as usize).min(bytes.len());
132                bytes[write_start..write_end].fill(value);
133            }
134        }
135    }
136
137    fn clear_range(&mut self, height: usize, range: Range<isize>) {
138        deconstruct_range!(start..end = range,height);
139        if start <= 0 && end as usize >= tree_size(height) || self.0.is_none() {
140            self.0 = None;
141            return;
142        }
143        match self.get_mut(height) {
144            Node::Inner(children) => {
145                for (child_offset, child) in
146                    Self::affected_children(children, height - 1, range.clone())
147                {
148                    child.clear_range(height - 1, start - child_offset..end - child_offset);
149                }
150                if range_all(children, |c| c.0.is_none()) {
151                    self.0 = None;
152                }
153            }
154            Node::Leaf(bytes) => {
155                let write_start = start.max(0) as usize;
156                let write_end = (end as usize).min(bytes.len());
157                bytes[write_start..write_end].fill(0);
158                if range_all(bytes, |b| *b == 0) {
159                    self.0 = None;
160                }
161            }
162        }
163    }
164
165    fn put_leaf(&mut self, height: usize, offset: usize, leaf: NodePointer) {
166        match self.get_mut(height) {
167            Node::Inner(children) => {
168                let range = offset as isize..offset as isize + 1;
169                let (co, c) = Self::affected_children(children, height - 1, range)
170                    .next()
171                    .unwrap();
172                c.put_leaf(height - 1, offset - co as usize, leaf);
173            }
174            Node::Leaf(_) => {
175                debug_assert_eq!(offset, 0);
176                *self = leaf;
177            }
178        }
179    }
180
181    fn locate_leaf(
182        &mut self,
183        height: usize,
184        offset: usize,
185    ) -> Option<(usize, &mut [u8; LEAF_SIZE])> {
186        self.0.as_ref()?;
187        match self.get_mut(height) {
188            Node::Inner(children) => {
189                let range = offset as isize..offset as isize + 1;
190                let (co, c) = Self::affected_children(children, height - 1, range)
191                    .next()
192                    .unwrap();
193                c.locate_leaf(height - 1, offset - co as usize)
194            }
195            Node::Leaf(x) => Some((offset, x)),
196        }
197    }
198
199    fn locate_leaf_ref(&self, heigth: usize, offset: usize) -> (usize, &[u8; LEAF_SIZE]) {
200        if let Some(x) = &self.0 {
201            match &**x {
202                Node::Inner(children) => {
203                    let child_size = tree_size(heigth - 1);
204                    children[offset / child_size].locate_leaf_ref(heigth - 1, offset % child_size)
205                }
206                Node::Leaf(data) => (offset, data),
207            }
208        } else {
209            (offset % LEAF_SIZE, &ZERO_LEAF)
210        }
211    }
212}
213
214const fn const_tree_size(height: usize) -> usize {
215    if height == 0 {
216        LEAF_SIZE
217    } else {
218        INNER_SIZE * const_tree_size(height - 1)
219    }
220}
221
222fn tree_size(height: usize) -> usize {
223    const_tree_size(height)
224}
225
226impl Default for SnapBuf {
227    fn default() -> Self {
228        Self::new()
229    }
230}
231
232static ZERO_LEAF: [u8; LEAF_SIZE] = [0; LEAF_SIZE];
233
234impl SnapBuf {
235    /// Creates an empty buffer.
236    pub fn new() -> Self {
237        Self {
238            root_height: 0,
239            size: 0,
240            root: NodePointer(None),
241        }
242    }
243
244    fn shrink(&mut self, new_len: usize) {
245        self.root.clear_range(
246            self.root_height,
247            new_len as isize..tree_size(self.root_height) as isize,
248        );
249        self.size = new_len;
250    }
251
252    fn grow_height_until(&mut self, min_size: usize) {
253        while tree_size(self.root_height) < min_size {
254            if self.root.0.is_some() {
255                let new_root = Arc::new(Node::Inner(array_init::array_init(|x| {
256                    if x == 0 {
257                        self.root.clone()
258                    } else {
259                        NodePointer(None)
260                    }
261                })));
262                self.root = NodePointer(Some(new_root.clone()));
263            }
264            self.root_height += 1;
265        }
266    }
267
268    fn grow_zero(&mut self, new_len: usize) {
269        self.grow_height_until(new_len);
270        self.size = new_len;
271    }
272
273    /// Resizes the buffer, to the given length.
274    ///
275    /// If `new_len` is greater than `len`, the new space in the buffer is filled with copies of value.
276    /// This is more efficient if `value == 0`.
277    #[inline]
278    pub fn resize(&mut self, new_len: usize, value: u8) {
279        match new_len.cmp(&self.size) {
280            Ordering::Less => {
281                self.shrink(new_len);
282            }
283            Ordering::Equal => {}
284            Ordering::Greater => {
285                let old_len = self.size;
286                self.grow_zero(new_len);
287                if value != 0 {
288                    self.fill_range(old_len..new_len, value);
289                }
290            }
291        }
292    }
293
294    /// Shortens the buffer, keeping the first `new_len` bytes and discarding the rest.
295    ///
296    /// If `new_len` is greater or equal to the buffer’s current length, this has no effect.
297    pub fn truncate(&mut self, new_len: usize) {
298        if new_len < self.size {
299            self.shrink(new_len);
300        }
301    }
302
303    /// Fill the given range with copies of value.
304    ///
305    /// This is equivalent to calling [write](Self::write) with a slice filled with value.
306    /// Calling this with `value = 0` is not guaranteed to free up the zeroed segments,
307    /// use [clear_range](Self::clear_range) if that is required.
308    pub fn fill_range(&mut self, range: Range<usize>, value: u8) {
309        if self.size < range.end {
310            self.grow_zero(range.end);
311        }
312        if range.is_empty() {
313            return;
314        }
315        let range = range.start as isize..range.end as isize;
316        self.root.fill_range(self.root_height, range, value);
317    }
318
319    pub fn read(&self, offset: usize) -> &[u8] {
320        if offset == self.size {
321            return &[];
322        }
323        assert!(offset < self.size);
324        let max_len = self.size - offset;
325        let (offset, leaf) = self.root.locate_leaf_ref(self.root_height, offset);
326        let data = &leaf[offset..];
327        &data[..data.len().min(max_len)]
328    }
329
330    /// Writes data at the specified offset.
331    ///
332    /// If this extends past the current end of the buffer, the buffer is automatically resized.
333    /// If offset is larger than the current buffer length, the space between the current buffer
334    /// end and the written region is filled with zeros.
335    ///
336    /// Zeroing parts of the buffer using this method is not guaranteed to free up the zeroed segments,
337    /// use [write_with_zeros](Self::write_with_zeros) if that is required.
338    pub fn write(&mut self, offset: usize, data: &[u8]) {
339        self.write_inner::<false>(offset, data);
340    }
341
342    /// Like [write](Self::write), but detects and frees newly zeroed segments in the buffer.
343    pub fn write_with_zeros(&mut self, offset: usize, data: &[u8]) {
344        self.write_inner::<true>(offset, data);
345    }
346
347    fn write_inner<const FREE_ZEROS: bool>(&mut self, offset: usize, data: &[u8]) {
348        let write_end = offset + data.len();
349        if self.size < write_end {
350            self.resize(write_end, 0);
351        }
352        if data.is_empty() {
353            return;
354        }
355        self.root
356            .set_range::<FREE_ZEROS>(self.root_height, offset as isize, data);
357    }
358
359    /// Returns `true` if the buffer length is zero.
360    pub fn is_empty(&self) -> bool {
361        self.len() == 0
362    }
363
364    /// Returns the length of the buffer, the number of bytes it contains.
365    ///
366    /// The memory footprint of the buffer may be much smaller than this due to omission of zero segments and sharing with other buffers.
367    pub fn len(&self) -> usize {
368        self.size
369    }
370
371    /// Fill a range with zeros, freeing memory if possible.
372    ///
373    /// # Panics
374    /// Panics if range end is `range.end` > `self.len()`.
375    pub fn clear_range(&mut self, range: Range<usize>) {
376        assert!(range.end <= self.size);
377        if range.is_empty() {
378            return;
379        }
380        self.root
381            .clear_range(self.root_height, range.start as isize..range.end as isize);
382    }
383
384    /// Clears all data and sets length to 0.
385    pub fn clear(&mut self) {
386        *self = Self::new();
387    }
388
389    fn iter_nodes_pre_order(&self) -> impl Iterator<Item = (&NodePointer, usize)> {
390        struct IterStack<'a> {
391            stack_end_height: usize,
392            stack: SmallVec<[&'a [NodePointer]; 5]>,
393        }
394
395        #[allow(clippy::needless_lifetimes)]
396        fn split_first_in_place<'x, 's, T>(x: &'x mut &'s [T]) -> &'s T {
397            let (first, rest) = mem::take(x).split_first().unwrap();
398            *x = rest;
399            first
400        }
401
402        impl<'a> Iterator for IterStack<'a> {
403            type Item = (&'a NodePointer, usize);
404
405            fn next(&mut self) -> Option<Self::Item> {
406                let visit_now = loop {
407                    let last_level = self.stack.last_mut()?;
408                    if last_level.is_empty() {
409                        self.stack.pop();
410                        self.stack_end_height += 1;
411                    } else {
412                        break split_first_in_place(last_level);
413                    }
414                };
415                let ret = (visit_now, self.stack_end_height);
416                if let Some(children) = visit_now.children() {
417                    self.stack.push(children);
418                    self.stack_end_height -= 1;
419                }
420                Some(ret)
421            }
422        }
423
424        let mut stack = SmallVec::new();
425        stack.push(slice::from_ref(&self.root));
426        IterStack {
427            stack_end_height: self.root_height,
428            stack,
429        }
430    }
431
432    /// Returns an iterator over the byte slices constituting the buffer.
433    ///
434    /// The returned slices may overlap.
435    pub fn chunks(&self) -> impl Iterator<Item = &[u8]> {
436        let mut emitted = 0;
437        self.iter_nodes_pre_order()
438            .flat_map(|(node, height)| match node.0.as_deref() {
439                None => {
440                    let leaf_count = INNER_SIZE.pow(height as u32);
441                    iter::repeat_n(&ZERO_LEAF, leaf_count)
442                }
443                Some(Node::Inner(_)) => iter::repeat_n(&ZERO_LEAF, 0),
444                Some(Node::Leaf(b)) => iter::repeat_n(b, 1),
445            })
446            .map(move |x| {
447                let emit = (self.size - emitted).min(x.len());
448                emitted += emit;
449                &x[..emit]
450            })
451            .filter(|x| !x.is_empty())
452    }
453
454    /// Returns an iterator over the buffer.
455    pub fn iter(&self) -> impl Iterator<Item = &u8> {
456        self.chunks().flat_map(|x| x.iter())
457    }
458
459    #[doc(hidden)]
460    pub fn bytes(&self) -> impl Iterator<Item = u8> + '_ {
461        self.iter().copied()
462    }
463
464    pub fn extend_from_slice(&mut self, data: &[u8]) {
465        self.write_with_zeros(self.size, data)
466    }
467}
468
469impl Extend<u8> for SnapBuf {
470    fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
471        fn generate_leaf(
472            start_at: usize,
473            iter: &mut impl Iterator<Item = u8>,
474        ) -> (usize, NodePointer) {
475            let mut consumed = start_at;
476            let first_non_zero = loop {
477                if let Some(x) = iter.next() {
478                    consumed += 1;
479                    if x != 0 {
480                        break x;
481                    }
482                } else {
483                    return (consumed, NodePointer(None));
484                }
485                if consumed == LEAF_SIZE {
486                    return (LEAF_SIZE, NodePointer(None));
487                }
488            };
489            let mut leaf = Arc::new(Node::Leaf([0u8; LEAF_SIZE]));
490            let leaf_mut = if let Node::Leaf(x) = Arc::get_mut(&mut leaf).unwrap() {
491                x
492            } else {
493                unreachable!()
494            };
495            leaf_mut[consumed - 1] = first_non_zero;
496            while consumed < LEAF_SIZE {
497                if let Some(x) = iter.next() {
498                    leaf_mut[consumed] = x;
499                    consumed += 1;
500                } else {
501                    break;
502                }
503            }
504            (consumed, NodePointer(Some(leaf)))
505        }
506
507        let it = &mut iter.into_iter();
508        if self.size < tree_size(self.root_height) {
509            if let Some((offset, first_leaf)) = self.root.locate_leaf(self.root_height, self.size) {
510                for i in offset..LEAF_SIZE {
511                    let Some(x) = it.next() else { return };
512                    first_leaf[i] = x;
513                    self.size += 1;
514                }
515                assert_eq!(self.size % LEAF_SIZE, 0);
516            }
517        } else {
518            assert_eq!(self.size % LEAF_SIZE, 0);
519        }
520        loop {
521            let in_leaf_offset = self.size % LEAF_SIZE;
522            let (consumed, leaf) = generate_leaf(in_leaf_offset, it);
523            let old_size = self.size;
524            self.size = old_size - in_leaf_offset + consumed;
525            self.grow_height_until(self.size);
526            if leaf.0.is_some() {
527                self.root
528                    .put_leaf(self.root_height, old_size - in_leaf_offset, leaf);
529            }
530            if consumed < LEAF_SIZE {
531                return;
532            }
533            assert_eq!(self.size % LEAF_SIZE, 0);
534        }
535    }
536}
537
538impl FromIterator<u8> for SnapBuf {
539    fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self {
540        let mut iter = iter.into_iter();
541        let mut ret = Self::new();
542        ret.extend(&mut iter);
543        ret
544    }
545}