snap_buf/
lib.rs

1#![no_std]
2//! A [SnapBuf] is like a `Vec<u8>` with cheap snapshotting using copy on write.
3//!
4//! Internally, the data is broken up into segments that are organized in a tree structure.
5//! Only modified subtrees are cloned, so buffers with only little differences can share most of their memory.
6//! Moreover, subtrees which contain only zeros take up no memory.
7
8extern crate alloc;
9#[cfg(feature = "test")]
10extern crate std;
11
12use alloc::sync::Arc;
13use core::cmp::Ordering;
14use core::ops::Range;
15use core::{iter, mem, slice};
16use smallvec::SmallVec;
17
18#[derive(Debug)]
19pub struct SnapBuf {
20    size: usize,
21    root_height: usize,
22    root: NodePointer,
23}
24
25const LEAF_SIZE: usize = if cfg!(feature = "test") { 32 } else { 4000 };
26const INNER_SIZE: usize = if cfg!(feature = "test") { 4 } else { 500 };
27
28#[cfg(feature = "test")]
29pub mod test;
30
31#[derive(Clone, Debug)]
32enum Node {
33    Inner([NodePointer; INNER_SIZE]),
34    Leaf([u8; LEAF_SIZE]),
35}
36
37#[derive(Clone, Debug)]
38struct NodePointer(Option<Arc<Node>>);
39
40impl NodePointer {
41    fn children(&self) -> Option<&[NodePointer; INNER_SIZE]> {
42        match &**(self.0.as_ref()?) {
43            Node::Inner(x) => Some(x),
44            Node::Leaf(_) => None,
45        }
46    }
47
48    fn get_mut(&mut self, height: usize) -> &mut Node {
49        let arc = self.0.get_or_insert_with(|| {
50            Arc::new({
51                if height == 0 {
52                    Node::Leaf([0; LEAF_SIZE])
53                } else {
54                    Node::Inner(array_init::array_init(|_| NodePointer(None)))
55                }
56            })
57        });
58        Arc::make_mut(arc)
59    }
60
61    fn set_range(&mut self, height: usize, start: usize, values: &[u8]) {
62        debug_assert!(start < tree_size(height));
63        match self.get_mut(height) {
64            Node::Inner(children) => {
65                let child_size = tree_size(height - 1);
66                let first_child = start / child_size;
67                let last_child = ((start + values.len() - 1) / child_size).min(children.len() - 1);
68                #[allow(clippy::needless_range_loop)]
69                for child in first_child..=last_child {
70                    let child_offset = child * child_size;
71                    if child_offset <= start {
72                        children[child].set_range(height - 1, start - child_offset, values)
73                    } else {
74                        let values = &values[child_offset - start..];
75                        children[child].set_range(
76                            height - 1,
77                            start.saturating_sub(child_offset),
78                            values,
79                        )
80                    }
81                }
82            }
83            Node::Leaf(bytes) => {
84                let write_len = (bytes.len() - start).min(values.len());
85                bytes[start..start + write_len].copy_from_slice(&values[..write_len]);
86            }
87        }
88    }
89
90    fn clear_range(&mut self, height: usize, range: Range<usize>) {
91        let start = range.start;
92        let end = range.end;
93        let self_size = tree_size(height);
94        debug_assert!(start < self_size);
95        if start == 0 && end >= self_size || self.0.is_none() {
96            self.0 = None;
97            return;
98        }
99        match self.get_mut(height) {
100            Node::Inner(children) => {
101                let child_size = tree_size(height - 1);
102                let first_child = start / child_size;
103                let last_child = ((end - 1) / child_size).min(children.len() - 1);
104                #[allow(clippy::needless_range_loop)]
105                for child in first_child..=last_child {
106                    let child_offset = child * child_size;
107                    children[child].clear_range(
108                        height - 1,
109                        start.saturating_sub(child_offset)..end - child_offset,
110                    );
111                }
112                if children.first().unwrap().0.is_none()
113                    && children.last().unwrap().0.is_none()
114                    && children.iter().all(|x| x.0.is_none())
115                {
116                    self.0 = None;
117                }
118            }
119            Node::Leaf(bytes) => {
120                let write_end = range.end.min(bytes.len());
121                bytes[start..write_end].fill(0);
122                if bytes[0] == 0 && *bytes.last().unwrap() == 0 && bytes.iter().all(|x| *x == 0) {
123                    self.0 = None;
124                }
125            }
126        }
127    }
128}
129
130const fn const_tree_size(height: usize) -> usize {
131    if height == 0 {
132        LEAF_SIZE
133    } else {
134        INNER_SIZE * const_tree_size(height - 1)
135    }
136}
137
138fn tree_size(height: usize) -> usize {
139    const_tree_size(height)
140}
141
142impl Default for SnapBuf {
143    fn default() -> Self {
144        Self::new()
145    }
146}
147
148impl SnapBuf {
149    /// Creates an empty buffer.
150    pub fn new() -> Self {
151        Self {
152            root_height: 0,
153            size: 0,
154            root: NodePointer(None),
155        }
156    }
157
158    /// Resizes the buffer, so that `len == new_len`.
159    ///
160    /// If `new_len` is greater than `len`, the new space in the buffer is filled with zeros.
161    pub fn resize_zero(&mut self, new_len: usize) {
162        match new_len.cmp(&self.size) {
163            Ordering::Less => {
164                self.root
165                    .clear_range(self.root_height, new_len..tree_size(self.root_height));
166                self.size = new_len;
167            }
168            Ordering::Equal => {}
169            Ordering::Greater => {
170                while tree_size(self.root_height) < new_len {
171                    if self.root.0.is_some() {
172                        let new_root = Arc::new(Node::Inner(array_init::array_init(|x| {
173                            if x == 0 {
174                                self.root.clone()
175                            } else {
176                                NodePointer(None)
177                            }
178                        })));
179                        self.root = NodePointer(Some(new_root.clone()));
180                    }
181                    self.root_height += 1;
182                }
183                self.size = new_len;
184            }
185        }
186    }
187
188    /// Writes data at the specified offset.
189    ///
190    /// If this extends past the current end of the buffer, the buffer is automatically resized.
191    /// If offset is larger than the current buffer length, the space between the current buffer
192    /// end and the written region is filled with zeros.
193    pub fn write(&mut self, offset: usize, data: &[u8]) {
194        let write_end = offset + data.len();
195        if self.size < write_end {
196            self.resize_zero(write_end);
197        }
198        if data.is_empty() {
199            return;
200        }
201        self.root.set_range(self.root_height, offset, data);
202    }
203
204    /// Returns `true` if the buffer length is zero.
205    pub fn is_empty(&self) -> bool {
206        self.len() == 0
207    }
208
209    /// Returns the length of the buffer, the number of bytes it contains.
210    ///
211    /// The memory footprint of the buffer may be much smaller than this due to omission of zero segments and sharing with other buffers.
212    pub fn len(&self) -> usize {
213        self.size
214    }
215
216    /// Clears data in range, possibly freeing memory.
217    ///
218    /// # Panics
219    /// Panics if range end is `range.end` > `self.len()`.
220    pub fn clear_range(&mut self, range: Range<usize>) {
221        assert!(range.end <= self.size);
222        if range.is_empty() {
223            return;
224        }
225        self.root.clear_range(self.root_height, range);
226    }
227
228    /// Clears all data and sets length to 0.
229    pub fn clear(&mut self) {
230        *self = Self::new();
231    }
232
233    fn iter_nodes_pre_order(&self) -> impl Iterator<Item = (&NodePointer, usize)> {
234        struct IterStack<'a> {
235            stack_end_height: usize,
236            stack: SmallVec<[&'a [NodePointer]; 5]>,
237        }
238
239        #[allow(clippy::needless_lifetimes)]
240        fn split_first_in_place<'x, 's, T>(x: &'x mut &'s [T]) -> &'s T {
241            let (first, rest) = mem::take(x).split_first().unwrap();
242            *x = rest;
243            first
244        }
245
246        impl<'a> Iterator for IterStack<'a> {
247            type Item = (&'a NodePointer, usize);
248
249            fn next(&mut self) -> Option<Self::Item> {
250                let visit_now = loop {
251                    let last_level = self.stack.last_mut()?;
252                    if last_level.is_empty() {
253                        self.stack.pop();
254                        self.stack_end_height += 1;
255                    } else {
256                        break split_first_in_place(last_level);
257                    }
258                };
259                let ret = (visit_now, self.stack_end_height);
260                if let Some(children) = visit_now.children() {
261                    self.stack.push(children);
262                    self.stack_end_height -= 1;
263                }
264                Some(ret)
265            }
266        }
267
268        let mut stack = SmallVec::new();
269        stack.push(slice::from_ref(&self.root));
270        IterStack {
271            stack_end_height: self.root_height,
272            stack,
273        }
274    }
275
276    /// Returns an iterator over the byte slices constituting the buffer.
277    ///
278    /// The returned slices may overlap.
279    pub fn chunks(&self) -> impl Iterator<Item = &[u8]> {
280        let mut emitted = 0;
281        self.iter_nodes_pre_order()
282            .flat_map(|(node, height)| {
283                let zero_leaf = &[0u8; LEAF_SIZE];
284                match node.0.as_deref() {
285                    None => {
286                        let leaf_count = INNER_SIZE.pow(height as u32);
287                        iter::repeat_n(zero_leaf, leaf_count)
288                    }
289                    Some(Node::Inner(_)) => iter::repeat_n(zero_leaf, 0),
290                    Some(Node::Leaf(b)) => iter::repeat_n(b, 1),
291                }
292            })
293            .map(move |x| {
294                let emit = (self.size - emitted).min(x.len());
295                emitted += emit;
296                &x[..emit]
297            })
298            .filter(|x| !x.is_empty())
299    }
300
301    /// Returns an iterator over the buffer.
302    pub fn iter(&self) -> impl Iterator<Item = &u8> {
303        self.chunks().flat_map(|x| x.iter())
304    }
305
306    #[doc(hidden)]
307    pub fn bytes(&self) -> impl Iterator<Item = u8> + '_ {
308        self.iter().copied()
309    }
310}