contigious_tree/
lib.rs

1//! Write and read tree graphs to and from contigious blocks of memory.
2
3use std::{
4    io::{self, Write},
5    marker::PhantomData,
6    mem::size_of,
7    ops::Deref,
8};
9
10/// Used to store the binary sizes of [`TreeVec`]s and [`TreeSlice`]s in bytes. This would usually be
11/// done utilizing `usize`, yet the size of `usize` is platform dependend. Since part of the appeal
12/// of a serializable tree data structure is to store it to a filesystem and load it, it seems
13/// beneficial to fix this to 64Bit on any platform to not introduce a dependency of the fileformat
14/// to the platform it has been generated on.
15pub type TreeSize = u64;
16
17/// Helpful if we want to extract a value of [`TreeSize`] out of a raw binary representation of
18/// binary slices or in calculating the size of a subtree.
19const TREE_SIZE_SIZE: usize = size_of::<TreeSize>();
20
21/// [`TreeVec`] is generic over the value types associated with each node. Furthermore it is also
22/// generic about the way these are serialized. E.g. A value type of `i64` could be stored in
23/// little endian, big endian or a bitpacked representation. This allows us to adapt the tree to a
24/// wide variaty of usecases.
25pub trait Node {
26    /// The value type associated with each node in the tree.
27    type Value;
28
29    /// Writes the value, so [`Self::read_value`] can extract it again. In case of success the
30    /// number of bytes written is returned.
31    fn write_value<W>(writer: &mut W, value: &Self::Value) -> io::Result<usize>
32    where
33        W: Write;
34
35    /// Reads the value from a raw binary representation. Reads the value from the back of the
36    /// passed slice.
37    fn read_value(bytes: &[u8]) -> (usize, Self::Value);
38}
39
40/// Serializes a tree data structure in a depth first manner.
41pub struct TreeBuilder<N, W> {
42    /// Since we serialize each value of any node right away, we do not hold them as members per se.
43    /// To get the type safety still, we hold PhantomData of N
44    _node_type: PhantomData<N>,
45    /// Remember the subtrees and their sizes, which are not connected to a parent node yet.
46    open_node_sizes: Vec<TreeSize>,
47    /// Writer we serialize the stream into.
48    writer: W,
49}
50
51impl<N, W> TreeBuilder<N, W> {
52    pub fn new(writer: W) -> Self {
53        Self {
54            _node_type: PhantomData,
55            open_node_sizes: Vec::new(),
56            writer,
57        }
58    }
59
60    /// Adds a node to the tree.
61    ///
62    /// # Parameters
63    ///
64    /// * `value`: Value associated with the node
65    /// * `num_children`: This node will be the parent node of the last `num_children` nodes written
66    ///   which do not have a parent yet.
67    pub fn write_node(&mut self, value: &N::Value, num_children: usize) -> io::Result<()>
68    where
69        N: Node,
70        W: Write,
71    {
72        // All previous children have been written and are immediate predecessors to this node.
73        // Layout: children, value, totalsize
74        let size_value: TreeSize = N::write_value(&mut self.writer, value)? as TreeSize;
75        let size_children: TreeSize = self
76            .open_node_sizes
77            .drain((self.open_node_sizes.len() - num_children)..)
78            .sum();
79        let total_size = size_value + size_children;
80        self.writer.write_all(&total_size.to_le_bytes())?;
81        // We write the size, without the size of the size value itself. However, then accounting
82        // for all the childern it must of course be added.
83        self.open_node_sizes
84            .push(total_size + TREE_SIZE_SIZE as TreeSize);
85        Ok(())
86    }
87
88    /// Call this once every node has been written. Flushes the output and returns the inner writer
89    /// in case you want to use it for something else.
90    pub fn finish(mut self) -> io::Result<W>
91    where
92        W: Write,
93    {
94        self.writer.flush()?;
95        Ok(self.writer)
96    }
97}
98
99/// An owned tree, which is stored in contigious memory. Fast traversal and query times.
100pub struct TreeVec<N> {
101    _node_type: PhantomData<N>,
102    bytes: Vec<u8>,
103}
104
105impl<N> TreeVec<N> {
106    /// Takes ownership of the bytes, and interprets them as a tree. No checks are performed wether
107    /// these actually describe a sensible tree. None of Rusts safety guarantees are violated if
108    /// providing 'random' bytes in this constructor. For bugfree code utilizing bytes written with
109    /// [`TreeBuilder`] is recommended, though.
110    pub fn new(bytes: Vec<u8>) -> TreeVec<N> {
111        TreeVec {
112            _node_type: PhantomData,
113            bytes,
114        }
115    }
116
117    pub fn as_tree_slice(&self) -> &TreeSlice<N> {
118        TreeSlice::from_slice(&self.bytes)
119    }
120}
121
122impl<N> Deref for TreeVec<N> {
123    type Target = TreeSlice<N>;
124
125    fn deref(&self) -> &Self::Target {
126        self.as_tree_slice()
127    }
128}
129
130/// Each subtree is contigious in memory and can borrowed independently similarly to a slice of
131/// bytes.
132pub struct TreeSlice<N> {
133    _node_type: PhantomData<N>,
134    bytes: [u8],
135}
136
137impl<N> TreeSlice<N> {
138    pub fn from_slice(slice: &[u8]) -> &Self {
139        let ptr: *const [u8] = slice;
140        unsafe { &*(ptr as *const TreeSlice<N>) }
141    }
142
143    /// Deserializes the value of the root node of this silce, and returns an iterator over its
144    /// children.
145    pub fn read_node(&self) -> (N::Value, Branches<'_, N>)
146    where
147        N: Node,
148    {
149        let total_size = self.bytes.len();
150        let (size_value, value) = N::read_value(&self.bytes[..(total_size - TREE_SIZE_SIZE)]);
151        let branches = Branches {
152            _node_type: PhantomData,
153            bytes: &self.bytes[..(total_size - TREE_SIZE_SIZE - size_value)],
154        };
155        (value, branches)
156    }
157}
158
159/// Iterates over the individual root nodes of subtrees
160pub struct Branches<'a, N> {
161    _node_type: PhantomData<N>,
162    bytes: &'a [u8],
163}
164
165impl<'a, N: 'a> Iterator for Branches<'a, N> {
166    type Item = &'a TreeSlice<N>;
167
168    fn next(&mut self) -> Option<Self::Item> {
169        if self.bytes.is_empty() {
170            None
171        } else {
172            let total_size = self.bytes.len();
173            let tree_size_bytes: &[u8; TREE_SIZE_SIZE] = self.bytes
174                [(total_size - TREE_SIZE_SIZE)..]
175                .try_into()
176                .unwrap();
177            let tree_size = TreeSize::from_le_bytes(*tree_size_bytes) as usize;
178            let (remainder, tree_slice) =
179                self.bytes.split_at(total_size - tree_size - TREE_SIZE_SIZE);
180            let tree_slice = TreeSlice::from_slice(tree_slice);
181
182            // Advance iterator by assigning all bytes **not** part of the tree slice just returned.
183            self.bytes = remainder;
184
185            Some(tree_slice)
186        }
187    }
188}
189
190/// 32 Bit signed integer stored in little endian byte order
191pub struct LeI32;
192
193impl Node for LeI32 {
194    type Value = i32;
195
196    fn write_value<W>(writer: &mut W, value: &Self::Value) -> std::io::Result<usize>
197    where
198        W: Write,
199    {
200        let bytes = value.to_le_bytes();
201        writer.write_all(&bytes)?;
202        Ok(bytes.len()) // Should always be 4
203    }
204
205    fn read_value(bytes: &[u8]) -> (usize, i32) {
206        let total_len = bytes.len();
207        let last_four_bytes: &[u8; 4] = bytes[(total_len - 4)..].try_into().unwrap();
208        (4, i32::from_le_bytes(*last_four_bytes))
209    }
210}
211
212/// 8 Bit unsigned integer stored in little endian byte order
213pub struct U8;
214
215impl Node for U8 {
216    type Value = u8;
217
218    fn write_value<W>(writer: &mut W, value: &Self::Value) -> std::io::Result<usize>
219    where
220        W: Write,
221    {
222        let bytes = value.to_le_bytes();
223        writer.write_all(&bytes)?;
224        Ok(bytes.len()) // Should always be 1
225    }
226
227    fn read_value(bytes: &[u8]) -> (usize, u8) {
228        let total_len = bytes.len();
229        let last_four_bytes: &[u8; 1] = bytes[(total_len - 1)..].try_into().unwrap();
230        (1, u8::from_le_bytes(*last_four_bytes))
231    }
232}