contigious_tree/lib.rs
1//! Write and read tree graphs to and from contigious blocks of memory.
2
3use std::{
4 io::{self, Write},
5 marker::PhantomData,
6 mem::size_of,
7 ops::Deref,
8};
9
10/// Used to store the binary sizes of [`TreeVec`]s and [`TreeSlice`]s in bytes. This would usually be
11/// done utilizing `usize`, yet the size of `usize` is platform dependend. Since part of the appeal
12/// of a serializable tree data structure is to store it to a filesystem and load it, it seems
13/// beneficial to fix this to 64Bit on any platform to not introduce a dependency of the fileformat
14/// to the platform it has been generated on.
15pub type TreeSize = u64;
16
17/// Helpful if we want to extract a value of [`TreeSize`] out of a raw binary representation of
18/// binary slices or in calculating the size of a subtree.
19const TREE_SIZE_SIZE: usize = size_of::<TreeSize>();
20
21/// [`TreeVec`] is generic over the value types associated with each node. Furthermore it is also
22/// generic about the way these are serialized. E.g. A value type of `i64` could be stored in
23/// little endian, big endian or a bitpacked representation. This allows us to adapt the tree to a
24/// wide variaty of usecases.
25pub trait Node {
26 /// The value type associated with each node in the tree.
27 type Value;
28
29 /// Writes the value, so [`Self::read_value`] can extract it again. In case of success the
30 /// number of bytes written is returned.
31 fn write_value<W>(writer: &mut W, value: &Self::Value) -> io::Result<usize>
32 where
33 W: Write;
34
35 /// Reads the value from a raw binary representation. Reads the value from the back of the
36 /// passed slice.
37 fn read_value(bytes: &[u8]) -> (usize, Self::Value);
38}
39
40/// Serializes a tree data structure in a depth first manner.
41pub struct TreeBuilder<N, W> {
42 /// Since we serialize each value of any node right away, we do not hold them as members per se.
43 /// To get the type safety still, we hold PhantomData of N
44 _node_type: PhantomData<N>,
45 /// Remember the subtrees and their sizes, which are not connected to a parent node yet.
46 open_node_sizes: Vec<TreeSize>,
47 /// Writer we serialize the stream into.
48 writer: W,
49}
50
51impl<N, W> TreeBuilder<N, W> {
52 pub fn new(writer: W) -> Self {
53 Self {
54 _node_type: PhantomData,
55 open_node_sizes: Vec::new(),
56 writer,
57 }
58 }
59
60 /// Adds a node to the tree.
61 ///
62 /// # Parameters
63 ///
64 /// * `value`: Value associated with the node
65 /// * `num_children`: This node will be the parent node of the last `num_children` nodes written
66 /// which do not have a parent yet.
67 pub fn write_node(&mut self, value: &N::Value, num_children: usize) -> io::Result<()>
68 where
69 N: Node,
70 W: Write,
71 {
72 // All previous children have been written and are immediate predecessors to this node.
73 // Layout: children, value, totalsize
74 let size_value: TreeSize = N::write_value(&mut self.writer, value)? as TreeSize;
75 let size_children: TreeSize = self
76 .open_node_sizes
77 .drain((self.open_node_sizes.len() - num_children)..)
78 .sum();
79 let total_size = size_value + size_children;
80 self.writer.write_all(&total_size.to_le_bytes())?;
81 // We write the size, without the size of the size value itself. However, then accounting
82 // for all the childern it must of course be added.
83 self.open_node_sizes
84 .push(total_size + TREE_SIZE_SIZE as TreeSize);
85 Ok(())
86 }
87
88 /// Call this once every node has been written. Flushes the output and returns the inner writer
89 /// in case you want to use it for something else.
90 pub fn finish(mut self) -> io::Result<W>
91 where
92 W: Write,
93 {
94 self.writer.flush()?;
95 Ok(self.writer)
96 }
97}
98
99/// An owned tree, which is stored in contigious memory. Fast traversal and query times.
100pub struct TreeVec<N> {
101 _node_type: PhantomData<N>,
102 bytes: Vec<u8>,
103}
104
105impl<N> TreeVec<N> {
106 /// Takes ownership of the bytes, and interprets them as a tree. No checks are performed wether
107 /// these actually describe a sensible tree. None of Rusts safety guarantees are violated if
108 /// providing 'random' bytes in this constructor. For bugfree code utilizing bytes written with
109 /// [`TreeBuilder`] is recommended, though.
110 pub fn new(bytes: Vec<u8>) -> TreeVec<N> {
111 TreeVec {
112 _node_type: PhantomData,
113 bytes,
114 }
115 }
116
117 pub fn as_tree_slice(&self) -> &TreeSlice<N> {
118 TreeSlice::from_slice(&self.bytes)
119 }
120}
121
122impl<N> Deref for TreeVec<N> {
123 type Target = TreeSlice<N>;
124
125 fn deref(&self) -> &Self::Target {
126 self.as_tree_slice()
127 }
128}
129
130/// Each subtree is contigious in memory and can borrowed independently similarly to a slice of
131/// bytes.
132pub struct TreeSlice<N> {
133 _node_type: PhantomData<N>,
134 bytes: [u8],
135}
136
137impl<N> TreeSlice<N> {
138 pub fn from_slice(slice: &[u8]) -> &Self {
139 let ptr: *const [u8] = slice;
140 unsafe { &*(ptr as *const TreeSlice<N>) }
141 }
142
143 /// Deserializes the value of the root node of this silce, and returns an iterator over its
144 /// children.
145 pub fn read_node(&self) -> (N::Value, Branches<'_, N>)
146 where
147 N: Node,
148 {
149 let total_size = self.bytes.len();
150 let (size_value, value) = N::read_value(&self.bytes[..(total_size - TREE_SIZE_SIZE)]);
151 let branches = Branches {
152 _node_type: PhantomData,
153 bytes: &self.bytes[..(total_size - TREE_SIZE_SIZE - size_value)],
154 };
155 (value, branches)
156 }
157}
158
159/// Iterates over the individual root nodes of subtrees
160pub struct Branches<'a, N> {
161 _node_type: PhantomData<N>,
162 bytes: &'a [u8],
163}
164
165impl<'a, N: 'a> Iterator for Branches<'a, N> {
166 type Item = &'a TreeSlice<N>;
167
168 fn next(&mut self) -> Option<Self::Item> {
169 if self.bytes.is_empty() {
170 None
171 } else {
172 let total_size = self.bytes.len();
173 let tree_size_bytes: &[u8; TREE_SIZE_SIZE] = self.bytes
174 [(total_size - TREE_SIZE_SIZE)..]
175 .try_into()
176 .unwrap();
177 let tree_size = TreeSize::from_le_bytes(*tree_size_bytes) as usize;
178 let (remainder, tree_slice) =
179 self.bytes.split_at(total_size - tree_size - TREE_SIZE_SIZE);
180 let tree_slice = TreeSlice::from_slice(tree_slice);
181
182 // Advance iterator by assigning all bytes **not** part of the tree slice just returned.
183 self.bytes = remainder;
184
185 Some(tree_slice)
186 }
187 }
188}
189
190/// 32 Bit signed integer stored in little endian byte order
191pub struct LeI32;
192
193impl Node for LeI32 {
194 type Value = i32;
195
196 fn write_value<W>(writer: &mut W, value: &Self::Value) -> std::io::Result<usize>
197 where
198 W: Write,
199 {
200 let bytes = value.to_le_bytes();
201 writer.write_all(&bytes)?;
202 Ok(bytes.len()) // Should always be 4
203 }
204
205 fn read_value(bytes: &[u8]) -> (usize, i32) {
206 let total_len = bytes.len();
207 let last_four_bytes: &[u8; 4] = bytes[(total_len - 4)..].try_into().unwrap();
208 (4, i32::from_le_bytes(*last_four_bytes))
209 }
210}
211
212/// 8 Bit unsigned integer stored in little endian byte order
213pub struct U8;
214
215impl Node for U8 {
216 type Value = u8;
217
218 fn write_value<W>(writer: &mut W, value: &Self::Value) -> std::io::Result<usize>
219 where
220 W: Write,
221 {
222 let bytes = value.to_le_bytes();
223 writer.write_all(&bytes)?;
224 Ok(bytes.len()) // Should always be 1
225 }
226
227 fn read_value(bytes: &[u8]) -> (usize, u8) {
228 let total_len = bytes.len();
229 let last_four_bytes: &[u8; 1] = bytes[(total_len - 1)..].try_into().unwrap();
230 (1, u8::from_le_bytes(*last_four_bytes))
231 }
232}