Skip to main content

yulang_runtime/runtime/
bytes_tree.rs

1use std::rc::Rc;
2
3pub const MAX_LEAF_BYTES: usize = 64;
4
5#[derive(Debug, PartialEq, Eq)]
6pub enum BytesTree {
7    Empty,
8    Leaf(Rc<[u8]>),
9    Node(Rc<BytesNode>),
10}
11
12impl Clone for BytesTree {
13    fn clone(&self) -> Self {
14        match self {
15            Self::Empty => Self::Empty,
16            Self::Leaf(value) => Self::Leaf(value.clone()),
17            Self::Node(node) => Self::Node(node.clone()),
18        }
19    }
20}
21
22impl BytesTree {
23    pub fn empty() -> Self {
24        Self::Empty
25    }
26
27    pub fn from_bytes(value: &[u8]) -> Self {
28        let leaves = chunk_bytes(value)
29            .into_iter()
30            .map(Self::leaf)
31            .collect::<Vec<_>>();
32        build_balanced(leaves)
33    }
34
35    pub fn len(&self) -> usize {
36        match self {
37            Self::Empty => 0,
38            Self::Leaf(value) => value.len(),
39            Self::Node(node) => node.len,
40        }
41    }
42
43    pub fn is_empty(&self) -> bool {
44        matches!(self, Self::Empty)
45    }
46
47    pub fn concat(left: Self, right: Self) -> Self {
48        match (left, right) {
49            (Self::Empty, right) => right,
50            (left, Self::Empty) => left,
51            (Self::Leaf(left), Self::Leaf(right)) if left.len() + right.len() <= MAX_LEAF_BYTES => {
52                let mut merged = Vec::with_capacity(left.len() + right.len());
53                merged.extend_from_slice(&left);
54                merged.extend_from_slice(&right);
55                Self::leaf(merged)
56            }
57            (left, right) if left.black_height() == right.black_height() => {
58                Self::black_node(left, right)
59            }
60            (left, right) => build_balanced({
61                let mut leaves = Vec::new();
62                left.push_leaves(&mut leaves);
63                right.push_leaves(&mut leaves);
64                compact_leaves(leaves)
65            }),
66        }
67    }
68
69    pub fn index(&self, index: usize) -> Option<u8> {
70        match self {
71            Self::Empty => None,
72            Self::Leaf(value) => value.get(index).copied(),
73            Self::Node(node) => {
74                let left_len = node.left.len();
75                if index < left_len {
76                    node.left.index(index)
77                } else {
78                    node.right.index(index - left_len)
79                }
80            }
81        }
82    }
83
84    pub fn index_range(&self, start: usize, end: usize) -> Option<Self> {
85        if start > end || end > self.len() {
86            return None;
87        }
88        if start == 0 && end == self.len() {
89            return Some(self.clone());
90        }
91        match self {
92            Self::Empty => Some(Self::Empty),
93            Self::Leaf(value) => Some(Self::from_bytes(&value[start..end])),
94            Self::Node(node) => {
95                let left_len = node.left.len();
96                if end <= left_len {
97                    node.left.index_range(start, end)
98                } else if start >= left_len {
99                    node.right.index_range(start - left_len, end - left_len)
100                } else {
101                    let left = node.left.index_range(start, left_len)?;
102                    let right = node.right.index_range(0, end - left_len)?;
103                    Some(Self::concat(left, right))
104                }
105            }
106        }
107    }
108
109    pub fn splice(&self, start: usize, end: usize, insert: Self) -> Option<Self> {
110        if start > end || end > self.len() {
111            return None;
112        }
113        let prefix = self.index_range(0, start)?;
114        let suffix = self.index_range(end, self.len())?;
115        Some(Self::concat(prefix, Self::concat(insert, suffix)))
116    }
117
118    pub fn to_flat_vec(&self) -> Vec<u8> {
119        let mut out = Vec::with_capacity(self.len());
120        self.push_bytes(&mut out);
121        out
122    }
123
124    pub fn view(&self) -> BytesView {
125        match self {
126            Self::Empty => BytesView::Empty,
127            Self::Leaf(value) => BytesView::Leaf(value.clone()),
128            Self::Node(node) => BytesView::Node {
129                color: node.color,
130                len: node.len,
131                left: node.left.clone(),
132                right: node.right.clone(),
133            },
134        }
135    }
136
137    pub fn black_height(&self) -> usize {
138        match self {
139            Self::Empty | Self::Leaf(_) => 0,
140            Self::Node(node) => {
141                let child_height = node.left.black_height();
142                child_height + usize::from(node.color == Color::Black)
143            }
144        }
145    }
146
147    pub fn is_red_black_well_formed(&self) -> bool {
148        self.red_black_status().is_some()
149    }
150
151    fn leaf(value: impl Into<Vec<u8>>) -> Self {
152        let value = value.into();
153        if value.is_empty() {
154            Self::Empty
155        } else {
156            Self::Leaf(Rc::from(value.into_boxed_slice()))
157        }
158    }
159
160    fn black_node(left: Self, right: Self) -> Self {
161        Self::node(Color::Black, left, right)
162    }
163
164    fn red_node(left: Self, right: Self) -> Self {
165        Self::node(Color::Red, left, right)
166    }
167
168    fn node(color: Color, left: Self, right: Self) -> Self {
169        Self::Node(Rc::new(BytesNode {
170            color,
171            len: left.len() + right.len(),
172            left,
173            right,
174        }))
175    }
176
177    fn push_bytes(&self, out: &mut Vec<u8>) {
178        match self {
179            Self::Empty => {}
180            Self::Leaf(value) => out.extend_from_slice(value),
181            Self::Node(node) => {
182                node.left.push_bytes(out);
183                node.right.push_bytes(out);
184            }
185        }
186    }
187
188    fn push_leaves(&self, out: &mut Vec<Vec<u8>>) {
189        match self {
190            Self::Empty => {}
191            Self::Leaf(value) => out.push(value.to_vec()),
192            Self::Node(node) => {
193                node.left.push_leaves(out);
194                node.right.push_leaves(out);
195            }
196        }
197    }
198
199    fn red_black_status(&self) -> Option<usize> {
200        match self {
201            Self::Empty | Self::Leaf(_) => Some(0),
202            Self::Node(node) => {
203                let left = node.left.red_black_status()?;
204                let right = node.right.red_black_status()?;
205                if left != right {
206                    return None;
207                }
208                if node.color == Color::Red
209                    && (node.left.node_color() == Some(Color::Red)
210                        || node.right.node_color() == Some(Color::Red))
211                {
212                    return None;
213                }
214                Some(left + usize::from(node.color == Color::Black))
215            }
216        }
217    }
218
219    fn node_color(&self) -> Option<Color> {
220        match self {
221            Self::Node(node) => Some(node.color),
222            _ => None,
223        }
224    }
225}
226
227impl From<&[u8]> for BytesTree {
228    fn from(value: &[u8]) -> Self {
229        Self::from_bytes(value)
230    }
231}
232
233impl From<Vec<u8>> for BytesTree {
234    fn from(value: Vec<u8>) -> Self {
235        Self::from_bytes(&value)
236    }
237}
238
239#[derive(Debug, Clone, PartialEq, Eq)]
240pub enum BytesView {
241    Empty,
242    Leaf(Rc<[u8]>),
243    Node {
244        color: Color,
245        len: usize,
246        left: BytesTree,
247        right: BytesTree,
248    },
249}
250
251#[derive(Debug, Clone, Copy, PartialEq, Eq)]
252pub enum Color {
253    Red,
254    Black,
255}
256
257#[derive(Debug, Clone, PartialEq, Eq)]
258pub struct BytesNode {
259    pub color: Color,
260    pub len: usize,
261    pub left: BytesTree,
262    pub right: BytesTree,
263}
264
265fn chunk_bytes(value: &[u8]) -> Vec<Vec<u8>> {
266    value
267        .chunks(MAX_LEAF_BYTES)
268        .map(|chunk| chunk.to_vec())
269        .collect()
270}
271
272fn compact_leaves(leaves: Vec<Vec<u8>>) -> Vec<BytesTree> {
273    let mut compacted = Vec::new();
274    let mut current = Vec::new();
275    for leaf in leaves {
276        for byte in leaf {
277            if current.len() >= MAX_LEAF_BYTES {
278                compacted.push(BytesTree::leaf(std::mem::take(&mut current)));
279            }
280            current.push(byte);
281        }
282    }
283    if !current.is_empty() {
284        compacted.push(BytesTree::leaf(current));
285    }
286    compacted
287}
288
289fn build_balanced(mut items: Vec<BytesTree>) -> BytesTree {
290    items.retain(|item| !item.is_empty());
291    if items.is_empty() {
292        return BytesTree::Empty;
293    }
294    while items.len() > 1 {
295        let count = items.len();
296        let triple_count = if count % 2 == 1 && count >= 3 { 1 } else { 0 };
297        let mut next = Vec::with_capacity(items.len().div_ceil(2));
298        let mut pairs = items.into_iter();
299        let mut remaining_triples = triple_count;
300        while let Some(first) = pairs.next() {
301            if remaining_triples > 0 {
302                let Some(second) = pairs.next() else {
303                    next.push(first);
304                    break;
305                };
306                let Some(third) = pairs.next() else {
307                    next.push(BytesTree::black_node(first, second));
308                    break;
309                };
310                next.push(BytesTree::black_node(
311                    BytesTree::red_node(first, second),
312                    third,
313                ));
314                remaining_triples -= 1;
315                continue;
316            }
317            match pairs.next() {
318                Some(second) => next.push(BytesTree::black_node(first, second)),
319                None => next.push(first),
320            }
321        }
322        items = next;
323    }
324    items.pop().unwrap_or(BytesTree::Empty)
325}
326
327#[cfg(test)]
328mod tests {
329    use super::{BytesTree, BytesView, Color, MAX_LEAF_BYTES};
330
331    #[test]
332    fn bytes_tree_tracks_length() {
333        let value = BytesTree::from_bytes(b"hello");
334        assert_eq!(value.len(), 5);
335        assert_eq!(value.to_flat_vec(), b"hello".to_vec());
336    }
337
338    #[test]
339    fn bytes_tree_chunks_large_leaves() {
340        let source = vec![b'x'; MAX_LEAF_BYTES + 1];
341        let value = BytesTree::from_bytes(&source);
342
343        assert!(matches!(value.view(), BytesView::Node { .. }));
344        assert_eq!(value.to_flat_vec(), source);
345        assert!(value.is_red_black_well_formed());
346    }
347
348    #[test]
349    fn bytes_tree_concat_range_and_splice_use_tree_operations() {
350        let value = BytesTree::concat(BytesTree::from_bytes(b"ab"), BytesTree::from_bytes(b"cd"));
351
352        assert_eq!(value.index(1), Some(b'b'));
353        assert_eq!(
354            value.index_range(1, 3).unwrap().to_flat_vec(),
355            b"bc".to_vec()
356        );
357        assert_eq!(
358            value
359                .splice(1, 3, BytesTree::from_bytes(b"XY"))
360                .unwrap()
361                .to_flat_vec(),
362            b"aXYd".to_vec()
363        );
364    }
365
366    #[test]
367    fn bytes_tree_view_reports_node_metadata() {
368        let value = BytesTree::concat(
369            BytesTree::from_bytes(&vec![b'a'; MAX_LEAF_BYTES]),
370            BytesTree::from_bytes(&vec![b'b'; MAX_LEAF_BYTES]),
371        );
372        let BytesView::Node { color, len, .. } = value.view() else {
373            panic!("expected node view");
374        };
375
376        assert_eq!(color, Color::Black);
377        assert_eq!(len, MAX_LEAF_BYTES * 2);
378    }
379}