content_tree/
root.rs

1#![allow(clippy::needless_lifetimes)] // Clippy doesn't understand the need for some lifetimes below
2
3use std::mem::size_of;
4
5use humansize::{file_size_opts, FileSize};
6use smallvec::SmallVec;
7use rle::{Searchable, merge_items};
8use super::*;
9
10pub type DeleteResult<E> = SmallVec<[E; 8]>;
11
12impl<E: ContentTraits, I: TreeMetrics<E>, const IE: usize, const LE: usize> ContentTreeRaw<E, I, IE, LE> {
13    pub fn new() -> Pin<Box<Self>> {
14        let mut tree = Box::pin(Self {
15            count: I::Value::default(),
16            root: unsafe { Node::Leaf(Box::pin(NodeLeaf::new(None))) },
17            // last_cursor: Cell::new(None),
18            _pin: marker::PhantomPinned,
19        });
20
21        // What a mess. I'm sure there's a nicer way to write this, somehow O_o.
22        let parent_ref = unsafe { tree.as_ref().get_ref().to_parent_ptr() };
23        tree.as_mut().root_ref_mut().set_parent(parent_ref);
24
25        tree
26    }
27
28    fn root_ref_mut(self: Pin<&mut Self>) -> &mut Node<E, I, IE, LE> {
29        unsafe {
30            &mut self.get_unchecked_mut().root
31        }
32    }
33
34    pub fn len(&self) -> I::Value {
35        self.count
36    }
37
38    // pub fn get(&self, pos: usize) -> Option<E::Item> {
39    //     let cursor = self.cursor_at_pos(pos, false);
40    //     cursor.get_item()
41    // }
42
43    pub(crate) unsafe fn to_parent_ptr(&self) -> ParentPtr<E, I, IE, LE> {
44        ParentPtr::Root(ref_to_nonnull(self))
45    }
46
47    /// WARNING: This method doesn't actually figure out the cursor position at the item. The offset
48    /// stored in the cursor contains the final offset. For cursor_at_offset this will be correct,
49    /// or any time the content size corresponds to offset size.
50    pub fn unsafe_cursor_at_query<F, G>(&self, raw_pos: usize, stick_end: bool, offset_to_num: F, entry_to_num: G) -> UnsafeCursor<E, I, IE, LE>
51            where F: Fn(I::Value) -> usize, G: Fn(E) -> usize {
52        // if let Some((pos, mut cursor)) = self.last_cursor.get() {
53        //     if pos == raw_pos {
54        //         if cursor.offset == 0 {
55        //             cursor.prev_entry();
56        //         }
57        //         return cursor;
58        //     }
59        // }
60
61        unsafe {
62            let mut node = self.root.as_ptr();
63            let mut offset_remaining = raw_pos;
64            while let NodePtr::Internal(data) = node {
65                let (new_offset_remaining, next) = data.as_ref()
66                    .find_child_at_offset(offset_remaining, stick_end, &offset_to_num)
67                    .expect("Internal consistency violation");
68                offset_remaining = new_offset_remaining;
69                node = next;
70            };
71
72            let leaf_ptr = node.unwrap_leaf();
73            let node = leaf_ptr.as_ref();
74
75            let (idx, offset_remaining) = if node.num_entries == 0 {
76                (0, usize::MAX)
77            } else {
78                node.find_offset(offset_remaining, stick_end, entry_to_num)
79                    .expect("Element does not contain entry")
80            };
81
82            UnsafeCursor {
83                node: leaf_ptr,
84                idx,
85                offset: offset_remaining,
86                // _marker: marker::PhantomData
87            }
88        }
89    }
90
91    pub(crate) fn leaf_at_start(&self) -> &NodeLeaf<E, I, IE, LE> {
92        // There is always at least one leaf, so this is safe!
93        unsafe {
94            let mut node = self.root.as_ptr();
95            while let NodePtr::Internal(data) = node {
96                node = data.as_ref().children[0].as_ref().unwrap().as_ptr()
97            };
98
99            node.unwrap_leaf().as_ref()
100        }
101    }
102
103    pub fn unsafe_cursor_at_start(&self) -> UnsafeCursor<E, I, IE, LE> {
104        // TODO: Consider moving this into unsafe_cursor
105        unsafe {
106            let leaf_ref = self.leaf_at_start();
107            UnsafeCursor {
108                node: NonNull::new_unchecked(leaf_ref as *const _ as *mut _),
109                idx: 0,
110                offset: if leaf_ref.num_entries == 0 { usize::MAX } else { 0 },
111                // _marker: marker::PhantomData
112            }
113        }
114    }
115
116    pub fn unsafe_cursor_at_end(&self) -> UnsafeCursor<E, I, IE, LE> {
117        // There's ways to write this to be faster, but this method is called rarely enough that it
118        // should be fine.
119        // let cursor = self.cursor_at_query(offset_to_num(self.count), true, offset_to_num, entry_to_num);
120
121        let cursor = unsafe {
122            let mut node = self.root.as_ptr();
123            while let NodePtr::Internal(ptr) = node {
124                node = ptr.as_ref().last_child();
125            };
126
127            // Now scan to the end of the leaf
128            let leaf_ptr = node.unwrap_leaf();
129            let leaf = leaf_ptr.as_ref();
130            let (idx, offset) = if leaf.len_entries() == 0 {
131                // We're creating a cursor into an empty range tree.
132                (0, usize::MAX)
133            } else {
134                let idx = leaf.len_entries() - 1;
135                let offset = leaf.data[idx].len();
136                (idx, offset)
137            };
138            UnsafeCursor {
139                node: leaf_ptr,
140                idx,
141                offset
142            }
143        };
144
145        if cfg!(debug_assertions) {
146            // Make sure nothing went wrong while we're here.
147            let mut cursor = cursor.clone();
148            let node = unsafe { cursor.node.as_ref() };
149            if let Some(entry) = cursor.try_get_raw_entry() {
150                assert_eq!(entry.len(), cursor.offset);
151            }
152            if node.len_entries() > 0 {
153                assert_eq!(cursor.idx, node.len_entries() - 1);
154            }
155            assert!(!cursor.next_entry());
156        }
157
158        cursor
159    }
160
161    // pub fn clear_cursor_cache(self: &Pin<Box<Self>>) {
162    //     self.as_ref().last_cursor.set(None);
163    // }
164    // pub fn cache_cursor(self: &Pin<Box<Self>>, pos: usize, cursor: Cursor<E>) {
165    //     self.as_ref().last_cursor.set(Some((pos, cursor)));
166    // }
167
168    pub fn next_entry_or_panic(cursor: &mut UnsafeCursor<E, I, IE, LE>, marker: &mut I::Update) {
169        if !cursor.next_entry_marker(Some(marker)) {
170            panic!("Local delete past the end of the document");
171        }
172    }
173
174    // Returns size.
175    fn check_leaf(leaf: &NodeLeaf<E, I, IE, LE>, expected_parent: ParentPtr<E, I, IE, LE>) -> I::Value {
176        assert_eq!(leaf.parent, expected_parent);
177        
178        // let mut count: usize = 0;
179        let mut count = I::Value::default();
180
181        for e in &leaf.data[..leaf.num_entries as usize] {
182            // assert!(e.is_valid());
183
184            // Make sure there's no data after an invalid entry
185            assert_ne!(e.len(), 0, "Invalid leaf - 0 length");
186            // count += e.content_len() as usize;
187            I::increment_offset(&mut count, e);
188        }
189
190        // An empty leaf is only valid if we're the root element.
191        if let ParentPtr::Internal(_) = leaf.parent {
192            assert_ne!(leaf.num_entries, 0, "Non-root leaf is empty");
193        }
194
195        // Check the next pointer makes sense.
196        // Note we're using adjacent_leaf_by_traversal, which forces the full traversal.
197        let next = leaf.adjacent_leaf_by_traversal(true);
198        assert_eq!(next, leaf.next);
199
200        count
201    }
202    
203    // Returns size.
204    fn check_internal(node: &NodeInternal<E, I, IE, LE>, expected_parent: ParentPtr<E, I, IE, LE>) -> I::Value {
205        assert_eq!(node.parent, expected_parent);
206        
207        // let mut count_total: usize = 0;
208        let mut count_total = I::Value::default();
209        let mut done = false;
210        let mut child_type = None; // Make sure all the children have the same type.
211        // let self_parent = ParentPtr::Internal(NonNull::new(node as *const _ as *mut _).unwrap());
212        let self_parent = unsafe { node.to_parent_ptr() };
213
214        for idx in 0..node.metrics.len() {
215            let child_count_expected = node.metrics[idx];
216            let child = &node.children[idx];
217
218            if let Some(child) = child {
219                // Make sure there's no data after an invalid entry
220                assert!(!done);
221
222                let child_ref = child;
223
224                let actual_type = match child_ref {
225                    Node::Internal(_) => 1,
226                    Node::Leaf(_) => 2,
227                };
228                // Make sure all children have the same type.
229                if child_type.is_none() { child_type = Some(actual_type) }
230                else { assert_eq!(child_type, Some(actual_type)); }
231
232                // Recurse
233                let count_actual = match child_ref {
234                    Node::Leaf(ref n) => { Self::check_leaf(n.as_ref().get_ref(), self_parent) },
235                    Node::Internal(ref n) => { Self::check_internal(n.as_ref().get_ref(), self_parent) },
236                };
237
238                // Make sure all the individual counts match.
239                // if *child_count_expected as usize != count_actual {
240                //     eprintln!("xxx {:#?}", node);
241                // }
242                assert_eq!(child_count_expected, count_actual, "Child node count does not match");
243                count_total += count_actual;
244            } else {
245                done = true;
246            }
247        }
248
249        count_total
250    }
251
252    pub fn check(&self) {
253        // Check the parent of each node is its correct parent
254        // Check the size of each node is correct up and down the tree
255        // println!("check tree {:#?}", self);
256        let root = &self.root;
257        let expected_parent = ParentPtr::Root(unsafe { ref_to_nonnull(self) });
258        let expected_size = match root {
259            Node::Internal(n) => { Self::check_internal(n, expected_parent) },
260            Node::Leaf(n) => { Self::check_leaf(n, expected_parent) },
261        };
262        assert_eq!(self.count, expected_size, "tree.count is incorrect");
263    }
264
265    fn print_node_tree(node: &Node<E, I, IE, LE>, depth: usize) {
266        for _ in 0..depth { eprint!("  "); }
267        match node {
268            Node::Internal(n) => {
269                let n = n.as_ref().get_ref();
270                eprintln!("Internal {:?} (parent: {:?})", n as *const _, n.parent);
271                let mut unused = 0;
272                for e in &n.children[..] {
273                    if let Some(e) = e {
274                        Self::print_node_tree(e, depth + 1);
275                    } else { unused += 1; }
276                }
277
278                if unused > 0 {
279                    for _ in 0..=depth { eprint!("  "); }
280                    eprintln!("({} empty places)", unused);
281                }
282            },
283            Node::Leaf(n) => {
284                eprintln!("Leaf {:?} (parent: {:?}) - {} filled", n as *const _, n.parent, n.len_entries());
285            }
286        }
287    }
288
289    #[allow(unused)]
290    pub fn print_ptr_tree(&self) {
291        eprintln!("Tree count {:?} ptr {:?}", self.count, self as *const _);
292        Self::print_node_tree(&self.root, 1);
293    }
294
295    #[allow(unused)]
296    pub fn print_stats(&self, name: &str, detailed: bool) {
297        // We'll get the distribution of entry sizes
298        let mut size_counts = vec!();
299
300        for entry in self.raw_iter() {
301            // println!("entry {:?}", entry);
302            let bucket = entry.len() as usize;
303            if bucket >= size_counts.len() {
304                size_counts.resize(bucket + 1, 0);
305            }
306            size_counts[bucket] += 1;
307        }
308
309        let (num_internal_nodes, num_leaf_nodes) = self.count_nodes();
310        let leaf_node_size = num_leaf_nodes * size_of::<NodeLeaf<E, I, IE, LE>>();
311        let internal_node_size = num_internal_nodes * size_of::<NodeInternal<E, I, IE, LE>>();
312        let num_entries = self.count_entries();
313
314        println!("-------- Range tree {} stats --------", name);
315        println!("Number of {} byte entries: {} ({} bytes of entries)",
316             size_of::<E>(),
317             num_entries,
318             (num_entries * size_of::<E>()).file_size(file_size_opts::CONVENTIONAL).unwrap()
319        );
320        println!("Number of {} byte internal nodes {} ({})",
321             size_of::<NodeInternal<E, I, IE, LE>>(),
322             num_internal_nodes,
323             internal_node_size.file_size(file_size_opts::CONVENTIONAL).unwrap());
324        println!("Number of {} byte leaf nodes {} ({}) (space for {} entries)",
325             size_of::<NodeLeaf<E, I, IE, LE>>(),
326             num_leaf_nodes,
327             leaf_node_size.file_size(file_size_opts::CONVENTIONAL).unwrap(),
328             num_leaf_nodes * LE
329        );
330
331        println!("Depth {}", self.get_depth());
332        println!("Total range tree memory usage {}",
333             self.count_total_memory().file_size(file_size_opts::CONVENTIONAL).unwrap());
334
335        let compacted_entries = merge_items(self.raw_iter()).count();
336        // println!("(efficient size: {})", (self.count_entries() * size_of::<E>()).file_size(file_size_opts::CONVENTIONAL).unwrap());
337        println!("Compacts to {} entries / {} bytes",
338             compacted_entries,
339             (compacted_entries * size_of::<E>()).file_size(file_size_opts::CONVENTIONAL).unwrap()
340        );
341
342        // This prints the first 100 items of the real entries, and maximally compacted entries:
343        // for e in self.iter().take(100) {
344        //     println!("{:?}", e);
345        // }
346        // println!("\n\n");
347        // for e in compacted.iter().take(100) {
348        //     println!("{:?}", e);
349        // }
350
351        if detailed {
352            println!("Entry distribution {:?}", size_counts);
353            println!("Internal node size {}", std::mem::size_of::<NodeInternal<E, I, IE, LE>>());
354            println!("Node entry size {} alignment {}",
355                     std::mem::size_of::<Option<Node<E, I, IE, LE>>>(),
356                     std::mem::align_of::<Option<Node<E, I, IE, LE>>>());
357            println!("Leaf size {}", std::mem::size_of::<NodeLeaf<E, I, IE, LE>>());
358        }
359    }
360
361    fn get_depth(&self) -> usize {
362        unsafe {
363            let mut depth = 0;
364            let mut node = self.root.as_ptr();
365            while let NodePtr::Internal(data) = node {
366                depth += 1;
367                node = data.as_ref().children[0].as_ref().unwrap().as_ptr()
368            };
369            depth
370        }
371    }
372
373    #[allow(unused)]
374    pub fn count_entries(&self) -> usize {
375        self.raw_iter().fold(0, |a, _| a + 1)
376    }
377
378    // Passing (num internal nodes, num leaf nodes).
379    fn count_nodes_internal(node: &Node<E, I, IE, LE>, num: &mut (usize, usize)) {
380        if let Node::Internal(n) = node {
381            num.0 += 1;
382
383            for e in n.children[..].iter().flatten() {
384                Self::count_nodes_internal(e, num);
385            }
386        } else { num.1 += 1; }
387    }
388
389    #[allow(unused)]
390    pub fn count_nodes(&self) -> (usize, usize) {
391        let mut num = (0, 0);
392        Self::count_nodes_internal(&self.root, &mut num);
393        num
394    }
395
396    fn count_memory_internal(node: &Node<E, I, IE, LE>, size: &mut usize) {
397        match node {
398            Node::Internal(n) => {
399                *size += size_of::<NodeInternal<E, I, IE, LE>>();
400
401                for e in n.children[..].iter().flatten() {
402                    Self::count_memory_internal(e, size);
403                }
404            }
405            Node::Leaf(_) => {
406                *size += std::mem::size_of::<NodeLeaf<E, I, IE, LE>>();
407            }
408        }
409    }
410
411    #[allow(unused)]
412    pub fn count_total_memory(&self) -> usize {
413        let mut size = size_of::<ContentTreeRaw<E, I, IE, LE>>();
414        Self::count_memory_internal(&self.root, &mut size);
415        size
416    }
417}
418
419impl<E: ContentTraits + Searchable, I: TreeMetrics<E>, const IE: usize, const LE: usize> ContentTreeRaw<E, I, IE, LE> {
420    /// Returns a cursor right before the named location, referenced by the pointer.
421    #[inline]
422    pub unsafe fn unsafe_cursor_before_item(loc: E::Item, ptr: NonNull<NodeLeaf<E, I, IE, LE>>) -> UnsafeCursor<E, I, IE, LE> {
423        // First make a cursor to the specified item
424        let leaf = ptr.as_ref();
425        leaf.find(loc).expect("Position not in named leaf")
426    }
427
428    pub fn cursor_before_item(&self, loc: E::Item, ptr: NonNull<NodeLeaf<E, I, IE, LE>>) -> Cursor<E, I, IE, LE> {
429        unsafe {
430            // Safe because &self is valid and the returned cursor is bound to the lifetime of &self
431            Cursor::unchecked_from_raw(self, Self::unsafe_cursor_before_item(loc, ptr))
432        }
433    }
434
435    pub fn mut_cursor_before_item<'a>(self: &'a mut Pin<Box<Self>>, loc: E::Item, ptr: NonNull<NodeLeaf<E, I, IE, LE>>) -> MutCursor<'a, E, I, IE, LE> {
436        unsafe {
437            // Safe because &self is valid and the returned cursor is bound to the lifetime of &self
438            MutCursor::unchecked_from_raw(self, Self::unsafe_cursor_before_item(loc, ptr))
439        }
440    }
441}
442
443impl<E: ContentTraits + ContentLength, I: FindContent<E>, const IE: usize, const LE: usize> ContentTreeRaw<E, I, IE, LE> {
444    pub fn content_len(&self) -> usize {
445        I::index_to_content(self.count)
446    }
447
448    pub fn unsafe_cursor_at_content_pos(&self, pos: usize, stick_end: bool) -> UnsafeCursor<E, I, IE, LE> {
449        self.unsafe_cursor_at_query(pos, stick_end, I::index_to_content, |e| e.content_len())
450    }
451
452    pub fn cursor_at_content_pos(&self, pos: usize, stick_end: bool) -> Cursor<E, I, IE, LE> {
453        self.cursor_at_query(pos, stick_end, I::index_to_content, |e| e.content_len())
454    }
455
456    pub fn mut_cursor_at_content_pos<'a>(self: &'a mut Pin<Box<Self>>, pos: usize, stick_end: bool) -> MutCursor<'a, E, I, IE, LE> {
457        self.mut_cursor_at_query(pos, stick_end, I::index_to_content, |e| e.content_len())
458    }
459}
460
461impl<E: ContentTraits, I: FindOffset<E>, const IE: usize, const LE: usize> ContentTreeRaw<E, I, IE, LE> {
462    pub fn offset_len(&self) -> usize {
463        I::index_to_offset(self.count)
464    }
465
466    pub fn unsafe_cursor_at_offset_pos(&self, pos: usize, stick_end: bool) -> UnsafeCursor<E, I, IE, LE> {
467        self.unsafe_cursor_at_query(pos, stick_end, I::index_to_offset, |e| e.len())
468    }
469
470    pub fn cursor_at_offset_pos(&self, pos: usize, stick_end: bool) -> Cursor<E, I, IE, LE> {
471        self.cursor_at_query(pos, stick_end, I::index_to_offset, |e| e.len())
472    }
473
474    pub fn mut_cursor_at_offset_pos<'a>(self: &'a mut Pin<Box<Self>>, pos: usize, stick_end: bool) -> MutCursor<'a, E, I, IE, LE> {
475        self.mut_cursor_at_query(pos, stick_end, I::index_to_offset, |e| e.len())
476    }
477}
478    
479impl<E: ContentTraits + Searchable, I: FindOffset<E>, const IE: usize, const LE: usize> ContentTreeRaw<E, I, IE, LE> {
480    pub fn at_offset(&self, pos: usize) -> Option<E::Item> {
481        let cursor = self.unsafe_cursor_at_offset_pos(pos, false);
482        unsafe { cursor.unsafe_get_item() }
483    }
484}
485
486impl<E: ContentTraits + ContentLength + Searchable, I: FindContent<E>, const IE: usize, const LE: usize> ContentTreeRaw<E, I, IE, LE> {
487    pub fn at_content(&self, pos: usize) -> Option<E::Item> {
488        let cursor = self.unsafe_cursor_at_content_pos(pos, false);
489        unsafe { cursor.unsafe_get_item() }
490    }
491}
492
493impl<E: ContentTraits + PartialEq, I: TreeMetrics<E>, const IE: usize, const LE: usize> PartialEq for ContentTreeRaw<E, I, IE, LE> {
494    fn eq(&self, other: &Self) -> bool {
495        self.iter().eq(other.iter())
496    }
497}
498
499impl<E: ContentTraits + PartialEq, I: TreeMetrics<E>, const IE: usize, const LE: usize> Eq for ContentTreeRaw<E, I, IE, LE> {}