content_tree/
lib.rs

1// The btree here is used to map character -> document positions. It could also
2// be extended to inline a rope, but I haven't done that here.
3
4#![allow(clippy::missing_safety_doc)]
5
6// use std::cell::Cell;
7use std::fmt::Debug;
8use std::marker;
9use std::marker::PhantomPinned;
10use std::pin::Pin;
11use std::ptr::NonNull;
12
13pub use metrics::*;
14pub use root::DeleteResult;
15
16// Types re-exported from rle for convenience
17pub use rle::{SplitAndJoinSpan, RleRun, ReverseSpan, Single};
18
19// The common data structures are:
20mod unsafe_cursor;
21mod root;
22mod leaf;
23mod internal;
24mod mutations;
25mod metrics;
26mod safe_cursor;
27pub mod testrange;
28mod iter;
29mod debug;
30
31// pub(crate) use cursor::Cursor;
32
33#[cfg(debug_assertions)]
34pub const DEFAULT_IE: usize = 8; // This needs to be minimum 8.
35#[cfg(not(debug_assertions))]
36pub const DEFAULT_IE: usize = 10;
37
38// Must fit in u8, and must be >= 4 due to limitations in splice_insert.
39#[cfg(debug_assertions)]
40pub const DEFAULT_LE: usize = 4;
41#[cfg(not(debug_assertions))]
42pub const DEFAULT_LE: usize = 32;
43
44/// Simple empty helper trait naming all the properties needed by ContentTree.
45pub trait ContentTraits: SplitAndJoinSpan + Copy + Debug + Default {}
46impl<T: SplitAndJoinSpan + Copy + Debug + Default> ContentTraits for T {}
47
48/// A ContentTree is an efficient packed list of RLE entries, allowing for arbitrary inserts and
49/// deletes anywhere in the range.
50///
51/// ```rust
52/// use content_tree::ContentTree;
53/// use content_tree::testrange::TestRange;
54///
55/// let mut tree = ContentTree::new();
56/// tree.push(TestRange { id: 0, len: 100, is_activated: true });
57/// tree.push(TestRange { id: 100, len: 50, is_activated: true });
58///
59/// assert_eq!(tree.iter().collect::<Vec<TestRange>>(), vec![
60///     TestRange { id: 0, len: 150, is_activated: true }
61/// ]);
62/// ```
63pub struct ContentTreeRaw<E: ContentTraits, I: TreeMetrics<E>, const INT_ENTRIES: usize = DEFAULT_IE, const LEAF_ENTRIES: usize = DEFAULT_LE> {
64    count: I::Value,
65
66    // There's a bit of double-deref going on when you access the first node in the tree, but I
67    // can't think of a clean way around it.
68    root: Node<E, I, INT_ENTRIES, LEAF_ENTRIES>,
69
70    // Usually inserts and deletes are followed by more inserts / deletes at the same location.
71    // We cache the last cursor position so we can reuse cursors between edits.
72    // TODO: Currently unused.
73    // last_cursor: Cell<Option<(usize, Cursor<E, I, IE, LE>)>>,
74
75    _pin: marker::PhantomPinned,
76}
77
78pub trait Cursors {
79    type UnsafeCursor;
80    type Cursor;
81    type MutCursor;
82}
83
84// This is a simple helper to make it easier to reference the type of a content tree cursor.
85impl<'a, E: ContentTraits, I: TreeMetrics<E>, const IE: usize, const LE: usize> Cursors for &'a ContentTreeRaw<E, I, IE, LE> {
86    type UnsafeCursor = UnsafeCursor<E, I, IE, LE>;
87    type Cursor = Cursor<'a, E, I, IE, LE>;
88    type MutCursor = MutCursor<'a, E, I, IE, LE>;
89}
90impl<'a, E: ContentTraits, I: TreeMetrics<E>, const IE: usize, const LE: usize> Cursors for &'a Pin<Box<ContentTreeRaw<E, I, IE, LE>>> {
91    type UnsafeCursor = UnsafeCursor<E, I, IE, LE>;
92    type Cursor = Cursor<'a, E, I, IE, LE>;
93    type MutCursor = MutCursor<'a, E, I, IE, LE>;
94}
95
96/// An alias for ContentTreeRaw which uses the default sizes for internal elements.
97#[deprecated]
98pub type ContentTreeWithIndex<E, I> = ContentTreeRaw<E, I, DEFAULT_IE, DEFAULT_LE>;
99pub type ContentTree<E> = ContentTreeRaw<E, RawPositionMetricsU32, DEFAULT_IE, DEFAULT_LE>;
100
101/// An internal node in the B-tree
102#[derive(Debug)]
103pub(crate) struct NodeInternal<E: ContentTraits, I: TreeMetrics<E>, const INT_ENTRIES: usize, const LEAF_ENTRIES: usize> {
104    parent: ParentPtr<E, I, INT_ENTRIES, LEAF_ENTRIES>,
105    // Pairs of (count of subtree elements, subtree contents).
106    // Left packed. The nodes are all the same type.
107    // ItemCount only includes items which haven't been deleted.
108    metrics: [I::Value; INT_ENTRIES],
109    children: [Option<Node<E, I, INT_ENTRIES, LEAF_ENTRIES>>; INT_ENTRIES],
110    _pin: PhantomPinned, // Needed because children have parent pointers here.
111}
112
113/// A leaf node in the B-tree. Except the root, each child stores MAX_CHILDREN/2 - MAX_CHILDREN
114/// entries.
115///
116/// The details in a leaf node are private. This is exposed to allow consumers to build custom
117/// indexes, allowing for fast cursor creation without needing to iterate through the tree.
118///
119/// See diamond-types for an example of this.
120#[derive(Debug)]
121pub struct NodeLeaf<E: ContentTraits, I: TreeMetrics<E>, const INT_ENTRIES: usize = DEFAULT_IE, const LEAF_ENTRIES: usize = DEFAULT_LE> {
122    parent: ParentPtr<E, I, INT_ENTRIES, LEAF_ENTRIES>,
123    num_entries: u8, // Number of entries which have been populated
124    data: [E; LEAF_ENTRIES],
125    _pin: PhantomPinned, // Needed because cursors point here.
126
127    next: Option<NonNull<Self>>,
128}
129
130#[derive(Debug)]
131pub(crate) enum Node<E: ContentTraits, I: TreeMetrics<E>, const IE: usize = DEFAULT_IE, const LE: usize = DEFAULT_LE> {
132    Internal(Pin<Box<NodeInternal<E, I, IE, LE>>>),
133    Leaf(Pin<Box<NodeLeaf<E, I, IE, LE>>>),
134}
135
136// I hate that I need this, but its used all over the place when traversing the tree.
137#[derive(Copy, Clone, Debug, PartialEq, Eq)]
138pub(crate) enum NodePtr<E: ContentTraits, I: TreeMetrics<E>, const IE: usize = DEFAULT_IE, const LE: usize = DEFAULT_LE> {
139    Internal(NonNull<NodeInternal<E, I, IE, LE>>),
140    Leaf(NonNull<NodeLeaf<E, I, IE, LE>>),
141}
142
143// TODO: Consider just reusing NodePtr for this.
144#[derive(Copy, Clone, Debug, Eq)]
145pub(crate) enum ParentPtr<E: ContentTraits, I: TreeMetrics<E>, const IE: usize = DEFAULT_IE, const LE: usize = DEFAULT_LE> {
146    Root(NonNull<ContentTreeRaw<E, I, IE, LE>>),
147    Internal(NonNull<NodeInternal<E, I, IE, LE>>)
148}
149
150/// A cursor into some location in a range tree.
151///
152/// Note the state of a cursor is weird in two situations:
153/// - When a cursor points to a location in between two entries, the cursor could either point to
154/// the end of the first entry or the start of the subsequent entry.
155/// - When a tree is empty, the cursor points past the end of the tree.
156///
157/// Safety: This is unsafe because there's no associated lifetime on a cursor (its 'static).
158///
159/// The caller must ensure any reads and mutations through an UnsafeCursor are valid WRT the
160/// mutability and lifetime of the implicitly referenced content tree. Use Cursor and MutCursor.
161#[derive(Clone, Debug)]
162pub struct UnsafeCursor<E: ContentTraits, I: TreeMetrics<E>, const IE: usize = DEFAULT_IE, const LE: usize = DEFAULT_LE> {
163    node: NonNull<NodeLeaf<E, I, IE, LE>>,
164    idx: usize,
165    pub offset: usize, // This doesn't need to be usize, but the memory size of Cursor doesn't matter.
166}
167
168#[repr(transparent)]
169#[derive(Clone, Debug)]
170pub struct SafeCursor<R, E: ContentTraits, I: TreeMetrics<E>, const IE: usize = DEFAULT_IE, const LE: usize = DEFAULT_LE> {
171    pub inner: UnsafeCursor<E, I, IE, LE>,
172    marker: marker::PhantomData<R>
173}
174
175/// A cursor into an immutable ContentTree. A cursor is the primary way to read entries in the
176/// content tree. A cursor points to a specific offset at a specific entry in a specific node in
177/// the content tree.
178pub type Cursor<'a, E, I, const IE: usize = DEFAULT_IE, const LE: usize = DEFAULT_LE>
179    = SafeCursor<&'a ContentTreeRaw<E, I, IE, LE>, E, I, IE, LE>;
180
181/// A mutable cursor into a ContentTree. Mutable cursors inherit all the functionality of Cursor,
182/// and can also be used to modify the content tree.
183///
184/// A mutable cursor mutably borrows the content tree. Only one mutable cursor can exist at a time.
185pub type MutCursor<'a, E, I, const IE: usize, const LE: usize> = SafeCursor<&'a mut ContentTreeRaw<E, I, IE, LE>, E, I, IE, LE>;
186
187// I can't use the derive() implementation of this because EntryTraits does not always implement
188// PartialEq.
189impl<E: ContentTraits, I: TreeMetrics<E>, const IE: usize, const LE: usize> PartialEq for ParentPtr<E, I, IE, LE> {
190    fn eq(&self, other: &Self) -> bool {
191        use ParentPtr::*;
192        match (self, other) {
193            (Root(a), Root(b)) => a == b,
194            (Internal(a), Internal(b)) => a == b,
195            _ => false
196        }
197    }
198}
199
200/// Unsafe because NonNull wraps a mutable pointer. Callers must take care of mutability!
201unsafe fn ref_to_nonnull<T>(val: &T) -> NonNull<T> {
202    NonNull::new_unchecked(val as *const _ as *mut _)
203}
204
205/// Helper when a notify function is not needed
206pub fn null_notify<E, Node>(_e: E, _node: Node) {}
207
208impl<E: ContentTraits, I: TreeMetrics<E>, const IE: usize, const LE: usize> Node<E, I, IE, LE> {
209    /// Unsafe: Created leaf has a dangling parent pointer. Must be set after initialization.
210    // unsafe fn new_leaf() -> Self {
211    //     Node::Leaf(Box::pin(NodeLeaf::new()))
212    // }
213    // fn new_with_parent(parent: ParentPtr) -> Self {
214    //     Node::Leaf(Box::pin(NodeLeaf::new_with_parent(parent)))
215    // }
216
217    fn set_parent(&mut self, parent: ParentPtr<E, I, IE, LE>) {
218        unsafe {
219            match self {
220                Node::Leaf(l) => l.as_mut().get_unchecked_mut().parent = parent,
221                Node::Internal(i) => i.as_mut().get_unchecked_mut().parent = parent,
222            }
223        }
224    }
225
226    // pub fn get_parent(&self) -> ParentPtr {
227    //     match self {
228    //         Node::Leaf(l) => l.parent,
229    //         Node::Internal(i) => i.parent,
230    //     }
231    // }
232
233    pub fn is_leaf(&self) -> bool {
234        match self {
235            Node::Leaf(_) => true,
236            Node::Internal(_) => false,
237        }
238    }
239
240    // fn unwrap_leaf(&self) -> &NodeLeaf<E, I, IE, LE> {
241    //     match self {
242    //         Node::Leaf(l) => l.as_ref().get_ref(),
243    //         Node::Internal(_) => panic!("Expected leaf - found internal node"),
244    //     }
245    // }
246    //
247    // fn unwrap_into_leaf(self) -> Pin<Box<NodeLeaf<E, I, IE, LE>>> {
248    //     match self {
249    //         Node::Leaf(l) => l,
250    //         Node::Internal(_) => panic!("Expected leaf - found internal node"),
251    //     }
252    // }
253
254    fn unwrap_leaf_mut(&mut self) -> Pin<&mut NodeLeaf<E, I, IE, LE>> {
255        match self {
256            Node::Leaf(l) => l.as_mut(),
257            Node::Internal(_) => panic!("Expected leaf - found internal node"),
258        }
259    }
260    // fn unwrap_internal(&self) -> &NodeInternal {
261    //     match self {
262    //         Node::Internal(n) => n.as_ref().get_ref(),
263    //         Node::Leaf(_) => panic!("Expected internal node"),
264    //     }
265    // }
266    fn unwrap_internal_mut(&mut self) -> Pin<&mut NodeInternal<E, I, IE, LE>> {
267        match self {
268            Node::Internal(n) => n.as_mut(),
269            Node::Leaf(_) => panic!("Expected internal node"),
270        }
271    }
272
273    /// Unsafe: The resulting NodePtr is mutable and doesn't have an associated lifetime.
274    unsafe fn as_ptr(&self) -> NodePtr<E, I, IE, LE> {
275        match self {
276            Node::Internal(n) => {
277                NodePtr::Internal(ref_to_nonnull(n.as_ref().get_ref()))
278            },
279            Node::Leaf(n) => {
280                NodePtr::Leaf(ref_to_nonnull(n.as_ref().get_ref()))
281            },
282        }
283    }
284
285    fn ptr_eq(&self, ptr: NodePtr<E, I, IE, LE>) -> bool {
286        match (self, ptr) {
287            (Node::Internal(n), NodePtr::Internal(ptr)) => {
288                std::ptr::eq(n.as_ref().get_ref(), ptr.as_ptr())
289            },
290            (Node::Leaf(n), NodePtr::Leaf(ptr)) => {
291                std::ptr::eq(n.as_ref().get_ref(), ptr.as_ptr())
292            },
293            _ => panic!("Pointer type does not match")
294        }
295    }
296}
297
298impl<E: ContentTraits, I: TreeMetrics<E>, const IE: usize, const LE: usize> NodePtr<E, I, IE, LE> {
299    fn unwrap_leaf(self) -> NonNull<NodeLeaf<E, I, IE, LE>> {
300        match self {
301            NodePtr::Leaf(l) => l,
302            NodePtr::Internal(_) => panic!("Expected leaf - found internal node"),
303        }
304    }
305
306    unsafe fn get_parent(&self) -> ParentPtr<E, I, IE, LE> {
307        match self {
308            NodePtr::Internal(n) => { n.as_ref().parent }
309            NodePtr::Leaf(n) => { n.as_ref().parent }
310        }
311    }
312}
313
314impl<E: ContentTraits, I: TreeMetrics<E>, const IE: usize, const LE: usize> ParentPtr<E, I, IE, LE> {
315    fn unwrap_internal(self) -> NonNull<NodeInternal<E, I, IE, LE>> {
316        match self {
317            ParentPtr::Root(_) => { panic!("Expected internal node"); }
318            ParentPtr::Internal(ptr) => { ptr }
319        }
320    }
321
322    fn is_root(&self) -> bool {
323        match self {
324            ParentPtr::Root(_) => { true }
325            ParentPtr::Internal(_) => { false }
326        }
327    }
328}
329
330#[cfg(test)]
331mod test {
332    use std::mem::size_of;
333    use super::*;
334    use crate::testrange::TestRange;
335
336// use std::pin::Pin;
337
338    #[test]
339    fn option_node_size_is_transparent() {
340        let node_size = size_of::<Node<TestRange, RawPositionMetricsU32, DEFAULT_IE, DEFAULT_LE>>();
341        let opt_node_size = size_of::<Option<Node<TestRange, RawPositionMetricsU32, DEFAULT_IE, DEFAULT_LE>>>();
342        assert_eq!(node_size, opt_node_size);
343
344        // TODO: This fails, which means we're burning 8 bytes to simply store tags for each
345        // pointer in a node. Despite all the items inside a node being the same type.
346        // let item_size = size_of::<Pin<Box<NodeInternal<OrderSpan, RawPositionIndex>>>>();
347        // assert_eq!(node_size, item_size);
348    }
349}