lapce_xi_rope/
tree.rs

1// Copyright 2016 The xi-editor Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! A general b-tree structure suitable for ropes and the like.
16
17use std::cmp::{min, Ordering};
18use std::marker::PhantomData;
19use std::sync::Arc;
20
21use crate::interval::{Interval, IntervalBounds};
22
23const MIN_CHILDREN: usize = 4;
24const MAX_CHILDREN: usize = 8;
25
26pub trait NodeInfo: Clone {
27    /// The type of the leaf.
28    ///
29    /// A given `NodeInfo` is for exactly one type of leaf. That is why
30    /// the leaf type is an associated type rather than a type parameter.
31    type L: Leaf;
32
33    /// An operator that combines info from two subtrees. It is intended
34    /// (but not strictly enforced) that this operator be associative and
35    /// obey an identity property. In mathematical terms, the accumulate
36    /// method is the operation of a monoid.
37    fn accumulate(&mut self, other: &Self);
38
39    /// A mapping from a leaf into the info type. It is intended (but
40    /// not strictly enforced) that applying the accumulate method to
41    /// the info derived from two leaves gives the same result as
42    /// deriving the info from the concatenation of the two leaves. In
43    /// mathematical terms, the compute_info method is a monoid
44    /// homomorphism.
45    fn compute_info(_: &Self::L) -> Self;
46
47    /// The identity of the monoid. Need not be implemented because it
48    /// can be computed from the leaf default.
49    ///
50    /// This is here to demonstrate that this is a monoid.
51    fn identity() -> Self {
52        Self::compute_info(&Self::L::default())
53    }
54
55    /// The interval covered by the first `len` base units of this node. The
56    /// default impl is sufficient for most types, but interval trees may need
57    /// to override it.
58    fn interval(&self, len: usize) -> Interval {
59        Interval::new(0, len)
60    }
61}
62
63/// A trait indicating the default metric of a NodeInfo.
64///
65/// Adds quality of life functions to
66/// Node\<N\>, where N is a DefaultMetric.
67/// For example, [Node\<DefaultMetric\>.count](struct.Node.html#method.count).
68pub trait DefaultMetric: NodeInfo {
69    type DefaultMetric: Metric<Self>;
70}
71
72/// A trait for the leaves of trees of type [Node](struct.Node.html).
73///
74/// Two leafs can be concatenated using `push_maybe_split`.
75pub trait Leaf: Sized + Clone + Default {
76    /// Measurement of leaf in base units.
77    /// A 'base unit' refers to the smallest discrete unit
78    /// by which a given concrete type can be indexed.
79    /// Concretely, for Rust's String type the base unit is the byte.
80    fn len(&self) -> usize;
81
82    /// Generally a minimum size requirement for leaves.
83    fn is_ok_child(&self) -> bool;
84
85    /// Combine the part `other` denoted by the `Interval` `iv` into `self`,
86    /// optionly splitting off a new `Leaf` if `self` would have become too big.
87    /// Returns either `None` if no splitting was needed, or `Some(rest)` if
88    /// `rest` was split off.
89    ///
90    /// Interval is in "base units".  Generally implements a maximum size.
91    ///
92    /// # Invariants:
93    /// - If one or the other input is empty, then no split.
94    /// - If either input satisfies `is_ok_child`, then, on return, `self`
95    ///   satisfies this, as does the optional split.
96    fn push_maybe_split(&mut self, other: &Self, iv: Interval) -> Option<Self>;
97
98    /// Same meaning as push_maybe_split starting from an empty
99    /// leaf, but maybe can be implemented more efficiently?
100    ///
101    // TODO: remove if it doesn't pull its weight
102    fn subseq(&self, iv: Interval) -> Self {
103        let mut result = Self::default();
104        if result.push_maybe_split(self, iv).is_some() {
105            panic!("unexpected split");
106        }
107        result
108    }
109}
110
111/// A b-tree node storing leaves at the bottom, and with info
112/// retained at each node. It is implemented with atomic reference counting
113/// and copy-on-write semantics, so an immutable clone is a very cheap
114/// operation, and nodes can be shared across threads. Even so, it is
115/// designed to be updated in place, with efficiency similar to a mutable
116/// data structure, using uniqueness of reference count to detect when
117/// this operation is safe.
118///
119/// When the leaf is a string, this is a rope data structure (a persistent
120/// rope in functional programming jargon). However, it is not restricted
121/// to strings, and it is expected to be the basis for a number of data
122/// structures useful for text processing.
123#[derive(Clone)]
124pub struct Node<N: NodeInfo>(Arc<NodeBody<N>>);
125
126#[derive(Clone)]
127struct NodeBody<N: NodeInfo> {
128    height: usize,
129    len: usize,
130    info: N,
131    val: NodeVal<N>,
132}
133
134#[derive(Clone)]
135enum NodeVal<N: NodeInfo> {
136    Leaf(N::L),
137    Internal(Vec<Node<N>>),
138}
139
140// also consider making Metric a newtype for usize, so type system can
141// help separate metrics
142
143/// A trait for quickly processing attributes of a
144/// [NodeInfo](struct.NodeInfo.html).
145///
146/// For the conceptual background see the
147/// [blog post, Rope science, part 2: metrics](https://github.com/google/xi-editor/blob/master/docs/docs/rope_science_02.md).
148pub trait Metric<N: NodeInfo> {
149    /// Return the size of the
150    /// [NodeInfo::L](trait.NodeInfo.html#associatedtype.L), as measured by this
151    /// metric.
152    ///
153    /// The usize argument is the total size/length of the node, in base units.
154    ///
155    /// # Examples
156    /// For the [LinesMetric](../rope/struct.LinesMetric.html), this gives the number of
157    /// lines in string contained in the leaf. For the
158    /// [BaseMetric](../rope/struct.BaseMetric.html), this gives the size of the string
159    /// in uft8 code units, that is, bytes.
160    ///
161    fn measure(info: &N, len: usize) -> usize;
162
163    /// Returns the smallest offset, in base units, for an offset in measured units.
164    ///
165    /// # Invariants:
166    ///
167    /// - `from_base_units(to_base_units(x)) == x` is True for valid `x`
168    fn to_base_units(l: &N::L, in_measured_units: usize) -> usize;
169
170    /// Returns the smallest offset in measured units corresponding to an offset in base units.
171    ///
172    /// # Invariants:
173    ///
174    /// - `from_base_units(to_base_units(x)) == x` is True for valid `x`
175    fn from_base_units(l: &N::L, in_base_units: usize) -> usize;
176
177    /// Return whether the offset in base units is a boundary of this metric.
178    /// If a boundary is at end of a leaf then this method must return true.
179    /// However, a boundary at the beginning of a leaf is optional
180    /// (the previous leaf will be queried).
181    fn is_boundary(l: &N::L, offset: usize) -> bool;
182
183    /// Returns the index of the boundary directly preceding offset,
184    /// or None if no such boundary exists. Input and result are in base units.
185    fn prev(l: &N::L, offset: usize) -> Option<usize>;
186
187    /// Returns the index of the first boundary for which index > offset,
188    /// or None if no such boundary exists. Input and result are in base units.
189    fn next(l: &N::L, offset: usize) -> Option<usize>;
190
191    /// Returns true if the measured units in this metric can span multiple
192    /// leaves.  As an example, in a metric that measures lines in a rope, a
193    /// line may start in one leaf and end in another; however in a metric
194    /// measuring bytes, storage of a single byte cannot extend across leaves.
195    fn can_fragment() -> bool;
196}
197
198impl<N: NodeInfo> Node<N> {
199    pub fn from_leaf(l: N::L) -> Node<N> {
200        let len = l.len();
201        let info = N::compute_info(&l);
202        Node(Arc::new(NodeBody { height: 0, len, info, val: NodeVal::Leaf(l) }))
203    }
204
205    /// Create a node from a vec of nodes.
206    ///
207    /// The input must satisfy the following balancing requirements:
208    /// * The length of `nodes` must be <= MAX_CHILDREN and > 1.
209    /// * All the nodes are the same height.
210    /// * All the nodes must satisfy is_ok_child.
211    fn from_nodes(nodes: Vec<Node<N>>) -> Node<N> {
212        debug_assert!(nodes.len() > 1);
213        debug_assert!(nodes.len() <= MAX_CHILDREN);
214        let height = nodes[0].0.height + 1;
215        let mut len = nodes[0].0.len;
216        let mut info = nodes[0].0.info.clone();
217        debug_assert!(nodes[0].is_ok_child());
218        for child in &nodes[1..] {
219            debug_assert_eq!(child.height() + 1, height);
220            debug_assert!(child.is_ok_child());
221            len += child.0.len;
222            info.accumulate(&child.0.info);
223        }
224        Node(Arc::new(NodeBody { height, len, info, val: NodeVal::Internal(nodes) }))
225    }
226
227    pub fn len(&self) -> usize {
228        self.0.len
229    }
230
231    pub fn is_empty(&self) -> bool {
232        self.len() == 0
233    }
234
235    /// Returns `true` if these two `Node`s share the same underlying data.
236    ///
237    /// This is principally intended to be used by the druid crate, without needing
238    /// to actually add a feature and implement druid's `Data` trait.
239    pub fn ptr_eq(&self, other: &Self) -> bool {
240        Arc::ptr_eq(&self.0, &other.0)
241    }
242
243    fn height(&self) -> usize {
244        self.0.height
245    }
246
247    fn is_leaf(&self) -> bool {
248        self.0.height == 0
249    }
250
251    fn interval(&self) -> Interval {
252        self.0.info.interval(self.0.len)
253    }
254
255    fn get_children(&self) -> &[Node<N>] {
256        if let NodeVal::Internal(ref v) = self.0.val {
257            v
258        } else {
259            panic!("get_children called on leaf node");
260        }
261    }
262
263    fn get_leaf(&self) -> &N::L {
264        if let NodeVal::Leaf(ref l) = self.0.val {
265            l
266        } else {
267            panic!("get_leaf called on internal node");
268        }
269    }
270
271    /// Call a callback with a mutable reference to a leaf.
272    ///
273    /// This clones the leaf if the reference is shared. It also recomputes
274    /// length and info after the leaf is mutated.
275    fn with_leaf_mut<T>(&mut self, f: impl FnOnce(&mut N::L) -> T) -> T {
276        let inner = Arc::make_mut(&mut self.0);
277        if let NodeVal::Leaf(ref mut l) = inner.val {
278            let result = f(l);
279            inner.len = l.len();
280            inner.info = N::compute_info(l);
281            result
282        } else {
283            panic!("with_leaf_mut called on internal node");
284        }
285    }
286
287    fn is_ok_child(&self) -> bool {
288        match self.0.val {
289            NodeVal::Leaf(ref l) => l.is_ok_child(),
290            NodeVal::Internal(ref nodes) => (nodes.len() >= MIN_CHILDREN),
291        }
292    }
293
294    fn merge_nodes(children1: &[Node<N>], children2: &[Node<N>]) -> Node<N> {
295        let n_children = children1.len() + children2.len();
296        if n_children <= MAX_CHILDREN {
297            Node::from_nodes([children1, children2].concat())
298        } else {
299            // Note: this leans left. Splitting at midpoint is also an option
300            let splitpoint = min(MAX_CHILDREN, n_children - MIN_CHILDREN);
301            let mut iter = children1.iter().chain(children2.iter()).cloned();
302            let left = iter.by_ref().take(splitpoint).collect();
303            let right = iter.collect();
304            let parent_nodes = vec![Node::from_nodes(left), Node::from_nodes(right)];
305            Node::from_nodes(parent_nodes)
306        }
307    }
308
309    fn merge_leaves(mut rope1: Node<N>, rope2: Node<N>) -> Node<N> {
310        debug_assert!(rope1.is_leaf() && rope2.is_leaf());
311
312        let both_ok = rope1.get_leaf().is_ok_child() && rope2.get_leaf().is_ok_child();
313        if both_ok {
314            return Node::from_nodes(vec![rope1, rope2]);
315        }
316        match {
317            let node1 = Arc::make_mut(&mut rope1.0);
318            let leaf2 = rope2.get_leaf();
319            if let NodeVal::Leaf(ref mut leaf1) = node1.val {
320                let leaf2_iv = Interval::new(0, leaf2.len());
321                let new = leaf1.push_maybe_split(leaf2, leaf2_iv);
322                node1.len = leaf1.len();
323                node1.info = N::compute_info(leaf1);
324                new
325            } else {
326                panic!("merge_leaves called on non-leaf");
327            }
328        } {
329            Some(new) => Node::from_nodes(vec![rope1, Node::from_leaf(new)]),
330            None => rope1,
331        }
332    }
333
334    pub fn concat(rope1: Node<N>, rope2: Node<N>) -> Node<N> {
335        let h1 = rope1.height();
336        let h2 = rope2.height();
337
338        match h1.cmp(&h2) {
339            Ordering::Less => {
340                let children2 = rope2.get_children();
341                if h1 == h2 - 1 && rope1.is_ok_child() {
342                    return Node::merge_nodes(&[rope1], children2);
343                }
344                let newrope = Node::concat(rope1, children2[0].clone());
345                if newrope.height() == h2 - 1 {
346                    Node::merge_nodes(&[newrope], &children2[1..])
347                } else {
348                    Node::merge_nodes(newrope.get_children(), &children2[1..])
349                }
350            }
351            Ordering::Equal => {
352                if rope1.is_ok_child() && rope2.is_ok_child() {
353                    return Node::from_nodes(vec![rope1, rope2]);
354                }
355                if h1 == 0 {
356                    return Node::merge_leaves(rope1, rope2);
357                }
358                Node::merge_nodes(rope1.get_children(), rope2.get_children())
359            }
360            Ordering::Greater => {
361                let children1 = rope1.get_children();
362                if h2 == h1 - 1 && rope2.is_ok_child() {
363                    return Node::merge_nodes(children1, &[rope2]);
364                }
365                let lastix = children1.len() - 1;
366                let newrope = Node::concat(children1[lastix].clone(), rope2);
367                if newrope.height() == h1 - 1 {
368                    Node::merge_nodes(&children1[..lastix], &[newrope])
369                } else {
370                    Node::merge_nodes(&children1[..lastix], newrope.get_children())
371                }
372            }
373        }
374    }
375
376    pub fn measure<M: Metric<N>>(&self) -> usize {
377        M::measure(&self.0.info, self.0.len)
378    }
379
380    pub(crate) fn push_subseq(&self, b: &mut TreeBuilder<N>, iv: Interval) {
381        if iv.is_empty() {
382            return;
383        }
384        if iv == self.interval() {
385            b.push(self.clone());
386            return;
387        }
388        match self.0.val {
389            NodeVal::Leaf(ref l) => {
390                b.push_leaf_slice(l, iv);
391            }
392            NodeVal::Internal(ref v) => {
393                let mut offset = 0;
394                for child in v {
395                    if iv.is_before(offset) {
396                        break;
397                    }
398                    let child_iv = child.interval();
399                    // easier just to use signed ints?
400                    let rec_iv = iv.intersect(child_iv.translate(offset)).translate_neg(offset);
401                    child.push_subseq(b, rec_iv);
402                    offset += child.len();
403                }
404            }
405        }
406    }
407
408    pub fn subseq<T: IntervalBounds>(&self, iv: T) -> Node<N> {
409        let iv = iv.into_interval(self.len());
410        let mut b = TreeBuilder::new();
411        self.push_subseq(&mut b, iv);
412        b.build()
413    }
414
415    pub fn edit<T, IV>(&mut self, iv: IV, new: T)
416    where
417        T: Into<Node<N>>,
418        IV: IntervalBounds,
419    {
420        let mut b = TreeBuilder::new();
421        let iv = iv.into_interval(self.len());
422        let self_iv = self.interval();
423        self.push_subseq(&mut b, self_iv.prefix(iv));
424        b.push(new.into());
425        self.push_subseq(&mut b, self_iv.suffix(iv));
426        *self = b.build();
427    }
428
429    // doesn't deal with endpoint, handle that specially if you need it
430    pub fn convert_metrics<M1: Metric<N>, M2: Metric<N>>(&self, mut m1: usize) -> usize {
431        if m1 == 0 {
432            return 0;
433        }
434        // If M1 can fragment, then we must land on the leaf containing
435        // the m1 boundary. Otherwise, we can land on the beginning of
436        // the leaf immediately following the M1 boundary, which may be
437        // more efficient.
438        let m1_fudge = if M1::can_fragment() { 1 } else { 0 };
439        let mut m2 = 0;
440        let mut node = self;
441        while node.height() > 0 {
442            for child in node.get_children() {
443                let child_m1 = child.measure::<M1>();
444                if m1 < child_m1 + m1_fudge {
445                    node = child;
446                    break;
447                }
448                m2 += child.measure::<M2>();
449                m1 -= child_m1;
450            }
451        }
452        let l = node.get_leaf();
453        let base = M1::to_base_units(l, m1);
454        m2 + M2::from_base_units(l, base)
455    }
456}
457
458impl<N: DefaultMetric> Node<N> {
459    /// Measures the length of the text bounded by ``DefaultMetric::measure(offset)`` with another metric.
460    ///
461    /// # Examples
462    /// ```
463    /// use crate::xi_rope::{Rope, LinesMetric};
464    ///
465    /// // the default metric of Rope is BaseMetric (aka number of bytes)
466    /// let my_rope = Rope::from("first line \n second line \n");
467    ///
468    /// // count the number of lines in my_rope
469    /// let num_lines = my_rope.count::<LinesMetric>(my_rope.len());
470    /// assert_eq!(2, num_lines);
471    /// ```
472    pub fn count<M: Metric<N>>(&self, offset: usize) -> usize {
473        self.convert_metrics::<N::DefaultMetric, M>(offset)
474    }
475
476    /// Measures the length of the text bounded by ``M::measure(offset)`` with the default metric.
477    ///
478    /// # Examples
479    /// ```
480    /// use crate::xi_rope::{Rope, LinesMetric};
481    ///
482    /// // the default metric of Rope is BaseMetric (aka number of bytes)
483    /// let my_rope = Rope::from("first line \n second line \n");
484    ///
485    /// // get the byte offset of the line at index 1
486    /// let byte_offset = my_rope.count_base_units::<LinesMetric>(1);
487    /// assert_eq!(12, byte_offset);
488    /// ```
489    pub fn count_base_units<M: Metric<N>>(&self, offset: usize) -> usize {
490        self.convert_metrics::<M, N::DefaultMetric>(offset)
491    }
492}
493
494impl<N: NodeInfo> Default for Node<N> {
495    fn default() -> Node<N> {
496        Node::from_leaf(N::L::default())
497    }
498}
499
500/// A builder for creating new trees.
501pub struct TreeBuilder<N: NodeInfo> {
502    // A stack of partially built trees. These are kept in order of
503    // strictly descending height, and all vectors have a length less
504    // than MAX_CHILDREN and greater than zero.
505    //
506    // In addition, there is a balancing invariant: for each vector
507    // of length greater than one, all elements satisfy `is_ok_child`.
508    stack: Vec<Vec<Node<N>>>,
509}
510
511impl<N: NodeInfo> TreeBuilder<N> {
512    /// A new, empty builder.
513    pub fn new() -> TreeBuilder<N> {
514        TreeBuilder { stack: Vec::new() }
515    }
516
517    /// Append a node to the tree being built.
518    pub fn push(&mut self, mut n: Node<N>) {
519        loop {
520            let ord = if let Some(last) = self.stack.last() {
521                last[0].height().cmp(&n.height())
522            } else {
523                Ordering::Greater
524            };
525            match ord {
526                Ordering::Less => {
527                    n = Node::concat(self.pop(), n);
528                }
529                Ordering::Equal => {
530                    let tos = self.stack.last_mut().unwrap();
531                    if tos.last().unwrap().is_ok_child() && n.is_ok_child() {
532                        tos.push(n);
533                    } else if n.height() == 0 {
534                        let iv = Interval::new(0, n.len());
535                        let new_leaf = tos
536                            .last_mut()
537                            .unwrap()
538                            .with_leaf_mut(|l| l.push_maybe_split(n.get_leaf(), iv));
539                        if let Some(new_leaf) = new_leaf {
540                            tos.push(Node::from_leaf(new_leaf));
541                        }
542                    } else {
543                        let last = tos.pop().unwrap();
544                        let children1 = last.get_children();
545                        let children2 = n.get_children();
546                        let n_children = children1.len() + children2.len();
547                        if n_children <= MAX_CHILDREN {
548                            tos.push(Node::from_nodes([children1, children2].concat()));
549                        } else {
550                            // Note: this leans left. Splitting at midpoint is also an option
551                            let splitpoint = min(MAX_CHILDREN, n_children - MIN_CHILDREN);
552                            let mut iter = children1.iter().chain(children2.iter()).cloned();
553                            let left = iter.by_ref().take(splitpoint).collect();
554                            let right = iter.collect();
555                            tos.push(Node::from_nodes(left));
556                            tos.push(Node::from_nodes(right));
557                        }
558                    }
559                    if tos.len() < MAX_CHILDREN {
560                        break;
561                    }
562                    n = self.pop()
563                }
564                Ordering::Greater => {
565                    self.stack.push(vec![n]);
566                    break;
567                }
568            }
569        }
570    }
571
572    /// Append a sequence of leaves.
573    pub fn push_leaves(&mut self, leaves: impl IntoIterator<Item = N::L>) {
574        for leaf in leaves.into_iter() {
575            self.push(Node::from_leaf(leaf));
576        }
577    }
578
579    /// Append a single leaf.
580    pub fn push_leaf(&mut self, l: N::L) {
581        self.push(Node::from_leaf(l))
582    }
583
584    /// Append a slice of a single leaf.
585    pub fn push_leaf_slice(&mut self, l: &N::L, iv: Interval) {
586        self.push(Node::from_leaf(l.subseq(iv)))
587    }
588
589    /// Build the final tree.
590    ///
591    /// The tree is the concatenation of all the nodes and leaves that have been pushed
592    /// on the builder, in order.
593    pub fn build(mut self) -> Node<N> {
594        if self.stack.is_empty() {
595            Node::from_leaf(N::L::default())
596        } else {
597            let mut n = self.pop();
598            while !self.stack.is_empty() {
599                n = Node::concat(self.pop(), n);
600            }
601            n
602        }
603    }
604
605    /// Pop the last vec-of-nodes off the stack, resulting in a node.
606    fn pop(&mut self) -> Node<N> {
607        let nodes = self.stack.pop().unwrap();
608        if nodes.len() == 1 {
609            nodes.into_iter().next().unwrap()
610        } else {
611            Node::from_nodes(nodes)
612        }
613    }
614}
615
616const CURSOR_CACHE_SIZE: usize = 4;
617
618/// A data structure for traversing boundaries in a tree.
619///
620/// It is designed to be efficient both for random access and for iteration. The
621/// cursor itself is agnostic to which [`Metric`] is used to determine boundaries, but
622/// the methods to find boundaries are parametrized on the [`Metric`].
623///
624/// A cursor can be valid or invalid. It is always valid when created or after
625/// [`set`](#method.set) is called, and becomes invalid after [`prev`](#method.prev)
626/// or [`next`](#method.next) fails to find a boundary.
627///
628/// [`Metric`]: struct.Metric.html
629pub struct Cursor<'a, N: 'a + NodeInfo> {
630    /// The tree being traversed by this cursor.
631    root: &'a Node<N>,
632    /// The current position of the cursor.
633    ///
634    /// It is always less than or equal to the tree length.
635    position: usize,
636    /// The cache holds the tail of the path from the root to the current leaf.
637    ///
638    /// Each entry is a reference to the parent node and the index of the child. It
639    /// is stored bottom-up; `cache[0]` is the parent of the leaf and the index of
640    /// the leaf within that parent.
641    ///
642    /// The main motivation for this being a fixed-size array is to keep the cursor
643    /// an allocation-free data structure.
644    cache: [Option<(&'a Node<N>, usize)>; CURSOR_CACHE_SIZE],
645    /// The leaf containing the current position, when the cursor is valid.
646    ///
647    /// The position is only at the end of the leaf when it is at the end of the tree.
648    leaf: Option<&'a N::L>,
649    /// The offset of `leaf` within the tree.
650    offset_of_leaf: usize,
651}
652
653impl<'a, N: NodeInfo> Cursor<'a, N> {
654    /// Create a new cursor at the given position.
655    pub fn new(n: &'a Node<N>, position: usize) -> Cursor<'a, N> {
656        let mut result = Cursor {
657            root: n,
658            position,
659            cache: [None; CURSOR_CACHE_SIZE],
660            leaf: None,
661            offset_of_leaf: 0,
662        };
663        result.descend();
664        result
665    }
666
667    /// The length of the tree.
668    pub fn total_len(&self) -> usize {
669        self.root.len()
670    }
671
672    /// Return a reference to the root node of the tree.
673    pub fn root(&self) -> &'a Node<N> {
674        self.root
675    }
676
677    /// Get the current leaf of the cursor.
678    ///
679    /// If the cursor is valid, returns the leaf containing the current position,
680    /// and the offset of the current position within the leaf. That offset is equal
681    /// to the leaf length only at the end, otherwise it is less than the leaf length.
682    pub fn get_leaf(&self) -> Option<(&'a N::L, usize)> {
683        self.leaf.map(|l| (l, self.position - self.offset_of_leaf))
684    }
685
686    /// Set the position of the cursor.
687    ///
688    /// The cursor is valid after this call.
689    ///
690    /// Precondition: `position` is less than or equal to the length of the tree.
691    pub fn set(&mut self, position: usize) {
692        self.position = position;
693        if let Some(l) = self.leaf {
694            if self.position >= self.offset_of_leaf && self.position < self.offset_of_leaf + l.len()
695            {
696                return;
697            }
698        }
699        // TODO: walk up tree to find leaf if nearby
700        self.descend();
701    }
702
703    /// Get the position of the cursor.
704    pub fn pos(&self) -> usize {
705        self.position
706    }
707
708    /// Determine whether the current position is a boundary.
709    ///
710    /// Note: the beginning and end of the tree may or may not be boundaries, depending on the
711    /// metric. If the metric is not `can_fragment`, then they always are.
712    pub fn is_boundary<M: Metric<N>>(&mut self) -> bool {
713        if self.leaf.is_none() {
714            // not at a valid position
715            return false;
716        }
717        if self.position == self.offset_of_leaf && !M::can_fragment() {
718            return true;
719        }
720        if self.position == 0 || self.position > self.offset_of_leaf {
721            return M::is_boundary(self.leaf.unwrap(), self.position - self.offset_of_leaf);
722        }
723        // tricky case, at beginning of leaf, need to query end of previous
724        // leaf; TODO: would be nice if we could do it another way that didn't
725        // make the method &mut self.
726        let l = self.prev_leaf().unwrap().0;
727        let result = M::is_boundary(l, l.len());
728        let _ = self.next_leaf();
729        result
730    }
731
732    /// Moves the cursor to the previous boundary.
733    ///
734    /// When there is no previous boundary, returns `None` and the cursor becomes invalid.
735    ///
736    /// Return value: the position of the boundary, if it exists.
737    pub fn prev<M: Metric<N>>(&mut self) -> Option<usize> {
738        if self.position == 0 || self.leaf.is_none() {
739            self.leaf = None;
740            return None;
741        }
742        let orig_pos = self.position;
743        let offset_in_leaf = orig_pos - self.offset_of_leaf;
744        if offset_in_leaf > 0 {
745            let l = self.leaf.unwrap();
746            if let Some(offset_in_leaf) = M::prev(l, offset_in_leaf) {
747                self.position = self.offset_of_leaf + offset_in_leaf;
748                return Some(self.position);
749            }
750        }
751
752        // not in same leaf, need to scan backwards
753        self.prev_leaf()?;
754        if let Some(offset) = self.last_inside_leaf::<M>(orig_pos) {
755            return Some(offset);
756        }
757
758        // Not found in previous leaf, find using measurement.
759        let measure = self.measure_leaf::<M>(self.position);
760        if measure == 0 {
761            self.leaf = None;
762            self.position = 0;
763            return None;
764        }
765        self.descend_metric::<M>(measure);
766        self.last_inside_leaf::<M>(orig_pos)
767    }
768
769    /// Moves the cursor to the next boundary.
770    ///
771    /// When there is no next boundary, returns `None` and the cursor becomes invalid.
772    ///
773    /// Return value: the position of the boundary, if it exists.
774    pub fn next<M: Metric<N>>(&mut self) -> Option<usize> {
775        if self.position >= self.root.len() || self.leaf.is_none() {
776            self.leaf = None;
777            return None;
778        }
779
780        if let Some(offset) = self.next_inside_leaf::<M>() {
781            return Some(offset);
782        }
783
784        self.next_leaf()?;
785        if let Some(offset) = self.next_inside_leaf::<M>() {
786            return Some(offset);
787        }
788
789        // Leaf is 0-measure (otherwise would have already succeeded).
790        let measure = self.measure_leaf::<M>(self.position);
791        self.descend_metric::<M>(measure + 1);
792        if let Some(offset) = self.next_inside_leaf::<M>() {
793            return Some(offset);
794        }
795
796        // Not found, properly invalidate cursor.
797        self.position = self.root.len();
798        self.leaf = None;
799        None
800    }
801
802    /// Returns the current position if it is a boundary in this [`Metric`],
803    /// else behaves like [`next`](#method.next).
804    ///
805    /// [`Metric`]: struct.Metric.html
806    pub fn at_or_next<M: Metric<N>>(&mut self) -> Option<usize> {
807        if self.is_boundary::<M>() {
808            Some(self.pos())
809        } else {
810            self.next::<M>()
811        }
812    }
813
814    /// Returns the current position if it is a boundary in this [`Metric`],
815    /// else behaves like [`prev`](#method.prev).
816    ///
817    /// [`Metric`]: struct.Metric.html
818    pub fn at_or_prev<M: Metric<N>>(&mut self) -> Option<usize> {
819        if self.is_boundary::<M>() {
820            Some(self.pos())
821        } else {
822            self.prev::<M>()
823        }
824    }
825
826    /// Returns an iterator with this cursor over the given [`Metric`].
827    ///
828    /// # Examples:
829    ///
830    /// ```
831    /// # use xi_rope::{Cursor, LinesMetric, Rope};
832    /// #
833    /// let text: Rope = "one line\ntwo line\nred line\nblue".into();
834    /// let mut cursor = Cursor::new(&text, 0);
835    /// let line_offsets = cursor.iter::<LinesMetric>().collect::<Vec<_>>();
836    /// assert_eq!(line_offsets, vec![9, 18, 27]);
837    ///
838    /// ```
839    /// [`Metric`]: struct.Metric.html
840    pub fn iter<'c, M: Metric<N>>(&'c mut self) -> CursorIter<'c, 'a, N, M> {
841        CursorIter { cursor: self, _metric: PhantomData }
842    }
843
844    /// Tries to find the last boundary in the leaf the cursor is currently in.
845    ///
846    /// If the last boundary is at the end of the leaf, it is only counted if
847    /// it is less than `orig_pos`.
848    #[inline]
849    fn last_inside_leaf<M: Metric<N>>(&mut self, orig_pos: usize) -> Option<usize> {
850        let l = self.leaf.expect("inconsistent, shouldn't get here");
851        let len = l.len();
852        if self.offset_of_leaf + len < orig_pos && M::is_boundary(l, len) {
853            let _ = self.next_leaf();
854            return Some(self.position);
855        }
856        let offset_in_leaf = M::prev(l, len)?;
857        self.position = self.offset_of_leaf + offset_in_leaf;
858        Some(self.position)
859    }
860
861    /// Tries to find the next boundary in the leaf the cursor is currently in.
862    #[inline]
863    fn next_inside_leaf<M: Metric<N>>(&mut self) -> Option<usize> {
864        let l = self.leaf.expect("inconsistent, shouldn't get here");
865        let offset_in_leaf = self.position - self.offset_of_leaf;
866        let offset_in_leaf = M::next(l, offset_in_leaf)?;
867        if offset_in_leaf == l.len() && self.offset_of_leaf + offset_in_leaf != self.root.len() {
868            let _ = self.next_leaf();
869        } else {
870            self.position = self.offset_of_leaf + offset_in_leaf;
871        }
872        Some(self.position)
873    }
874
875    /// Move to beginning of next leaf.
876    ///
877    /// Return value: same as [`get_leaf`](#method.get_leaf).
878    pub fn next_leaf(&mut self) -> Option<(&'a N::L, usize)> {
879        let leaf = self.leaf?;
880        self.position = self.offset_of_leaf + leaf.len();
881        for i in 0..CURSOR_CACHE_SIZE {
882            if self.cache[i].is_none() {
883                // this probably can't happen
884                self.leaf = None;
885                return None;
886            }
887            let (node, j) = self.cache[i].unwrap();
888            if j + 1 < node.get_children().len() {
889                self.cache[i] = Some((node, j + 1));
890                let mut node_down = &node.get_children()[j + 1];
891                for k in (0..i).rev() {
892                    self.cache[k] = Some((node_down, 0));
893                    node_down = &node_down.get_children()[0];
894                }
895                self.leaf = Some(node_down.get_leaf());
896                self.offset_of_leaf = self.position;
897                return self.get_leaf();
898            }
899        }
900        if self.offset_of_leaf + self.leaf.unwrap().len() == self.root.len() {
901            self.leaf = None;
902            return None;
903        }
904        self.descend();
905        self.get_leaf()
906    }
907
908    /// Move to beginning of previous leaf.
909    ///
910    /// Return value: same as [`get_leaf`](#method.get_leaf).
911    pub fn prev_leaf(&mut self) -> Option<(&'a N::L, usize)> {
912        if self.offset_of_leaf == 0 {
913            self.leaf = None;
914            self.position = 0;
915            return None;
916        }
917        for i in 0..CURSOR_CACHE_SIZE {
918            if self.cache[i].is_none() {
919                // this probably can't happen
920                self.leaf = None;
921                return None;
922            }
923            let (node, j) = self.cache[i].unwrap();
924            if j > 0 {
925                self.cache[i] = Some((node, j - 1));
926                let mut node_down = &node.get_children()[j - 1];
927                for k in (0..i).rev() {
928                    let last_ix = node_down.get_children().len() - 1;
929                    self.cache[k] = Some((node_down, last_ix));
930                    node_down = &node_down.get_children()[last_ix];
931                }
932                let leaf = node_down.get_leaf();
933                self.leaf = Some(leaf);
934                self.offset_of_leaf -= leaf.len();
935                self.position = self.offset_of_leaf;
936                return self.get_leaf();
937            }
938        }
939        self.position = self.offset_of_leaf - 1;
940        self.descend();
941        self.position = self.offset_of_leaf;
942        self.get_leaf()
943    }
944
945    /// Go to the leaf containing the current position.
946    ///
947    /// Sets `leaf` to the leaf containing `position`, and updates `cache` and
948    /// `offset_of_leaf` to be consistent.
949    fn descend(&mut self) {
950        let mut node = self.root;
951        let mut offset = 0;
952        while node.height() > 0 {
953            let children = node.get_children();
954            let mut i = 0;
955            loop {
956                if i + 1 == children.len() {
957                    break;
958                }
959                let nextoff = offset + children[i].len();
960                if nextoff > self.position {
961                    break;
962                }
963                offset = nextoff;
964                i += 1;
965            }
966            let cache_ix = node.height() - 1;
967            if cache_ix < CURSOR_CACHE_SIZE {
968                self.cache[cache_ix] = Some((node, i));
969            }
970            node = &children[i];
971        }
972        self.leaf = Some(node.get_leaf());
973        self.offset_of_leaf = offset;
974    }
975
976    /// Returns the measure at the beginning of the leaf containing `pos`.
977    ///
978    /// This method is O(log n) no matter the current cursor state.
979    fn measure_leaf<M: Metric<N>>(&self, mut pos: usize) -> usize {
980        let mut node = self.root;
981        let mut metric = 0;
982        while node.height() > 0 {
983            for child in node.get_children() {
984                let len = child.len();
985                if pos < len {
986                    node = child;
987                    break;
988                }
989                pos -= len;
990                metric += child.measure::<M>();
991            }
992        }
993        metric
994    }
995
996    /// Find the leaf having the given measure.
997    ///
998    /// This function sets `self.position` to the beginning of the leaf
999    /// containing the smallest offset with the given metric, and also updates
1000    /// state as if [`descend`](#method.descend) was called.
1001    ///
1002    /// If `measure` is greater than the measure of the whole tree, then moves
1003    /// to the last node.
1004    fn descend_metric<M: Metric<N>>(&mut self, mut measure: usize) {
1005        let mut node = self.root;
1006        let mut offset = 0;
1007        while node.height() > 0 {
1008            let children = node.get_children();
1009            let mut i = 0;
1010            loop {
1011                if i + 1 == children.len() {
1012                    break;
1013                }
1014                let child = &children[i];
1015                let child_m = child.measure::<M>();
1016                if child_m >= measure {
1017                    break;
1018                }
1019                offset += child.len();
1020                measure -= child_m;
1021                i += 1;
1022            }
1023            let cache_ix = node.height() - 1;
1024            if cache_ix < CURSOR_CACHE_SIZE {
1025                self.cache[cache_ix] = Some((node, i));
1026            }
1027            node = &children[i];
1028        }
1029        self.leaf = Some(node.get_leaf());
1030        self.position = offset;
1031        self.offset_of_leaf = offset;
1032    }
1033}
1034
1035/// An iterator generated by a [`Cursor`], for some [`Metric`].
1036///
1037/// [`Cursor`]: struct.Cursor.html
1038/// [`Metric`]: struct.Metric.html
1039pub struct CursorIter<'c, 'a: 'c, N: 'a + NodeInfo, M: 'a + Metric<N>> {
1040    cursor: &'c mut Cursor<'a, N>,
1041    _metric: PhantomData<&'a M>,
1042}
1043
1044impl<'c, 'a, N: NodeInfo, M: Metric<N>> Iterator for CursorIter<'c, 'a, N, M> {
1045    type Item = usize;
1046
1047    fn next(&mut self) -> Option<usize> {
1048        self.cursor.next::<M>()
1049    }
1050}
1051
1052impl<'c, 'a, N: NodeInfo, M: Metric<N>> CursorIter<'c, 'a, N, M> {
1053    /// Returns the current position of the underlying [`Cursor`].
1054    ///
1055    /// [`Cursor`]: struct.Cursor.html
1056    pub fn pos(&self) -> usize {
1057        self.cursor.pos()
1058    }
1059}
1060
1061#[cfg(test)]
1062mod test {
1063    use super::*;
1064    use crate::rope::*;
1065
1066    fn build_triangle(n: u32) -> String {
1067        let mut s = String::new();
1068        let mut line = String::new();
1069        for _ in 0..n {
1070            s += &line;
1071            s += "\n";
1072            line += "a";
1073        }
1074        s
1075    }
1076
1077    #[test]
1078    fn eq_rope_with_pieces() {
1079        let n = 2_000;
1080        let s = build_triangle(n);
1081        let mut builder_default = TreeBuilder::new();
1082        let mut concat_rope = Rope::default();
1083        builder_default.push_str(&s);
1084        let mut i = 0;
1085        while i < s.len() {
1086            let j = (i + 1000).min(s.len());
1087            concat_rope = concat_rope + s[i..j].into();
1088            i = j;
1089        }
1090        let built_rope = builder_default.build();
1091        assert_eq!(built_rope, concat_rope);
1092    }
1093
1094    #[test]
1095    fn cursor_next_triangle() {
1096        let n = 2_000;
1097        let text = Rope::from(build_triangle(n));
1098
1099        let mut cursor = Cursor::new(&text, 0);
1100        let mut prev_offset = cursor.pos();
1101        for i in 1..(n + 1) as usize {
1102            let offset = cursor.next::<LinesMetric>().expect("arrived at the end too soon");
1103            assert_eq!(offset - prev_offset, i);
1104            prev_offset = offset;
1105        }
1106        assert_eq!(cursor.next::<LinesMetric>(), None);
1107    }
1108
1109    #[test]
1110    fn node_is_empty() {
1111        let text = Rope::from(String::new());
1112        assert_eq!(text.is_empty(), true);
1113    }
1114
1115    #[test]
1116    fn cursor_next_empty() {
1117        let text = Rope::from(String::new());
1118        let mut cursor = Cursor::new(&text, 0);
1119        assert_eq!(cursor.next::<LinesMetric>(), None);
1120        assert_eq!(cursor.pos(), 0);
1121    }
1122
1123    #[test]
1124    fn cursor_iter() {
1125        let text: Rope = build_triangle(50).into();
1126        let mut cursor = Cursor::new(&text, 0);
1127        let mut manual = Vec::new();
1128        while let Some(nxt) = cursor.next::<LinesMetric>() {
1129            manual.push(nxt);
1130        }
1131
1132        cursor.set(0);
1133        let auto = cursor.iter::<LinesMetric>().collect::<Vec<_>>();
1134        assert_eq!(manual, auto);
1135    }
1136
1137    #[test]
1138    fn cursor_next_misc() {
1139        cursor_next_for("toto");
1140        cursor_next_for("toto\n");
1141        cursor_next_for("toto\ntata");
1142        cursor_next_for("歴史\n科学的");
1143        cursor_next_for("\n歴史\n科学的\n");
1144        cursor_next_for(&build_triangle(100));
1145    }
1146
1147    fn cursor_next_for(s: &str) {
1148        let r = Rope::from(s.to_owned());
1149        for i in 0..r.len() {
1150            let mut c = Cursor::new(&r, i);
1151            let it = c.next::<LinesMetric>();
1152            let pos = c.pos();
1153            assert!(s.as_bytes()[i..pos - 1].iter().all(|c| *c != b'\n'), "missed linebreak");
1154            if pos < s.len() {
1155                assert!(it.is_some(), "must be Some(_)");
1156                assert!(s.as_bytes()[pos - 1] == b'\n', "not a linebreak");
1157            } else {
1158                if s.as_bytes()[s.len() - 1] == b'\n' {
1159                    assert!(it.is_some(), "must be Some(_)");
1160                } else {
1161                    assert!(it.is_none());
1162                    assert!(c.get_leaf().is_none());
1163                }
1164            }
1165        }
1166    }
1167
1168    #[test]
1169    fn cursor_prev_misc() {
1170        cursor_prev_for("toto");
1171        cursor_prev_for("a\na\n");
1172        cursor_prev_for("toto\n");
1173        cursor_prev_for("toto\ntata");
1174        cursor_prev_for("歴史\n科学的");
1175        cursor_prev_for("\n歴史\n科学的\n");
1176        cursor_prev_for(&build_triangle(100));
1177    }
1178
1179    fn cursor_prev_for(s: &str) {
1180        let r = Rope::from(s.to_owned());
1181        for i in 0..r.len() {
1182            let mut c = Cursor::new(&r, i);
1183            let it = c.prev::<LinesMetric>();
1184            let pos = c.pos();
1185
1186            //Should countain at most one linebreak
1187            assert!(
1188                s.as_bytes()[pos..i].iter().filter(|c| **c == b'\n').count() <= 1,
1189                "missed linebreak"
1190            );
1191
1192            if i == 0 && s.as_bytes()[i] == b'\n' {
1193                assert_eq!(pos, 0);
1194            }
1195
1196            if pos > 0 {
1197                assert!(it.is_some(), "must be Some(_)");
1198                assert!(s.as_bytes()[pos - 1] == b'\n', "not a linebreak");
1199            }
1200        }
1201    }
1202
1203    #[test]
1204    fn at_or_next() {
1205        let text: Rope = "this\nis\nalil\nstring".into();
1206        let mut cursor = Cursor::new(&text, 0);
1207        assert_eq!(cursor.at_or_next::<LinesMetric>(), Some(5));
1208        assert_eq!(cursor.at_or_next::<LinesMetric>(), Some(5));
1209        cursor.set(1);
1210        assert_eq!(cursor.at_or_next::<LinesMetric>(), Some(5));
1211        assert_eq!(cursor.at_or_prev::<LinesMetric>(), Some(5));
1212        cursor.set(6);
1213        assert_eq!(cursor.at_or_prev::<LinesMetric>(), Some(5));
1214        cursor.set(6);
1215        assert_eq!(cursor.at_or_next::<LinesMetric>(), Some(8));
1216        assert_eq!(cursor.at_or_next::<LinesMetric>(), Some(8));
1217    }
1218
1219    #[test]
1220    fn next_zero_measure_large() {
1221        let mut text = Rope::from("a");
1222        for _ in 0..24 {
1223            text = Node::concat(text.clone(), text);
1224            let mut cursor = Cursor::new(&text, 0);
1225            assert_eq!(cursor.next::<LinesMetric>(), None);
1226            // Test that cursor is properly invalidated and at end of text.
1227            assert_eq!(cursor.get_leaf(), None);
1228            assert_eq!(cursor.pos(), text.len());
1229
1230            cursor.set(text.len());
1231            assert_eq!(cursor.prev::<LinesMetric>(), None);
1232            // Test that cursor is properly invalidated and at beginning of text.
1233            assert_eq!(cursor.get_leaf(), None);
1234            assert_eq!(cursor.pos(), 0);
1235        }
1236    }
1237
1238    #[test]
1239    fn prev_line_large() {
1240        let s: String = format!("{}{}", "\n", build_triangle(1000));
1241        let rope = Rope::from(s);
1242        let mut expected_pos = rope.len();
1243        let mut cursor = Cursor::new(&rope, rope.len());
1244
1245        for i in (1..1001).rev() {
1246            expected_pos = expected_pos - i;
1247            assert_eq!(expected_pos, cursor.prev::<LinesMetric>().unwrap());
1248        }
1249
1250        assert_eq!(None, cursor.prev::<LinesMetric>());
1251    }
1252
1253    #[test]
1254    fn prev_line_small() {
1255        let empty_rope = Rope::from("\n");
1256        let mut cursor = Cursor::new(&empty_rope, empty_rope.len());
1257        assert_eq!(None, cursor.prev::<LinesMetric>());
1258
1259        let rope = Rope::from("\n\n\n\n\n\n\n\n\n\n");
1260        cursor = Cursor::new(&rope, rope.len());
1261        let mut expected_pos = rope.len();
1262        for _ in (1..10).rev() {
1263            expected_pos -= 1;
1264            assert_eq!(expected_pos, cursor.prev::<LinesMetric>().unwrap());
1265        }
1266
1267        assert_eq!(None, cursor.prev::<LinesMetric>());
1268    }
1269
1270    #[test]
1271    fn balance_invariant() {
1272        let mut tb = TreeBuilder::<RopeInfo>::new();
1273        let leaves: Vec<String> = (0..1000).map(|i| i.to_string().into()).collect();
1274        tb.push_leaves(leaves);
1275        let tree = tb.build();
1276        println!("height {}", tree.height());
1277    }
1278}
lapce_xi_rope/tree.rs

lapce_xi_rope/
tree.rs