sp_ropey/
rope_builder.rs

1use smallvec::alloc::string::String;
2use smallvec::SmallVec;
3use alloc::sync::Arc;
4
5use crate::crlf;
6use crate::rope::Rope;
7use crate::tree::{Node, NodeChildren, NodeText, MAX_BYTES, MAX_CHILDREN};
8
9/// An efficient incremental `Rope` builder.
10///
11/// This is used to efficiently build ropes from sequences of text
12/// chunks.  It is useful for creating ropes from:
13///
14/// - ...large text files, without pre-loading their entire contents into
15///   memory (but see
16///   [`Rope::from_reader()`](struct.Rope.html#method.from_reader) for a
17///   convenience function that does this for utf8 text).
18/// - ...streaming data sources.
19/// - ...non-utf8 text data, doing the encoding conversion incrementally
20///   as you go.
21///
22/// Unlike repeatedly calling `Rope::insert()` on the end of a rope,
23/// this API runs in time linear to the amount of data fed to it, and
24/// is overall much faster.
25///
26/// # Example
27/// ```
28/// # use sp_ropey::RopeBuilder;
29/// #
30/// let mut builder = RopeBuilder::new();
31///
32/// builder.append("Hello ");
33/// builder.append("world!\n");
34/// builder.append("How's ");
35/// builder.append("it goin");
36/// builder.append("g?");
37///
38/// let rope = builder.finish();
39///
40/// assert_eq!(rope, "Hello world!\nHow's it going?");
41/// ```
42#[derive(Debug, Clone, Default)]
43pub struct RopeBuilder {
44    stack: SmallVec<[Arc<Node>; 4]>,
45    buffer: String,
46}
47
48impl RopeBuilder {
49    /// Creates a new RopeBuilder, ready for input.
50    pub fn new() -> Self {
51        RopeBuilder {
52            stack: {
53                let mut stack = SmallVec::new();
54                stack.push(Arc::new(Node::new()));
55                stack
56            },
57            buffer: String::new(),
58        }
59    }
60
61    /// Appends `chunk` to the end of the in-progress `Rope`.
62    ///
63    /// This method is called repeatedly to incrementally build up a
64    /// `Rope`.  The passed text chunk can be as large or small as
65    /// desired, but larger chunks are more efficient.
66    ///
67    /// `chunk` must be valid utf8 text.
68    pub fn append(&mut self, chunk: &str) {
69        self.append_internal(chunk, false);
70    }
71
72    /// Finishes the build, and returns the `Rope`.
73    ///
74    /// Note: this method consumes the builder.  If you want to continue
75    /// building other ropes with the same prefix, you can clone the builder
76    /// before calling `finish()`.
77    pub fn finish(mut self) -> Rope {
78        // Append the last leaf
79        self.append_internal("", true);
80        self.finish_internal()
81    }
82
83    /// Builds a rope all at once from a single string slice.
84    ///
85    /// This avoids the creation and use of the internal buffer.  This is
86    /// for internal use only, because the public-facing API has
87    /// Rope::from_str(), which actually uses this for its implementation.
88    pub(crate) fn build_at_once(mut self, chunk: &str) -> Rope {
89        self.append_internal(chunk, true);
90        self.finish_internal()
91    }
92
93    //-----------------------------------------------------------------
94
95    // Internal workings of `append()`.
96    fn append_internal(&mut self, chunk: &str, is_last_chunk: bool) {
97        let mut chunk = chunk;
98
99        // Repeatedly chop text off the end of the input, creating
100        // leaf nodes out of them and appending them to the tree.
101        while !chunk.is_empty() || (!self.buffer.is_empty() && is_last_chunk) {
102            // Get the text for the next leaf
103            let (leaf_text, remainder) = self.get_next_leaf_text(chunk, is_last_chunk);
104            chunk = remainder;
105
106            // Append the leaf to the rope
107            match leaf_text {
108                NextText::None => break,
109                NextText::UseBuffer => {
110                    let leaf_text = NodeText::from_str(&self.buffer);
111                    self.append_leaf_node(Arc::new(Node::Leaf(leaf_text)));
112                    self.buffer.clear();
113                }
114                NextText::String(s) => {
115                    self.append_leaf_node(Arc::new(Node::Leaf(NodeText::from_str(s))));
116                }
117            }
118        }
119    }
120
121    // Internal workings of `finish()`.
122    fn finish_internal(mut self) -> Rope {
123        // Zip up all the remaining nodes on the stack
124        let mut stack_idx = self.stack.len() - 1;
125        while stack_idx >= 1 {
126            let node = self.stack.pop().unwrap();
127            if let Node::Internal(ref mut children) = *Arc::make_mut(&mut self.stack[stack_idx - 1])
128            {
129                children.push((node.text_info(), node));
130            } else {
131                unreachable!();
132            }
133            stack_idx -= 1;
134        }
135
136        // Get root and fix any right-side nodes with too few children.
137        let mut root = self.stack.pop().unwrap();
138        Arc::make_mut(&mut root).zip_fix_right();
139
140        // Create the rope, make sure it's well-formed, and return it.
141        let mut rope = Rope { root: root };
142        rope.pull_up_singular_nodes();
143        return rope;
144    }
145
146    // Returns (next_leaf_text, remaining_text)
147    #[inline(always)]
148    fn get_next_leaf_text<'a>(
149        &mut self,
150        text: &'a str,
151        is_last_chunk: bool,
152    ) -> (NextText<'a>, &'a str) {
153        assert!(
154            self.buffer.len() < MAX_BYTES,
155            "RopeBuilder: buffer is already full when receiving a chunk! \
156             This should never happen!",
157        );
158
159        // Simplest case: empty buffer and enough in `text` for a full
160        // chunk, so just chop a chunk off from `text` and use that.
161        if self.buffer.is_empty() && text.len() >= MAX_BYTES {
162            let split_idx = crlf::find_good_split(
163                MAX_BYTES.min(text.len() - 1), // - 1 to avoid CRLF split.
164                text.as_bytes(),
165                true,
166            );
167            return (NextText::String(&text[..split_idx]), &text[split_idx..]);
168        }
169        // If the buffer + `text` is enough for a full chunk, push enough
170        // of `text` onto the buffer to fill it and use that.
171        else if (text.len() + self.buffer.len()) >= MAX_BYTES {
172            let mut split_idx =
173                crlf::find_good_split(MAX_BYTES - self.buffer.len(), text.as_bytes(), true);
174            if split_idx == text.len() && text.as_bytes()[text.len() - 1] == 0x0D {
175                // Avoid CRLF split.
176                split_idx -= 1;
177            };
178            self.buffer.push_str(&text[..split_idx]);
179            return (NextText::UseBuffer, &text[split_idx..]);
180        }
181        // If we don't have enough text for a full chunk.
182        else {
183            // If it's our last chunk, wrap it all up!
184            if is_last_chunk {
185                if self.buffer.is_empty() {
186                    return if text.is_empty() {
187                        (NextText::None, "")
188                    } else {
189                        (NextText::String(text), "")
190                    };
191                } else {
192                    self.buffer.push_str(text);
193                    return (NextText::UseBuffer, "");
194                }
195            }
196            // Otherwise, just push to the buffer.
197            else {
198                self.buffer.push_str(text);
199                return (NextText::None, "");
200            }
201        }
202    }
203
204    fn append_leaf_node(&mut self, leaf: Arc<Node>) {
205        let last = self.stack.pop().unwrap();
206        match *last {
207            Node::Leaf(_) => {
208                if last.leaf_text().is_empty() {
209                    self.stack.push(leaf);
210                } else {
211                    let mut children = NodeChildren::new();
212                    children.push((last.text_info(), last));
213                    children.push((leaf.text_info(), leaf));
214                    self.stack.push(Arc::new(Node::Internal(children)));
215                }
216            }
217
218            Node::Internal(_) => {
219                self.stack.push(last);
220                let mut left = leaf;
221                let mut stack_idx = (self.stack.len() - 1) as isize;
222                loop {
223                    if stack_idx < 0 {
224                        // We're above the root, so do a root split.
225                        let mut children = NodeChildren::new();
226                        children.push((left.text_info(), left));
227                        self.stack.insert(0, Arc::new(Node::Internal(children)));
228                        break;
229                    } else if self.stack[stack_idx as usize].child_count() < (MAX_CHILDREN - 1) {
230                        // There's room to add a child, so do that.
231                        Arc::make_mut(&mut self.stack[stack_idx as usize])
232                            .children_mut()
233                            .push((left.text_info(), left));
234                        break;
235                    } else {
236                        // Not enough room to fit a child, so split.
237                        left = Arc::new(Node::Internal(
238                            Arc::make_mut(&mut self.stack[stack_idx as usize])
239                                .children_mut()
240                                .push_split((left.text_info(), left)),
241                        ));
242                        core::mem::swap(&mut left, &mut self.stack[stack_idx as usize]);
243                        stack_idx -= 1;
244                    }
245                }
246            }
247        }
248    }
249}
250
251enum NextText<'a> {
252    None,
253    UseBuffer,
254    String(&'a str),
255}
256
257//===========================================================================
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    // 127 bytes, 103 chars, 4 lines
264    const TEXT: &str = "Hello there!  How're you doing?\r\nIt's \
265                        a fine day, isn't it?\r\nAren't you glad \
266                        we're alive?\r\nこんにちは、みんなさん!";
267
268    #[test]
269    fn rope_builder_01() {
270        let mut b = RopeBuilder::new();
271
272        b.append("Hello there!  How're you doing?\r");
273        b.append("\nIt's a fine ");
274        b.append("d");
275        b.append("a");
276        b.append("y,");
277        b.append(" ");
278        b.append("isn't it?");
279        b.append("\r");
280        b.append("\nAren't you ");
281        b.append("glad we're alive?\r");
282        b.append("\n");
283        b.append("こんにち");
284        b.append("は、みんなさ");
285        b.append("ん!");
286
287        let r = b.finish();
288
289        assert_eq!(r, TEXT);
290
291        r.assert_integrity();
292        r.assert_invariants();
293    }
294}