hi_doc_jumprope/
jumprope.rs

1// This is an implementation of a Rope (fancy string) based on a skip list. This
2// implementation is a rust port of librope:
3// https://github.com/josephg/librope
4// It does not support wide characters.
5
6// Unlike other rust rope implementations, this implementation should be very
7// fast; but it manages that through heavy use of unsafe pointers and C-style
8// dynamic arrays.
9
10// use rope::*;
11
12use std::str;
13use std::cmp::min;
14use std::fmt::{Debug, Display, Formatter};
15use std::marker::PhantomData;
16use std::ops::Range;
17use std::ptr::null_mut;
18use rand::prelude::*;
19use rand::Rng;
20use crate::fast_str_tools::*;
21use crate::gapbuffer::GapBuffer;
22// use crate::utils::*;
23// use crate::params::*;
24
25// Must be <= UINT16_MAX. Benchmarking says this is pretty close to optimal
26// (tested on a mac using clang 4.0 and x86_64).
27//const NODE_SIZE: usize = 136;
28
29// The likelyhood (out of 256) a node will have height (n+1) instead of n
30const BIAS: u8 = 65;
31// const BIAS: u8 = XX_BIAS;
32
33// The rope will become less efficient after the string is 2 ^ ROPE_MAX_HEIGHT nodes.
34
35#[cfg(debug_assertions)]
36pub(crate) const NODE_STR_SIZE: usize = 10;
37#[cfg(not(debug_assertions))]
38pub(crate) const NODE_STR_SIZE: usize = 392;
39// pub(crate) const NODE_STR_SIZE: usize = XX_SIZE;
40
41const MAX_HEIGHT: usize = 20;//NODE_STR_SIZE / mem::size_of::<SkipEntry>();
42const MAX_HEIGHT_U8: u8 = MAX_HEIGHT as u8;
43
44// Using StdRng notably increases wasm code size, providing some tiny extra protection against
45// ddos attacks. See main module documentation for details.
46#[cfg(feature = "ddos_protection")]
47type RopeRng = StdRng;
48#[cfg(not(feature = "ddos_protection"))]
49type RopeRng = SmallRng;
50
51
52// The node structure is designed in a very fancy way which would be more at home in C or something
53// like that. The basic idea is that the node structure is fixed size in memory, but the proportion
54// of that space taken up by characters and by the height are different depentant on a node's
55// height.
56#[repr(C)]
57pub struct JumpRope {
58    rng: RopeRng,
59    // The total number of characters in the rope
60    // num_chars: usize,
61
62    // The total number of bytes which the characters in the rope take up
63    num_bytes: usize,
64
65    // The first node is inline. The height is the max height we've ever used in the rope + 1. The
66    // highest entry points "past the end" of the list, including the entire list length.
67    // TODO: Get rid of this and just rely on nexts out of here.
68    pub(super) head: Node,
69
70    // This is so dirty. The first node is embedded in JumpRope; but we need to allocate enough room
71    // for height to get arbitrarily large. I could insist on JumpRope always getting allocated on
72    // the heap, but for small strings its better that the first string is just on the stack. So
73    // this struct is repr(C) and I'm just padding out the struct directly.
74    // nexts: [SkipEntry; MAX_HEIGHT+1],
75
76    // The nexts array contains an extra entry at [head.height-1] the which points past the skip
77    // list. The size is the size of the entire list.
78}
79
80/// JumpRope is Send and Sync, because the only way to (safely) mutate the rope is via a &mut
81/// reference.
82unsafe impl Send for JumpRope {}
83unsafe impl Sync for JumpRope {}
84
85pub(super) struct Node {
86    // The first num_bytes of this store a valid utf8 string.
87    // str: [u8; NODE_STR_SIZE],
88    //
89    // // Number of bytes in str in use
90    // num_bytes: u8,
91    pub(super) str: GapBuffer<NODE_STR_SIZE>,
92
93    // Height of nexts array.
94    pub(super) height: u8,
95
96    // #[repr(align(std::align_of::<SkipEntry>()))]
97
98    // Only the first height items are used in this. Earlier versions made explicit allocator calls
99    // to reduce memory usage, but that makes miri quite sad, so I'm now just wasting some memory
100    // in each nexts[] array.
101    nexts: [SkipEntry; MAX_HEIGHT+1],
102}
103
104#[derive(Copy, Clone, Debug, Eq, PartialEq)]
105pub(super) struct SkipEntry {
106    pub(super) node: *mut Node,
107    /// The number of *characters* between the start of the current node and the start of the next
108    /// node.
109    pub(super) skip_chars: usize,
110
111    #[cfg(feature = "wchar_conversion")]
112    pub(super) skip_pairs: usize,
113}
114
115// Make sure nexts uses correct alignment. This should be guaranteed by repr(C)
116// This test will fail if this ever stops being true.
117#[test]
118fn test_align() {
119    #[repr(C)] struct Check([SkipEntry; 0]);
120    assert!(std::mem::align_of::<Check>() >= std::mem::align_of::<SkipEntry>());
121}
122
123fn random_height(rng: &mut RopeRng) -> u8 {
124    let mut h: u8 = 1;
125    // TODO: This is using the thread_local rng, which is secure (?!). Check
126    // this is actually fast.
127    while h < MAX_HEIGHT_U8 && rng.gen::<u8>() < BIAS { h+=1; }
128    h
129}
130
131impl SkipEntry {
132    fn new() -> Self {
133        SkipEntry {
134            node: null_mut(),
135            skip_chars: 0,
136            #[cfg(feature = "wchar_conversion")]
137            skip_pairs: 0
138        }
139    }
140}
141
142impl Default for SkipEntry {
143    fn default() -> Self {
144        Self::new()
145    }
146}
147
148impl Node {
149    pub(super) fn next_ptr(&self) -> *const Self { // TODO: Pin.
150        self.first_next().node
151    }
152
153    // Do I need to be explicit about the lifetime of the references being tied
154    // to the lifetime of the node?
155    fn nexts(&self) -> &[SkipEntry] {
156        &self.nexts[..self.height as usize]
157        // unsafe {
158        //     std::slice::from_raw_parts(self.nexts.as_ptr(), self.height as usize)
159        // }
160    }
161
162    fn nexts_mut(&mut self) -> &mut [SkipEntry] {
163        &mut self.nexts[..self.height as usize]
164        // unsafe {
165        //     std::slice::from_raw_parts_mut(self.nexts.as_mut_ptr(), self.height as usize)
166        // }
167    }
168
169    fn new_with_height(height: u8, content: &str) -> Self {
170        Self {
171            str: GapBuffer::new_from_str(content),
172            height,
173            nexts: [SkipEntry::default(); MAX_HEIGHT+1]
174        }
175    }
176
177    // fn layout_with_height(height: u8) -> Layout {
178    //     Layout::from_size_align(
179    //         mem::size_of::<Node>() + mem::size_of::<SkipEntry>() * (height as usize),
180    //         mem::align_of::<Node>()).unwrap()
181    // }
182
183    // fn alloc_with_height(height: u8, content: &str) -> *mut Node {
184    //     //println!("height {} {}", height, max_height());
185    //     #![allow(clippy::manual_range_contains)]
186    //     assert!(height >= 1 && height <= MAX_HEIGHT_U8);
187    //
188    //     unsafe {
189    //         let node = alloc(Self::layout_with_height(height)) as *mut Node;
190    //         (*node) = Node {
191    //             str: GapBuffer::new_from_str(content),
192    //             height,
193    //             nexts: [SkipEntry::default(); MAX_HEIGHT+1],
194    //         };
195    //
196    //         for next in (*node).nexts_mut() {
197    //             *next = SkipEntry::new();
198    //         }
199    //
200    //         node
201    //     }
202    // }
203
204    // fn alloc(rng: &mut RopeRng, content: &str) -> *mut Node {
205    //     Self::alloc_with_height(random_height(rng), content)
206    // }
207
208    // fn new_random_height(rng: &mut RopeRng, content: &str) -> Node {
209    //     Self::new_with_height(random_height(rng), content)
210    // }
211
212    // unsafe fn free(p: *mut Node) {
213    //     dealloc(p as *mut u8, Self::layout_with_height((*p).height));
214    // }
215
216    fn as_str_1(&self) -> &str {
217        self.str.start_as_str()
218    }
219    fn as_str_2(&self) -> &str {
220        self.str.end_as_str()
221    }
222
223    // The height is at least 1, so this is always valid.
224    pub(super) fn first_next(&self) -> &SkipEntry {
225        unsafe { &*self.nexts.as_ptr() }
226    }
227
228    fn first_next_mut(&mut self) -> &mut SkipEntry {
229        unsafe { &mut *self.nexts.as_mut_ptr() }
230    }
231
232    pub(super) fn num_chars(&self) -> usize {
233        self.first_next().skip_chars
234    }
235
236    #[cfg(feature = "wchar_conversion")]
237    pub(super) fn num_surrogate_pairs(&self) -> usize {
238        self.first_next().skip_pairs
239    }
240}
241
242/// Cursors are a bit weird, and they deserve an explanation.
243///
244/// Cursors express the location that an edit will happen. But because this is a skip list, when
245/// items are added or removed we need to not just splice in / remove elements, but also update:
246///
247/// - The next pointers of *previous* items
248/// - The index item. Each next pointer in a node names how many items are being "skipped over" by
249///   that pointer. Those "skipped over" counts need to be updated based on the change.
250///
251/// Anyway, to do all of this, a cursor names the item which *points to* the current location.
252///
253/// A cursor also implicitly references a &mut JumpRope. So we store some "deep pointers" in to
254/// the jumprope itself so the jumprope reference can stay unused while the cursor is live.
255#[derive(Debug)]
256pub(super) struct MutCursor<'a> {
257    inner: [SkipEntry; MAX_HEIGHT+1],
258
259    // head_nexts: &'a mut [SkipEntry; MAX_HEIGHT+1],
260
261    // head_height: &'a mut u8,
262    rng: &'a mut RopeRng,
263    num_bytes: &'a mut usize,
264
265    phantom: PhantomData<&'a mut JumpRope>,
266}
267
268impl<'a> MutCursor<'a> {
269    fn head_height_u8(&self) -> u8 {
270        unsafe {
271            (*self.inner[MAX_HEIGHT].node).height
272        }
273    }
274
275    fn head_height(&self) -> usize {
276        self.head_height_u8() as usize
277    }
278
279    fn set_height(&mut self, new_height: usize) {
280        unsafe {
281            (*self.inner[MAX_HEIGHT].node).height = new_height as u8
282        }
283    }
284
285    fn is_head(&self, ptr: *const Node) -> bool {
286        std::ptr::eq(ptr, self.inner[MAX_HEIGHT].node)
287    }
288
289    fn update_offsets(&mut self, height: usize, by_chars: isize, #[cfg(feature = "wchar_conversion")] by_pairs: isize) {
290        for i in 0..height {
291            unsafe {
292                // This is weird but makes sense when you realise the nexts in
293                // the cursor are pointers into the elements that have the
294                // actual pointers.
295
296                // Also adding a usize + isize is awful in rust :/
297                let entry = &mut (*self.inner[i].node).nexts[i];
298                entry.skip_chars = entry.skip_chars.wrapping_add(by_chars as usize);
299                #[cfg(feature = "wchar_conversion")] {
300                    entry.skip_pairs = entry.skip_pairs.wrapping_add(by_pairs as usize);
301                }
302            }
303        }
304    }
305
306    fn move_within_node(&mut self, height: usize, by_chars: isize, #[cfg(feature = "wchar_conversion")] by_pairs: isize) {
307        for e in &mut self.inner[..height] {
308            e.skip_chars = e.skip_chars.wrapping_add(by_chars as usize);
309            #[cfg(feature = "wchar_conversion")] {
310                e.skip_pairs = e.skip_pairs.wrapping_add(by_pairs as usize);
311            }
312        }
313    }
314
315    pub(crate) fn here_ptr(&self) -> *mut Node {
316        self.inner[0].node
317    }
318
319    pub(crate) fn here_mut_ptr(&mut self) -> *mut Node {
320        self.inner[0].node
321    }
322
323    pub(crate) fn global_char_pos(&self) -> usize {
324        self.inner[self.head_height() - 1].skip_chars
325    }
326
327    #[cfg(feature = "wchar_conversion")]
328    pub(crate) fn wchar_pos(&self) -> usize {
329        let entry = &self.inner[self.head_height() - 1];
330        entry.skip_chars + entry.skip_pairs
331    }
332
333    pub(crate) fn local_char_pos(&self) -> usize {
334        self.inner[0].skip_chars
335    }
336}
337
338pub(crate) struct ReadCursor<'a> {
339    pub(super) node: &'a Node,
340
341    /// The number of *characters* between the start of the current node and the start of the next
342    /// node.
343    pub(super) offset_chars: usize,
344
345    // We can populate this, but we aren't using it anywhere.
346    // #[cfg(feature = "wchar_conversion")]
347    // pub(super) offset_pairs: usize,
348
349    // This is a bit gross, but its useful.
350    #[cfg(feature = "wchar_conversion")]
351    global_pairs: usize,
352
353    phantom: PhantomData<&'a JumpRope>
354}
355
356// impl ReadCursor {
357//
358// }
359
360/// A rope is a "rich string" data structure for storing fancy strings, like the contents of a
361/// text editor. See module level documentation for more information.
362impl JumpRope {
363    fn new_with_rng(rng: RopeRng) -> Self {
364        JumpRope {
365            rng,
366            num_bytes: 0,
367            // nexts: [SkipEntry::new(); MAX_HEIGHT],
368
369            // We don't ever store characters in the head node, but the height
370            // here is the maximum height of the entire rope.
371            head: Node::new_with_height(1, ""),
372            // head: Node {
373            //     str: GapBuffer::new(),
374            //     height: 1,
375            //     nexts: [],
376            // },
377            // nexts: [SkipEntry::new(); MAX_HEIGHT+1],
378        }
379    }
380
381    /// Creates and returns a new, empty rope.
382    ///
383    /// In release mode this method is an alias for [`new_from_entropy`](Self::new_from_entropy).
384    /// But when compiled for testing (or in debug mode), we use a fixed seed in order to keep tests
385    /// fully deterministic.
386    ///
387    /// Note using this method in wasm significantly increases bundle size. Use
388    /// [`new_with_seed`](Self::new_from_seed) instead.
389    pub fn new() -> Self {
390        if cfg!(test) || cfg!(debug_assertions) || !cfg!(feature = "ddos_protection") {
391            Self::new_from_seed(123)
392        } else {
393            Self::new_from_entropy()
394        }
395    }
396
397    /// Creates a new, empty rope seeded from an entropy source.
398    pub fn new_from_entropy() -> Self {
399        Self::new_with_rng(RopeRng::from_entropy())
400    }
401
402    /// Creates a new, empty rope using an RNG seeded from the passed u64 parameter.
403    ///
404    /// The performance of this library with any particular data set will vary by a few percent
405    /// within a range based on the seed provided. It may be useful to fix the seed within tests or
406    /// benchmarks in order to make the program entirely deterministic, though bear in mind:
407    ///
408    /// - Jumprope will always use a fixed seed
409    pub fn new_from_seed(seed: u64) -> Self {
410        Self::new_with_rng(RopeRng::seed_from_u64(seed))
411    }
412
413    fn new_from_str(s: &str) -> Self {
414        let mut rope = Self::new();
415        rope.insert(0, s);
416        rope
417    }
418
419    /// Return the length of the rope in unicode characters. Note this is not the same as either
420    /// the number of bytes the characters take, or the number of grapheme clusters in the string.
421    ///
422    /// This method returns the length in constant-time (*O(1)*).
423    ///
424    /// # Example
425    ///
426    /// ```
427    /// # use hi_doc_jumprope::*;
428    /// assert_eq!("↯".len(), 3);
429    ///
430    /// let rope = JumpRope::from("↯");
431    /// assert_eq!(rope.len_chars(), 1);
432    ///
433    /// // The unicode snowman grapheme cluster needs 2 unicode characters.
434    /// let snowman = JumpRope::from("☃️");
435    /// assert_eq!(snowman.len_chars(), 2);
436    /// ```
437    pub fn len_chars(&self) -> usize {
438        self.head.nexts[self.head.height as usize - 1].skip_chars
439    }
440
441    /// String length in wide characters (as would be reported by javascript / C# / etc).
442    ///
443    /// The byte length of this string when encoded to UTF16 will be exactly
444    /// `rope.len_wchars() * 2`.
445    #[cfg(feature = "wchar_conversion")]
446    pub fn len_wchars(&self) -> usize {
447        let SkipEntry {
448            skip_chars,
449            skip_pairs,
450            ..
451        } = self.head.nexts[self.head.height as usize - 1];
452
453        skip_pairs + skip_chars
454    }
455
456    /// Does the rope only contain ASCII characters? (Unicode codepoints < 128). There are some
457    /// optimizations that can be done if this is true.
458    #[cfg(feature = "wchar_conversion")]
459    pub fn is_ascii_only(&self) -> bool {
460        self.head.nexts[self.head.height as usize - 1].skip_pairs == 0
461    }
462
463    /// Returns read cursor and global surrogate pair position.
464    ///
465    /// Surrogate pairs are only counted if wchar_conversion feature enabled.
466    pub(crate) fn read_cursor_at_char(&self, char_pos: usize, stick_end: bool) -> ReadCursor<'_> {
467        assert!(char_pos <= self.len_chars());
468
469        let mut e: *const Node = &self.head;
470        let mut height = self.head.height as usize - 1;
471
472        let mut offset_chars = char_pos; // How many more chars to skip
473
474        #[cfg(feature = "wchar_conversion")]
475        let mut global_pairs = 0; // Current wchar pos from the start of the rope
476
477        loop { // while height >= 0
478            let en = unsafe { &*e };
479            let next = en.nexts[height];
480            let skip = next.skip_chars;
481            if offset_chars > skip || (!stick_end && offset_chars == skip && !next.node.is_null()) {
482                // Go right.
483                // debug_assert!(e == &self.head || !en.str.is_empty());
484                offset_chars -= skip;
485                #[cfg(feature = "wchar_conversion")] {
486                    global_pairs += next.skip_pairs;
487                }
488                e = next.node;
489                assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
490            } else {
491                // Go down.
492                if height != 0 {
493                    height -= 1;
494                } else {
495                    #[cfg(feature = "wchar_conversion")]
496                    let offset_pairs = en.str.count_surrogate_pairs(offset_chars);
497                    #[cfg(feature = "wchar_conversion")] {
498                        global_pairs += offset_pairs;
499                    }
500
501                    return ReadCursor {
502                        node: unsafe { &*e },
503                        offset_chars,
504                        // #[cfg(feature = "wchar_conversion")]
505                        // offset_pairs,
506                        phantom: PhantomData,
507                        #[cfg(feature = "wchar_conversion")]
508                        global_pairs,
509                    }
510                }
511            }
512        };
513    }
514
515    pub(super) fn mut_cursor_at_char(&mut self, char_pos: usize, stick_end: bool) -> MutCursor<'_> {
516        assert!(char_pos <= self.len_chars());
517
518        let mut e: *mut Node = &mut self.head;
519        let head_height = self.head.height as usize;
520        let mut height = head_height - 1;
521
522        let mut offset = char_pos; // How many more chars to skip
523
524        #[cfg(feature = "wchar_conversion")]
525        let mut surrogate_pairs = 0; // Current wchar pos from the start of the rope
526
527        // It would be nice to pop this into a function, but miri gets confused if we pass the node
528        // pointer out of this method. So I'm keeping this inline.
529        let mut cursor = MutCursor {
530            inner: [SkipEntry {
531                node: e,
532                skip_chars: 0,
533                #[cfg(feature = "wchar_conversion")]
534                skip_pairs: 0
535            }; MAX_HEIGHT+1],
536            rng: &mut self.rng,
537            num_bytes: &mut self.num_bytes,
538            phantom: PhantomData,
539        };
540
541        loop { // while height >= 0
542            let en = unsafe { &*e };
543            let next = en.nexts[height];
544            let skip = next.skip_chars;
545            if offset > skip || (!stick_end && offset == skip && !next.node.is_null()) {
546                // Go right.
547
548                // This breaks miri for some reason.
549                // assert!(e == &mut self.head || !en.str.is_empty());
550                offset -= skip;
551                #[cfg(feature = "wchar_conversion")] {
552                    surrogate_pairs += next.skip_pairs;
553                }
554                e = next.node;
555                assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
556            } else {
557                // Record this and go down.
558                cursor.inner[height] = SkipEntry {
559                    // node: e as *mut Node, // This is pretty gross
560                    node: e,
561                    skip_chars: offset,
562                    #[cfg(feature = "wchar_conversion")]
563                    skip_pairs: surrogate_pairs
564                };
565
566                if height != 0 {
567                    height -= 1;
568                } else {
569                    #[cfg(feature = "wchar_conversion")] {
570                        // Add on the wchar length at the current node.
571                        surrogate_pairs += en.str.count_surrogate_pairs(offset);
572                        if surrogate_pairs > 0 {
573                            for entry in &mut cursor.inner[0..head_height] {
574                                entry.skip_pairs = surrogate_pairs - entry.skip_pairs;
575                            }
576                        }
577                    }
578                    break;
579                }
580            }
581        };
582
583        assert!(offset <= NODE_STR_SIZE);
584
585        cursor
586    }
587
588    #[cfg(feature = "wchar_conversion")]
589    pub(crate) fn count_chars_at_wchar(&self, wchar_pos: usize) -> usize {
590        assert!(wchar_pos <= self.len_wchars());
591
592        let mut height = self.head.height as usize - 1;
593        let mut e: *const Node = &self.head;
594
595        let mut offset = wchar_pos; // How many more chars to skip
596
597        let mut char_pos = 0; // Char pos from the start of the rope
598
599        loop {
600            let en = unsafe { &*e };
601            let next = en.nexts[height];
602            let skip = next.skip_chars + next.skip_pairs;
603            if offset > skip {
604                // Go right.
605                // assert!(e == &self.head || !en.str.is_empty());
606                offset -= skip;
607                char_pos += next.skip_chars;
608                e = next.node;
609                assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
610            } else {
611                // Go down.
612                if height != 0 {
613                    height -= 1;
614                } else {
615                    char_pos += en.str.count_chars_in_wchars(offset);
616                    return char_pos;
617                }
618            }
619        };
620    }
621
622    /// Create a cursor pointing wchar characters into the rope
623    #[cfg(feature = "wchar_conversion")]
624    pub(crate) fn mut_cursor_at_wchar(&mut self, wchar_pos: usize, stick_end: bool) -> MutCursor {
625        assert!(wchar_pos <= self.len_wchars());
626
627        let head_height = self.head.height as usize;
628        let mut e: *mut Node = &mut self.head;
629        let mut height = self.head.height as usize - 1;
630
631        let mut offset = wchar_pos; // How many more chars to skip
632
633        let mut char_pos = 0; // Char pos from the start of the rope
634
635        let mut cursor = MutCursor {
636            inner: [SkipEntry {
637                node: e,
638                skip_chars: 0,
639                #[cfg(feature = "wchar_conversion")]
640                skip_pairs: 0
641            }; MAX_HEIGHT+1],
642            rng: &mut self.rng,
643            num_bytes: &mut self.num_bytes,
644            phantom: PhantomData,
645        };
646
647        loop {
648            let en = unsafe { &*e };
649            let next = en.nexts[height];
650            let skip = next.skip_chars + next.skip_pairs;
651            if offset > skip || (!stick_end && offset == skip && !next.node.is_null()) {
652                // Go right.
653                // assert!(e == &self.head || !en.str.is_empty());
654                offset -= skip;
655                char_pos += next.skip_chars;
656                e = next.node;
657                assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
658            } else {
659                // Record this and go down.
660                cursor.inner[height] = SkipEntry {
661                    node: e,
662                    skip_chars: char_pos,
663                    skip_pairs: offset
664                };
665
666                if height != 0 {
667                    height -= 1;
668                } else {
669                    char_pos += en.str.count_chars_in_wchars(offset);
670                    for entry in &mut cursor.inner[0..head_height] {
671                        let skip_chars = char_pos - entry.skip_chars;
672                        entry.skip_chars = skip_chars;
673                        entry.skip_pairs -= skip_chars;
674                    }
675                    break;
676                }
677            }
678        };
679
680        assert!(offset <= NODE_STR_SIZE);
681
682        cursor
683    }
684
685    fn mut_cursor_at_start(&mut self) -> MutCursor<'_> {
686        MutCursor {
687            inner: [SkipEntry {
688                node: &mut self.head,
689                skip_chars: 0,
690                #[cfg(feature = "wchar_conversion")]
691                skip_pairs: 0
692            }; MAX_HEIGHT+1],
693            rng: &mut self.rng,
694            num_bytes: &mut self.num_bytes,
695            phantom: PhantomData,
696        }
697    }
698
699    fn mut_cursor_at_end(&mut self) -> MutCursor {
700        self.mut_cursor_at_char(self.len_chars(), true)
701    }
702
703    fn insert_node_at(cursor: &mut MutCursor, contents: &str, num_chars: usize, update_cursor: bool, #[cfg(feature = "wchar_conversion")] num_pairs: usize) {
704        // println!("Insert_node_at {} len {}", contents.len(), self.num_bytes);
705        // assert!(contents.len() < NODE_STR_SIZE);
706        debug_assert_eq!(count_chars(contents), num_chars);
707        #[cfg(feature = "wchar_conversion")] {
708            debug_assert_eq!(count_utf16_surrogates(contents), num_pairs);
709        }
710        debug_assert!(num_chars <= NODE_STR_SIZE);
711
712        // TODO: Pin this sucka.
713        // let new_node = Pin::new(Node::alloc());
714        // let new_node = Node::alloc(cursor.rng, contents);
715
716        let new_height = random_height(cursor.rng);
717        let new_node = Box::into_raw(Box::new(Node::new_with_height(new_height, contents)));
718
719        let new_height = new_height as usize;
720
721        // let new_height = unsafe { (*new_node).height as usize };
722
723        let mut head_height = cursor.head_height();
724        while head_height <= new_height {
725            // TODO: Why do we copy here? Explain it in a comment. This is
726            // currently lifted from the C code.
727            // cursor.head_nexts[head_height] = cursor.head_nexts[head_height - 1];
728            unsafe {
729                let head = &mut (*cursor.inner[head_height].node);
730                head.nexts[head_height] = head.nexts[head_height - 1];
731            }
732
733            cursor.inner[head_height] = cursor.inner[head_height - 1];
734
735            // *cursor.head_height += 1; // Ends up 1 more than the max node height.
736            head_height += 1;
737            cursor.set_height(head_height);
738        }
739
740        for i in 0..new_height {
741            let prev_skip = unsafe { &mut (*cursor.inner[i].node).nexts[i] };
742            let nexts = unsafe { &mut (*new_node).nexts };
743            nexts[i].node = prev_skip.node;
744            nexts[i].skip_chars = num_chars + prev_skip.skip_chars - cursor.inner[i].skip_chars;
745
746            prev_skip.node = new_node;
747            prev_skip.skip_chars = cursor.inner[i].skip_chars;
748
749            #[cfg(feature = "wchar_conversion")] {
750                nexts[i].skip_pairs = num_pairs + prev_skip.skip_pairs - cursor.inner[i].skip_pairs;
751                prev_skip.skip_pairs = cursor.inner[i].skip_pairs;
752            }
753
754            // & move the iterator to the end of the newly inserted node.
755            if update_cursor {
756                cursor.inner[i].node = new_node;
757                cursor.inner[i].skip_chars = num_chars;
758                #[cfg(feature = "wchar_conversion")] {
759                    cursor.inner[i].skip_pairs = num_pairs;
760                }
761            }
762        }
763
764        for i in new_height..head_height {
765            // I don't know why miri needs me to use nexts[] rather than nexts_mut() here but ??.
766            unsafe {
767                (*cursor.inner[i].node).nexts[i].skip_chars += num_chars;
768                #[cfg(feature = "wchar_conversion")] {
769                    (*cursor.inner[i].node).nexts[i].skip_pairs += num_pairs;
770                }
771            }
772            if update_cursor {
773                cursor.inner[i].skip_chars += num_chars;
774                #[cfg(feature = "wchar_conversion")] {
775                    cursor.inner[i].skip_pairs += num_pairs;
776                }
777            }
778        }
779
780        // self.nexts[self.head.height as usize - 1].skip_chars += num_chars;
781        *cursor.num_bytes += contents.len();
782    }
783
784    fn insert_at_cursor(cursor: &mut MutCursor, contents: &str) {
785        if contents.is_empty() { return; }
786        // iter contains how far (in characters) into the current element to
787        // skip. Figure out how much that is in bytes.
788        let mut offset_bytes: usize = 0;
789        // The insertion offset into the destination node.
790        let offset_chars: usize = cursor.inner[0].skip_chars;
791        let head_height = cursor.head_height();
792
793        let mut e = cursor.here_mut_ptr();
794
795        // We might be able to insert the new data into the current node, depending on
796        // how big it is. We'll count the bytes, and also check that its valid utf8.
797        let num_inserted_bytes = contents.len();
798        let mut num_inserted_chars = count_chars(contents);
799        #[cfg(feature = "wchar_conversion")]
800            let mut num_inserted_pairs = if num_inserted_bytes != num_inserted_chars {
801            count_utf16_surrogates(contents)
802        } else { 0 };
803
804        // Adding this short circuit makes the code about 2% faster for 1% more code
805        unsafe {
806            if (*e).str.gap_start_chars as usize == offset_chars && (*e).str.gap_len as usize >= num_inserted_bytes {
807                // Short circuit. If we can just insert all the content right here in the gap, do so.
808                (*e).str.insert_in_gap(contents);
809
810                #[cfg(feature = "wchar_conversion")] {
811                    cursor.update_offsets(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
812                    cursor.move_within_node(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
813                }
814                #[cfg(not(feature = "wchar_conversion"))] {
815                    cursor.update_offsets(head_height, num_inserted_chars as isize);
816                    cursor.move_within_node(head_height, num_inserted_chars as isize);
817                }
818
819                *cursor.num_bytes += num_inserted_bytes;
820                return;
821            }
822
823            if offset_chars > 0 {
824                // Changing this to debug_assert reduces performance by a few % for some reason.
825                assert!(offset_chars <= (*e).nexts[0].skip_chars);
826                // This could be faster, but its not a big deal.
827                offset_bytes = (*e).str.count_bytes(offset_chars);
828            }
829
830            // Can we insert into the current node?
831            let current_len_bytes = (*e).str.len_bytes();
832            let mut insert_here = current_len_bytes + num_inserted_bytes <= NODE_STR_SIZE;
833
834            // If we can't insert here, see if we can move the cursor forward and insert into the
835            // subsequent node.
836            if !insert_here && offset_bytes == current_len_bytes {
837                // We can insert into the subsequent node if:
838                // - We can't insert into the current node
839                // - There _is_ a next node to insert into
840                // - The insert would be at the start of the next node
841                // - There's room in the next node
842                if let Some(next) = (*e).first_next_mut().node.as_mut() {
843                    if next.str.len_bytes() + num_inserted_bytes <= NODE_STR_SIZE {
844                        offset_bytes = 0;
845
846                        // Could do this with slice::fill but this seems slightly faster.
847                        for e in &mut cursor.inner[..next.height as usize] {
848                            *e = SkipEntry {
849                                node: next,
850                                skip_chars: 0,
851                                #[cfg(feature = "wchar_conversion")]
852                                skip_pairs: 0
853                            };
854                        }
855                        e = next;
856
857                        insert_here = true;
858                    }
859                }
860            }
861
862            if insert_here {
863                // First move the current bytes later on in the string.
864                let c = &mut (*e).str;
865                c.try_insert(offset_bytes, contents).unwrap();
866
867                *cursor.num_bytes += num_inserted_bytes;
868                // .... aaaand update all the offset amounts.
869
870                #[cfg(feature = "wchar_conversion")] {
871                    cursor.update_offsets(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
872                    cursor.move_within_node(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
873                }
874                #[cfg(not(feature = "wchar_conversion"))] {
875                    cursor.update_offsets(head_height, num_inserted_chars as isize);
876                    cursor.move_within_node(head_height, num_inserted_chars as isize);
877                }
878            } else {
879                // There isn't room. We'll need to add at least one new node to the rope.
880
881                // If we're not at the end of the current node, we'll need to remove
882                // the end of the current node's data and reinsert it later.
883                (*e).str.move_gap(offset_bytes);
884
885                let num_end_bytes = (*e).str.len_bytes() - offset_bytes;
886                let mut num_end_chars: usize = 0;
887                #[cfg(feature = "wchar_conversion")]
888                let mut num_end_pairs: usize = 0;
889
890                // let end_str = if num_end_bytes > 0 {
891                if num_end_bytes > 0 {
892                    // We'll truncate the node, but leave the bytes themselves there (for later).
893
894                    // It would also be correct (and slightly more space efficient) to pack some of the
895                    // new string's characters into this node after trimming it.
896                    num_end_chars = (*e).num_chars() - offset_chars;
897
898                    #[cfg(feature = "wchar_conversion")] {
899                        num_end_pairs = (*e).num_surrogate_pairs() - (*e).str.gap_start_surrogate_pairs as usize;
900                        debug_assert_eq!(num_end_pairs, count_utf16_surrogates((*e).str.end_as_str()));
901                        cursor.update_offsets(head_height, -(num_end_chars as isize), -(num_end_pairs as isize));
902                    }
903                    #[cfg(not(feature = "wchar_conversion"))]
904                    cursor.update_offsets(head_height, -(num_end_chars as isize));
905
906                    *cursor.num_bytes -= num_end_bytes;
907                }
908
909                // Now we insert new nodes containing the new character data. The
910                // data must be broken into pieces of with a maximum size of
911                // NODE_STR_SIZE. Node boundaries must not occur in the middle of a
912                // utf8 codepoint.
913                // let mut str_offset: usize = 0;
914                let mut remainder = contents;
915                // while !remainder.is_empty() {
916                loop {
917                    // println!(". {}", remainder);
918                    // Find the first index after STR_SIZE bytes
919
920                    if remainder.len() <= NODE_STR_SIZE {
921                        Self::insert_node_at(cursor, remainder, num_inserted_chars, true, #[cfg(feature = "wchar_conversion")] num_inserted_pairs);
922                        break;
923                    } else {
924                        // Find a suitable cut point. We should take as many characters as we can fit in
925                        // the node, without splitting any unicode codepoints.
926                        let mut byte_pos = NODE_STR_SIZE;
927                        loop { // Slide back to a character boundary.
928                            let c = remainder.as_bytes()[byte_pos];
929                            if c & 0b1100_0000 != 0b1000_0000 {
930                                break;
931                            }
932                            byte_pos -= 1;
933                        }
934
935                        let slice = &remainder.as_bytes()[..byte_pos];
936                        let char_pos = count_chars_in_bytes(slice);
937                        num_inserted_chars -= char_pos;
938
939                        #[cfg(feature = "wchar_conversion")]
940                        let pairs = count_utf16_surrogates_in_bytes(slice);
941                        #[cfg(feature = "wchar_conversion")] {
942                            num_inserted_pairs -= pairs;
943                        }
944
945                        let (next, rem) = remainder.split_at(byte_pos);
946                        assert!(!next.is_empty());
947                        Self::insert_node_at(cursor, next, char_pos, true, #[cfg(feature = "wchar_conversion")] pairs);
948                        remainder = rem;
949                    }
950                }
951
952                if num_end_bytes > 0 {
953                    let end_str = (*e).str.take_rest();
954                    Self::insert_node_at(cursor, end_str, num_end_chars, false, #[cfg(feature = "wchar_conversion")] num_end_pairs);
955                }
956                // if let Some(end_str) = end_str {
957                //     Self::insert_node_at(cursor, end_str, num_end_chars, false, #[cfg(feature = "wchar_conversion")] num_end_pairs);
958                // }
959            }
960
961            assert_ne!(cursor.local_char_pos(), 0);
962        }
963    }
964
965    fn del_at_cursor(cursor: &mut MutCursor, mut length: usize) {
966        if length == 0 { return; }
967        let mut offset_chars = cursor.local_char_pos();
968        let mut node = cursor.here_ptr();
969        unsafe {
970            while length > 0 {
971                {
972                    let s = (*node).first_next();
973                    if offset_chars == s.skip_chars {
974                        // End of current node. Skip to the start of the next one.
975                        node = s.node;
976                        offset_chars = 0;
977                    }
978                }
979
980                let num_chars = (*node).num_chars();
981                let removed = std::cmp::min(length, num_chars - offset_chars);
982                assert!(removed > 0);
983
984                // TODO: Figure out a better way to calculate this.
985                #[cfg(feature = "wchar_conversion")]
986                    let removed_pairs = (*node).str.count_surrogate_pairs(offset_chars + removed)
987                    - (*node).str.count_surrogate_pairs(offset_chars);
988
989                let height = (*node).height as usize;
990                if removed < num_chars || cursor.is_head(node) {
991                    // Just trim the node down.
992                    let s = &mut (*node).str;
993                    let removed_bytes = s.remove_chars(offset_chars, removed);
994                    *cursor.num_bytes -= removed_bytes;
995
996                    for s in (*node).nexts_mut() {
997                        s.skip_chars -= removed;
998                        #[cfg(feature = "wchar_conversion")] {
999                            s.skip_pairs -= removed_pairs;
1000                        }
1001                    }
1002                } else {
1003                    // Remove the node from the skip list. This works because the cursor must be
1004                    // pointing from the previous element to the start of this element.
1005                    assert_ne!(cursor.inner[0].node, node);
1006
1007                    for i in 0..(*node).height as usize {
1008                        let s = &mut (*cursor.inner[i].node).nexts_mut()[i];
1009                        s.node = (*node).nexts[i].node;
1010                        s.skip_chars += (*node).nexts[i].skip_chars - removed;
1011                        #[cfg(feature = "wchar_conversion")] {
1012                            s.skip_pairs += (*node).nexts[i].skip_pairs - removed_pairs;
1013                        }
1014                    }
1015
1016                    *cursor.num_bytes -= (*node).str.len_bytes();
1017                    let next = (*node).first_next().node;
1018                    // Node::free(node);
1019                    drop(Box::from_raw(node));
1020                    node = next;
1021                }
1022
1023                for i in height..cursor.head_height() {
1024                    let s = &mut (*cursor.inner[i].node).nexts[i];
1025                    s.skip_chars -= removed;
1026                    #[cfg(feature = "wchar_conversion")] {
1027                        s.skip_pairs -= removed_pairs;
1028                    }
1029                }
1030
1031                length -= removed;
1032            }
1033        }
1034    }
1035
1036    fn eq_str(&self, mut other: &str) -> bool {
1037        if self.len_bytes() != other.len() { return false; }
1038
1039        for s in self.substrings() {
1040            let (start, rem) = other.split_at(s.len());
1041            if start != s { return false; }
1042            other = rem;
1043        }
1044
1045        true
1046    }
1047}
1048
1049impl Default for JumpRope {
1050    fn default() -> Self {
1051        Self::new()
1052    }
1053}
1054
1055impl Drop for JumpRope {
1056    fn drop(&mut self) {
1057        let mut node = self.head.first_next().node;
1058        unsafe {
1059            while !node.is_null() {
1060                let next = (*node).first_next().node;
1061                // Node::free(node);
1062                drop(Box::from_raw(node));
1063                node = next;
1064            }
1065        }
1066    }
1067}
1068
1069impl<S: AsRef<str>> From<S> for JumpRope {
1070    fn from(str: S) -> Self {
1071        JumpRope::new_from_str(str.as_ref())
1072    }
1073}
1074
1075impl PartialEq for JumpRope {
1076    // This is quite complicated. It would be cleaner to just write a bytes
1077    // iterator, then iterate over the bytes of both strings comparing along the
1078    // way.
1079    // However, this should be faster because it can memcmp().
1080
1081    // Another way to implement this would be to rewrite it as a comparison with
1082    // an iterator over &str. Then the rope vs rope comparison would be trivial,
1083    // but also we could add comparison functions with a single &str and stuff
1084    // very easily.
1085    fn eq(&self, other: &JumpRope) -> bool {
1086        if self.num_bytes != other.num_bytes
1087                || self.len_chars() != other.len_chars() {
1088            return false
1089        }
1090
1091        let mut other_iter = other.substrings();
1092
1093        // let mut os = other_iter.next();
1094        let mut os = "";
1095
1096        for mut s in self.substrings() {
1097            // Walk s.len() bytes through the other rope
1098            while !s.is_empty() {
1099                if os.is_empty() {
1100                    os = other_iter.next().unwrap();
1101                }
1102                debug_assert!(!os.is_empty());
1103
1104                let amt = min(s.len(), os.len());
1105                debug_assert!(amt > 0);
1106
1107                let (s_start, s_rem) = s.split_at(amt);
1108                let (os_start, os_rem) = os.split_at(amt);
1109
1110                if s_start != os_start { return false; }
1111
1112                s = s_rem;
1113                os = os_rem;
1114            }
1115        }
1116
1117        true
1118    }
1119}
1120impl Eq for JumpRope {}
1121
1122impl Debug for JumpRope {
1123    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1124        f.debug_list()
1125            .entries(self.substrings())
1126            .finish()
1127    }
1128}
1129
1130impl Display for JumpRope {
1131    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
1132        for s in self.substrings() {
1133            f.write_str(s)?;
1134        }
1135        Ok(())
1136    }
1137}
1138
1139// I don't know why I need all three of these, but I do.
1140
1141impl<T: AsRef<str>> PartialEq<T> for JumpRope {
1142    fn eq(&self, other: &T) -> bool {
1143        self.eq_str(other.as_ref())
1144    }
1145}
1146
1147// Needed for assert_eq!(&rope, "Hi there");
1148impl PartialEq<str> for JumpRope {
1149    fn eq(&self, other: &str) -> bool {
1150        self.eq_str(other)
1151    }
1152}
1153
1154// Needed for assert_eq!(&rope, String::from("Hi there"));
1155impl PartialEq<String> for &JumpRope {
1156    fn eq(&self, other: &String) -> bool {
1157        self.eq_str(other.as_str())
1158    }
1159}
1160
1161impl<'a> Extend<&'a str> for JumpRope {
1162    fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
1163        let mut cursor = self.mut_cursor_at_end();
1164        iter.into_iter().for_each(|s| {
1165            Self::insert_at_cursor(&mut cursor, s);
1166        });
1167    }
1168}
1169
1170impl Clone for JumpRope {
1171    fn clone(&self) -> Self {
1172        // This method could be a little bit more efficient, but I think improving clone()
1173        // performance isn't worth the extra effort.
1174        let mut r = JumpRope::new();
1175        let mut cursor = r.mut_cursor_at_start();
1176        for node in self.node_iter_at_start() {
1177            JumpRope::insert_at_cursor(&mut cursor, node.as_str_1());
1178            JumpRope::insert_at_cursor(&mut cursor, node.as_str_2());
1179        }
1180        r
1181    }
1182}
1183
1184impl JumpRope {
1185    /// Insert new content into the rope. The content is inserted at the specified unicode character
1186    /// offset, which is different from a byte offset for non-ASCII characters.
1187    ///
1188    /// # Example
1189    ///
1190    /// ```
1191    /// # use hi_doc_jumprope::*;
1192    /// let mut rope = JumpRope::from("--");
1193    /// rope.insert(1, "hi there");
1194    /// assert_eq!(rope.to_string(), "-hi there-");
1195    /// ```
1196    ///
1197    /// If the position names a location past the end of the rope, it is truncated.
1198    pub fn insert(&mut self, mut pos: usize, contents: &str) {
1199        // if cfg!(debug_assertions) { self.check(); }
1200
1201        if contents.is_empty() { return; }
1202        pos = std::cmp::min(pos, self.len_chars());
1203
1204        let mut cursor = self.mut_cursor_at_char(pos, true);
1205
1206        Self::insert_at_cursor(&mut cursor, contents);
1207
1208        debug_assert_eq!(cursor.global_char_pos(), pos + count_chars(contents));
1209        // dbg!(&cursor.0[..self.head.height as usize]);
1210    }
1211
1212    /// Delete a span of unicode characters from the rope. The span is specified in unicode
1213    /// characters, not bytes.
1214    ///
1215    /// Any attempt to delete past the end of the rope will be silently ignored.
1216    ///
1217    /// # Example
1218    ///
1219    /// ```
1220    /// # use hi_doc_jumprope::*;
1221    /// let mut rope = JumpRope::from("Whoa dawg!");
1222    /// rope.remove(4..9); // delete " dawg"
1223    /// assert_eq!(rope.to_string(), "Whoa!");
1224    /// ```
1225    pub fn remove(&mut self, mut range: Range<usize>) {
1226        // if cfg!(debug_assertions) { self.check(); }
1227
1228        range.end = range.end.min(self.len_chars());
1229        if range.start >= range.end { return; }
1230
1231        // We need to stick_end so we can delete entries.
1232        let mut cursor = self.mut_cursor_at_char(range.start, true);
1233        Self::del_at_cursor(&mut cursor, range.end - range.start);
1234
1235        debug_assert_eq!(cursor.global_char_pos(), range.start);
1236    }
1237
1238    /// Replace the specified range with new content. This is equivalent to calling
1239    /// [`remove`](Self::remove) followed by [`insert`](Self::insert), but it is simpler and faster.
1240    ///
1241    /// # Example
1242    ///
1243    /// ```
1244    /// # use hi_doc_jumprope::*;
1245    /// let mut rope = JumpRope::from("Hi Mike!");
1246    /// rope.replace(3..7, "Duane"); // replace "Mike" with "Duane"
1247    /// assert_eq!(rope.to_string(), "Hi Duane!");
1248    /// ```
1249    pub fn replace(&mut self, range: Range<usize>, content: &str) {
1250        let len = self.len_chars();
1251        let pos = usize::min(range.start, len);
1252        let del_len = usize::min(range.end, len) - pos;
1253
1254        let mut cursor = self.mut_cursor_at_char(pos, true);
1255        if del_len > 0 {
1256            Self::del_at_cursor(&mut cursor, del_len);
1257        }
1258        if !content.is_empty() {
1259            Self::insert_at_cursor(&mut cursor, content);
1260        }
1261
1262        debug_assert_eq!(cursor.global_char_pos(), pos + count_chars(content));
1263    }
1264
1265    /// Get the number of bytes used for the UTF8 representation of the rope. This will always match
1266    /// the .len() property of the equivalent String.
1267    ///
1268    /// Note: This is only useful in specific situations - like preparing a byte buffer for saving
1269    /// or sending over the internet. In many cases it is preferable to use
1270    /// [`len_chars`](Self::len_chars).
1271    ///
1272    /// # Example
1273    ///
1274    /// ```
1275    /// # use hi_doc_jumprope::*;
1276    /// let str = "κόσμε"; // "Cosmos" in ancient greek
1277    /// assert_eq!(str.len(), 11); // 11 bytes over the wire
1278    ///
1279    /// let rope = JumpRope::from(str);
1280    /// assert_eq!(rope.len_bytes(), str.len());
1281    /// ```
1282    pub fn len_bytes(&self) -> usize { self.num_bytes }
1283
1284    /// Returns `true` if the rope contains no elements.
1285    pub fn is_empty(&self) -> bool { self.num_bytes == 0 }
1286
1287    pub fn check(&self) {
1288        assert!(self.head.height >= 1);
1289        assert!(self.head.height < MAX_HEIGHT_U8 + 1);
1290
1291        let skip_over = &self.head.nexts[self.head.height as usize - 1];
1292        // println!("Skip over skip chars {}, num bytes {}", skip_over.skip_chars, self.num_bytes);
1293        assert!(skip_over.skip_chars <= self.num_bytes as usize);
1294        #[cfg(feature = "wchar_conversion")] {
1295            assert!(skip_over.skip_pairs <= skip_over.skip_chars);
1296        }
1297        assert!(skip_over.node.is_null());
1298
1299        // The offsets store the total distance travelled since the start.
1300        let mut iter = [SkipEntry::new(); MAX_HEIGHT];
1301        for i in 0..self.head.height {
1302            // Bleh.
1303            iter[i as usize].node = &self.head as *const Node as *mut Node;
1304        }
1305
1306        let mut num_bytes: usize = 0;
1307        let mut num_chars = 0;
1308        #[cfg(feature = "wchar_conversion")]
1309        let mut num_pairs = 0;
1310
1311        for n in self.node_iter_at_start() {
1312            // println!("visiting {:?}", n.as_str());
1313            assert!(!n.str.is_empty() || std::ptr::eq(n, &self.head));
1314            assert!(n.height <= MAX_HEIGHT_U8);
1315            assert!(n.height >= 1);
1316            n.str.check();
1317
1318            assert_eq!(count_chars(n.as_str_1()) + count_chars(n.as_str_2()), n.num_chars());
1319            for (i, entry) in iter[0..n.height as usize].iter_mut().enumerate() {
1320                assert_eq!(entry.node as *const Node, n as *const Node);
1321                assert_eq!(entry.skip_chars, num_chars);
1322                #[cfg(feature = "wchar_conversion")] {
1323                    assert_eq!(entry.skip_pairs, num_pairs);
1324                }
1325
1326                // println!("replacing entry {:?} with {:?}", entry, n.nexts()[i].node);
1327                entry.node = n.nexts[i].node;
1328                entry.skip_chars += n.nexts[i].skip_chars;
1329                #[cfg(feature = "wchar_conversion")] {
1330                    entry.skip_pairs += n.nexts[i].skip_pairs;
1331                }
1332            }
1333
1334            num_bytes += n.str.len_bytes();
1335            num_chars += n.num_chars();
1336
1337            #[cfg(feature = "wchar_conversion")] {
1338                assert_eq!(n.num_surrogate_pairs(), n.str.count_surrogate_pairs(n.num_chars()));
1339                num_pairs += n.num_surrogate_pairs();
1340            }
1341        }
1342
1343        for entry in iter[0..self.head.height as usize].iter() {
1344            // println!("{:?}", entry);
1345            assert!(entry.node.is_null());
1346            assert_eq!(entry.skip_chars, num_chars);
1347            #[cfg(feature = "wchar_conversion")] {
1348                assert_eq!(entry.skip_pairs, num_pairs);
1349            }
1350        }
1351
1352        // println!("self bytes: {}, count bytes {}", self.num_bytes, num_bytes);
1353        assert_eq!(self.num_bytes, num_bytes);
1354        assert_eq!(self.len_chars(), num_chars);
1355        #[cfg(feature = "wchar_conversion")] {
1356            assert_eq!(self.len_wchars(), num_chars + num_pairs);
1357        }
1358    }
1359
1360    /// This method counts the number of bytes of memory allocated in the rope. This is purely for
1361    /// debugging.
1362    ///
1363    /// Notes:
1364    ///
1365    /// - This method (its existence, its signature and its return value) is not considered part of
1366    ///   the stable API provided by jumprope. This may disappear or change in point releases.
1367    /// - This method walks the entire rope. It has time complexity O(n).
1368    /// - If a rope is owned inside another structure, this method will double-count the bytes
1369    ///   stored in the rope's head.
1370    pub fn mem_size(&self) -> usize {
1371        let mut nodes = self.node_iter_at_start();
1372        let mut size = 0;
1373        // The first node is the head. Count the actual head size.
1374        size += std::mem::size_of::<Self>();
1375        nodes.next(); // And discard it from the iterator.
1376
1377        for _n in nodes {
1378            // let layout = Node::layout_with_height(n.height);
1379            // size += layout.size();
1380            size += std::mem::size_of::<Node>();
1381        }
1382
1383        size
1384    }
1385
1386    #[allow(unused)]
1387    // pub fn print(&self) {
1388    pub(crate) fn print(&self) {
1389        println!("chars: {}\tbytes: {}\theight: {}", self.len_chars(), self.num_bytes, self.head.height);
1390
1391        print!("HEAD:");
1392        for s in self.head.nexts() {
1393            print!(" |{} ", s.skip_chars);
1394            #[cfg(feature = "wchar_conversion")] {
1395                print!("({}) ", s.skip_pairs);
1396            }
1397        }
1398        println!();
1399
1400        for (i, node) in self.node_iter_at_start().enumerate() {
1401            print!("{}:", i);
1402            for s in node.nexts() {
1403                print!(" |{} ", s.skip_chars);
1404                #[cfg(feature = "wchar_conversion")] {
1405                    print!("({}) ", s.skip_pairs);
1406                }
1407            }
1408            println!("      : {:?}(s{}) + {:?}(s{})",
1409                     node.as_str_1(), count_utf16_surrogates(node.as_str_1()),
1410                     node.as_str_2(), count_utf16_surrogates(node.as_str_2())
1411            );
1412        }
1413    }
1414}
1415
1416/// These methods are only available if the `wchar_conversion` feature is enabled.
1417#[cfg_attr(doc_cfg, doc(cfg(feature = "wchar_conversion")))]
1418#[cfg(feature = "wchar_conversion")]
1419impl JumpRope {
1420    /// Convert from a unicode character count to a wchar index, like what you'd use in Javascript,
1421    /// Java or C#.
1422    pub fn chars_to_wchars(&self, chars: usize) -> usize {
1423        // IF the rope is ascii-only then chars_to_wchars is the identity function.
1424        if self.is_ascii_only() {
1425            chars
1426        } else {
1427            let cursor = self.read_cursor_at_char(chars, true);
1428            cursor.global_pairs + chars
1429        }
1430    }
1431
1432    /// Convert a wchar index back to a unicode character count.
1433    ///
1434    /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1435    /// rope with contents `𐆚` (a single character with wchar length 2), `wchars_to_chars(1)` is
1436    /// undefined and may panic / change in future versions of diamond types.
1437    pub fn wchars_to_chars(&self, wchars: usize) -> usize {
1438        if self.is_ascii_only() {
1439            wchars
1440        } else {
1441            self.count_chars_at_wchar(wchars)
1442        }
1443    }
1444
1445    /// Insert the given utf8 string into the rope at the specified wchar position.
1446    /// This is compatible with NSString, Javascript, etc.
1447    ///
1448    /// Returns the insertion position in characters.
1449    ///
1450    /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1451    /// rope with contents `𐆚` (a single character with wchar length 2), `insert_at_wchar(1, ...)`
1452    /// is undefined and may panic / change in future versions of diamond types.
1453    pub fn insert_at_wchar(&mut self, mut pos_wchar: usize, contents: &str) -> usize {
1454        pos_wchar = pos_wchar.min(self.len_wchars());
1455
1456        let mut cursor = self.mut_cursor_at_wchar(pos_wchar, true);
1457        // dbg!(pos_wchar, &cursor.0[0..3]);
1458        Self::insert_at_cursor(&mut cursor, contents);
1459
1460        debug_assert_eq!(
1461            cursor.wchar_pos(),
1462            pos_wchar + count_chars(contents) + count_utf16_surrogates(contents)
1463        );
1464
1465        cursor.global_char_pos()
1466    }
1467
1468    /// Remove items from the rope, specified by the passed range. The indexes are interpreted
1469    /// as wchar offsets (like you'd get in javascript / C# / etc).
1470    ///
1471    /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1472    /// rope with contents `𐆚` (a single character with wchar length 2), `remove_at_wchar(1..2)`
1473    /// is undefined and may panic / change in future versions of diamond types.
1474    pub fn remove_at_wchar(&mut self, mut range: Range<usize>) {
1475        range.end = range.end.min(self.len_wchars());
1476        if range.is_empty() { return; }
1477
1478        // Rather than making some fancy custom remove function, I'm just going to convert the
1479        // removed range into a char range and delete that.
1480        let char_end = self.wchars_to_chars(range.end);
1481
1482        // We need to stick_end so we can delete entries.
1483        let mut cursor = self.mut_cursor_at_wchar(range.start, true);
1484        let char_start = cursor.global_char_pos();
1485
1486        Self::del_at_cursor(&mut cursor, char_end - char_start);
1487
1488        debug_assert_eq!(cursor.wchar_pos(), range.start);
1489    }
1490
1491    /// Replace the characters in the specified wchar range with content.
1492    ///
1493    /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1494    /// rope with contents `𐆚` (a single character with wchar length 2),
1495    /// `replace_at_wchar(1..2, ...)` is undefined and may panic / change in future versions of
1496    /// diamond types.
1497    pub fn replace_at_wchar(&mut self, range: Range<usize>, content: &str) {
1498        // TODO: Optimize this. This method should work similarly to replace(), where we create
1499        // a single cursor and use it in both contexts.
1500        if !range.is_empty() {
1501            self.remove_at_wchar(range.clone());
1502        }
1503        if !content.is_empty() {
1504            self.insert_at_wchar(range.start, content);
1505        }
1506    }
1507}
hi_doc_jumprope/jumprope.rs

hi_doc_jumprope/
jumprope.rs