hi_doc_jumprope/jumprope.rs
1// This is an implementation of a Rope (fancy string) based on a skip list. This
2// implementation is a rust port of librope:
3// https://github.com/josephg/librope
4// It does not support wide characters.
5
6// Unlike other rust rope implementations, this implementation should be very
7// fast; but it manages that through heavy use of unsafe pointers and C-style
8// dynamic arrays.
9
10// use rope::*;
11
12use std::str;
13use std::cmp::min;
14use std::fmt::{Debug, Display, Formatter};
15use std::marker::PhantomData;
16use std::ops::Range;
17use std::ptr::null_mut;
18use rand::prelude::*;
19use rand::Rng;
20use crate::fast_str_tools::*;
21use crate::gapbuffer::GapBuffer;
22// use crate::utils::*;
23// use crate::params::*;
24
25// Must be <= UINT16_MAX. Benchmarking says this is pretty close to optimal
26// (tested on a mac using clang 4.0 and x86_64).
27//const NODE_SIZE: usize = 136;
28
29// The likelyhood (out of 256) a node will have height (n+1) instead of n
30const BIAS: u8 = 65;
31// const BIAS: u8 = XX_BIAS;
32
33// The rope will become less efficient after the string is 2 ^ ROPE_MAX_HEIGHT nodes.
34
35#[cfg(debug_assertions)]
36pub(crate) const NODE_STR_SIZE: usize = 10;
37#[cfg(not(debug_assertions))]
38pub(crate) const NODE_STR_SIZE: usize = 392;
39// pub(crate) const NODE_STR_SIZE: usize = XX_SIZE;
40
41const MAX_HEIGHT: usize = 20;//NODE_STR_SIZE / mem::size_of::<SkipEntry>();
42const MAX_HEIGHT_U8: u8 = MAX_HEIGHT as u8;
43
44// Using StdRng notably increases wasm code size, providing some tiny extra protection against
45// ddos attacks. See main module documentation for details.
46#[cfg(feature = "ddos_protection")]
47type RopeRng = StdRng;
48#[cfg(not(feature = "ddos_protection"))]
49type RopeRng = SmallRng;
50
51
52// The node structure is designed in a very fancy way which would be more at home in C or something
53// like that. The basic idea is that the node structure is fixed size in memory, but the proportion
54// of that space taken up by characters and by the height are different depentant on a node's
55// height.
56#[repr(C)]
57pub struct JumpRope {
58 rng: RopeRng,
59 // The total number of characters in the rope
60 // num_chars: usize,
61
62 // The total number of bytes which the characters in the rope take up
63 num_bytes: usize,
64
65 // The first node is inline. The height is the max height we've ever used in the rope + 1. The
66 // highest entry points "past the end" of the list, including the entire list length.
67 // TODO: Get rid of this and just rely on nexts out of here.
68 pub(super) head: Node,
69
70 // This is so dirty. The first node is embedded in JumpRope; but we need to allocate enough room
71 // for height to get arbitrarily large. I could insist on JumpRope always getting allocated on
72 // the heap, but for small strings its better that the first string is just on the stack. So
73 // this struct is repr(C) and I'm just padding out the struct directly.
74 // nexts: [SkipEntry; MAX_HEIGHT+1],
75
76 // The nexts array contains an extra entry at [head.height-1] the which points past the skip
77 // list. The size is the size of the entire list.
78}
79
80/// JumpRope is Send and Sync, because the only way to (safely) mutate the rope is via a &mut
81/// reference.
82unsafe impl Send for JumpRope {}
83unsafe impl Sync for JumpRope {}
84
85pub(super) struct Node {
86 // The first num_bytes of this store a valid utf8 string.
87 // str: [u8; NODE_STR_SIZE],
88 //
89 // // Number of bytes in str in use
90 // num_bytes: u8,
91 pub(super) str: GapBuffer<NODE_STR_SIZE>,
92
93 // Height of nexts array.
94 pub(super) height: u8,
95
96 // #[repr(align(std::align_of::<SkipEntry>()))]
97
98 // Only the first height items are used in this. Earlier versions made explicit allocator calls
99 // to reduce memory usage, but that makes miri quite sad, so I'm now just wasting some memory
100 // in each nexts[] array.
101 nexts: [SkipEntry; MAX_HEIGHT+1],
102}
103
104#[derive(Copy, Clone, Debug, Eq, PartialEq)]
105pub(super) struct SkipEntry {
106 pub(super) node: *mut Node,
107 /// The number of *characters* between the start of the current node and the start of the next
108 /// node.
109 pub(super) skip_chars: usize,
110
111 #[cfg(feature = "wchar_conversion")]
112 pub(super) skip_pairs: usize,
113}
114
115// Make sure nexts uses correct alignment. This should be guaranteed by repr(C)
116// This test will fail if this ever stops being true.
117#[test]
118fn test_align() {
119 #[repr(C)] struct Check([SkipEntry; 0]);
120 assert!(std::mem::align_of::<Check>() >= std::mem::align_of::<SkipEntry>());
121}
122
123fn random_height(rng: &mut RopeRng) -> u8 {
124 let mut h: u8 = 1;
125 // TODO: This is using the thread_local rng, which is secure (?!). Check
126 // this is actually fast.
127 while h < MAX_HEIGHT_U8 && rng.gen::<u8>() < BIAS { h+=1; }
128 h
129}
130
131impl SkipEntry {
132 fn new() -> Self {
133 SkipEntry {
134 node: null_mut(),
135 skip_chars: 0,
136 #[cfg(feature = "wchar_conversion")]
137 skip_pairs: 0
138 }
139 }
140}
141
142impl Default for SkipEntry {
143 fn default() -> Self {
144 Self::new()
145 }
146}
147
148impl Node {
149 pub(super) fn next_ptr(&self) -> *const Self { // TODO: Pin.
150 self.first_next().node
151 }
152
153 // Do I need to be explicit about the lifetime of the references being tied
154 // to the lifetime of the node?
155 fn nexts(&self) -> &[SkipEntry] {
156 &self.nexts[..self.height as usize]
157 // unsafe {
158 // std::slice::from_raw_parts(self.nexts.as_ptr(), self.height as usize)
159 // }
160 }
161
162 fn nexts_mut(&mut self) -> &mut [SkipEntry] {
163 &mut self.nexts[..self.height as usize]
164 // unsafe {
165 // std::slice::from_raw_parts_mut(self.nexts.as_mut_ptr(), self.height as usize)
166 // }
167 }
168
169 fn new_with_height(height: u8, content: &str) -> Self {
170 Self {
171 str: GapBuffer::new_from_str(content),
172 height,
173 nexts: [SkipEntry::default(); MAX_HEIGHT+1]
174 }
175 }
176
177 // fn layout_with_height(height: u8) -> Layout {
178 // Layout::from_size_align(
179 // mem::size_of::<Node>() + mem::size_of::<SkipEntry>() * (height as usize),
180 // mem::align_of::<Node>()).unwrap()
181 // }
182
183 // fn alloc_with_height(height: u8, content: &str) -> *mut Node {
184 // //println!("height {} {}", height, max_height());
185 // #![allow(clippy::manual_range_contains)]
186 // assert!(height >= 1 && height <= MAX_HEIGHT_U8);
187 //
188 // unsafe {
189 // let node = alloc(Self::layout_with_height(height)) as *mut Node;
190 // (*node) = Node {
191 // str: GapBuffer::new_from_str(content),
192 // height,
193 // nexts: [SkipEntry::default(); MAX_HEIGHT+1],
194 // };
195 //
196 // for next in (*node).nexts_mut() {
197 // *next = SkipEntry::new();
198 // }
199 //
200 // node
201 // }
202 // }
203
204 // fn alloc(rng: &mut RopeRng, content: &str) -> *mut Node {
205 // Self::alloc_with_height(random_height(rng), content)
206 // }
207
208 // fn new_random_height(rng: &mut RopeRng, content: &str) -> Node {
209 // Self::new_with_height(random_height(rng), content)
210 // }
211
212 // unsafe fn free(p: *mut Node) {
213 // dealloc(p as *mut u8, Self::layout_with_height((*p).height));
214 // }
215
216 fn as_str_1(&self) -> &str {
217 self.str.start_as_str()
218 }
219 fn as_str_2(&self) -> &str {
220 self.str.end_as_str()
221 }
222
223 // The height is at least 1, so this is always valid.
224 pub(super) fn first_next(&self) -> &SkipEntry {
225 unsafe { &*self.nexts.as_ptr() }
226 }
227
228 fn first_next_mut(&mut self) -> &mut SkipEntry {
229 unsafe { &mut *self.nexts.as_mut_ptr() }
230 }
231
232 pub(super) fn num_chars(&self) -> usize {
233 self.first_next().skip_chars
234 }
235
236 #[cfg(feature = "wchar_conversion")]
237 pub(super) fn num_surrogate_pairs(&self) -> usize {
238 self.first_next().skip_pairs
239 }
240}
241
242/// Cursors are a bit weird, and they deserve an explanation.
243///
244/// Cursors express the location that an edit will happen. But because this is a skip list, when
245/// items are added or removed we need to not just splice in / remove elements, but also update:
246///
247/// - The next pointers of *previous* items
248/// - The index item. Each next pointer in a node names how many items are being "skipped over" by
249/// that pointer. Those "skipped over" counts need to be updated based on the change.
250///
251/// Anyway, to do all of this, a cursor names the item which *points to* the current location.
252///
253/// A cursor also implicitly references a &mut JumpRope. So we store some "deep pointers" in to
254/// the jumprope itself so the jumprope reference can stay unused while the cursor is live.
255#[derive(Debug)]
256pub(super) struct MutCursor<'a> {
257 inner: [SkipEntry; MAX_HEIGHT+1],
258
259 // head_nexts: &'a mut [SkipEntry; MAX_HEIGHT+1],
260
261 // head_height: &'a mut u8,
262 rng: &'a mut RopeRng,
263 num_bytes: &'a mut usize,
264
265 phantom: PhantomData<&'a mut JumpRope>,
266}
267
268impl<'a> MutCursor<'a> {
269 fn head_height_u8(&self) -> u8 {
270 unsafe {
271 (*self.inner[MAX_HEIGHT].node).height
272 }
273 }
274
275 fn head_height(&self) -> usize {
276 self.head_height_u8() as usize
277 }
278
279 fn set_height(&mut self, new_height: usize) {
280 unsafe {
281 (*self.inner[MAX_HEIGHT].node).height = new_height as u8
282 }
283 }
284
285 fn is_head(&self, ptr: *const Node) -> bool {
286 std::ptr::eq(ptr, self.inner[MAX_HEIGHT].node)
287 }
288
289 fn update_offsets(&mut self, height: usize, by_chars: isize, #[cfg(feature = "wchar_conversion")] by_pairs: isize) {
290 for i in 0..height {
291 unsafe {
292 // This is weird but makes sense when you realise the nexts in
293 // the cursor are pointers into the elements that have the
294 // actual pointers.
295
296 // Also adding a usize + isize is awful in rust :/
297 let entry = &mut (*self.inner[i].node).nexts[i];
298 entry.skip_chars = entry.skip_chars.wrapping_add(by_chars as usize);
299 #[cfg(feature = "wchar_conversion")] {
300 entry.skip_pairs = entry.skip_pairs.wrapping_add(by_pairs as usize);
301 }
302 }
303 }
304 }
305
306 fn move_within_node(&mut self, height: usize, by_chars: isize, #[cfg(feature = "wchar_conversion")] by_pairs: isize) {
307 for e in &mut self.inner[..height] {
308 e.skip_chars = e.skip_chars.wrapping_add(by_chars as usize);
309 #[cfg(feature = "wchar_conversion")] {
310 e.skip_pairs = e.skip_pairs.wrapping_add(by_pairs as usize);
311 }
312 }
313 }
314
315 pub(crate) fn here_ptr(&self) -> *mut Node {
316 self.inner[0].node
317 }
318
319 pub(crate) fn here_mut_ptr(&mut self) -> *mut Node {
320 self.inner[0].node
321 }
322
323 pub(crate) fn global_char_pos(&self) -> usize {
324 self.inner[self.head_height() - 1].skip_chars
325 }
326
327 #[cfg(feature = "wchar_conversion")]
328 pub(crate) fn wchar_pos(&self) -> usize {
329 let entry = &self.inner[self.head_height() - 1];
330 entry.skip_chars + entry.skip_pairs
331 }
332
333 pub(crate) fn local_char_pos(&self) -> usize {
334 self.inner[0].skip_chars
335 }
336}
337
338pub(crate) struct ReadCursor<'a> {
339 pub(super) node: &'a Node,
340
341 /// The number of *characters* between the start of the current node and the start of the next
342 /// node.
343 pub(super) offset_chars: usize,
344
345 // We can populate this, but we aren't using it anywhere.
346 // #[cfg(feature = "wchar_conversion")]
347 // pub(super) offset_pairs: usize,
348
349 // This is a bit gross, but its useful.
350 #[cfg(feature = "wchar_conversion")]
351 global_pairs: usize,
352
353 phantom: PhantomData<&'a JumpRope>
354}
355
356// impl ReadCursor {
357//
358// }
359
360/// A rope is a "rich string" data structure for storing fancy strings, like the contents of a
361/// text editor. See module level documentation for more information.
362impl JumpRope {
363 fn new_with_rng(rng: RopeRng) -> Self {
364 JumpRope {
365 rng,
366 num_bytes: 0,
367 // nexts: [SkipEntry::new(); MAX_HEIGHT],
368
369 // We don't ever store characters in the head node, but the height
370 // here is the maximum height of the entire rope.
371 head: Node::new_with_height(1, ""),
372 // head: Node {
373 // str: GapBuffer::new(),
374 // height: 1,
375 // nexts: [],
376 // },
377 // nexts: [SkipEntry::new(); MAX_HEIGHT+1],
378 }
379 }
380
381 /// Creates and returns a new, empty rope.
382 ///
383 /// In release mode this method is an alias for [`new_from_entropy`](Self::new_from_entropy).
384 /// But when compiled for testing (or in debug mode), we use a fixed seed in order to keep tests
385 /// fully deterministic.
386 ///
387 /// Note using this method in wasm significantly increases bundle size. Use
388 /// [`new_with_seed`](Self::new_from_seed) instead.
389 pub fn new() -> Self {
390 if cfg!(test) || cfg!(debug_assertions) || !cfg!(feature = "ddos_protection") {
391 Self::new_from_seed(123)
392 } else {
393 Self::new_from_entropy()
394 }
395 }
396
397 /// Creates a new, empty rope seeded from an entropy source.
398 pub fn new_from_entropy() -> Self {
399 Self::new_with_rng(RopeRng::from_entropy())
400 }
401
402 /// Creates a new, empty rope using an RNG seeded from the passed u64 parameter.
403 ///
404 /// The performance of this library with any particular data set will vary by a few percent
405 /// within a range based on the seed provided. It may be useful to fix the seed within tests or
406 /// benchmarks in order to make the program entirely deterministic, though bear in mind:
407 ///
408 /// - Jumprope will always use a fixed seed
409 pub fn new_from_seed(seed: u64) -> Self {
410 Self::new_with_rng(RopeRng::seed_from_u64(seed))
411 }
412
413 fn new_from_str(s: &str) -> Self {
414 let mut rope = Self::new();
415 rope.insert(0, s);
416 rope
417 }
418
419 /// Return the length of the rope in unicode characters. Note this is not the same as either
420 /// the number of bytes the characters take, or the number of grapheme clusters in the string.
421 ///
422 /// This method returns the length in constant-time (*O(1)*).
423 ///
424 /// # Example
425 ///
426 /// ```
427 /// # use hi_doc_jumprope::*;
428 /// assert_eq!("↯".len(), 3);
429 ///
430 /// let rope = JumpRope::from("↯");
431 /// assert_eq!(rope.len_chars(), 1);
432 ///
433 /// // The unicode snowman grapheme cluster needs 2 unicode characters.
434 /// let snowman = JumpRope::from("☃️");
435 /// assert_eq!(snowman.len_chars(), 2);
436 /// ```
437 pub fn len_chars(&self) -> usize {
438 self.head.nexts[self.head.height as usize - 1].skip_chars
439 }
440
441 /// String length in wide characters (as would be reported by javascript / C# / etc).
442 ///
443 /// The byte length of this string when encoded to UTF16 will be exactly
444 /// `rope.len_wchars() * 2`.
445 #[cfg(feature = "wchar_conversion")]
446 pub fn len_wchars(&self) -> usize {
447 let SkipEntry {
448 skip_chars,
449 skip_pairs,
450 ..
451 } = self.head.nexts[self.head.height as usize - 1];
452
453 skip_pairs + skip_chars
454 }
455
456 /// Does the rope only contain ASCII characters? (Unicode codepoints < 128). There are some
457 /// optimizations that can be done if this is true.
458 #[cfg(feature = "wchar_conversion")]
459 pub fn is_ascii_only(&self) -> bool {
460 self.head.nexts[self.head.height as usize - 1].skip_pairs == 0
461 }
462
463 /// Returns read cursor and global surrogate pair position.
464 ///
465 /// Surrogate pairs are only counted if wchar_conversion feature enabled.
466 pub(crate) fn read_cursor_at_char(&self, char_pos: usize, stick_end: bool) -> ReadCursor<'_> {
467 assert!(char_pos <= self.len_chars());
468
469 let mut e: *const Node = &self.head;
470 let mut height = self.head.height as usize - 1;
471
472 let mut offset_chars = char_pos; // How many more chars to skip
473
474 #[cfg(feature = "wchar_conversion")]
475 let mut global_pairs = 0; // Current wchar pos from the start of the rope
476
477 loop { // while height >= 0
478 let en = unsafe { &*e };
479 let next = en.nexts[height];
480 let skip = next.skip_chars;
481 if offset_chars > skip || (!stick_end && offset_chars == skip && !next.node.is_null()) {
482 // Go right.
483 // debug_assert!(e == &self.head || !en.str.is_empty());
484 offset_chars -= skip;
485 #[cfg(feature = "wchar_conversion")] {
486 global_pairs += next.skip_pairs;
487 }
488 e = next.node;
489 assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
490 } else {
491 // Go down.
492 if height != 0 {
493 height -= 1;
494 } else {
495 #[cfg(feature = "wchar_conversion")]
496 let offset_pairs = en.str.count_surrogate_pairs(offset_chars);
497 #[cfg(feature = "wchar_conversion")] {
498 global_pairs += offset_pairs;
499 }
500
501 return ReadCursor {
502 node: unsafe { &*e },
503 offset_chars,
504 // #[cfg(feature = "wchar_conversion")]
505 // offset_pairs,
506 phantom: PhantomData,
507 #[cfg(feature = "wchar_conversion")]
508 global_pairs,
509 }
510 }
511 }
512 };
513 }
514
515 pub(super) fn mut_cursor_at_char(&mut self, char_pos: usize, stick_end: bool) -> MutCursor<'_> {
516 assert!(char_pos <= self.len_chars());
517
518 let mut e: *mut Node = &mut self.head;
519 let head_height = self.head.height as usize;
520 let mut height = head_height - 1;
521
522 let mut offset = char_pos; // How many more chars to skip
523
524 #[cfg(feature = "wchar_conversion")]
525 let mut surrogate_pairs = 0; // Current wchar pos from the start of the rope
526
527 // It would be nice to pop this into a function, but miri gets confused if we pass the node
528 // pointer out of this method. So I'm keeping this inline.
529 let mut cursor = MutCursor {
530 inner: [SkipEntry {
531 node: e,
532 skip_chars: 0,
533 #[cfg(feature = "wchar_conversion")]
534 skip_pairs: 0
535 }; MAX_HEIGHT+1],
536 rng: &mut self.rng,
537 num_bytes: &mut self.num_bytes,
538 phantom: PhantomData,
539 };
540
541 loop { // while height >= 0
542 let en = unsafe { &*e };
543 let next = en.nexts[height];
544 let skip = next.skip_chars;
545 if offset > skip || (!stick_end && offset == skip && !next.node.is_null()) {
546 // Go right.
547
548 // This breaks miri for some reason.
549 // assert!(e == &mut self.head || !en.str.is_empty());
550 offset -= skip;
551 #[cfg(feature = "wchar_conversion")] {
552 surrogate_pairs += next.skip_pairs;
553 }
554 e = next.node;
555 assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
556 } else {
557 // Record this and go down.
558 cursor.inner[height] = SkipEntry {
559 // node: e as *mut Node, // This is pretty gross
560 node: e,
561 skip_chars: offset,
562 #[cfg(feature = "wchar_conversion")]
563 skip_pairs: surrogate_pairs
564 };
565
566 if height != 0 {
567 height -= 1;
568 } else {
569 #[cfg(feature = "wchar_conversion")] {
570 // Add on the wchar length at the current node.
571 surrogate_pairs += en.str.count_surrogate_pairs(offset);
572 if surrogate_pairs > 0 {
573 for entry in &mut cursor.inner[0..head_height] {
574 entry.skip_pairs = surrogate_pairs - entry.skip_pairs;
575 }
576 }
577 }
578 break;
579 }
580 }
581 };
582
583 assert!(offset <= NODE_STR_SIZE);
584
585 cursor
586 }
587
588 #[cfg(feature = "wchar_conversion")]
589 pub(crate) fn count_chars_at_wchar(&self, wchar_pos: usize) -> usize {
590 assert!(wchar_pos <= self.len_wchars());
591
592 let mut height = self.head.height as usize - 1;
593 let mut e: *const Node = &self.head;
594
595 let mut offset = wchar_pos; // How many more chars to skip
596
597 let mut char_pos = 0; // Char pos from the start of the rope
598
599 loop {
600 let en = unsafe { &*e };
601 let next = en.nexts[height];
602 let skip = next.skip_chars + next.skip_pairs;
603 if offset > skip {
604 // Go right.
605 // assert!(e == &self.head || !en.str.is_empty());
606 offset -= skip;
607 char_pos += next.skip_chars;
608 e = next.node;
609 assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
610 } else {
611 // Go down.
612 if height != 0 {
613 height -= 1;
614 } else {
615 char_pos += en.str.count_chars_in_wchars(offset);
616 return char_pos;
617 }
618 }
619 };
620 }
621
622 /// Create a cursor pointing wchar characters into the rope
623 #[cfg(feature = "wchar_conversion")]
624 pub(crate) fn mut_cursor_at_wchar(&mut self, wchar_pos: usize, stick_end: bool) -> MutCursor {
625 assert!(wchar_pos <= self.len_wchars());
626
627 let head_height = self.head.height as usize;
628 let mut e: *mut Node = &mut self.head;
629 let mut height = self.head.height as usize - 1;
630
631 let mut offset = wchar_pos; // How many more chars to skip
632
633 let mut char_pos = 0; // Char pos from the start of the rope
634
635 let mut cursor = MutCursor {
636 inner: [SkipEntry {
637 node: e,
638 skip_chars: 0,
639 #[cfg(feature = "wchar_conversion")]
640 skip_pairs: 0
641 }; MAX_HEIGHT+1],
642 rng: &mut self.rng,
643 num_bytes: &mut self.num_bytes,
644 phantom: PhantomData,
645 };
646
647 loop {
648 let en = unsafe { &*e };
649 let next = en.nexts[height];
650 let skip = next.skip_chars + next.skip_pairs;
651 if offset > skip || (!stick_end && offset == skip && !next.node.is_null()) {
652 // Go right.
653 // assert!(e == &self.head || !en.str.is_empty());
654 offset -= skip;
655 char_pos += next.skip_chars;
656 e = next.node;
657 assert!(!e.is_null(), "Internal constraint violation: Reached rope end prematurely");
658 } else {
659 // Record this and go down.
660 cursor.inner[height] = SkipEntry {
661 node: e,
662 skip_chars: char_pos,
663 skip_pairs: offset
664 };
665
666 if height != 0 {
667 height -= 1;
668 } else {
669 char_pos += en.str.count_chars_in_wchars(offset);
670 for entry in &mut cursor.inner[0..head_height] {
671 let skip_chars = char_pos - entry.skip_chars;
672 entry.skip_chars = skip_chars;
673 entry.skip_pairs -= skip_chars;
674 }
675 break;
676 }
677 }
678 };
679
680 assert!(offset <= NODE_STR_SIZE);
681
682 cursor
683 }
684
685 fn mut_cursor_at_start(&mut self) -> MutCursor<'_> {
686 MutCursor {
687 inner: [SkipEntry {
688 node: &mut self.head,
689 skip_chars: 0,
690 #[cfg(feature = "wchar_conversion")]
691 skip_pairs: 0
692 }; MAX_HEIGHT+1],
693 rng: &mut self.rng,
694 num_bytes: &mut self.num_bytes,
695 phantom: PhantomData,
696 }
697 }
698
699 fn mut_cursor_at_end(&mut self) -> MutCursor {
700 self.mut_cursor_at_char(self.len_chars(), true)
701 }
702
703 fn insert_node_at(cursor: &mut MutCursor, contents: &str, num_chars: usize, update_cursor: bool, #[cfg(feature = "wchar_conversion")] num_pairs: usize) {
704 // println!("Insert_node_at {} len {}", contents.len(), self.num_bytes);
705 // assert!(contents.len() < NODE_STR_SIZE);
706 debug_assert_eq!(count_chars(contents), num_chars);
707 #[cfg(feature = "wchar_conversion")] {
708 debug_assert_eq!(count_utf16_surrogates(contents), num_pairs);
709 }
710 debug_assert!(num_chars <= NODE_STR_SIZE);
711
712 // TODO: Pin this sucka.
713 // let new_node = Pin::new(Node::alloc());
714 // let new_node = Node::alloc(cursor.rng, contents);
715
716 let new_height = random_height(cursor.rng);
717 let new_node = Box::into_raw(Box::new(Node::new_with_height(new_height, contents)));
718
719 let new_height = new_height as usize;
720
721 // let new_height = unsafe { (*new_node).height as usize };
722
723 let mut head_height = cursor.head_height();
724 while head_height <= new_height {
725 // TODO: Why do we copy here? Explain it in a comment. This is
726 // currently lifted from the C code.
727 // cursor.head_nexts[head_height] = cursor.head_nexts[head_height - 1];
728 unsafe {
729 let head = &mut (*cursor.inner[head_height].node);
730 head.nexts[head_height] = head.nexts[head_height - 1];
731 }
732
733 cursor.inner[head_height] = cursor.inner[head_height - 1];
734
735 // *cursor.head_height += 1; // Ends up 1 more than the max node height.
736 head_height += 1;
737 cursor.set_height(head_height);
738 }
739
740 for i in 0..new_height {
741 let prev_skip = unsafe { &mut (*cursor.inner[i].node).nexts[i] };
742 let nexts = unsafe { &mut (*new_node).nexts };
743 nexts[i].node = prev_skip.node;
744 nexts[i].skip_chars = num_chars + prev_skip.skip_chars - cursor.inner[i].skip_chars;
745
746 prev_skip.node = new_node;
747 prev_skip.skip_chars = cursor.inner[i].skip_chars;
748
749 #[cfg(feature = "wchar_conversion")] {
750 nexts[i].skip_pairs = num_pairs + prev_skip.skip_pairs - cursor.inner[i].skip_pairs;
751 prev_skip.skip_pairs = cursor.inner[i].skip_pairs;
752 }
753
754 // & move the iterator to the end of the newly inserted node.
755 if update_cursor {
756 cursor.inner[i].node = new_node;
757 cursor.inner[i].skip_chars = num_chars;
758 #[cfg(feature = "wchar_conversion")] {
759 cursor.inner[i].skip_pairs = num_pairs;
760 }
761 }
762 }
763
764 for i in new_height..head_height {
765 // I don't know why miri needs me to use nexts[] rather than nexts_mut() here but ??.
766 unsafe {
767 (*cursor.inner[i].node).nexts[i].skip_chars += num_chars;
768 #[cfg(feature = "wchar_conversion")] {
769 (*cursor.inner[i].node).nexts[i].skip_pairs += num_pairs;
770 }
771 }
772 if update_cursor {
773 cursor.inner[i].skip_chars += num_chars;
774 #[cfg(feature = "wchar_conversion")] {
775 cursor.inner[i].skip_pairs += num_pairs;
776 }
777 }
778 }
779
780 // self.nexts[self.head.height as usize - 1].skip_chars += num_chars;
781 *cursor.num_bytes += contents.len();
782 }
783
784 fn insert_at_cursor(cursor: &mut MutCursor, contents: &str) {
785 if contents.is_empty() { return; }
786 // iter contains how far (in characters) into the current element to
787 // skip. Figure out how much that is in bytes.
788 let mut offset_bytes: usize = 0;
789 // The insertion offset into the destination node.
790 let offset_chars: usize = cursor.inner[0].skip_chars;
791 let head_height = cursor.head_height();
792
793 let mut e = cursor.here_mut_ptr();
794
795 // We might be able to insert the new data into the current node, depending on
796 // how big it is. We'll count the bytes, and also check that its valid utf8.
797 let num_inserted_bytes = contents.len();
798 let mut num_inserted_chars = count_chars(contents);
799 #[cfg(feature = "wchar_conversion")]
800 let mut num_inserted_pairs = if num_inserted_bytes != num_inserted_chars {
801 count_utf16_surrogates(contents)
802 } else { 0 };
803
804 // Adding this short circuit makes the code about 2% faster for 1% more code
805 unsafe {
806 if (*e).str.gap_start_chars as usize == offset_chars && (*e).str.gap_len as usize >= num_inserted_bytes {
807 // Short circuit. If we can just insert all the content right here in the gap, do so.
808 (*e).str.insert_in_gap(contents);
809
810 #[cfg(feature = "wchar_conversion")] {
811 cursor.update_offsets(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
812 cursor.move_within_node(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
813 }
814 #[cfg(not(feature = "wchar_conversion"))] {
815 cursor.update_offsets(head_height, num_inserted_chars as isize);
816 cursor.move_within_node(head_height, num_inserted_chars as isize);
817 }
818
819 *cursor.num_bytes += num_inserted_bytes;
820 return;
821 }
822
823 if offset_chars > 0 {
824 // Changing this to debug_assert reduces performance by a few % for some reason.
825 assert!(offset_chars <= (*e).nexts[0].skip_chars);
826 // This could be faster, but its not a big deal.
827 offset_bytes = (*e).str.count_bytes(offset_chars);
828 }
829
830 // Can we insert into the current node?
831 let current_len_bytes = (*e).str.len_bytes();
832 let mut insert_here = current_len_bytes + num_inserted_bytes <= NODE_STR_SIZE;
833
834 // If we can't insert here, see if we can move the cursor forward and insert into the
835 // subsequent node.
836 if !insert_here && offset_bytes == current_len_bytes {
837 // We can insert into the subsequent node if:
838 // - We can't insert into the current node
839 // - There _is_ a next node to insert into
840 // - The insert would be at the start of the next node
841 // - There's room in the next node
842 if let Some(next) = (*e).first_next_mut().node.as_mut() {
843 if next.str.len_bytes() + num_inserted_bytes <= NODE_STR_SIZE {
844 offset_bytes = 0;
845
846 // Could do this with slice::fill but this seems slightly faster.
847 for e in &mut cursor.inner[..next.height as usize] {
848 *e = SkipEntry {
849 node: next,
850 skip_chars: 0,
851 #[cfg(feature = "wchar_conversion")]
852 skip_pairs: 0
853 };
854 }
855 e = next;
856
857 insert_here = true;
858 }
859 }
860 }
861
862 if insert_here {
863 // First move the current bytes later on in the string.
864 let c = &mut (*e).str;
865 c.try_insert(offset_bytes, contents).unwrap();
866
867 *cursor.num_bytes += num_inserted_bytes;
868 // .... aaaand update all the offset amounts.
869
870 #[cfg(feature = "wchar_conversion")] {
871 cursor.update_offsets(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
872 cursor.move_within_node(head_height, num_inserted_chars as isize, num_inserted_pairs as isize);
873 }
874 #[cfg(not(feature = "wchar_conversion"))] {
875 cursor.update_offsets(head_height, num_inserted_chars as isize);
876 cursor.move_within_node(head_height, num_inserted_chars as isize);
877 }
878 } else {
879 // There isn't room. We'll need to add at least one new node to the rope.
880
881 // If we're not at the end of the current node, we'll need to remove
882 // the end of the current node's data and reinsert it later.
883 (*e).str.move_gap(offset_bytes);
884
885 let num_end_bytes = (*e).str.len_bytes() - offset_bytes;
886 let mut num_end_chars: usize = 0;
887 #[cfg(feature = "wchar_conversion")]
888 let mut num_end_pairs: usize = 0;
889
890 // let end_str = if num_end_bytes > 0 {
891 if num_end_bytes > 0 {
892 // We'll truncate the node, but leave the bytes themselves there (for later).
893
894 // It would also be correct (and slightly more space efficient) to pack some of the
895 // new string's characters into this node after trimming it.
896 num_end_chars = (*e).num_chars() - offset_chars;
897
898 #[cfg(feature = "wchar_conversion")] {
899 num_end_pairs = (*e).num_surrogate_pairs() - (*e).str.gap_start_surrogate_pairs as usize;
900 debug_assert_eq!(num_end_pairs, count_utf16_surrogates((*e).str.end_as_str()));
901 cursor.update_offsets(head_height, -(num_end_chars as isize), -(num_end_pairs as isize));
902 }
903 #[cfg(not(feature = "wchar_conversion"))]
904 cursor.update_offsets(head_height, -(num_end_chars as isize));
905
906 *cursor.num_bytes -= num_end_bytes;
907 }
908
909 // Now we insert new nodes containing the new character data. The
910 // data must be broken into pieces of with a maximum size of
911 // NODE_STR_SIZE. Node boundaries must not occur in the middle of a
912 // utf8 codepoint.
913 // let mut str_offset: usize = 0;
914 let mut remainder = contents;
915 // while !remainder.is_empty() {
916 loop {
917 // println!(". {}", remainder);
918 // Find the first index after STR_SIZE bytes
919
920 if remainder.len() <= NODE_STR_SIZE {
921 Self::insert_node_at(cursor, remainder, num_inserted_chars, true, #[cfg(feature = "wchar_conversion")] num_inserted_pairs);
922 break;
923 } else {
924 // Find a suitable cut point. We should take as many characters as we can fit in
925 // the node, without splitting any unicode codepoints.
926 let mut byte_pos = NODE_STR_SIZE;
927 loop { // Slide back to a character boundary.
928 let c = remainder.as_bytes()[byte_pos];
929 if c & 0b1100_0000 != 0b1000_0000 {
930 break;
931 }
932 byte_pos -= 1;
933 }
934
935 let slice = &remainder.as_bytes()[..byte_pos];
936 let char_pos = count_chars_in_bytes(slice);
937 num_inserted_chars -= char_pos;
938
939 #[cfg(feature = "wchar_conversion")]
940 let pairs = count_utf16_surrogates_in_bytes(slice);
941 #[cfg(feature = "wchar_conversion")] {
942 num_inserted_pairs -= pairs;
943 }
944
945 let (next, rem) = remainder.split_at(byte_pos);
946 assert!(!next.is_empty());
947 Self::insert_node_at(cursor, next, char_pos, true, #[cfg(feature = "wchar_conversion")] pairs);
948 remainder = rem;
949 }
950 }
951
952 if num_end_bytes > 0 {
953 let end_str = (*e).str.take_rest();
954 Self::insert_node_at(cursor, end_str, num_end_chars, false, #[cfg(feature = "wchar_conversion")] num_end_pairs);
955 }
956 // if let Some(end_str) = end_str {
957 // Self::insert_node_at(cursor, end_str, num_end_chars, false, #[cfg(feature = "wchar_conversion")] num_end_pairs);
958 // }
959 }
960
961 assert_ne!(cursor.local_char_pos(), 0);
962 }
963 }
964
965 fn del_at_cursor(cursor: &mut MutCursor, mut length: usize) {
966 if length == 0 { return; }
967 let mut offset_chars = cursor.local_char_pos();
968 let mut node = cursor.here_ptr();
969 unsafe {
970 while length > 0 {
971 {
972 let s = (*node).first_next();
973 if offset_chars == s.skip_chars {
974 // End of current node. Skip to the start of the next one.
975 node = s.node;
976 offset_chars = 0;
977 }
978 }
979
980 let num_chars = (*node).num_chars();
981 let removed = std::cmp::min(length, num_chars - offset_chars);
982 assert!(removed > 0);
983
984 // TODO: Figure out a better way to calculate this.
985 #[cfg(feature = "wchar_conversion")]
986 let removed_pairs = (*node).str.count_surrogate_pairs(offset_chars + removed)
987 - (*node).str.count_surrogate_pairs(offset_chars);
988
989 let height = (*node).height as usize;
990 if removed < num_chars || cursor.is_head(node) {
991 // Just trim the node down.
992 let s = &mut (*node).str;
993 let removed_bytes = s.remove_chars(offset_chars, removed);
994 *cursor.num_bytes -= removed_bytes;
995
996 for s in (*node).nexts_mut() {
997 s.skip_chars -= removed;
998 #[cfg(feature = "wchar_conversion")] {
999 s.skip_pairs -= removed_pairs;
1000 }
1001 }
1002 } else {
1003 // Remove the node from the skip list. This works because the cursor must be
1004 // pointing from the previous element to the start of this element.
1005 assert_ne!(cursor.inner[0].node, node);
1006
1007 for i in 0..(*node).height as usize {
1008 let s = &mut (*cursor.inner[i].node).nexts_mut()[i];
1009 s.node = (*node).nexts[i].node;
1010 s.skip_chars += (*node).nexts[i].skip_chars - removed;
1011 #[cfg(feature = "wchar_conversion")] {
1012 s.skip_pairs += (*node).nexts[i].skip_pairs - removed_pairs;
1013 }
1014 }
1015
1016 *cursor.num_bytes -= (*node).str.len_bytes();
1017 let next = (*node).first_next().node;
1018 // Node::free(node);
1019 drop(Box::from_raw(node));
1020 node = next;
1021 }
1022
1023 for i in height..cursor.head_height() {
1024 let s = &mut (*cursor.inner[i].node).nexts[i];
1025 s.skip_chars -= removed;
1026 #[cfg(feature = "wchar_conversion")] {
1027 s.skip_pairs -= removed_pairs;
1028 }
1029 }
1030
1031 length -= removed;
1032 }
1033 }
1034 }
1035
1036 fn eq_str(&self, mut other: &str) -> bool {
1037 if self.len_bytes() != other.len() { return false; }
1038
1039 for s in self.substrings() {
1040 let (start, rem) = other.split_at(s.len());
1041 if start != s { return false; }
1042 other = rem;
1043 }
1044
1045 true
1046 }
1047}
1048
1049impl Default for JumpRope {
1050 fn default() -> Self {
1051 Self::new()
1052 }
1053}
1054
1055impl Drop for JumpRope {
1056 fn drop(&mut self) {
1057 let mut node = self.head.first_next().node;
1058 unsafe {
1059 while !node.is_null() {
1060 let next = (*node).first_next().node;
1061 // Node::free(node);
1062 drop(Box::from_raw(node));
1063 node = next;
1064 }
1065 }
1066 }
1067}
1068
1069impl<S: AsRef<str>> From<S> for JumpRope {
1070 fn from(str: S) -> Self {
1071 JumpRope::new_from_str(str.as_ref())
1072 }
1073}
1074
1075impl PartialEq for JumpRope {
1076 // This is quite complicated. It would be cleaner to just write a bytes
1077 // iterator, then iterate over the bytes of both strings comparing along the
1078 // way.
1079 // However, this should be faster because it can memcmp().
1080
1081 // Another way to implement this would be to rewrite it as a comparison with
1082 // an iterator over &str. Then the rope vs rope comparison would be trivial,
1083 // but also we could add comparison functions with a single &str and stuff
1084 // very easily.
1085 fn eq(&self, other: &JumpRope) -> bool {
1086 if self.num_bytes != other.num_bytes
1087 || self.len_chars() != other.len_chars() {
1088 return false
1089 }
1090
1091 let mut other_iter = other.substrings();
1092
1093 // let mut os = other_iter.next();
1094 let mut os = "";
1095
1096 for mut s in self.substrings() {
1097 // Walk s.len() bytes through the other rope
1098 while !s.is_empty() {
1099 if os.is_empty() {
1100 os = other_iter.next().unwrap();
1101 }
1102 debug_assert!(!os.is_empty());
1103
1104 let amt = min(s.len(), os.len());
1105 debug_assert!(amt > 0);
1106
1107 let (s_start, s_rem) = s.split_at(amt);
1108 let (os_start, os_rem) = os.split_at(amt);
1109
1110 if s_start != os_start { return false; }
1111
1112 s = s_rem;
1113 os = os_rem;
1114 }
1115 }
1116
1117 true
1118 }
1119}
1120impl Eq for JumpRope {}
1121
1122impl Debug for JumpRope {
1123 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1124 f.debug_list()
1125 .entries(self.substrings())
1126 .finish()
1127 }
1128}
1129
1130impl Display for JumpRope {
1131 fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
1132 for s in self.substrings() {
1133 f.write_str(s)?;
1134 }
1135 Ok(())
1136 }
1137}
1138
1139// I don't know why I need all three of these, but I do.
1140
1141impl<T: AsRef<str>> PartialEq<T> for JumpRope {
1142 fn eq(&self, other: &T) -> bool {
1143 self.eq_str(other.as_ref())
1144 }
1145}
1146
1147// Needed for assert_eq!(&rope, "Hi there");
1148impl PartialEq<str> for JumpRope {
1149 fn eq(&self, other: &str) -> bool {
1150 self.eq_str(other)
1151 }
1152}
1153
1154// Needed for assert_eq!(&rope, String::from("Hi there"));
1155impl PartialEq<String> for &JumpRope {
1156 fn eq(&self, other: &String) -> bool {
1157 self.eq_str(other.as_str())
1158 }
1159}
1160
1161impl<'a> Extend<&'a str> for JumpRope {
1162 fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
1163 let mut cursor = self.mut_cursor_at_end();
1164 iter.into_iter().for_each(|s| {
1165 Self::insert_at_cursor(&mut cursor, s);
1166 });
1167 }
1168}
1169
1170impl Clone for JumpRope {
1171 fn clone(&self) -> Self {
1172 // This method could be a little bit more efficient, but I think improving clone()
1173 // performance isn't worth the extra effort.
1174 let mut r = JumpRope::new();
1175 let mut cursor = r.mut_cursor_at_start();
1176 for node in self.node_iter_at_start() {
1177 JumpRope::insert_at_cursor(&mut cursor, node.as_str_1());
1178 JumpRope::insert_at_cursor(&mut cursor, node.as_str_2());
1179 }
1180 r
1181 }
1182}
1183
1184impl JumpRope {
1185 /// Insert new content into the rope. The content is inserted at the specified unicode character
1186 /// offset, which is different from a byte offset for non-ASCII characters.
1187 ///
1188 /// # Example
1189 ///
1190 /// ```
1191 /// # use hi_doc_jumprope::*;
1192 /// let mut rope = JumpRope::from("--");
1193 /// rope.insert(1, "hi there");
1194 /// assert_eq!(rope.to_string(), "-hi there-");
1195 /// ```
1196 ///
1197 /// If the position names a location past the end of the rope, it is truncated.
1198 pub fn insert(&mut self, mut pos: usize, contents: &str) {
1199 // if cfg!(debug_assertions) { self.check(); }
1200
1201 if contents.is_empty() { return; }
1202 pos = std::cmp::min(pos, self.len_chars());
1203
1204 let mut cursor = self.mut_cursor_at_char(pos, true);
1205
1206 Self::insert_at_cursor(&mut cursor, contents);
1207
1208 debug_assert_eq!(cursor.global_char_pos(), pos + count_chars(contents));
1209 // dbg!(&cursor.0[..self.head.height as usize]);
1210 }
1211
1212 /// Delete a span of unicode characters from the rope. The span is specified in unicode
1213 /// characters, not bytes.
1214 ///
1215 /// Any attempt to delete past the end of the rope will be silently ignored.
1216 ///
1217 /// # Example
1218 ///
1219 /// ```
1220 /// # use hi_doc_jumprope::*;
1221 /// let mut rope = JumpRope::from("Whoa dawg!");
1222 /// rope.remove(4..9); // delete " dawg"
1223 /// assert_eq!(rope.to_string(), "Whoa!");
1224 /// ```
1225 pub fn remove(&mut self, mut range: Range<usize>) {
1226 // if cfg!(debug_assertions) { self.check(); }
1227
1228 range.end = range.end.min(self.len_chars());
1229 if range.start >= range.end { return; }
1230
1231 // We need to stick_end so we can delete entries.
1232 let mut cursor = self.mut_cursor_at_char(range.start, true);
1233 Self::del_at_cursor(&mut cursor, range.end - range.start);
1234
1235 debug_assert_eq!(cursor.global_char_pos(), range.start);
1236 }
1237
1238 /// Replace the specified range with new content. This is equivalent to calling
1239 /// [`remove`](Self::remove) followed by [`insert`](Self::insert), but it is simpler and faster.
1240 ///
1241 /// # Example
1242 ///
1243 /// ```
1244 /// # use hi_doc_jumprope::*;
1245 /// let mut rope = JumpRope::from("Hi Mike!");
1246 /// rope.replace(3..7, "Duane"); // replace "Mike" with "Duane"
1247 /// assert_eq!(rope.to_string(), "Hi Duane!");
1248 /// ```
1249 pub fn replace(&mut self, range: Range<usize>, content: &str) {
1250 let len = self.len_chars();
1251 let pos = usize::min(range.start, len);
1252 let del_len = usize::min(range.end, len) - pos;
1253
1254 let mut cursor = self.mut_cursor_at_char(pos, true);
1255 if del_len > 0 {
1256 Self::del_at_cursor(&mut cursor, del_len);
1257 }
1258 if !content.is_empty() {
1259 Self::insert_at_cursor(&mut cursor, content);
1260 }
1261
1262 debug_assert_eq!(cursor.global_char_pos(), pos + count_chars(content));
1263 }
1264
1265 /// Get the number of bytes used for the UTF8 representation of the rope. This will always match
1266 /// the .len() property of the equivalent String.
1267 ///
1268 /// Note: This is only useful in specific situations - like preparing a byte buffer for saving
1269 /// or sending over the internet. In many cases it is preferable to use
1270 /// [`len_chars`](Self::len_chars).
1271 ///
1272 /// # Example
1273 ///
1274 /// ```
1275 /// # use hi_doc_jumprope::*;
1276 /// let str = "κόσμε"; // "Cosmos" in ancient greek
1277 /// assert_eq!(str.len(), 11); // 11 bytes over the wire
1278 ///
1279 /// let rope = JumpRope::from(str);
1280 /// assert_eq!(rope.len_bytes(), str.len());
1281 /// ```
1282 pub fn len_bytes(&self) -> usize { self.num_bytes }
1283
1284 /// Returns `true` if the rope contains no elements.
1285 pub fn is_empty(&self) -> bool { self.num_bytes == 0 }
1286
1287 pub fn check(&self) {
1288 assert!(self.head.height >= 1);
1289 assert!(self.head.height < MAX_HEIGHT_U8 + 1);
1290
1291 let skip_over = &self.head.nexts[self.head.height as usize - 1];
1292 // println!("Skip over skip chars {}, num bytes {}", skip_over.skip_chars, self.num_bytes);
1293 assert!(skip_over.skip_chars <= self.num_bytes as usize);
1294 #[cfg(feature = "wchar_conversion")] {
1295 assert!(skip_over.skip_pairs <= skip_over.skip_chars);
1296 }
1297 assert!(skip_over.node.is_null());
1298
1299 // The offsets store the total distance travelled since the start.
1300 let mut iter = [SkipEntry::new(); MAX_HEIGHT];
1301 for i in 0..self.head.height {
1302 // Bleh.
1303 iter[i as usize].node = &self.head as *const Node as *mut Node;
1304 }
1305
1306 let mut num_bytes: usize = 0;
1307 let mut num_chars = 0;
1308 #[cfg(feature = "wchar_conversion")]
1309 let mut num_pairs = 0;
1310
1311 for n in self.node_iter_at_start() {
1312 // println!("visiting {:?}", n.as_str());
1313 assert!(!n.str.is_empty() || std::ptr::eq(n, &self.head));
1314 assert!(n.height <= MAX_HEIGHT_U8);
1315 assert!(n.height >= 1);
1316 n.str.check();
1317
1318 assert_eq!(count_chars(n.as_str_1()) + count_chars(n.as_str_2()), n.num_chars());
1319 for (i, entry) in iter[0..n.height as usize].iter_mut().enumerate() {
1320 assert_eq!(entry.node as *const Node, n as *const Node);
1321 assert_eq!(entry.skip_chars, num_chars);
1322 #[cfg(feature = "wchar_conversion")] {
1323 assert_eq!(entry.skip_pairs, num_pairs);
1324 }
1325
1326 // println!("replacing entry {:?} with {:?}", entry, n.nexts()[i].node);
1327 entry.node = n.nexts[i].node;
1328 entry.skip_chars += n.nexts[i].skip_chars;
1329 #[cfg(feature = "wchar_conversion")] {
1330 entry.skip_pairs += n.nexts[i].skip_pairs;
1331 }
1332 }
1333
1334 num_bytes += n.str.len_bytes();
1335 num_chars += n.num_chars();
1336
1337 #[cfg(feature = "wchar_conversion")] {
1338 assert_eq!(n.num_surrogate_pairs(), n.str.count_surrogate_pairs(n.num_chars()));
1339 num_pairs += n.num_surrogate_pairs();
1340 }
1341 }
1342
1343 for entry in iter[0..self.head.height as usize].iter() {
1344 // println!("{:?}", entry);
1345 assert!(entry.node.is_null());
1346 assert_eq!(entry.skip_chars, num_chars);
1347 #[cfg(feature = "wchar_conversion")] {
1348 assert_eq!(entry.skip_pairs, num_pairs);
1349 }
1350 }
1351
1352 // println!("self bytes: {}, count bytes {}", self.num_bytes, num_bytes);
1353 assert_eq!(self.num_bytes, num_bytes);
1354 assert_eq!(self.len_chars(), num_chars);
1355 #[cfg(feature = "wchar_conversion")] {
1356 assert_eq!(self.len_wchars(), num_chars + num_pairs);
1357 }
1358 }
1359
1360 /// This method counts the number of bytes of memory allocated in the rope. This is purely for
1361 /// debugging.
1362 ///
1363 /// Notes:
1364 ///
1365 /// - This method (its existence, its signature and its return value) is not considered part of
1366 /// the stable API provided by jumprope. This may disappear or change in point releases.
1367 /// - This method walks the entire rope. It has time complexity O(n).
1368 /// - If a rope is owned inside another structure, this method will double-count the bytes
1369 /// stored in the rope's head.
1370 pub fn mem_size(&self) -> usize {
1371 let mut nodes = self.node_iter_at_start();
1372 let mut size = 0;
1373 // The first node is the head. Count the actual head size.
1374 size += std::mem::size_of::<Self>();
1375 nodes.next(); // And discard it from the iterator.
1376
1377 for _n in nodes {
1378 // let layout = Node::layout_with_height(n.height);
1379 // size += layout.size();
1380 size += std::mem::size_of::<Node>();
1381 }
1382
1383 size
1384 }
1385
1386 #[allow(unused)]
1387 // pub fn print(&self) {
1388 pub(crate) fn print(&self) {
1389 println!("chars: {}\tbytes: {}\theight: {}", self.len_chars(), self.num_bytes, self.head.height);
1390
1391 print!("HEAD:");
1392 for s in self.head.nexts() {
1393 print!(" |{} ", s.skip_chars);
1394 #[cfg(feature = "wchar_conversion")] {
1395 print!("({}) ", s.skip_pairs);
1396 }
1397 }
1398 println!();
1399
1400 for (i, node) in self.node_iter_at_start().enumerate() {
1401 print!("{}:", i);
1402 for s in node.nexts() {
1403 print!(" |{} ", s.skip_chars);
1404 #[cfg(feature = "wchar_conversion")] {
1405 print!("({}) ", s.skip_pairs);
1406 }
1407 }
1408 println!(" : {:?}(s{}) + {:?}(s{})",
1409 node.as_str_1(), count_utf16_surrogates(node.as_str_1()),
1410 node.as_str_2(), count_utf16_surrogates(node.as_str_2())
1411 );
1412 }
1413 }
1414}
1415
1416/// These methods are only available if the `wchar_conversion` feature is enabled.
1417#[cfg_attr(doc_cfg, doc(cfg(feature = "wchar_conversion")))]
1418#[cfg(feature = "wchar_conversion")]
1419impl JumpRope {
1420 /// Convert from a unicode character count to a wchar index, like what you'd use in Javascript,
1421 /// Java or C#.
1422 pub fn chars_to_wchars(&self, chars: usize) -> usize {
1423 // IF the rope is ascii-only then chars_to_wchars is the identity function.
1424 if self.is_ascii_only() {
1425 chars
1426 } else {
1427 let cursor = self.read_cursor_at_char(chars, true);
1428 cursor.global_pairs + chars
1429 }
1430 }
1431
1432 /// Convert a wchar index back to a unicode character count.
1433 ///
1434 /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1435 /// rope with contents `𐆚` (a single character with wchar length 2), `wchars_to_chars(1)` is
1436 /// undefined and may panic / change in future versions of diamond types.
1437 pub fn wchars_to_chars(&self, wchars: usize) -> usize {
1438 if self.is_ascii_only() {
1439 wchars
1440 } else {
1441 self.count_chars_at_wchar(wchars)
1442 }
1443 }
1444
1445 /// Insert the given utf8 string into the rope at the specified wchar position.
1446 /// This is compatible with NSString, Javascript, etc.
1447 ///
1448 /// Returns the insertion position in characters.
1449 ///
1450 /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1451 /// rope with contents `𐆚` (a single character with wchar length 2), `insert_at_wchar(1, ...)`
1452 /// is undefined and may panic / change in future versions of diamond types.
1453 pub fn insert_at_wchar(&mut self, mut pos_wchar: usize, contents: &str) -> usize {
1454 pos_wchar = pos_wchar.min(self.len_wchars());
1455
1456 let mut cursor = self.mut_cursor_at_wchar(pos_wchar, true);
1457 // dbg!(pos_wchar, &cursor.0[0..3]);
1458 Self::insert_at_cursor(&mut cursor, contents);
1459
1460 debug_assert_eq!(
1461 cursor.wchar_pos(),
1462 pos_wchar + count_chars(contents) + count_utf16_surrogates(contents)
1463 );
1464
1465 cursor.global_char_pos()
1466 }
1467
1468 /// Remove items from the rope, specified by the passed range. The indexes are interpreted
1469 /// as wchar offsets (like you'd get in javascript / C# / etc).
1470 ///
1471 /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1472 /// rope with contents `𐆚` (a single character with wchar length 2), `remove_at_wchar(1..2)`
1473 /// is undefined and may panic / change in future versions of diamond types.
1474 pub fn remove_at_wchar(&mut self, mut range: Range<usize>) {
1475 range.end = range.end.min(self.len_wchars());
1476 if range.is_empty() { return; }
1477
1478 // Rather than making some fancy custom remove function, I'm just going to convert the
1479 // removed range into a char range and delete that.
1480 let char_end = self.wchars_to_chars(range.end);
1481
1482 // We need to stick_end so we can delete entries.
1483 let mut cursor = self.mut_cursor_at_wchar(range.start, true);
1484 let char_start = cursor.global_char_pos();
1485
1486 Self::del_at_cursor(&mut cursor, char_end - char_start);
1487
1488 debug_assert_eq!(cursor.wchar_pos(), range.start);
1489 }
1490
1491 /// Replace the characters in the specified wchar range with content.
1492 ///
1493 /// **NOTE:** This method's behaviour is undefined if the wchar offset is invalid. Eg, given a
1494 /// rope with contents `𐆚` (a single character with wchar length 2),
1495 /// `replace_at_wchar(1..2, ...)` is undefined and may panic / change in future versions of
1496 /// diamond types.
1497 pub fn replace_at_wchar(&mut self, range: Range<usize>, content: &str) {
1498 // TODO: Optimize this. This method should work similarly to replace(), where we create
1499 // a single cursor and use it in both contexts.
1500 if !range.is_empty() {
1501 self.remove_at_wchar(range.clone());
1502 }
1503 if !content.is_empty() {
1504 self.insert_at_wchar(range.start, content);
1505 }
1506 }
1507}