Skip to main content

piece_tree/
lib.rs

1#![cfg_attr(not(feature = "write"), no_std)]
2
3#![warn(clippy::all, clippy::pedantic, dead_code, clippy::cargo)]
4#![allow(
5    unused_assignments,
6    clippy::inline_always,
7    clippy::uninlined_format_args, // ?...
8    clippy::borrow_as_ptr,
9    clippy::negative_feature_names,
10    clippy::redundant_closure_for_method_calls,
11    clippy::sliced_string_as_bytes,
12    clippy::should_implement_trait,
13    clippy::collapsible_if,
14    clippy::new_without_default,
15    clippy::comparison_chain,
16    clippy::redundant_field_names,
17    clippy::semicolon_if_nothing_returned,
18    clippy::pub_underscore_fields,
19    clippy::struct_field_names,
20    clippy::ptr_as_ptr,
21    clippy::missing_transmute_annotations,
22    clippy::multiple_crate_versions,
23    clippy::cast_precision_loss,
24    clippy::cast_possible_truncation,
25    clippy::similar_names,
26    clippy::cast_sign_loss,
27    clippy::cast_possible_wrap,
28    clippy::used_underscore_binding,
29    clippy::nonstandard_macro_braces,
30    clippy::used_underscore_items,
31    clippy::enum_glob_use,
32    clippy::cast_lossless,
33    clippy::match_same_arms,
34    clippy::too_many_lines,
35    clippy::unnested_or_patterns,
36    clippy::blocks_in_conditions,
37    clippy::missing_errors_doc,
38    clippy::missing_panics_doc,
39)]
40
41#[cfg(feature = "write")]
42extern crate std;
43
44extern crate alloc;
45
46#[allow(unused)]
47use alloc::vec;
48use alloc::vec::Vec;
49use alloc::sync::Arc;
50use alloc::string::{String, ToString};
51
52use core::str;
53use core::cmp::Ordering;
54use core::ops::{Bound, Deref, RangeBounds};
55
56#[cfg(not(feature = "dont_vendor"))]
57mod cranelift_entity_vendor;
58#[cfg(not(feature = "dont_vendor"))]
59use cranelift_entity_vendor as cranelift_entity;
60
61#[cfg(not(feature = "dont_vendor"))]
62mod smallvec_vendor;
63#[cfg(not(feature = "dont_vendor"))]
64use smallvec_vendor as smallvec;
65
66#[cfg(not(feature = "dont_vendor"))]
67mod bytecount_vendor;
68#[cfg(not(feature = "dont_vendor"))]
69use bytecount_vendor as bytecount;
70
71use smallvec::SmallVec;
72use cranelift_entity::{EntityRef, PrimaryMap};
73#[cfg(feature = "dont_vendor")]
74use cranelift_entity::entity_impl;
75
76#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
77pub struct BufferRef(pub u32);
78entity_impl!(BufferRef);
79
80#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
81pub struct NodeRef(pub u32);
82entity_impl!(NodeRef);
83
84pub const NIL: NodeRef = NodeRef(0);
85
86pub const MOD_BUFFER: BufferRef = BufferRef(u32::MAX >> 1);
87
88pub const CHECKPOINT_INTERVAL: u32 = 64;
89
90pub const MAX_PIECE_SIZE: u32 = 64 * 1024;
91
92#[derive(Clone, Copy, PartialEq, Eq, Debug)]
93pub enum Color { Black = 0, Red = 1 }
94
95#[derive(Clone, Copy, Debug)]
96pub struct HistoryEntry {
97    pub root:          NodeRef,
98    pub cursor_offset: u32,
99}
100
101#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
102pub struct Piece {
103    pub buffer:            BufferRef,
104
105    pub byte_offset:       u32,
106    pub byte_length:       u32,
107
108    pub newline_count:     u32,
109    pub char_count:        u32,
110
111    pub buffer_start_line: u32,  // Index into that buffer's `newline_offsets` array of this piece's first '\n'
112
113    pub piece_start_char:  u32,  // Absolute char index of p.offset in its buffer
114}
115
116// For tests and other stuff
117#[derive(Clone, Copy, Debug)]
118pub enum Edit {
119    Insert { offset: u32, text: &'static str },
120    Remove { offset: u32, length: u32 },
121}
122
123impl Edit {
124    #[inline(always)]
125    #[must_use]
126    pub fn offset(&self) -> u32 {
127        match self {
128            Edit::Insert { offset, .. } => *offset,
129            Edit::Remove { offset, .. } => *offset,
130        }
131    }
132}
133
134#[derive(Clone, Copy, Debug)]
135#[repr(C)]
136pub struct Node {
137    pub left:              NodeRef, // 4 bytes
138    pub right:             NodeRef, // 4 bytes
139    pub offset:            u32,     // 4 bytes
140    pub subtree_len:       u32,     // 4 bytes
141    pub subtree_chars:     u32,     // 4 bytes
142    pub subtree_newlines:  u32,     // 4 bytes
143    pub meta:              u32,     // 4 bytes (Bit 0: Color, Bits 1..31: BufferIndex)
144    pub buffer_start_line: u32,     // 4 bytes
145    pub piece_start_char:  u32,     // 4 bytes
146}
147
148const _: () = assert!(size_of::<Node>() == 36);
149
150impl Node {
151    #[inline(always)]
152    #[must_use]
153    pub fn color(&self) -> Color {
154        if (self.meta & 1) == 1 { Color::Red } else { Color::Black }
155    }
156
157    #[inline(always)]
158    pub fn set_color(&mut self, color: Color) {
159        self.meta = (self.meta & !1) | (color as u32);
160    }
161
162    #[inline(always)]
163    #[must_use]
164    pub fn buffer_index(&self) -> BufferRef {
165        BufferRef(self.meta >> 1)
166    }
167
168    #[inline(always)]
169    pub fn set_buffer(&mut self, buffer: BufferRef) {
170        self.meta = (buffer.as_u32() << 1) | (self.meta & 1);
171    }
172}
173
174#[derive(Debug, Clone)]
175pub struct OriginalBuffer {
176    pub text:             Arc<str>,
177    pub newline_offsets:  Arc<[u32]>,
178    pub char_checkpoints: Arc<[(u32, u32)]>,
179}
180
181impl Deref for OriginalBuffer {
182    type Target = str;
183
184    #[inline(always)]
185    fn deref(&self) -> &Self::Target {
186        self.text.as_ref()
187    }
188}
189
190#[derive(Debug, Clone)]
191pub struct Buffers {
192    pub original_buffers: PrimaryMap<BufferRef, OriginalBuffer>,
193
194    pub modifications_buffer:           String,
195    pub modifications_newline_offsets:  Vec<u32>,
196    pub modifications_char_checkpoints: Vec<(u32, u32)>,
197}
198
199impl Default for Buffers {
200    #[inline(always)]
201    fn default() -> Self {
202        Self {
203            original_buffers: PrimaryMap::new(),
204            modifications_newline_offsets: Vec::with_capacity(1024),
205            modifications_char_checkpoints: Vec::with_capacity(128),
206            modifications_buffer: String::with_capacity(1024 * 64),
207        }
208    }
209}
210
211#[must_use]
212#[inline(always)]
213pub fn count_chars_and_newlines(bytes: &[u8]) -> (u32, u32) {
214    let mut chars    = 0u32;
215    let mut newlines = 0u32;
216
217    for &b in bytes {
218        newlines += (b == b'\n')         as u32;
219        chars    += ((b & 0xC0) != 0x80) as u32;
220    }
221
222    (chars, newlines)
223}
224
225#[must_use]
226#[inline(always)]
227pub fn count_chars_and_newlines_with_offsets_and_checkpoints(
228    bytes: &[u8],
229    offsets: &mut Vec<u32>,
230    checkpoints: &mut Vec<(u32, u32)>,
231) -> (u32, u32) {
232    let mut chars    = 0u32;
233    let mut newlines = 0u32;
234
235    let mut next_checkpoint = CHECKPOINT_INTERVAL;
236
237    for (i, &b) in bytes.iter().enumerate() {
238        if (b as i8) >= -0x40 {
239            if chars == next_checkpoint {
240                checkpoints.push((chars, i as u32));
241                next_checkpoint += CHECKPOINT_INTERVAL;
242            }
243            chars += 1;
244        }
245
246        if b == b'\n' {
247            offsets.push(i as u32);
248            newlines += 1;
249        }
250    }
251
252    (chars, newlines)
253}
254
255impl Buffers {
256    #[inline(always)]
257    #[must_use]
258    pub fn new() -> Self { Self::default() }
259
260    #[inline(always)]
261    #[must_use]
262    pub fn get(&self, buffer: BufferRef) -> &str {
263        if buffer == MOD_BUFFER {
264            &self.modifications_buffer
265        } else {
266            &self.original_buffers[buffer]
267        }
268    }
269
270    #[inline(always)]
271    #[must_use]
272    pub fn get_slice(&self, buffer: BufferRef, offset: u32, len: u32) -> &str {
273        let buf = self.get(buffer);
274        let start = offset as usize;
275        let end = start + len as usize;
276        unsafe { str::from_utf8_unchecked(buf.as_bytes().get_unchecked(start..end)) }
277    }
278
279    #[inline]
280    #[must_use]
281    pub fn count_chars_and_newlines(&self, buffer: BufferRef, start_offset: u32, len: u32) -> (u32, u32) {
282        if len == 0 { return (0, 0) }
283
284        let end_offset = start_offset + len;
285
286        //
287        // O(log N) newlines count
288        //
289        let nl_offsets = self.get_newlines(buffer);
290        let start_nl_index = nl_offsets.binary_search(&start_offset).unwrap_or_else(|x| x);
291        let end_nl_index = nl_offsets.binary_search(&end_offset).unwrap_or_else(|x| x);
292        let newlines = (end_nl_index - start_nl_index) as u32;
293
294        //
295        // O(log N) char count
296        //
297
298        let checkpoints = self.get_checkpoints(buffer);
299        let buffer_text = self.get(buffer);
300
301        //
302        // Gets absolute char count from start of buffer to `target_offset`
303        //
304        let chars_up_to = |target_offset: u32| -> u32 {
305            if target_offset == 0 { return 0; }
306
307            //
308            // Find the index of the first checkpoint that is strictly strictly greater than our target
309            //
310            let partition_index = checkpoints.partition_point(|&(_char_cnt, byte_offset)| byte_offset <= target_offset);
311
312            let (base_chars, base_bytes) = if partition_index > 0 {
313                checkpoints[partition_index - 1]
314            } else {
315                (0, 0)
316            };
317
318            //
319            // Linearly scan only the remaining bytes from the checkpoint to the target
320            //
321            let tail_bytes = &buffer_text[base_bytes as usize .. target_offset as usize];
322            base_chars + bytecount::num_chars(tail_bytes.as_bytes()) as u32
323        };
324
325        let chars = chars_up_to(end_offset) - chars_up_to(start_offset);
326
327        (chars, newlines)
328    }
329
330    #[inline(always)]
331    #[must_use]
332    pub fn count_chars(&self, buffer: BufferRef, offset: u32, len: u32) -> u32 {
333        bytecount::num_chars(self.get_slice(buffer, offset, len).as_bytes()) as u32
334    }
335
336    #[inline(always)]
337    #[must_use]
338    pub fn count_newlines(&self, buffer: BufferRef, offset: u32, len: u32) -> u32 {
339        bytecount::count(self.get_slice(buffer, offset, len).as_bytes(), b'\n') as _
340    }
341
342    #[inline(always)]
343    fn get_newlines(&self, buffer: BufferRef) -> &[u32] {
344        if buffer == MOD_BUFFER {
345            &self.modifications_newline_offsets
346        } else {
347            &self.original_buffers[buffer].newline_offsets
348        }
349    }
350
351    #[inline(always)]
352    fn get_checkpoints(&self, buffer: BufferRef) -> &[(u32, u32)] {
353        if buffer == MOD_BUFFER {
354            &self.modifications_char_checkpoints
355        } else {
356            &self.original_buffers[buffer].char_checkpoints
357        }
358    }
359
360    /// Converts an absolute char index to an absolute byte offset for a specific buffer
361    #[must_use]
362    #[inline]
363    pub fn char_to_byte_absolute(&self, buffer: BufferRef, target_char: u32) -> u32 {
364        if target_char == 0 { return 0; }
365
366        let checkpoints = self.get_checkpoints(buffer);
367        let cp_index = checkpoints.partition_point(|&(c, _)| c <= target_char);
368
369        let (current_char, current_byte) = if cp_index == 0 {
370            (0, 0)
371        } else {
372            unsafe { *checkpoints.get_unchecked(cp_index - 1) }
373        };
374
375        let remainder_chars = target_char - current_char;
376        if remainder_chars == 0 { return current_byte; }
377
378        let text = self.get(buffer);
379        let slice = unsafe { text.get_unchecked(current_byte as usize..) };
380
381        let additional_bytes = slice
382            .char_indices()
383            .nth(remainder_chars as usize).map_or_else(|| slice.len() as u32, |(b, _)| b as u32);  // Fallback to end if out of bounds
384
385        current_byte + additional_bytes
386    }
387
388    /// Converts an absolute byte offset to an absolute char index for a specific buffer
389    #[must_use]
390    #[inline]
391    pub fn byte_to_char_absolute(&self, buffer: BufferRef, target_byte: u32) -> u32 {
392        if target_byte == 0 { return 0; }
393
394        let checkpoints = self.get_checkpoints(buffer);
395        let cp_index = checkpoints.partition_point(|&(_, b)| b <= target_byte);
396
397        let (current_char, current_byte) = if cp_index == 0 {
398            (0, 0)
399        } else {
400            unsafe { *checkpoints.get_unchecked(cp_index - 1) }
401        };
402
403        let remainder_bytes = target_byte - current_byte;
404        if remainder_bytes == 0 { return current_char; }
405
406        let text = self.get(buffer);
407        let slice = unsafe { text.get_unchecked(current_byte as usize..target_byte as usize) };
408        let additional_chars = bytecount::num_chars(slice.as_bytes()) as u32;
409
410        current_char + additional_chars
411    }
412}
413
414#[derive(Debug, Clone)]
415pub struct Pieces {
416    pub nodes: PrimaryMap<NodeRef, Node>,
417}
418
419impl Default for Pieces {
420    #[inline(always)]
421    fn default() -> Self {
422        Self::new()
423    }
424}
425
426impl Pieces {
427    #[inline(always)]
428    #[must_use]
429    pub fn new() -> Self {
430        let mut nodes = PrimaryMap::new();
431        nodes.push(Node {
432            left: NIL, right: NIL, offset: 0,
433            subtree_len: 0, subtree_chars: 0, subtree_newlines: 0,
434            meta: 0, buffer_start_line: 0,
435            piece_start_char: 0
436        });
437        Self { nodes }
438    }
439
440    #[inline(always)]
441    #[must_use]
442    pub fn len(&self) -> usize { self.nodes.len() }
443
444    #[inline(always)]
445    #[must_use]
446    pub fn is_empty(&self) -> bool { self.nodes.is_empty() }
447
448    #[inline(always)]
449    #[must_use]
450    pub fn get(&self, index: NodeRef) -> &Node { &self.nodes[index] }
451
452    #[inline(always)]
453    #[must_use]
454    pub fn get_piece(&self, index: NodeRef) -> Piece {
455        if index == NIL { return Piece::default(); }
456
457        let node = &self.nodes[index];
458        let l = &self.nodes[node.left];
459        let r = &self.nodes[node.right];
460
461        Piece {
462            buffer: node.buffer_index(),
463            byte_offset: node.offset,
464            byte_length: node.subtree_len - l.subtree_len - r.subtree_len,
465            char_count: node.subtree_chars - l.subtree_chars - r.subtree_chars,
466            newline_count: node.subtree_newlines - l.subtree_newlines - r.subtree_newlines,
467            buffer_start_line: node.buffer_start_line,
468            piece_start_char: node.piece_start_char,
469        }
470    }
471
472    #[inline(always)]
473    pub fn alloc(&mut self, color: Color, left: NodeRef, piece: Piece, right: NodeRef) -> NodeRef {
474        let l = &self.nodes[left];
475        let r = &self.nodes[right];
476
477        let mut node = Node {
478            left, right,
479            offset: piece.byte_offset,
480            subtree_len: l.subtree_len + piece.byte_length + r.subtree_len,
481            subtree_chars: l.subtree_chars + piece.char_count + r.subtree_chars,
482            subtree_newlines: l.subtree_newlines + piece.newline_count + r.subtree_newlines,
483            buffer_start_line: piece.buffer_start_line,
484            piece_start_char: piece.piece_start_char,
485            meta: 0,
486        };
487        node.set_color(color);
488        node.set_buffer(piece.buffer);
489
490        self.nodes.push(node)
491    }
492
493    /// Extends a piece without triggering Red-Black structural changes.
494    /// It creates a single new path to the root to maintain persistence.
495    pub fn extend_piece(
496        &mut self,
497        root: NodeRef,
498        piece_start_offset: u32,
499        add_len: u32,
500        add_chars: u32,
501        add_nl: u32,
502    ) -> NodeRef {
503        if root == NIL { return NIL }
504
505        let node = self.nodes[root];
506        let mut new_node = node;
507
508        let left_len = self.nodes[node.left].subtree_len;
509        let piece_len = node.subtree_len - left_len - self.nodes[node.right].subtree_len;
510
511        if piece_start_offset < left_len {
512            new_node.left  = self.extend_piece(node.left, piece_start_offset, add_len, add_chars, add_nl);
513        } else if piece_start_offset > left_len {
514            new_node.right = self.extend_piece(node.right, piece_start_offset - (left_len + piece_len), add_len, add_chars, add_nl);
515        } else {
516            // piece_start_offset == left_len
517            // We found the exact node! No further recursion needed.
518        }
519
520        new_node.subtree_len      += add_len;
521        new_node.subtree_chars    += add_chars;
522        new_node.subtree_newlines += add_nl;
523
524        self.nodes.push(new_node)
525    }
526
527    #[inline]
528    pub fn insert_node(&mut self, root: NodeRef, piece: Piece, at: u32) -> NodeRef {
529        let new_root = self.ins(root, piece, at, 0);
530        let r_node = self.nodes[new_root];
531        let p = self.get_piece(new_root);
532        self.alloc(Color::Black, r_node.left, p, r_node.right)
533    }
534
535    #[inline]
536    pub fn remove_node(&mut self, root: NodeRef, at: u32) -> NodeRef {
537        let new_root = self.rem(root, at, 0);
538        if new_root == NIL { return NIL }
539
540        let r_node = self.nodes[new_root];
541        self.alloc(Color::Black, r_node.left, self.get_piece(new_root), r_node.right)
542    }
543
544    #[inline]
545    fn rem(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
546        if root == NIL { return NIL }
547
548        let node = self.nodes[root];
549        let node_piece = self.get_piece(root);
550        let left_len = self.nodes[node.left].subtree_len;
551
552        match at.cmp(&(total + left_len)) {
553            Ordering::Less => self.remove_left(root, at, total),
554            Ordering::Equal => self.fuse(node.left, node.right),
555            Ordering::Greater => {
556                let next_total = total + left_len + node_piece.byte_length;
557                self.remove_right(root, at, next_total)
558            }
559        }
560    }
561
562    #[inline]
563    fn remove_left(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
564        let node = self.nodes[root];
565        let new_left = self.rem(node.left, at, total);
566        let new_node = self.alloc(Color::Red, new_left, self.get_piece(root), node.right);
567        if self.nodes[node.left].color() == Color::Black {
568            self.balance_left(new_node)
569        } else {
570            new_node
571        }
572    }
573
574    #[inline]
575    fn remove_right(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
576        let node = self.nodes[root];
577        let new_right = self.rem(node.right, at, total);
578        let new_node = self.alloc(Color::Red, node.left, self.get_piece(root), new_right);
579        if self.nodes[node.right].color() == Color::Black {
580            self.balance_right(new_node)
581        } else {
582            new_node
583        }
584    }
585
586    #[inline]
587    fn ins(&mut self, root: NodeRef, p: Piece, at: u32, total_offset: u32) -> NodeRef {
588        if root == NIL { return self.alloc(Color::Red, NIL, p, NIL); }
589
590        let node = self.nodes[root];
591        let node_piece = self.get_piece(root);
592        let left_len = self.nodes[node.left].subtree_len;
593
594        if at < total_offset + left_len + node_piece.byte_length {
595            let lft = self.ins(node.left, p, at, total_offset);
596            self.balance(node.color(), lft, node_piece, node.right)
597        } else {
598            let next_offset = total_offset + left_len + node_piece.byte_length;
599            let rgt = self.ins(node.right, p, at, next_offset);
600            self.balance(node.color(), node.left, node_piece, rgt)
601        }
602    }
603
604    #[inline]
605    fn balance(&mut self, c: Color, l_index: NodeRef, p: Piece, r_index: NodeRef) -> NodeRef {
606        let l = self.nodes[l_index];
607        let r = self.nodes[r_index];
608
609        if c == Color::Black {
610            if l.color() == Color::Red {
611                let ll = self.nodes[l.left];
612                let lr = self.nodes[l.right];
613                if ll.color() == Color::Red {
614                    let new_l = self.alloc(Color::Black, ll.left, self.get_piece(l.left), ll.right);
615                    let new_r = self.alloc(Color::Black, l.right, p, r_index);
616                    return self.alloc(Color::Red, new_l, self.get_piece(l_index), new_r);
617                } else if lr.color() == Color::Red {
618                    let new_l = self.alloc(Color::Black, l.left, self.get_piece(l_index), lr.left);
619                    let new_r = self.alloc(Color::Black, lr.right, p, r_index);
620                    return self.alloc(Color::Red, new_l, self.get_piece(l.right), new_r);
621                }
622            }
623
624            if r.color() == Color::Red {
625                let rl = self.nodes[r.left];
626                let rr = self.nodes[r.right];
627                if rl.color() == Color::Red {
628                    let new_l = self.alloc(Color::Black, l_index, p, rl.left);
629                    let new_r = self.alloc(Color::Black, rl.right, self.get_piece(r_index), r.right);
630                    return self.alloc(Color::Red, new_l, self.get_piece(r.left), new_r);
631                } else if rr.color() == Color::Red {
632                    let new_l = self.alloc(Color::Black, l_index, p, r.left);
633                    let new_r = self.alloc(Color::Black, rr.left, self.get_piece(r.right), rr.right);
634                    return self.alloc(Color::Red, new_l, self.get_piece(r_index), new_r);
635                }
636            }
637        }
638
639        self.alloc(c, l_index, p, r_index)
640    }
641
642    #[inline]
643    fn fuse(&mut self, left: NodeRef, right: NodeRef) -> NodeRef {
644        // match: (left, right)
645
646        // case: (None, r)
647        if left  == NIL { return right }
648        if right == NIL { return left }
649
650        let l_node = self.nodes[left];
651        let r_node = self.nodes[right];
652
653        // match: (left.color, right.color)
654
655        // case: (B, R)
656        if l_node.color() == Color::Black && r_node.color() == Color::Red {
657            let fused = self.fuse(left, r_node.left);
658            return self.alloc(Color::Red, fused, self.get_piece(right), r_node.right);
659        }
660
661        // case: (R, B)
662        if l_node.color() == Color::Red && r_node.color() == Color::Black {
663            let fused = self.fuse(l_node.right, right);
664            return self.alloc(Color::Red, l_node.left, self.get_piece(left), fused);
665        }
666
667        // case: (R, R)
668        if l_node.color() == Color::Red && r_node.color() == Color::Red {
669            let fused = self.fuse(l_node.right, r_node.left);
670            let f_node = self.nodes[fused];
671            if fused != NIL && f_node.color() == Color::Red {
672                let new_l = self.alloc(Color::Red, l_node.left, self.get_piece(left), f_node.left);
673                let new_r = self.alloc(Color::Red, f_node.right, self.get_piece(right), r_node.right);
674                return self.alloc(Color::Red, new_l, self.get_piece(fused), new_r);
675            }
676            let new_r = self.alloc(Color::Red, fused, self.get_piece(right), r_node.right);
677            return self.alloc(Color::Red, l_node.left, self.get_piece(left), new_r);
678        }
679
680        // case: (B, B)
681
682        let fused = self.fuse(l_node.right, r_node.left);
683        let f_node = self.nodes[fused];
684        if fused != NIL && f_node.color() == Color::Red {
685            let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), f_node.left);
686            let new_r = self.alloc(Color::Black, f_node.right, self.get_piece(right), r_node.right);
687            return self.alloc(Color::Red, new_l, self.get_piece(fused), new_r);
688        }
689
690        let new_r = self.alloc(Color::Black, fused, self.get_piece(right), r_node.right);
691        let new_node = self.alloc(Color::Red, l_node.left, self.get_piece(left), new_r);
692        self.balance_left(new_node)
693    }
694
695    #[inline]
696    fn balance_left(&mut self, left: NodeRef) -> NodeRef {
697        let l_node = self.nodes[left];
698        let ll_node = self.nodes[l_node.left];
699        let lr_node = self.nodes[l_node.right];
700
701        // match: (color_l, color_r, color_r_l)
702
703        // case: (Some(R), ..)
704        if l_node.left != NIL && ll_node.color() == Color::Red {
705            let new_ll = self.alloc(Color::Black, ll_node.left, self.get_piece(l_node.left), ll_node.right);
706            return self.alloc(Color::Red, new_ll, self.get_piece(left), l_node.right);
707        }
708
709        // case: (_, Some(B), _)
710        if l_node.right != NIL && lr_node.color() == Color::Black {
711            let new_lr = self.alloc(Color::Red, lr_node.left, self.get_piece(l_node.right), lr_node.right);
712            let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), new_lr);
713            let nl = self.nodes[new_l];
714            return self.balance(Color::Black, nl.left, self.get_piece(new_l), nl.right);
715        }
716
717        // case: (_, Some(R), Some(B))
718        if l_node.right != NIL && lr_node.color() == Color::Red {
719            let lrl_node = self.nodes[lr_node.left];
720            if lr_node.left != NIL && lrl_node.color() == Color::Black {
721                let lrr_node = self.nodes[lr_node.right];
722                let new_lrr = self.alloc(Color::Red, lrr_node.left, self.get_piece(lr_node.right), lrr_node.right);
723                let unbal_r = self.alloc(Color::Black, lrl_node.right, self.get_piece(l_node.right), new_lrr);
724                let ur_node = self.nodes[unbal_r];
725                let new_r = self.balance(ur_node.color(), ur_node.left, self.get_piece(unbal_r), ur_node.right);
726                let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), lrl_node.left);
727                return self.alloc(Color::Red, new_l, self.get_piece(lr_node.left), new_r);
728            }
729        }
730
731        left
732    }
733
734    #[inline]
735    fn balance_right(&mut self, right: NodeRef) -> NodeRef {
736        let r_node = self.nodes[right];
737        let rl_node = self.nodes[r_node.left];
738        let rr_node = self.nodes[r_node.right];
739
740        // match: (color_l, color_l_r, color_r)
741
742        // case: (.., Some(R))
743        if r_node.right != NIL && rr_node.color() == Color::Red {
744            let new_rr = self.alloc(Color::Black, rr_node.left, self.get_piece(r_node.right), rr_node.right);
745            return self.alloc(Color::Red, r_node.left, self.get_piece(right), new_rr);
746        }
747
748        // case: (Some(B), ..)
749        if r_node.left != NIL && rl_node.color() == Color::Black {
750            let new_rl = self.alloc(Color::Red, rl_node.left, self.get_piece(r_node.left), rl_node.right);
751            let new_r = self.alloc(Color::Black, new_rl, self.get_piece(right), r_node.right);
752            let nr = self.nodes[new_r];
753            return self.balance(Color::Black, nr.left, self.get_piece(new_r), nr.right);
754        }
755
756        // case: (Some(R), Some(B), _)
757        if r_node.left != NIL && rl_node.color() == Color::Red {
758            let rlr_node = self.nodes[rl_node.right];
759            if rl_node.right != NIL && rlr_node.color() == Color::Black {
760                let rll_node = self.nodes[rl_node.left];
761                let new_rll = self.alloc(Color::Red, rll_node.left, self.get_piece(rl_node.left), rll_node.right);
762                let unbal_l = self.alloc(Color::Black, new_rll, self.get_piece(r_node.left), rlr_node.left);
763                let ul_node = self.nodes[unbal_l];
764                let new_l = self.balance(ul_node.color(), ul_node.left, self.get_piece(unbal_l), ul_node.right);
765                let new_r = self.alloc(Color::Black, rlr_node.right, self.get_piece(right), r_node.right);
766                return self.alloc(Color::Red, new_l, self.get_piece(rl_node.right), new_r);
767            }
768        }
769
770        right
771    }
772
773    #[inline]
774    #[must_use]
775    pub fn find_offset(
776        &self,
777        mut root: NodeRef,
778        mut target_offset: u32,
779        prefer_left: bool
780    ) -> Option<(NodeRef, u32)> {
781        while root != NIL {
782            let node = &self.nodes[root];
783            let left_len = self.nodes[node.left].subtree_len;
784            let piece_len = self.get_piece(root).byte_length;
785
786            if target_offset < left_len {
787                root = node.left;
788
789            } else if target_offset == left_len + piece_len && prefer_left {
790                // We are perfectly on the boundary and want to merge with the left piece
791                return Some((root, piece_len));
792
793            } else if target_offset < left_len + piece_len {
794                return Some((root, target_offset - left_len));
795
796            } else {
797                target_offset -= left_len + piece_len;
798                root = node.right;
799            }
800        }
801
802        None
803    }
804}
805
806impl PieceTree {
807    /// Slices a tree exactly at `offset`, returning (`LeftTree`, `RightTree`)
808    #[inline]
809    pub fn split(&mut self, root: NodeRef, offset: u32) -> (NodeRef, NodeRef) {
810        if root == NIL {
811            return (NIL, NIL);
812        }
813
814        let node = self.pieces.nodes[root];
815        let p = self.pieces.get_piece(root);
816        let left_len = self.pieces.nodes[node.left].subtree_len;
817
818        if offset < left_len {
819            let (ll, lr) = self.split(node.left, offset);
820
821            // Fast path: Use join_with_middle directly
822            let new_right = self.pieces.join_with_middle(lr, p, node.right);
823            (ll, new_right)
824
825        } else if offset > left_len + p.byte_length {
826            let (rl, rr) = self.split(node.right, offset - left_len - p.byte_length);
827
828            // Fast path: Use join_with_middle directly
829            let new_left = self.pieces.join_with_middle(node.left, p, rl);
830            (new_left, rr)
831
832        } else {
833            let rel_offset = offset - left_len;
834
835            if rel_offset == 0 {
836                (node.left, self.pieces.join_with_middle(NIL, p, node.right))
837
838            } else if rel_offset == p.byte_length {
839                (self.pieces.join_with_middle(node.left, p, NIL), node.right)
840
841            } else {
842                //
843                // We have to slice the Piece exactly in half!
844                //
845                let left_len = rel_offset;
846                let right_len = p.byte_length - rel_offset;
847
848                //
849                // Only scan the smaller half of the piece to avoid O(N) bottlenecks
850                //
851                let (left_chars, left_nl) = if left_len <= right_len {
852                    // Left side is shorter, scan normally
853                    self.buffers.count_chars_and_newlines(p.buffer, p.byte_offset, left_len)
854                } else {
855                    // Right side is shorter, scan it and subtract from the known totals
856                    let (r_chars, r_nl) = self.buffers.count_chars_and_newlines(
857                        p.buffer, p.byte_offset + left_len, right_len
858                    );
859
860                    (p.char_count - r_chars, p.newline_count - r_nl)
861                };
862
863                let left_stump = Piece {
864                    buffer: p.buffer,
865                    byte_offset: p.byte_offset,
866                    byte_length: left_len,
867                    newline_count: left_nl,
868                    char_count: left_chars,
869                    buffer_start_line: p.buffer_start_line,
870                    piece_start_char: p.piece_start_char,
871                };
872
873                let right_stump = Piece {
874                    buffer: p.buffer,
875                    byte_offset: p.byte_offset + left_len,
876                    byte_length: right_len,
877                    newline_count: p.newline_count - left_nl,
878                    char_count: p.char_count - left_chars,
879                    buffer_start_line: p.buffer_start_line + left_nl,
880                    piece_start_char: p.piece_start_char + left_chars,
881                };
882
883                let new_left = self.pieces.join_with_middle(node.left, left_stump, NIL);
884                let new_right = self.pieces.join_with_middle(NIL, right_stump, node.right);
885                (new_left, new_right)
886            }
887        }
888    }
889
890    /// Glues two arbitrary trees together by extracting the max element of the left tree
891    #[inline]
892    pub fn concat(&mut self, left: NodeRef, right: NodeRef) -> NodeRef {
893        //
894        // Group the evaluation so we can force a Black root on the way out!
895        //
896        let new_root = if left == NIL {
897            right
898        } else if right == NIL {
899            left
900        } else {
901            let mut curr = left;
902            while self.pieces.nodes[curr].right != NIL {
903                curr = self.pieces.nodes[curr].right;
904            }
905            let max_piece = self.pieces.get_piece(curr);
906            let max_offset = self.pieces.nodes[left].subtree_len - max_piece.byte_length;
907
908            let left_without_max = self.pieces.remove_node(left, max_offset);
909
910            self.pieces.join_with_middle(left_without_max, max_piece, right)
911        };
912
913        //
914        // Even if we early-returned right, we MUST ensure the final returned root is Black.
915        //
916        if new_root != NIL && self.pieces.nodes[new_root].color() == Color::Red {
917            let p = self.pieces.get_piece(new_root);
918            let r = self.pieces.nodes[new_root];
919            return self.pieces.alloc(Color::Black, r.left, p, r.right);
920        }
921
922        new_root
923    }
924}
925
926impl Pieces {
927    /// Recursively counts the black height of a given subtree
928    #[inline(always)]
929    #[must_use]
930    pub fn black_height(&self, mut node: NodeRef) -> u32 {
931        let mut h = 0;
932        while node != NIL {
933            if self.nodes[node].color() == Color::Black {
934                h += 1;
935            }
936            node = self.nodes[node].left;
937        }
938        h
939    }
940
941    /// Safely joins two arbitrary Red-Black trees using a middle element
942    #[inline(always)]
943    pub fn join_with_middle(&mut self, left: NodeRef, piece: Piece, right: NodeRef) -> NodeRef {
944        let hl = self.black_height(left);
945        let hr = self.black_height(right);
946
947        let new_root = if hl > hr {
948            self.join_right(left, piece, right, hl, hr)
949        } else if hr > hl {
950            self.join_left(left, piece, right, hl, hr)
951        } else {
952            self.alloc(Color::Red, left, piece, right)
953        };
954
955        // Enforce the Red-Black root constraint!
956        if new_root != NIL && self.nodes[new_root].color() == Color::Red {
957            let p = self.get_piece(new_root);
958            let r = self.nodes[new_root];
959            return self.alloc(Color::Black, r.left, p, r.right);
960        }
961
962        new_root
963    }
964
965    #[inline(always)]
966    fn join_right(&mut self, left: NodeRef, piece: Piece, right: NodeRef, hl: u32, hr: u32) -> NodeRef {
967        if hl == hr {
968            return self.alloc(Color::Red, left, piece, right);
969        }
970
971        let l_node = self.nodes[left];
972        let next_hl = if l_node.color() == Color::Black { hl - 1 } else { hl };
973
974        let new_right = self.join_right(l_node.right, piece, right, next_hl, hr);
975        let p = self.get_piece(left);
976
977        self.balance(l_node.color(), l_node.left, p, new_right)
978    }
979
980    #[inline(always)]
981    fn join_left(&mut self, left: NodeRef, piece: Piece, right: NodeRef, hl: u32, hr: u32) -> NodeRef {
982        if hl == hr {
983            return self.alloc(Color::Red, left, piece, right);
984        }
985
986        let r_node = self.nodes[right];
987        let next_hr = if r_node.color() == Color::Black { hr - 1 } else { hr };
988
989        let new_left = self.join_left(left, piece, r_node.left, hl, next_hr);
990        let p = self.get_piece(right);
991
992        self.balance(r_node.color(), new_left, p, r_node.right)
993    }
994}
995
996#[derive(Default, Debug, Clone)]
997pub struct PieceTree {
998    //
999    // `last_insert_end`      is the mod-buffer offset one past the last inserted byte.
1000    // `last_insert_tree_end` is the document   offset one past that insertion.
1001    //
1002    // Both are u32::MAX when unset (after removes, undos, or on init).
1003    //
1004    last_mod_end:  u32,  // fredbuf's last_insert     (BufferCursor)
1005    last_tree_end: u32,  // fredbuf's end_last_insert (CharOffset)
1006
1007    pub root: NodeRef,
1008
1009    /// Tracks nested undo groups. 0 if we are outside any group.
1010    pub transaction_depth: usize,
1011
1012    pub undo_stack: Vec<HistoryEntry>,
1013    pub redo_stack: Vec<HistoryEntry>,
1014
1015    pub pieces:  Pieces,
1016    pub buffers: Buffers,
1017
1018    pub scratch_index_map: Vec<u32>,
1019}
1020
1021impl core::fmt::Display for PieceTree {
1022    #[inline(always)]
1023    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1024        let walker = TreeWalker::new(self);
1025        for char in walker {
1026            write!(f, "{char}")?;
1027        }
1028
1029        Ok(())
1030    }
1031}
1032
1033impl<T> From<T> for PieceTree where T: AsRef<str> {
1034    #[inline(always)]
1035    fn from(value: T) -> Self {
1036        Self::from_str(value.as_ref())
1037    }
1038}
1039
1040impl str::FromStr for PieceTree {
1041    type Err = core::convert::Infallible;
1042
1043    #[inline(always)]
1044    fn from_str(s: &str) -> Result<Self, Self::Err> {
1045        Ok(PieceTree::from(s))
1046    }
1047}
1048
1049impl PieceTree {
1050    #[inline(always)]
1051    #[must_use]
1052    pub fn new() -> Self {
1053        Self::default()
1054    }
1055
1056    #[inline(always)]
1057    #[must_use]
1058    pub fn from_str(text: &str) -> Self {
1059        #[inline(always)]
1060        fn is_utf8_char_boundary(b: u8) -> bool { (b as i8) >= -0x40 }
1061
1062        let mut tree = PieceTree::new();
1063
1064        let mut offset = 0;
1065
1066        let text_bytes = text.as_bytes();
1067        let total_len = text.len() as u32;
1068
1069        while offset < total_len {
1070            // Find the end of the chunk
1071            let mut chunk_len = (total_len - offset).min(MAX_PIECE_SIZE);
1072
1073            // SAFETY: Ensure we don't slice a UTF-8 character in half!
1074            while offset + chunk_len < total_len && !is_utf8_char_boundary(text_bytes[(offset + chunk_len) as usize]) {
1075                chunk_len += 1;
1076            }
1077
1078            let chunk_text = &text[offset as usize .. (offset + chunk_len) as usize];
1079
1080            // Insert the chunk at the very end of the tree
1081            tree.insert(tree.total_length(), chunk_text);
1082
1083            offset += chunk_len;
1084        }
1085
1086        tree
1087    }
1088}
1089
1090impl PieceTree {
1091    /// Captures the current state of the document.
1092    #[inline]
1093    #[must_use]
1094    pub fn take_snapshot(&self, current_cursor: u32) -> HistoryEntry {
1095        HistoryEntry {
1096            root: self.root,
1097            cursor_offset: current_cursor,
1098        }
1099    }
1100
1101    /// Restores the tree to a previously saved snapshot.
1102    /// Returns the cursor offset from the snapshot.
1103    #[inline]
1104    #[must_use]
1105    pub fn snap_to(&mut self, snapshot: HistoryEntry, current_cursor: u32) -> u32 {
1106        if self.root == snapshot.root {
1107            return snapshot.cursor_offset;
1108        }
1109
1110        //
1111        // Prevent snapping while in the middle of an active transaction
1112        //
1113        assert!(self.transaction_depth == 0, "Cannot snap_to during an active undo group");
1114
1115        //
1116        // Save the current state to the undo stack so the user can undo the jump
1117        //
1118        self.undo_stack.push(HistoryEntry {
1119            root: self.root,
1120            cursor_offset: current_cursor,
1121        });
1122
1123        //
1124        // Clear the redo stack
1125        //
1126        self.redo_stack.clear();
1127
1128        //
1129        // Invalidate caches
1130        //
1131        self.last_tree_end = u32::MAX;
1132        self.last_mod_end  = u32::MAX;
1133
1134        //
1135        //  Restore the tree
1136        //
1137        self.root = snapshot.root;
1138
1139        snapshot.cursor_offset
1140    }
1141}
1142
1143impl PieceTree {
1144    /// Starts a new undo group, saves the current state if this is the outermost group.
1145    #[inline]
1146    pub fn begin_undo_group(&mut self, cursor_offset: u32) {
1147        if self.transaction_depth == 0 {
1148            self.commit_head(cursor_offset);
1149        }
1150
1151        self.transaction_depth += 1;
1152    }
1153
1154    /// Ends the current undo group
1155    #[inline]
1156    pub fn end_undo_group(&mut self) {
1157        if self.transaction_depth > 0 {
1158            self.transaction_depth -= 1;
1159        }
1160    }
1161
1162    #[inline(always)]
1163    pub fn commit_head(&mut self, cursor_offset: u32) {
1164        if let Some(last) = self.undo_stack.last() {
1165            if last.root == self.root {
1166                return;  // Root hasn't changed, skip duplication
1167            }
1168        }
1169
1170        self.undo_stack.push(HistoryEntry { root: self.root, cursor_offset });
1171        self.redo_stack.clear();
1172    }
1173
1174    #[inline(always)]
1175    pub fn try_undo(&mut self, current_cursor: u32) -> Option<u32> {
1176        if self.transaction_depth > 0 {
1177            return None;
1178        }
1179
1180        if let Some(entry) = self.undo_stack.pop() {
1181            self.last_tree_end = u32::MAX;
1182            self.last_mod_end  = u32::MAX;
1183
1184            self.redo_stack.push(HistoryEntry {
1185                root: self.root, cursor_offset: current_cursor
1186            });
1187            self.root = entry.root;
1188            return Some(entry.cursor_offset);
1189        }
1190
1191        None
1192    }
1193
1194    #[inline(always)]
1195    pub fn try_redo(&mut self, current_cursor: u32) -> Option<u32> {
1196        if let Some(entry) = self.redo_stack.pop() {
1197            self.last_tree_end = u32::MAX;
1198            self.last_mod_end  = u32::MAX;
1199
1200            self.undo_stack.push(HistoryEntry {
1201                root: self.root, cursor_offset: current_cursor
1202            });
1203            self.root = entry.root;
1204            return Some(entry.cursor_offset);
1205        }
1206
1207        None
1208    }
1209
1210    #[inline(always)]
1211    #[must_use]
1212    pub fn total_length(&self) -> u32 {
1213        self.pieces.get(self.root).subtree_len
1214    }
1215
1216    #[inline(always)]
1217    pub fn apply_edits(&mut self, primary_cursor_offset: u32, edits: &mut [Edit]) {
1218        if edits.is_empty() { return }
1219
1220        self.begin_undo_group(primary_cursor_offset);
1221
1222        edits.sort_by_key(|b| core::cmp::Reverse(b.offset()));
1223        for edit in edits {
1224            match edit {
1225                Edit::Insert { offset, text }   => self.insert_no_commit(*offset, text),
1226                Edit::Remove { offset, length } => self.remove_no_commit(*offset, *length),
1227            }
1228        }
1229
1230        self.end_undo_group();
1231    }
1232
1233    /// Inserts a single character at the specified logical byte offset.
1234    #[inline(always)]
1235    pub fn insert_char(&mut self, offset: u32, ch: char) {
1236        self.commit_head(offset);
1237        self.insert_char_no_commit(offset, ch);
1238    }
1239
1240    /// Inserts a single character at the specified logical byte offset.
1241    #[inline(always)]
1242    pub fn insert_char_no_commit(&mut self, offset: u32, ch: char) {
1243        let mut buf = [0; 4];
1244        self.insert_no_commit(offset, ch.encode_utf8(&mut buf));
1245    }
1246
1247    #[inline(always)]
1248    pub fn insert(&mut self, offset: u32, text: &str) {
1249        if text.is_empty() { return }
1250
1251        let auto_group = self.transaction_depth == 0;
1252        if auto_group {
1253            self.begin_undo_group(offset);
1254        }
1255
1256        self.insert_no_commit(offset, text);
1257
1258        if auto_group {
1259            self.end_undo_group();
1260        }
1261    }
1262
1263    #[inline(always)]
1264    pub fn remove_at(&mut self, offset: u32, length: u32) {
1265        if length == 0 || self.root == NIL { return }
1266
1267        let auto_group = self.transaction_depth == 0;
1268        if auto_group {
1269            self.begin_undo_group(offset);
1270        }
1271
1272        self.remove_no_commit(offset, length);
1273
1274        if auto_group {
1275            self.end_undo_group();
1276        }
1277    }
1278
1279    pub fn insert_no_commit(&mut self, offset: u32, text: &str) {
1280        let mod_offset = self.buffers.modifications_buffer.len() as u32;
1281        let start_line_in_buffer = self.buffers.modifications_newline_offsets.len() as u32;
1282
1283        let mut total_char_count = if let Some(&(c, b)) = self.buffers.modifications_char_checkpoints.last() {
1284            let tail_bytes = &self.buffers.modifications_buffer[b as usize..mod_offset as usize];
1285            c + bytecount::num_chars(tail_bytes.as_bytes()) as u32
1286        } else {
1287            bytecount::num_chars(self.buffers.modifications_buffer[..mod_offset as usize].as_bytes()) as u32
1288        };
1289
1290        let mut newline_count = 0;
1291        let mut char_count = 0;
1292        {
1293            let rem = total_char_count % CHECKPOINT_INTERVAL;
1294            let mut next_checkpoint = total_char_count + ((CHECKPOINT_INTERVAL - rem) % CHECKPOINT_INTERVAL);
1295
1296            if next_checkpoint == 0 {
1297                next_checkpoint = CHECKPOINT_INTERVAL;
1298            }
1299
1300            //
1301            // @Cutnpaste from count_chars_and_newlines_with_offsets_and_checkpoints
1302            //
1303            for (i, b) in text.bytes().enumerate() {
1304                if (b as i8) >= -0x40 {
1305                    if total_char_count == next_checkpoint {
1306                        self.buffers.modifications_char_checkpoints.push((total_char_count, mod_offset + i as u32));
1307                        next_checkpoint += CHECKPOINT_INTERVAL;
1308                    }
1309
1310                    total_char_count += 1;
1311                    char_count += 1;
1312                }
1313
1314                if b == b'\n' {
1315                    self.buffers.modifications_newline_offsets.push(mod_offset + i as u32);
1316                    newline_count += 1;
1317                }
1318            }
1319        }
1320
1321        self.buffers.modifications_buffer.push_str(text);
1322        let text_len = text.len() as u32;
1323
1324        let new_mod_end  = mod_offset + text_len;
1325        let new_tree_end = offset + text_len;
1326
1327        //
1328        // Fast path for sequential typing
1329        //
1330        if offset > 0
1331        && self.last_tree_end == offset
1332        && self.last_mod_end == mod_offset
1333        && let Some((prev_index, prev_rel)) = self.find_position(offset - 1, false)
1334        {
1335            let prev = self.pieces.get_piece(prev_index);
1336
1337            //
1338            // Predecessor must be a mod-buf piece ending at mod_offset.
1339            //
1340            if prev.buffer == MOD_BUFFER && prev.byte_offset + prev.byte_length == mod_offset {
1341                let prev_start = (offset - 1) - prev_rel;
1342
1343                self.root = self.pieces.extend_piece(
1344                    self.root,
1345                    prev_start,
1346                    text_len,
1347                    char_count,
1348                    newline_count
1349                );
1350
1351                self.last_mod_end  = new_mod_end;
1352                self.last_tree_end = new_tree_end;
1353                return;
1354            }
1355        }
1356
1357        let new_piece = Piece {
1358            buffer: MOD_BUFFER,
1359            byte_offset: mod_offset,
1360            byte_length: text_len,
1361            newline_count,
1362            char_count,
1363            buffer_start_line: start_line_in_buffer,
1364            piece_start_char:  total_char_count - char_count,
1365        };
1366
1367        if self.root == NIL {
1368            self.root = self.pieces.insert_node(self.root, new_piece, offset);
1369        } else {
1370            //
1371            // Snip the tree exactly at the insertion offset
1372            //
1373            let (left, right) = self.split(self.root, offset);
1374
1375            //
1376            // Sandwich the new piece directly between the left and right trees!
1377            //
1378            self.root = self.pieces.join_with_middle(left, new_piece, right);
1379
1380            //
1381            // Clean up the resulting seams
1382            //
1383
1384            // Attempt to merge the left side with new_piece
1385            self.try_merge_at(offset);
1386
1387            // Attempt to merge new_piece with the right side
1388            self.try_merge_at(offset + text_len);
1389        }
1390
1391        self.last_mod_end  = new_mod_end;
1392        self.last_tree_end = new_tree_end;
1393    }
1394
1395    pub fn remove_no_commit(&mut self, offset: u32, mut length: u32) {
1396        let total = self.total_length();
1397        if offset >= total { return; }
1398        if offset + length > total { length = total - offset; }
1399        if length == 0 { return }
1400
1401        //
1402        // Snip off the portion of the tree that comes BEFORE the deletion
1403        //
1404        let (left, remainder) = self.split(self.root, offset);
1405
1406        //
1407        // Snip the deleted portion out of the remainder
1408        //
1409        let (_deleted, right) = self.split(remainder, length);
1410
1411        //
1412        // Glue the surviving outer halves back together
1413        //
1414        self.root = self.concat(left, right);
1415
1416        //
1417        // Clean up the resulting seam
1418        //
1419        self.try_merge_at(offset);
1420    }
1421
1422    #[inline]
1423    fn try_merge_at(&mut self, pos: u32) {
1424        if pos == 0 { return }
1425
1426        let Some((right_index, right_rel)) = self.find_position(pos, false) else { return };
1427
1428        if right_rel != 0 { return }
1429
1430        let right = self.pieces.get_piece(right_index);
1431        self.try_merge_right_with_left(pos, right);
1432    }
1433
1434    // Returns Some((merged_start, merged_piece)) if merge happened, None otherwise.
1435    #[inline]
1436    fn try_merge_right_with_left(&mut self, right_start: u32, right: Piece) -> Option<(u32, Piece)> {
1437        if right_start == 0 { return None; }
1438        if right.buffer != MOD_BUFFER { return None; }
1439
1440        let (left_index, left_rel) = self.find_position(right_start - 1, false)?;
1441        let left = self.pieces.get_piece(left_index);
1442
1443        if left.buffer != MOD_BUFFER { return None; }
1444        if left.byte_offset + left.byte_length != right.byte_offset { return None; }
1445
1446        let left_start = (right_start - 1) - left_rel;
1447
1448        self.root = self.pieces.remove_node(self.root, left_start);
1449        self.root = self.pieces.remove_node(self.root, left_start);
1450
1451        let merged = Piece {
1452            buffer:  MOD_BUFFER,
1453            byte_offset:        left.byte_offset,
1454            byte_length:        left.byte_length        + right.byte_length,
1455            newline_count: left.newline_count + right.newline_count,
1456            char_count:    left.char_count    + right.char_count,
1457            buffer_start_line: left.buffer_start_line,
1458            piece_start_char: left.piece_start_char,  // right side extends, left start unchanged
1459        };
1460        self.root = self.pieces.insert_node(self.root, merged, left_start);
1461        Some((left_start, merged))
1462    }
1463
1464    #[inline(always)]
1465    #[must_use]
1466    pub fn get_piece(&self, index: NodeRef) -> Piece {
1467        self.pieces.get_piece(index)
1468    }
1469
1470    #[inline]
1471    #[must_use]
1472    pub fn find_position(&self, offset: u32, prefer_left: bool) -> Option<(NodeRef, u32)> {
1473        let total = self.total_length();
1474        if offset > total { return None; }
1475
1476        if offset == total && total > 0 {
1477            let mut current = self.root;
1478            let mut last_valid = NIL;
1479            while current != NIL {
1480                last_valid = current;
1481                current = self.pieces.get(current).right;
1482            }
1483            let len = self.pieces.get_piece(last_valid).byte_length;
1484            return Some((last_valid, len));
1485        }
1486
1487        self.pieces.find_offset(self.root, offset, prefer_left)
1488    }
1489
1490    #[cfg(feature = "write")]
1491    #[inline]
1492    pub fn write_to<W: std::io::Write>(&self, mut writer: W) -> std::io::Result<()> {
1493        if self.root == NIL {
1494            return Ok(());
1495        }
1496
1497        for (_, piece) in self.pieces() {
1498            let slice = self.buffers.get_slice(piece.buffer, piece.byte_offset, piece.byte_length);
1499            writer.write_all(slice.as_bytes())?;
1500        }
1501
1502        writer.flush()
1503    }
1504}
1505
1506impl PieceTree {
1507    #[inline]
1508    pub fn compact(&mut self) {
1509        #[inline(always)]
1510        fn copy_node(
1511            old_index: NodeRef, old_arena: &Pieces,
1512            new_arena: &mut Pieces, index_map: &mut [u32]
1513        ) -> NodeRef {
1514            if old_index == NIL { return NIL }
1515
1516            if index_map[old_index.index()] != 0 {
1517                return NodeRef::new(index_map[old_index.index()] as usize);
1518            }
1519
1520            let node = old_arena.get(old_index);
1521            let left_new = copy_node(node.left, old_arena, new_arena, index_map);
1522            let right_new = copy_node(node.right, old_arena, new_arena, index_map);
1523
1524            let new_index = NodeRef::new(new_arena.nodes.len());
1525            new_arena.nodes.push(Node {
1526                left: left_new,
1527                right: right_new,
1528                offset: node.offset,
1529                subtree_len: node.subtree_len,
1530                subtree_chars: node.subtree_chars,
1531                subtree_newlines: node.subtree_newlines,
1532                piece_start_char: node.piece_start_char,
1533                meta: node.meta,
1534                buffer_start_line: node.buffer_start_line
1535            });
1536
1537            index_map[old_index.index()] = new_index.index() as u32;
1538            new_index
1539        }
1540
1541        let mut new_pieces = Pieces::new();
1542
1543        self.scratch_index_map.clear();
1544        self.scratch_index_map.resize(self.pieces.nodes.len(), 0);
1545
1546        self.root = copy_node(self.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1547        for entry in &mut self.undo_stack {
1548            entry.root = copy_node(entry.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1549        }
1550        for entry in &mut self.redo_stack {
1551            entry.root = copy_node(entry.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1552        }
1553
1554        self.pieces = new_pieces;
1555    }
1556
1557    #[inline]
1558    pub fn squash(&mut self) {  // @Memory
1559        if self.root == NIL { return }
1560
1561        let squashed_text = self.to_string();  // @Memory
1562        let length = squashed_text.len() as u32;
1563
1564        let mut offsets = Vec::new();
1565        let mut checkpoints = Vec::new();
1566
1567        let (char_count, newline_count) =
1568            count_chars_and_newlines_with_offsets_and_checkpoints(
1569                squashed_text.as_bytes(),
1570                &mut offsets,
1571                &mut checkpoints
1572            );
1573
1574        self.buffers = Buffers::new();
1575        let buffer = self.buffers.original_buffers.push(OriginalBuffer {
1576            newline_offsets: offsets.into(),
1577            char_checkpoints: checkpoints.into(),
1578            text: squashed_text.into()
1579        });
1580        self.pieces = Pieces::new();
1581
1582        let piece = Piece {
1583            byte_length: length, newline_count, char_count,
1584            buffer, byte_offset: 0,
1585            buffer_start_line: 0, piece_start_char: 0
1586        };
1587        self.root = self.pieces.insert_node(NIL, piece, 0);
1588
1589        self.undo_stack.clear();
1590        self.redo_stack.clear();
1591    }
1592}
1593
1594impl PieceTree {
1595    /// Total bytes allocated for the node arena (includes NIL sentinel and
1596    /// all historical nodes retained for undo/redo).
1597    #[inline(always)]
1598    #[must_use]
1599    pub fn node_arena_bytes(&self) -> u32 {
1600        (self.pieces.nodes.len() * size_of::<Node>()) as _
1601    }
1602
1603    /// Bytes consumed by the modifications buffer (append-only, never shrinks).
1604    #[inline(always)]
1605    #[must_use]
1606    pub fn mod_buffer_bytes(&self) -> u32 {
1607        self.buffers.modifications_buffer.len() as _
1608    }
1609
1610    /// Bytes consumed by all original (read) buffers.
1611    #[inline(always)]
1612    #[must_use]
1613    pub fn original_buffers_bytes(&self) -> u32 {
1614        self.buffers.original_buffers.values().map(|s| s.len()).sum::<usize>() as _
1615    }
1616
1617    /// Bytes consumed by undo + redo history entries.
1618    #[inline(always)]
1619    #[must_use]
1620    pub fn history_bytes(&self) -> u32 {
1621        ((self.undo_stack.len() + self.redo_stack.len()) * size_of::<HistoryEntry>()) as _
1622    }
1623
1624    /// Number of live nodes in the arena (includes NIL and all historical nodes).
1625    #[inline(always)]
1626    #[must_use]
1627    pub fn node_count(&self) -> u32 {
1628        self.pieces.nodes.len() as _
1629    }
1630
1631    /// Aggregate memory usage breakdown.
1632    #[inline(always)]
1633    #[must_use]
1634    pub fn memory_usage(&self) -> MemoryUsage {
1635        MemoryUsage {
1636            node_arena:       self.node_arena_bytes(),
1637            mod_buffer:       self.mod_buffer_bytes(),
1638            original_buffers: self.original_buffers_bytes(),
1639            history:          self.history_bytes(),
1640        }
1641    }
1642}
1643
1644#[derive(Debug, Clone, Copy)]
1645pub struct MemoryUsage {
1646    pub node_arena:       u32,
1647    pub mod_buffer:       u32,
1648    pub original_buffers: u32,
1649    pub history:          u32,
1650}
1651
1652impl MemoryUsage {
1653    #[inline(always)]
1654    #[must_use]
1655    pub const fn total(&self) -> u32 {
1656        self.node_arena + self.mod_buffer + self.original_buffers + self.history
1657    }
1658
1659    /// Overhead = everything except the actual document content in buffers.
1660    #[inline(always)]
1661    #[must_use]
1662    pub const fn overhead(&self) -> u32 {
1663        self.node_arena + self.history
1664    }
1665}
1666
1667#[derive(Debug)]
1668pub struct LinesIter<'a> {
1669    tree: &'a PieceTree,
1670    current_line: u32,
1671    total_lines: u32,
1672}
1673
1674impl Iterator for LinesIter<'_> {
1675    type Item = String;
1676
1677    fn next(&mut self) -> Option<Self::Item> {
1678        if self.current_line >= self.total_lines { return None }
1679
1680        let line = self.tree.get_line_content_allocating(self.current_line)?;
1681        self.current_line += 1;
1682        Some(line)
1683    }
1684}
1685
1686/// A lazy, non-allocating slice view over a byte range of the tree
1687#[derive(Debug)]
1688pub struct ChunkIter<'a> {
1689    tree:       &'a PieceTree,
1690    pieces:     PieceTreeIter<'a>,
1691    // byte range we care about
1692    start:      u32,
1693    end:        u32,
1694    // bytes consumed so far across all pieces
1695    piece_start: u32,
1696}
1697
1698impl<'a> ChunkIter<'a> {
1699    #[inline]
1700    #[must_use]
1701    pub fn new(tree: &'a PieceTree, start: u32, end: u32) -> Self {
1702        Self {
1703            tree,
1704            pieces: PieceTreeIter::new(&tree.pieces, tree.root),
1705            start,
1706            end,
1707            piece_start: 0,
1708        }
1709    }
1710}
1711
1712impl<'a> Iterator for ChunkIter<'a> {
1713    type Item = &'a str;
1714
1715    #[inline]
1716    fn next(&mut self) -> Option<&'a str> {
1717        loop {
1718            let (_, p) = self.pieces.next()?;
1719            let piece_end = self.piece_start + p.byte_length;
1720
1721            // Skip pieces entirely before our window
1722            if piece_end <= self.start {
1723                self.piece_start = piece_end;
1724                continue;
1725            }
1726            // Stop once we're past our window
1727            if self.piece_start >= self.end {
1728                return None;
1729            }
1730
1731            let slice_start = self.start.saturating_sub(self.piece_start);
1732            let slice_end   = (self.end - self.piece_start).min(p.byte_length);
1733
1734            self.piece_start = piece_end;
1735
1736            let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset + slice_start, slice_end - slice_start);
1737            if text.is_empty() { continue; }
1738            return Some(text);
1739        }
1740    }
1741}
1742
1743/// Non-allocating char iterator over a byte-bounded window of the tree.
1744#[derive(Debug)]
1745pub struct SliceChars<'a> {
1746    walker:   TreeWalker<'a>,
1747    byte_end: u32,
1748}
1749
1750impl Iterator for SliceChars<'_> {
1751    type Item = char;
1752    #[inline]
1753    fn next(&mut self) -> Option<char> {
1754        if self.walker.offset >= self.byte_end { return None; }
1755        self.walker.next()
1756    }
1757}
1758
1759/// A zero-copy borrowed view over a byte range of the tree.
1760/// All offsets are relative to the slice start.
1761pub struct TreeSlice<'a> {
1762    tree:  &'a PieceTree,
1763    start: u32,   // byte offset into tree (inclusive)
1764    end:   u32,   // byte offset into tree (exclusive)
1765}
1766
1767impl core::fmt::Display for TreeSlice<'_> {
1768    #[inline(always)]
1769    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1770        for char in self.chars() {
1771            write!(f, "{char}")?;
1772        }
1773        Ok(())
1774    }
1775}
1776
1777impl<'a> TreeSlice<'a> {
1778    #[inline(always)]
1779    #[must_use]
1780    pub fn new(tree: &'a PieceTree, start: u32, end: u32) -> Self {
1781        let end = end.min(tree.len_bytes());
1782        debug_assert!(start <= end);
1783        Self { tree, start, end }
1784    }
1785
1786    #[inline(always)]
1787    #[must_use]
1788    pub fn len_bytes(&self) -> u32  { self.end - self.start }
1789
1790    #[inline(always)]
1791    #[must_use]
1792    pub fn is_empty(&self)  -> bool { self.start == self.end }
1793
1794    #[inline(always)]
1795    #[must_use]
1796    pub fn len_chars(&self) -> u32 {
1797        let a = self.tree.byte_to_char(self.start).unwrap_or(0);
1798        let b = self.tree.byte_to_char(self.end).unwrap_or_else(|| self.tree.len_chars());
1799        b - a
1800    }
1801
1802    #[inline(always)]
1803    #[must_use]
1804    pub fn len_lines(&self) -> u32 {
1805        self.tree.chunks_at_byte(self.start, self.end)
1806            .map(|s| bytecount::count(s.as_bytes(), b'\n') as u32)
1807            .sum::<u32>() + 1
1808    }
1809
1810    /// Non-allocating chunk iterator over the slice's byte range.
1811    #[inline(always)]
1812    #[must_use]
1813    pub fn chunks(&self) -> ChunkIter<'a> {
1814        self.tree.chunks_at_byte(self.start, self.end)
1815    }
1816
1817    /// Non-allocating char iterator over the slice.
1818    #[inline(always)]
1819    #[must_use]
1820    pub fn chars(&self) -> SliceChars<'a> {
1821        self.tree.slice_bytes(self.start, self.end)
1822    }
1823
1824    /// Byte at a slice-relative byte offset.
1825    #[inline(always)]
1826    #[must_use]
1827    pub fn byte(&self, offset: u32) -> Option<u8> {
1828        if offset >= self.len_bytes() { return None; }
1829        self.tree.byte(self.start + offset)
1830    }
1831
1832    /// Char at a slice-relative char index.
1833    #[inline(always)]
1834    #[must_use]
1835    pub fn char(&self, char_index: u32) -> Option<char> {
1836        let abs_char = self.tree.byte_to_char(self.start)? + char_index;
1837        self.tree.char(abs_char)
1838    }
1839
1840    /// Non-allocating chunk iterator over a slice-relative line.
1841    #[inline(always)]
1842    #[must_use]
1843    pub fn line(&self, line: u32) -> Option<ChunkIter<'a>> {
1844        let (abs_start, abs_end) = self.abs_line_range(line)?;
1845        Some(self.tree.chunks_at_byte(abs_start, abs_end))
1846    }
1847
1848    /// Returns a sub-slice over a byte range (relative to this slice).
1849    #[inline(always)]
1850    #[must_use]
1851    pub fn slice<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'a> {
1852        let s = match range.start_bound() {
1853            Bound::Included(&n) => n,
1854            Bound::Excluded(&n) => n + 1,
1855            Bound::Unbounded    => 0,
1856        };
1857        let e = match range.end_bound() {
1858            Bound::Included(&n) => n + 1,
1859            Bound::Excluded(&n) => n,
1860            Bound::Unbounded    => self.len_bytes(),
1861        };
1862        TreeSlice::new(self.tree, self.start + s, self.start + e)
1863    }
1864
1865    #[inline(always)]
1866    #[must_use]
1867    pub fn byte_to_char(&self, byte_offset: u32) -> Option<u32> {
1868        let base = self.tree.byte_to_char(self.start)?;
1869        let abs  = self.tree.byte_to_char(self.start + byte_offset)?;
1870        Some(abs - base)
1871    }
1872
1873    #[inline(always)]
1874    #[must_use]
1875    pub fn char_to_byte(&self, char_index: u32) -> Option<u32> {
1876        let base_char = self.tree.byte_to_char(self.start)?;
1877        let abs_byte  = self.tree.char_to_byte(base_char + char_index)?;
1878        Some(abs_byte - self.start)
1879    }
1880
1881    #[inline(always)]
1882    #[must_use]
1883    pub fn byte_to_line(&self, byte_offset: u32) -> Option<u32> {
1884        let (abs_line, _) = self.tree.byte_to_line_col(self.start + byte_offset)?;
1885        let base_line     = self.tree.byte_to_line_col(self.start).map(|(l, _)| l)?;
1886        Some(abs_line - base_line)
1887    }
1888
1889    #[inline(always)]
1890    #[must_use]
1891    pub fn line_to_byte(&self, line: u32) -> Option<u32> {
1892        let (abs_start, _) = self.abs_line_range(line)?;
1893        Some(abs_start - self.start)
1894    }
1895
1896    #[inline]
1897    #[must_use]
1898    pub fn abs_line_range(&self, rel_line: u32) -> Option<(u32, u32)> {
1899        let base_line = self.tree.byte_to_line_col(self.start).map(|(l, _)| l)?;
1900        let abs_line  = base_line + rel_line;
1901
1902        let line_start = self.tree.line_to_byte(abs_line)?;
1903        let line_end   = self.tree.line_to_byte(abs_line + 1)
1904            .unwrap_or_else(|| self.tree.len_bytes());
1905
1906        //
1907        // Clamp to slice bounds.
1908        //
1909        let s = line_start.max(self.start);
1910        let e = line_end.min(self.end);
1911        if s > e { return None; }
1912
1913        Some((s, e))
1914    }
1915}
1916
1917// Allow slicing directly from PieceTree.
1918impl PieceTree {
1919    #[inline]
1920    pub fn slice<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'_> {
1921        let s = match range.start_bound() {
1922            Bound::Included(&n) => n,
1923            Bound::Excluded(&n) => n + 1,
1924            Bound::Unbounded    => 0,
1925        };
1926        let e = match range.end_bound() {
1927            Bound::Included(&n) => n + 1,
1928            Bound::Excluded(&n) => n,
1929            Bound::Unbounded    => self.len_bytes(),
1930        };
1931        TreeSlice::new(self, s, e)
1932    }
1933
1934    /// Slice by char range.
1935    #[inline]
1936    pub fn slice_chars_range<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'_> {
1937        let sc = match range.start_bound() {
1938            Bound::Included(&n) => n,
1939            Bound::Excluded(&n) => n + 1,
1940            Bound::Unbounded    => 0,
1941        };
1942        let ec = match range.end_bound() {
1943            Bound::Included(&n) => n + 1,
1944            Bound::Excluded(&n) => n,
1945            Bound::Unbounded    => self.len_chars(),
1946        };
1947        let s = self.char_to_byte(sc).unwrap_or(0);
1948        let e = self.char_to_byte(ec).unwrap_or_else(|| self.len_bytes());
1949        TreeSlice::new(self, s, e)
1950    }
1951}
1952
1953impl PieceTree {
1954    /// Fast-path chunk reader specifically designed for the Tree-sitter C API.
1955    /// Given an absolute byte offset, it returns the largest contiguous byte
1956    /// slice available starting exactly at that offset.
1957    #[inline]
1958    #[must_use]
1959    pub fn read_largest_contigous_chunk_at_byte(&self, offset: u32) -> &[u8] {
1960        let total = self.total_length();
1961        if offset >= total {
1962            return &[];
1963        }
1964
1965        let mut current = self.root;
1966        let mut current_offset = offset;
1967
1968        while current != NIL {
1969            let node = self.pieces.get(current);
1970            let p = self.pieces.get_piece(current);
1971            let left_len = self.pieces.get(node.left).subtree_len;
1972            let piece_len = p.byte_length;
1973
1974            if current_offset < left_len {
1975                current = node.left;
1976
1977            } else if current_offset < left_len + piece_len {
1978                //
1979                // The requested offset falls inside this exact piece
1980                //
1981
1982                let rel_offset = current_offset - left_len;
1983                let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
1984
1985                return &text.as_bytes()[rel_offset as usize..];
1986
1987            } else {
1988                current_offset -= left_len + piece_len;
1989                current = node.right;
1990            }
1991        }
1992
1993        &[]
1994    }
1995
1996    #[inline]
1997    #[must_use]
1998    pub fn line_to_byte(&self, target_line: u32) -> Option<u32> {
1999        if target_line == 0 { return Some(0) }
2000        let mut current = self.root;
2001        let mut current_offset = 0;
2002        let mut current_line = 0;
2003        while current != NIL {
2004            let node = self.pieces.get(current);
2005            let p = self.pieces.get_piece(current);
2006            let left_newlines = self.pieces.get(node.left).subtree_newlines;
2007            let left_len      = self.pieces.get(node.left).subtree_len;
2008            let piece_newlines = p.newline_count;
2009            let piece_len      = p.byte_length;
2010
2011            if target_line <= current_line + left_newlines {
2012                //
2013                // The target line starts somewhere in the left subtree
2014                //
2015                current = node.left;
2016
2017            } else if target_line <= current_line + left_newlines + piece_newlines {
2018                //
2019                // The target line starts inside this piece
2020                //
2021
2022                current_line   += left_newlines;
2023                current_offset += left_len;
2024
2025                let rel_line = target_line - current_line; // >= 1, guaranteed
2026                let absolute_newline_index = p.buffer_start_line + rel_line - 1;
2027                let absolute_byte_offset = if p.buffer == MOD_BUFFER {
2028                    self.buffers.modifications_newline_offsets[absolute_newline_index as usize]
2029                } else {
2030                    self.buffers.original_buffers[p.buffer].newline_offsets[absolute_newline_index as usize]
2031                };
2032
2033                let local_piece_offset = absolute_byte_offset - p.byte_offset + 1;
2034                return Some(current_offset + local_piece_offset);
2035
2036            } else {
2037                current_line   += left_newlines + piece_newlines;
2038                current_offset += left_len + piece_len;
2039                current = node.right;
2040            }
2041        }
2042
2043        None
2044    }
2045
2046    #[inline]
2047    #[must_use]
2048    pub fn char_to_byte(&self, char_index: u32) -> Option<u32> {
2049        let total_chars = self.len_chars();
2050        if char_index > total_chars { return None; }
2051        if char_index == total_chars { return Some(self.len_bytes()); }
2052
2053        let mut current = self.root;
2054        let mut current_byte = 0;
2055        let mut current_char = 0;
2056
2057        while current != NIL {
2058            let node = self.pieces.get(current);
2059            let left_node = self.pieces.get(node.left);
2060
2061            let left_chars = left_node.subtree_chars;
2062            let left_bytes = left_node.subtree_len;
2063
2064            let p = self.pieces.get_piece(current);
2065            let piece_chars = p.char_count;
2066
2067            if char_index < current_char + left_chars {
2068                current = node.left;
2069
2070            } else if char_index < current_char + left_chars + piece_chars {
2071                //
2072                // Target char is inside this piece,
2073                // p.piece_start_char is the absolute char index of p.offset in its buffer,
2074                // so (p.piece_start_char + rel_char) is the absolute char target.
2075                //
2076
2077                let rel_char = char_index - (current_char + left_chars);
2078                let absolute_target_byte =
2079                    self.buffers.char_to_byte_absolute(p.buffer, p.piece_start_char + rel_char);
2080
2081                return Some(current_byte + left_bytes + (absolute_target_byte - p.byte_offset));
2082
2083            } else {
2084                current_char += left_chars + piece_chars;
2085                current_byte += left_bytes + p.byte_length;
2086                current = node.right;
2087            }
2088        }
2089
2090        None
2091    }
2092
2093    #[inline]
2094    #[must_use]
2095    pub fn byte_to_char(&self, byte_offset: u32) -> Option<u32> {
2096        let total_bytes = self.len_bytes();
2097        if byte_offset > total_bytes { return None; }
2098        if byte_offset == total_bytes { return Some(self.len_chars()); }
2099
2100        let mut current = self.root;
2101        let mut current_byte = 0;
2102        let mut current_char = 0;
2103
2104        while current != NIL {
2105            let node = self.pieces.get(current);
2106            let left_node = self.pieces.get(node.left);
2107
2108            let left_bytes = left_node.subtree_len;
2109            let left_chars = left_node.subtree_chars;
2110
2111            let p = self.pieces.get_piece(current);
2112            let piece_bytes = p.byte_length;
2113
2114            if byte_offset < current_byte + left_bytes {
2115                current = node.left;
2116
2117            } else if byte_offset < current_byte + left_bytes + piece_bytes {
2118                // Target byte is inside this piece.
2119                // byte_to_char_absolute gives the absolute char index, so subtracting
2120                // piece_start_char converts it to a piece-relative char count.
2121
2122                let rel_byte = byte_offset - (current_byte + left_bytes);
2123                let absolute_target_char =
2124                    self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2125
2126                return Some(current_char + left_chars + (absolute_target_char - p.piece_start_char));
2127
2128            } else {
2129                current_byte += left_bytes + piece_bytes;
2130                current_char += left_chars + p.char_count;
2131                current = node.right;
2132            }
2133        }
2134
2135        None
2136    }
2137
2138    #[inline]
2139    #[must_use]
2140    pub fn byte_to_line_col(&self, offset: u32) -> Option<(u32, u32)> {
2141        let total_bytes = self.len_bytes();
2142        if offset > total_bytes { return None }
2143        if offset == total_bytes {
2144            //
2145            // Get offset of the last line
2146            //
2147            let line            = self.pieces.get(self.root).subtree_newlines;
2148            let line_start_byte = self.line_to_byte(line).unwrap_or(0);
2149            let target_char     = self.len_chars();
2150            let line_start_char = self.byte_to_char(line_start_byte)?;
2151            return Some((line, target_char - line_start_char));
2152        }
2153
2154        let mut current         = self.root;
2155        let mut current_line    = 0u32;
2156        let mut current_byte    = 0u32;
2157        let mut current_char    = 0u32;
2158
2159        while current != NIL {
2160            let node          = self.pieces.get(current);
2161            let left_node     = self.pieces.get(node.left);
2162            let left_bytes    = left_node.subtree_len;
2163            let left_newlines = left_node.subtree_newlines;
2164            let left_chars    = left_node.subtree_chars;
2165            let p             = self.pieces.get_piece(current);
2166            let piece_bytes   = p.byte_length;
2167
2168            if offset < current_byte + left_bytes {
2169                current = node.left;
2170
2171            } else if offset < current_byte + left_bytes + piece_bytes {
2172                current_line += left_newlines;
2173                current_char += left_chars;
2174
2175                let rel_byte = offset - (current_byte + left_bytes);
2176
2177                let (local_newlines, col) = if p.newline_count == 0 {
2178                    let target_char = self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2179                    let rel_char    = target_char - p.piece_start_char;
2180                    let abs_char    = current_char + rel_char;
2181
2182                    let line_start_byte = self.line_to_byte(current_line)?;
2183                    let line_start_char = self.byte_to_char(line_start_byte)?;
2184
2185                    (0, abs_char - line_start_char)
2186                } else {
2187                    let start_index       = p.buffer_start_line as usize;
2188                    let end_index         = start_index + p.newline_count as usize;
2189                    let absolute_target = p.byte_offset + rel_byte;
2190
2191                    let offsets = &self.buffers.get_newlines(p.buffer)[start_index..end_index];
2192
2193                    let local_nl = offsets.partition_point(|&off| off < absolute_target) as u32;
2194
2195                    let target_char = self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2196                    let col = if local_nl > 0 {
2197                        let last_nl_abs_byte = offsets[local_nl as usize - 1];
2198                        let last_nl_char     = self.buffers.byte_to_char_absolute(p.buffer, last_nl_abs_byte);
2199                        target_char - (last_nl_char + 1)
2200                    } else {
2201                        let line_start_byte = self.line_to_byte(current_line)?;
2202                        let line_start_char = self.byte_to_char(line_start_byte)?;
2203                        let rel_char        = target_char - p.piece_start_char;
2204                        (current_char + rel_char) - line_start_char
2205                    };
2206
2207                    (local_nl, col)
2208                };
2209
2210                return Some((current_line + local_newlines, col));
2211
2212            } else {
2213                current_line += left_newlines + p.newline_count;
2214                current_byte += left_bytes + piece_bytes;
2215                current_char += left_chars + p.char_count;
2216                current = node.right;
2217            }
2218        }
2219
2220        None
2221    }
2222
2223    #[inline]
2224    #[must_use]
2225    pub fn pieces(&self) -> PieceTreeIter<'_> {
2226        PieceTreeIter::new(&self.pieces, self.root)
2227    }
2228
2229    #[inline]
2230    #[must_use]
2231    pub fn get_line_range(&self, line: u32) -> Option<(u32, u32)> {
2232        let start = self.line_to_byte(line)?;
2233        let end = self.line_to_byte(line + 1).unwrap_or_else(|| self.total_length());
2234        Some((start, end))
2235    }
2236
2237    #[inline]
2238    #[must_use]
2239    pub fn get_line_content_allocating(&self, line: u32) -> Option<String> {
2240        let (start, end) = self.get_line_range(line)?;
2241
2242        let mut content = String::with_capacity((end - start) as usize);
2243        let mut walker = TreeWalker::new(self);
2244
2245        walker.seek(start);
2246        while walker.offset < end {
2247            if let Some(c) = walker.next() {
2248                content.push(c);
2249            } else {
2250                break;
2251            }
2252        }
2253
2254        Some(content)
2255    }
2256
2257        /// Returns an iterator of &str chunks over the given byte range.
2258    /// Zero allocation.
2259    #[inline]
2260    #[must_use]
2261    pub fn chunks_at_byte(&self, start: u32, end: u32) -> ChunkIter<'_> {
2262        let end = end.min(self.total_length());
2263        ChunkIter::new(self, start, end)
2264    }
2265
2266    /// Byte at a given byte offset. O(log n).
2267    #[inline]
2268    #[must_use]
2269    pub fn byte(&self, offset: u32) -> Option<u8> {
2270        let (node, rel) = self.find_position(offset, false)?;
2271        let p = self.get_piece(node);
2272        let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2273        text.as_bytes().get(rel as usize).copied()
2274    }
2275
2276    /// Char at a given char index. O(log n) to find the piece, then
2277    /// a short scan within the piece.
2278    #[inline]
2279    #[must_use]
2280    pub fn char(&self, char_index: u32) -> Option<char> {
2281        let byte_offset = self.char_to_byte(char_index)?;
2282        let (node, rel) = self.find_position(byte_offset, false)?;
2283        let p = self.get_piece(node);
2284        let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2285        text[rel as usize..].chars().next()
2286    }
2287
2288    /// Returns a non-allocating iterator of chars over the given char range.
2289    /// Backed by `TreeWalker::seek` so it reuses existing infrastructure.
2290    #[inline]
2291    #[must_use]
2292    pub fn slice_chars(&self, char_start: u32, char_end: u32) -> SliceChars<'_> {
2293        let byte_start = self.char_to_byte(char_start).unwrap_or(0);
2294        let byte_end   = self.char_to_byte(char_end).unwrap_or_else(|| self.total_length());
2295        SliceChars {
2296            walker:   { let mut w = TreeWalker::new(self); w.seek(byte_start); w },
2297            byte_end,
2298        }
2299    }
2300
2301    /// Returns a non-allocating iterator of chars over the given byte range.
2302    #[inline]
2303    #[must_use]
2304    pub fn slice_bytes(&self, byte_start: u32, byte_end: u32) -> SliceChars<'_> {
2305        SliceChars {
2306            walker:   { let mut w = TreeWalker::new(self); w.seek(byte_start); w },
2307            byte_end,
2308        }
2309    }
2310
2311    /// Non-allocating line view: returns a `ChunkIter` over the byte range of
2312    /// `line`. Line numbers are 0-based. The trailing \n is included if present.
2313    #[inline]
2314    #[must_use]
2315    pub fn line(&self, line: u32) -> Option<ChunkIter<'_>> {
2316        let start = self.line_to_byte(line)?;
2317        let end   = self.line_to_byte(line + 1)
2318                        .unwrap_or_else(|| self.total_length());
2319        Some(self.chunks_at_byte(start, end))
2320    }
2321
2322    /// Number of lines (= newline count + 1)
2323    #[inline]
2324    #[must_use]
2325    pub fn len_lines(&self) -> u32 { self.pieces.get(self.root).subtree_newlines + 1 }
2326
2327    #[inline(always)]
2328    #[must_use]
2329    pub fn len_chars(&self) -> u32 { self.pieces.get(self.root).subtree_chars }
2330
2331    #[inline]
2332    #[must_use]
2333    pub fn len_bytes(&self) -> u32 { self.total_length() }
2334
2335    #[inline]
2336    #[must_use]
2337    pub fn chars(&self) -> TreeWalker<'_> { TreeWalker::new(self) }
2338
2339    #[inline(always)]
2340    #[must_use]
2341    pub fn chars_rev(&self) -> ReverseTreeWalker<'_> { ReverseTreeWalker::new(self) }
2342
2343    #[inline]
2344    #[must_use]
2345    pub fn lines(&self) -> LinesIter<'_> {
2346        LinesIter { tree: self, current_line: 0, total_lines: self.len_lines() }
2347    }
2348
2349    #[inline]
2350    pub fn remove<R>(&mut self, range: R) where R: RangeBounds<u32> {
2351        let start = match range.start_bound() {
2352            Bound::Included(&n) => n,
2353            Bound::Excluded(&n) => n + 1,
2354            Bound::Unbounded => 0,
2355        };
2356        let end = match range.end_bound() {
2357            Bound::Included(&n) => n + 1,
2358            Bound::Excluded(&n) => n,
2359            Bound::Unbounded => self.len_bytes(),
2360        };
2361        if end > start { self.remove_at(start, end - start) }
2362    }
2363
2364    #[inline]
2365    #[must_use]
2366    pub fn char_to_line(&self, char_index: u32) -> Option<u32> {
2367        let byte_index = self.char_to_byte(char_index)?;
2368        self.byte_to_line_col(byte_index).map(|(line, _)| line)
2369    }
2370
2371    #[inline]
2372    #[must_use]
2373    pub fn char_to_line_col(&self, char_index: u32) -> Option<(u32, u32)> {
2374        let byte_index = self.char_to_byte(char_index)?;
2375        self.byte_to_line_col(byte_index)
2376    }
2377
2378    #[inline]
2379    #[must_use]
2380    pub fn line_to_char(&self, line: u32) -> Option<u32> {
2381        let byte_index = self.line_to_byte(line)?;
2382        self.byte_to_char(byte_index)
2383    }
2384}
2385
2386#[derive(Debug)]
2387pub struct PieceTreeIter<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
2388    arena: &'a Pieces,
2389    stack: SmallVec<[NodeRef; MAX_INLINE_TREE_DEPTH]>,
2390}
2391
2392impl<'a, const MAX_INLINE_TREE_DEPTH: usize> PieceTreeIter<'a, MAX_INLINE_TREE_DEPTH> {
2393    #[inline]
2394    #[must_use]
2395    pub fn new(arena: &'a Pieces, mut root: NodeRef) -> Self {
2396        let mut stack = SmallVec::new();
2397        while root != NIL {
2398            stack.push(root);
2399            root = arena.get(root).left;
2400        }
2401
2402        Self { arena, stack }
2403    }
2404}
2405
2406impl<const MAX_INLINE_TREE_DEPTH: usize> Iterator for PieceTreeIter<'_, MAX_INLINE_TREE_DEPTH> {
2407    type Item = (NodeRef, Piece);
2408
2409    #[inline]
2410    fn next(&mut self) -> Option<Self::Item> {
2411        let node_index = self.stack.pop()?;
2412        let node = self.arena.get(node_index);
2413        let p = self.arena.get_piece(node_index);
2414
2415        let mut current = node.right;
2416        while current != NIL {
2417            self.stack.push(current);
2418            current = self.arena.get(current).left;
2419        }
2420
2421        Some((node_index, p))
2422    }
2423}
2424
2425#[derive(Clone, Copy, PartialEq, Debug)]
2426enum Direction { Left, Center, Right }
2427
2428#[derive(Debug)]
2429pub struct TreeWalker<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
2430    tree: &'a PieceTree,
2431    stack: SmallVec<[(NodeRef, Direction); MAX_INLINE_TREE_DEPTH]>,
2432    current_str: str::Chars<'a>,
2433    pub offset: u32,
2434}
2435
2436impl<'a, const MAX_INLINE_TREE_DEPTH: usize> TreeWalker<'a, MAX_INLINE_TREE_DEPTH> {
2437    #[inline]
2438    #[must_use]
2439    pub fn new(tree: &'a PieceTree) -> Self {
2440        let mut walker = Self {
2441            tree,
2442            stack: SmallVec::new(),
2443            current_str: "".chars(),
2444            offset: 0,
2445        };
2446        if tree.root != NIL {
2447            walker.stack.push((tree.root, Direction::Left));
2448        }
2449        walker.populate_chars();
2450        walker
2451    }
2452
2453    #[inline]
2454    pub fn seek(&mut self, target: u32) {
2455        self.stack.clear();
2456        self.offset = target;
2457        self.current_str = "".chars();
2458
2459        if self.tree.root == NIL { return; }
2460
2461        let mut current = self.tree.root;
2462        let mut current_offset = target;
2463
2464        while current != NIL {
2465            let node = self.tree.pieces.get(current);
2466            let p = self.tree.pieces.get_piece(current);
2467            let left_len = self.tree.pieces.get(node.left).subtree_len;
2468            let piece_len = p.byte_length;
2469
2470            if current_offset < left_len {
2471                self.stack.push((current, Direction::Center));
2472                current = node.left;
2473            } else if current_offset < left_len + piece_len {
2474                self.stack.push((current, Direction::Right));
2475                let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2476                let rel_offset = current_offset - left_len;
2477                self.current_str = text[rel_offset as usize..].chars();
2478                break;
2479            } else {
2480                current_offset -= left_len + piece_len;
2481                current = node.right;
2482            }
2483        }
2484    }
2485
2486    #[inline]
2487    fn populate_chars(&mut self) {
2488        while let Some((node_index, dir)) = self.stack.pop() {
2489            let node = self.tree.pieces.get(node_index);
2490            match dir {
2491                Direction::Left => {
2492                    self.stack.push((node_index, Direction::Center));
2493                    if node.left != NIL { self.stack.push((node.left, Direction::Left)); }
2494                }
2495
2496                Direction::Center => {
2497                    self.stack.push((node_index, Direction::Right));
2498                    let p = self.tree.pieces.get_piece(node_index);
2499                    let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2500                    self.current_str = text.chars();
2501                    return;
2502                }
2503
2504                Direction::Right => {
2505                    if node.right != NIL { self.stack.push((node.right, Direction::Left)); }
2506                }
2507            }
2508        }
2509    }
2510}
2511
2512impl Iterator for TreeWalker<'_> {
2513    type Item = char;
2514
2515    #[inline(always)]
2516    fn next(&mut self) -> Option<char> {
2517        loop {
2518            if let Some(c) = self.current_str.next() {
2519                self.offset += c.len_utf8() as u32;
2520                return Some(c);
2521            }
2522
2523            if self.stack.is_empty() { return None; }
2524            self.populate_chars();
2525        }
2526    }
2527}
2528
2529#[derive(Debug)]
2530pub struct ReverseTreeWalker<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
2531    tree: &'a PieceTree,
2532    stack: SmallVec<[(NodeRef, bool); MAX_INLINE_TREE_DEPTH]>,
2533    current_str: str::Chars<'a>,
2534}
2535
2536impl<'a, const MAX_INLINE_TREE_DEPTH: usize> ReverseTreeWalker<'a, MAX_INLINE_TREE_DEPTH> {
2537    #[inline(always)]
2538    #[must_use]
2539    pub fn new(tree: &'a PieceTree) -> Self {
2540        let mut walker = Self {
2541            tree,
2542            stack: SmallVec::new(),
2543            current_str: "".chars(),
2544        };
2545        walker.push_rightmost(tree.root);
2546        walker
2547    }
2548
2549    #[inline(always)]
2550    fn push_rightmost(&mut self, mut node: NodeRef) {
2551        while node != NIL {
2552            self.stack.push((node, false));
2553            node = self.tree.pieces.get(node).right;
2554        }
2555    }
2556
2557    #[inline]
2558    pub fn seek(&mut self, mut target_offset: u32) {
2559        self.stack.clear();
2560        self.current_str = "".chars();
2561
2562        if self.tree.root == NIL { return; }
2563
2564        let total_bytes = self.tree.len_bytes();
2565
2566        if target_offset > total_bytes {
2567            target_offset = total_bytes;
2568        }
2569        if target_offset == total_bytes {
2570            self.push_rightmost(self.tree.root);
2571            return;
2572        }
2573
2574        let mut current = self.tree.root;
2575        let mut current_offset = target_offset;
2576
2577        while current != NIL {
2578            let node = self.tree.pieces.get(current);
2579            let p = self.tree.pieces.get_piece(current);
2580            let left_len = self.tree.pieces.get(node.left).subtree_len;
2581            let piece_len = p.byte_length;
2582
2583            if current_offset < left_len {
2584                self.stack.push((current, true));
2585                current = node.left;
2586
2587            } else if current_offset < left_len + piece_len {
2588                self.stack.push((current, true));
2589
2590                let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2591                let rel_offset = current_offset - left_len;
2592
2593                // Keep the subset of the string before the offset
2594                self.current_str = text[..rel_offset as usize].chars();
2595
2596                break;
2597
2598            } else {
2599                self.stack.push((current, false));
2600                current_offset -= left_len + piece_len;
2601                current = node.right;
2602            }
2603        }
2604    }
2605}
2606
2607impl Iterator for ReverseTreeWalker<'_> {
2608    type Item = char;
2609
2610    #[inline]
2611    fn next(&mut self) -> Option<Self::Item> {
2612        if let Some(c) = self.current_str.next_back() { return Some(c); }
2613
2614        while let Some((node_index, visited_right)) = self.stack.pop() {
2615            let node = self.tree.pieces.get(node_index);
2616            if visited_right {
2617                self.push_rightmost(node.left);
2618            } else {
2619                self.stack.push((node_index, true));
2620
2621                let p = self.tree.pieces.get_piece(node_index);
2622                let text_slice = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2623
2624                self.current_str = text_slice.chars();
2625
2626                if let Some(c) = self.current_str.next_back() { return Some(c); }
2627            }
2628        }
2629
2630        None
2631    }
2632}
2633
2634impl PieceTree {
2635    #[inline]
2636    pub fn debug(&self, f: &mut impl core::fmt::Write) -> core::fmt::Result {
2637        writeln!(f, "\n--- Tree State (Root: {:?}) ---", self.root)?;
2638        self.print_inorder(f, self.root, &mut None, 0)?;
2639        writeln!(f, "------------------------------\n")
2640    }
2641
2642    fn print_inorder(&self, f: &mut impl core::fmt::Write, node: NodeRef, last: &mut Option<Piece>, depth: usize) -> core::fmt::Result {
2643        if node == NIL { return Ok(()) }
2644
2645        let n = self.pieces.nodes[node];
2646
2647        self.print_inorder(f, n.left, last, depth + 1)?;
2648
2649        let cur = self.get_piece(node);
2650
2651        //
2652        // Check for the mergeable invariant
2653        //
2654        let warning = if let Some(prev) = last {
2655            if prev.buffer == cur.buffer
2656               && prev.byte_offset + prev.byte_length == cur.byte_offset
2657            {
2658                " --------- [!!! MERGEABLE NEIGHBORS NOT MERGED !!!] ---------"
2659            } else {
2660                ""
2661            }
2662        } else {
2663            ""
2664        };
2665
2666        let Piece { buffer, byte_offset: offset, byte_length: length, .. } = cur;
2667
2668        writeln!(
2669            f,
2670            "{:indent$}Node {node:?}: Buf={}, Off={offset}, Len={length}{warning}",
2671            "", buffer.as_u32(), indent = depth * 4
2672        )?;
2673
2674        *last = Some(cur);
2675
2676        self.print_inorder(f, n.right, last, depth + 1)
2677    }
2678}
2679
2680pub fn assert_state(tree: &PieceTree, expected: &str) {
2681    let tree_text = tree.to_string();
2682
2683    assert_eq!(tree_text, expected, "Text mismatch");
2684
2685    if !expected.is_empty() {
2686        let offsets = [0, expected.len() / 2, expected.len() - 1];
2687        for off in offsets {
2688            let chunk = tree.read_largest_contigous_chunk_at_byte(off as u32);
2689            let chunk_str = str::from_utf8(chunk).unwrap();
2690            assert!(
2691                expected[off..].starts_with(chunk_str),
2692                "Chunk mismatch at offset {}",
2693                off
2694            );
2695        }
2696    }
2697}
2698
2699pub fn assert_invariants(tree: &PieceTree) {
2700    fn check(tree: &PieceTree, node: NodeRef) -> (usize, usize) {
2701        if node == NIL { return (0, 1) }
2702
2703        let n = tree.pieces.nodes[node];
2704        let piece = tree.get_piece(node);
2705
2706        let (l_len, l_bh) = check(tree, n.left);
2707        let (r_len, r_bh) = check(tree, n.right);
2708
2709        let expected_len = l_len + piece.byte_length as usize + r_len;
2710        assert_eq!(n.subtree_len as usize, expected_len, "subtree_len mismatch");
2711
2712        if n.color() == Color::Red {
2713            assert_eq!(
2714                tree.pieces.nodes[n.left].color(),
2715                Color::Black,
2716                "red node with red left child"
2717            );
2718            assert_eq!(
2719                tree.pieces.nodes[n.right].color(),
2720                Color::Black,
2721                "red node with red right child"
2722            );
2723        }
2724
2725        assert_eq!(l_bh, r_bh, "black height mismatch");
2726
2727        let bh = l_bh + usize::from(n.color() == Color::Black);
2728        (expected_len, bh)
2729    }
2730
2731    if tree.root == NIL {
2732        assert_eq!(tree.total_length(), 0,            "Empty tree has nonzero length");
2733    } else {
2734        assert_eq!(
2735            tree.pieces.nodes[tree.root].color(),
2736            Color::Black,
2737            "root is not black"
2738        );
2739        let (len, _) = check(tree, tree.root);
2740        assert_eq!(len, tree.total_length() as usize, "Tree length and root length differ");
2741
2742    }
2743}
2744
2745pub fn assert_piece_metadata(tree: &PieceTree) {
2746    fn collect(tree: &PieceTree, node: NodeRef, out: &mut Vec<Piece>) {
2747        if node == NIL { return }
2748
2749        let n = tree.pieces.nodes[node];
2750
2751        collect(tree, n.left, out);
2752        out.push(tree.pieces.get_piece(node));
2753        collect(tree, n.right, out);
2754    }
2755
2756    let mut pieces = Vec::new();
2757    collect(tree, tree.root, &mut pieces);
2758
2759    for p in &pieces {
2760        let buf   = tree.buffers.get(p.buffer);
2761        let start = p.byte_offset as usize;
2762        let end   = start + p.byte_length as usize;
2763
2764        assert!(end <= buf.len(), "Piece points past end of buffer");
2765        assert!(p.byte_length > 0, "Zero-length piece found");
2766
2767        let slice = &buf[start..end];
2768        let (actual_chars, actual_nl) = count_chars_and_newlines(slice.as_bytes());
2769        assert_eq!(p.char_count,    actual_chars, "char_count mismatch");
2770        assert_eq!(p.newline_count, actual_nl,    "newline_count mismatch");
2771
2772        //
2773        // piece_start_char and buffer_start_line: scan only the prefix once
2774        //
2775        let prefix = &buf.as_bytes()[..p.byte_offset as usize];
2776        let actual_start_char = bytecount::num_chars(prefix) as u32;
2777        let actual_start_line = bytecount::count(prefix, b'\n') as u32;
2778
2779        assert_eq!(p.piece_start_char,  actual_start_char,
2780            "piece_start_char mismatch buffer={} offset={}", p.buffer.as_u32(), p.byte_offset);
2781
2782        assert_eq!(p.buffer_start_line, actual_start_line,
2783            "buffer_start_line mismatch buffer={} offset={}", p.buffer.as_u32(), p.byte_offset);
2784
2785        //
2786        // Verify the newline_offsets slice
2787        //
2788        let nl_offsets = &tree.buffers.get_newlines(p.buffer)
2789            [p.buffer_start_line as usize..(p.buffer_start_line + p.newline_count) as usize];
2790
2791        for (i, &abs_byte) in nl_offsets.iter().enumerate() {
2792            assert!(
2793                abs_byte >= p.byte_offset && abs_byte < p.byte_offset + p.byte_length,
2794                "newline_offsets[{}] outside piece", p.buffer_start_line as usize + i
2795            );
2796
2797            assert_eq!(
2798                buf.as_bytes()[abs_byte as usize], b'\n',
2799                "newline_offsets[{}] not a newline", p.buffer_start_line as usize + i
2800            );
2801        }
2802    }
2803}
2804
2805pub fn assert_no_mergeable_neighbors(tree: &PieceTree) {
2806    fn inorder(tree: &PieceTree, node: NodeRef, last: &mut Option<Piece>) {
2807        if node == NIL { return }
2808
2809        let n = tree.pieces.nodes[node];
2810        inorder(tree, n.left, last);
2811
2812        let cur = tree.get_piece(node);
2813
2814        if let Some(prev) = *last {
2815            if prev.buffer == cur.buffer
2816                && prev.byte_offset + prev.byte_length == cur.byte_offset
2817            {
2818                panic!("Mergeable neighboring pieces were left unmerged");
2819            }
2820        }
2821
2822        *last = Some(cur);
2823        inorder(tree, n.right, last);
2824    }
2825
2826    let mut last = None;
2827    inorder(tree, tree.root, &mut last);
2828}
2829
2830pub fn assert_coordinates(tree: &PieceTree, oracle: &str) {
2831    #[inline]
2832    fn oracle_offset_to_line_col(s: &str, byte_offset: usize) -> (usize, usize) {
2833        let slice = &s[..byte_offset];
2834        let line  = bytecount::count(slice.as_bytes(), b'\n');
2835        let last_nl = slice.rfind('\n').map_or(0, |i| i + 1);
2836        let col   = bytecount::num_chars(s[last_nl..byte_offset].as_bytes());
2837        (line, col)
2838    }
2839
2840    #[inline]
2841    fn oracle_line_to_offset(s: &str, target_line: usize) -> usize {
2842        if target_line == 0 { return 0 }
2843
2844        let mut line = 0;
2845        for (i, c) in s.char_indices() {
2846            if c == '\n' {
2847                line += 1;
2848                if line == target_line { return i + 1; }
2849            }
2850        }
2851
2852        s.len()
2853    }
2854
2855    fn check_coordinate(tree: &PieceTree, oracle: &str, byte_index: u32) {
2856        let char_index = bytecount::num_chars(oracle[..byte_index as usize].as_bytes()) as u32;
2857
2858        assert_eq!(tree.char_to_byte(char_index), Some(byte_index),
2859                   "char_to_byte({}) wrong", char_index);
2860
2861        assert_eq!(tree.byte_to_char(byte_index), Some(char_index),
2862                   "byte_to_char({}) wrong", byte_index);
2863
2864        let expected_lc = oracle_offset_to_line_col(oracle, byte_index as usize);
2865        assert_eq!(
2866            tree.byte_to_line_col(byte_index),
2867            Some((expected_lc.0 as u32, expected_lc.1 as u32)),
2868            "offset_to_line_col({}) wrong", byte_index
2869        );
2870    }
2871
2872    let total_bytes = oracle.len() as u32;
2873    let total_chars = oracle.chars().count() as u32;
2874
2875    //
2876    // Always check these
2877    //
2878    check_coordinate(tree, oracle, 0);
2879    if total_bytes > 0 {
2880        check_coordinate(tree, oracle, total_bytes - 1);
2881        check_coordinate(tree, oracle, total_bytes / 2);
2882    }
2883
2884
2885    //
2886    // Sample ~8 positions spread across the document
2887    //
2888    for i in 0u32..8 {
2889        let byte =
2890            ((total_bytes as u64 * ((i as u64 * 2_654_435_761) & 0xFFFF_FFFF)) >> 32) as u32 % total_bytes.max(1);
2891
2892        // Snap to char boundary
2893        let byte = oracle.as_bytes()[..byte as usize]
2894            .iter().rposition(|&b| !(0x80..0xC0).contains(&b))
2895            .map_or(0, |i| i as u32);
2896
2897        check_coordinate(tree, oracle, byte);
2898    }
2899
2900    //
2901    // EOF and out-of-bounds always
2902    //
2903    assert_eq!(tree.char_to_byte(total_chars), Some(total_bytes));
2904    assert_eq!(tree.byte_to_char(total_bytes), Some(total_chars));
2905    assert_eq!(tree.char_to_byte(total_chars + 1), None);
2906    assert_eq!(tree.byte_to_char(total_bytes + 1), None);
2907    assert_eq!(tree.byte_to_line_col(total_bytes + 1), None);
2908
2909    //
2910    // line_to_offset: check first, last, middle line only
2911    //
2912    let total_lines = oracle.chars().filter(|&c| c == '\n').count() + 1;
2913    for line in [0, total_lines / 2, total_lines.saturating_sub(1)] {
2914        let expected = oracle_line_to_offset(oracle, line) as u32;
2915        assert_eq!(tree.line_to_byte(line as u32), Some(expected),
2916            "line_to_offset({}) wrong", line);
2917    }
2918
2919    assert_eq!(tree.line_to_byte(total_lines as u32), None);
2920}