Skip to main content

piece_tree/
lib.rs

1#![cfg_attr(not(feature = "write"), no_std)]
2
3#![cfg_attr(all(not(feature = "runtime-dispatch-simd"), feature = "generic-simd"), feature(portable_simd))]
4
5#![warn(clippy::all, clippy::pedantic, dead_code, clippy::cargo)]
6#![allow(
7    unused_assignments,
8    clippy::inline_always,
9    clippy::uninlined_format_args, // ?...
10    clippy::borrow_as_ptr,
11    clippy::negative_feature_names,
12    clippy::redundant_closure_for_method_calls,
13    clippy::sliced_string_as_bytes,
14    clippy::should_implement_trait,
15    clippy::collapsible_if,
16    clippy::new_without_default,
17    clippy::comparison_chain,
18    clippy::redundant_field_names,
19    clippy::semicolon_if_nothing_returned,
20    clippy::pub_underscore_fields,
21    clippy::struct_field_names,
22    clippy::ptr_as_ptr,
23    clippy::missing_transmute_annotations,
24    clippy::multiple_crate_versions,
25    clippy::cast_precision_loss,
26    clippy::cast_possible_truncation,
27    clippy::similar_names,
28    clippy::cast_sign_loss,
29    clippy::cast_possible_wrap,
30    clippy::used_underscore_binding,
31    clippy::nonstandard_macro_braces,
32    clippy::used_underscore_items,
33    clippy::enum_glob_use,
34    clippy::cast_lossless,
35    clippy::match_same_arms,
36    clippy::too_many_lines,
37    clippy::unnested_or_patterns,
38    clippy::blocks_in_conditions,
39    clippy::missing_errors_doc,
40    clippy::missing_panics_doc,
41)]
42
43#[cfg(feature = "write")]
44extern crate std;
45
46extern crate alloc;
47
48#[allow(unused)]
49use alloc::vec;
50use alloc::vec::Vec;
51use alloc::sync::Arc;
52use alloc::string::{String, ToString};
53
54use core::str;
55use core::cmp::Ordering;
56use core::ops::{Bound, Deref, RangeBounds};
57
58#[cfg(not(feature = "dont_vendor"))]
59mod cranelift_entity_vendor;
60#[cfg(not(feature = "dont_vendor"))]
61use cranelift_entity_vendor as cranelift_entity;
62
63#[cfg(not(feature = "dont_vendor"))]
64mod smallvec_vendor;
65#[cfg(not(feature = "dont_vendor"))]
66use smallvec_vendor as smallvec;
67
68#[cfg(not(feature = "dont_vendor"))]
69mod bytecount_vendor;
70#[cfg(not(feature = "dont_vendor"))]
71use bytecount_vendor as bytecount;
72
73use smallvec::SmallVec;
74use cranelift_entity::{EntityRef, PrimaryMap};
75#[cfg(feature = "dont_vendor")]
76use cranelift_entity::entity_impl;
77
78#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
79pub struct BufferRef(pub u32);
80entity_impl!(BufferRef);
81
82#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
83pub struct NodeRef(pub u32);
84entity_impl!(NodeRef);
85
86pub const NIL: NodeRef = NodeRef(0);
87
88pub const MOD_BUFFER: BufferRef = BufferRef(u32::MAX >> 1);
89
90pub const CHECKPOINT_INTERVAL: u32 = 64;
91
92#[derive(Clone, Copy, PartialEq, Eq, Debug)]
93pub enum Color { Black = 0, Red = 1 }
94
95#[derive(Clone, Copy, Debug)]
96pub struct HistoryEntry {
97    pub root:          NodeRef,
98    pub cursor_offset: u32,
99}
100
101#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
102pub struct Piece {
103    pub buffer:            BufferRef,
104
105    pub byte_offset:       u32,
106    pub byte_length:       u32,
107
108    pub newline_count:     u32,
109    pub char_count:        u32,
110
111    pub buffer_start_line: u32,  // Index into that buffer's `newline_offsets` array of this piece's first '\n'
112
113    pub piece_start_char:  u32,  // Absolute char index of p.offset in its buffer
114}
115
116// For tests and other stuff
117#[derive(Clone, Copy, Debug)]
118pub enum Edit {
119    Insert { offset: u32, text: &'static str },
120    Remove { offset: u32, length: u32 },
121}
122
123impl Edit {
124    #[inline(always)]
125    #[must_use]
126    pub fn offset(&self) -> u32 {
127        match self {
128            Edit::Insert { offset, .. } => *offset,
129            Edit::Remove { offset, .. } => *offset,
130        }
131    }
132}
133
134#[derive(Clone, Copy, Debug)]
135#[repr(C)]
136pub struct Node {
137    pub left:              NodeRef, // 4 bytes
138    pub right:             NodeRef, // 4 bytes
139    pub offset:            u32,     // 4 bytes
140    pub subtree_len:       u32,     // 4 bytes
141    pub subtree_chars:     u32,     // 4 bytes
142    pub subtree_newlines:  u32,     // 4 bytes
143    pub meta:              u32,     // 4 bytes (Bit 0: Color, Bits 1..31: BufferIndex)
144    pub buffer_start_line: u32,     // 4 bytes
145    pub piece_start_char:  u32,     // 4 bytes
146}
147
148const _: () = assert!(size_of::<Node>() == 36);
149
150impl Node {
151    #[inline(always)]
152    #[must_use]
153    pub fn color(&self) -> Color {
154        if (self.meta & 1) == 1 { Color::Red } else { Color::Black }
155    }
156
157    #[inline(always)]
158    pub fn set_color(&mut self, color: Color) {
159        self.meta = (self.meta & !1) | (color as u32);
160    }
161
162    #[inline(always)]
163    #[must_use]
164    pub fn buffer_index(&self) -> BufferRef {
165        BufferRef(self.meta >> 1)
166    }
167
168    #[inline(always)]
169    pub fn set_buffer(&mut self, buffer: BufferRef) {
170        self.meta = (buffer.as_u32() << 1) | (self.meta & 1);
171    }
172}
173
174#[derive(Debug, Clone)]
175pub struct OriginalBuffer {
176    pub text:             Arc<str>,
177    pub newline_offsets:  Arc<[u32]>,
178    pub char_checkpoints: Arc<[(u32, u32)]>,
179}
180
181impl Deref for OriginalBuffer {
182    type Target = str;
183
184    #[inline(always)]
185    fn deref(&self) -> &Self::Target {
186        self.text.as_ref()
187    }
188}
189
190#[derive(Debug, Clone)]
191pub struct Buffers {
192    pub original_buffers: PrimaryMap<BufferRef, OriginalBuffer>,
193
194    pub modifications_char_count:       u32,
195    pub modifications_buffer:           String,
196    pub modifications_newline_offsets:  Vec<u32>,
197    pub modifications_char_checkpoints: Vec<(u32, u32)>,
198}
199
200impl Default for Buffers {
201    #[inline(always)]
202    fn default() -> Self {
203        Self {
204            original_buffers: PrimaryMap::new(),
205            modifications_char_count: 0,
206            modifications_newline_offsets: Vec::with_capacity(1024),
207            modifications_char_checkpoints: Vec::with_capacity(128),
208            modifications_buffer: String::with_capacity(1024 * 64),
209        }
210    }
211}
212
213#[must_use]
214#[inline(always)]
215pub fn count_chars_and_newlines(bytes: &[u8]) -> (u32, u32) {
216    let mut chars    = 0u32;
217    let mut newlines = 0u32;
218
219    for &b in bytes {
220        newlines += (b == b'\n')         as u32;
221        chars    += ((b & 0xC0) != 0x80) as u32;
222    }
223
224    (chars, newlines)
225}
226
227#[must_use]
228#[inline(always)]
229pub fn count_chars_and_newlines_with_offsets_and_checkpoints(
230    bytes: &[u8],
231    offsets: &mut Vec<u32>,
232    checkpoints: &mut Vec<(u32, u32)>,
233) -> (u32, u32) {
234    let mut chars    = 0u32;
235    let mut newlines = 0u32;
236
237    let mut next_checkpoint = CHECKPOINT_INTERVAL;
238
239    for (i, &b) in bytes.iter().enumerate() {
240        if (b as i8) >= -0x40 {
241            if chars == next_checkpoint {
242                checkpoints.push((chars, i as u32));
243                next_checkpoint += CHECKPOINT_INTERVAL;
244            }
245            chars += 1;
246        }
247
248        if b == b'\n' {
249            offsets.push(i as u32);
250            newlines += 1;
251        }
252    }
253
254    (chars, newlines)
255}
256
257impl Buffers {
258    #[inline(always)]
259    #[must_use]
260    pub fn new() -> Self { Self::default() }
261
262    #[inline(always)]
263    #[must_use]
264    pub fn get(&self, buffer: BufferRef) -> &str {
265        if buffer == MOD_BUFFER {
266            &self.modifications_buffer
267        } else {
268            &self.original_buffers[buffer]
269        }
270    }
271
272    #[inline(always)]
273    #[must_use]
274    pub fn get_slice(&self, buffer: BufferRef, offset: u32, len: u32) -> &str {
275        let buf = self.get(buffer);
276        let start = offset as usize;
277        let end = start + len as usize;
278        unsafe { str::from_utf8_unchecked(buf.as_bytes().get_unchecked(start..end)) }
279    }
280
281    #[inline]
282    #[must_use]
283    pub fn count_chars_and_newlines(&self, buffer: BufferRef, start_offset: u32, len: u32) -> (u32, u32) {
284        if len == 0 { return (0, 0) }
285
286        let end_offset = start_offset + len;
287
288        //
289        // O(log N) newlines count
290        //
291        let nl_offsets = self.get_newlines(buffer);
292        let start_nl_index = nl_offsets.binary_search(&start_offset).unwrap_or_else(|x| x);
293        let end_nl_index = nl_offsets.binary_search(&end_offset).unwrap_or_else(|x| x);
294        let newlines = (end_nl_index - start_nl_index) as u32;
295
296        //
297        // O(log N) char count
298        //
299
300        let checkpoints = self.get_checkpoints(buffer);
301        let buffer_text = self.get(buffer);
302
303        //
304        // Gets absolute char count from start of buffer to `target_offset`
305        //
306        let chars_up_to = |target_offset: u32| -> u32 {
307            if target_offset == 0 { return 0; }
308
309            //
310            // Find the index of the first checkpoint that is strictly strictly greater than our target
311            //
312            let partition_index = checkpoints.partition_point(|&(_char_cnt, byte_offset)| byte_offset <= target_offset);
313
314            let (base_chars, base_bytes) = if partition_index > 0 {
315                checkpoints[partition_index - 1]
316            } else {
317                (0, 0)
318            };
319
320            //
321            // Linearly scan only the remaining bytes from the checkpoint to the target
322            //
323            let tail_bytes = &buffer_text[base_bytes as usize .. target_offset as usize];
324            base_chars + bytecount::num_chars(tail_bytes.as_bytes()) as u32
325        };
326
327        let chars = chars_up_to(end_offset) - chars_up_to(start_offset);
328
329        (chars, newlines)
330    }
331
332    #[inline(always)]
333    #[must_use]
334    pub fn count_chars(&self, buffer: BufferRef, offset: u32, len: u32) -> u32 {
335        bytecount::num_chars(self.get_slice(buffer, offset, len).as_bytes()) as u32
336    }
337
338    #[inline(always)]
339    #[must_use]
340    pub fn count_newlines(&self, buffer: BufferRef, offset: u32, len: u32) -> u32 {
341        bytecount::count(self.get_slice(buffer, offset, len).as_bytes(), b'\n') as _
342    }
343
344    #[inline(always)]
345    fn get_newlines(&self, buffer: BufferRef) -> &[u32] {
346        if buffer == MOD_BUFFER {
347            &self.modifications_newline_offsets
348        } else {
349            &self.original_buffers[buffer].newline_offsets
350        }
351    }
352
353    #[inline(always)]
354    fn get_checkpoints(&self, buffer: BufferRef) -> &[(u32, u32)] {
355        if buffer == MOD_BUFFER {
356            &self.modifications_char_checkpoints
357        } else {
358            &self.original_buffers[buffer].char_checkpoints
359        }
360    }
361
362    /// Converts an absolute char index to an absolute byte offset for a specific buffer
363    #[must_use]
364    #[inline]
365    pub fn char_to_byte_absolute(&self, buffer: BufferRef, target_char: u32) -> u32 {
366        if target_char == 0 { return 0; }
367
368        let checkpoints = self.get_checkpoints(buffer);
369        let cp_index = checkpoints.partition_point(|&(c, _)| c <= target_char);
370
371        let (current_char, current_byte) = if cp_index == 0 {
372            (0, 0)
373        } else {
374            unsafe { *checkpoints.get_unchecked(cp_index - 1) }
375        };
376
377        let remainder_chars = target_char - current_char;
378        if remainder_chars == 0 { return current_byte; }
379
380        let text = self.get(buffer);
381        let slice = unsafe { text.get_unchecked(current_byte as usize..) };
382
383        let additional_bytes = slice
384            .char_indices()
385            .nth(remainder_chars as usize).map_or_else(|| slice.len() as u32, |(b, _)| b as u32);  // Fallback to end if out of bounds
386
387        current_byte + additional_bytes
388    }
389
390    /// Converts an absolute byte offset to an absolute char index for a specific buffer
391    #[must_use]
392    #[inline]
393    pub fn byte_to_char_absolute(&self, buffer: BufferRef, target_byte: u32) -> u32 {
394        if target_byte == 0 { return 0; }
395
396        let checkpoints = self.get_checkpoints(buffer);
397        let cp_index = checkpoints.partition_point(|&(_, b)| b <= target_byte);
398
399        let (current_char, current_byte) = if cp_index == 0 {
400            (0, 0)
401        } else {
402            unsafe { *checkpoints.get_unchecked(cp_index - 1) }
403        };
404
405        let remainder_bytes = target_byte - current_byte;
406        if remainder_bytes == 0 { return current_char; }
407
408        let text = self.get(buffer);
409        let slice = unsafe { text.get_unchecked(current_byte as usize..target_byte as usize) };
410        let additional_chars = bytecount::num_chars(slice.as_bytes()) as u32;
411
412        current_char + additional_chars
413    }
414}
415
416#[derive(Debug, Clone)]
417pub struct Pieces {
418    pub nodes: PrimaryMap<NodeRef, Node>,
419}
420
421impl Default for Pieces {
422    #[inline(always)]
423    fn default() -> Self {
424        Self::new()
425    }
426}
427
428impl Pieces {
429    #[inline(always)]
430    #[must_use]
431    pub fn new() -> Self {
432        let mut nodes = PrimaryMap::new();
433        nodes.push(Node {
434            left: NIL, right: NIL, offset: 0,
435            subtree_len: 0, subtree_chars: 0, subtree_newlines: 0,
436            meta: 0, buffer_start_line: 0,
437            piece_start_char: 0
438        });
439        Self { nodes }
440    }
441
442    #[inline(always)]
443    #[must_use]
444    pub fn len(&self) -> usize { self.nodes.len() }
445
446    #[inline(always)]
447    #[must_use]
448    pub fn is_empty(&self) -> bool { self.nodes.is_empty() }
449
450    #[inline(always)]
451    #[must_use]
452    pub fn get(&self, index: NodeRef) -> &Node { &self.nodes[index] }
453
454    #[inline(always)]
455    #[must_use]
456    pub fn get_piece(&self, index: NodeRef) -> Piece {
457        if index == NIL { return Piece::default(); }
458
459        let node = &self.nodes[index];
460        let l = &self.nodes[node.left];
461        let r = &self.nodes[node.right];
462
463        Piece {
464            buffer: node.buffer_index(),
465            byte_offset: node.offset,
466            byte_length: node.subtree_len - l.subtree_len - r.subtree_len,
467            char_count: node.subtree_chars - l.subtree_chars - r.subtree_chars,
468            newline_count: node.subtree_newlines - l.subtree_newlines - r.subtree_newlines,
469            buffer_start_line: node.buffer_start_line,
470            piece_start_char: node.piece_start_char,
471        }
472    }
473
474    #[inline(always)]
475    pub fn alloc(&mut self, color: Color, left: NodeRef, piece: Piece, right: NodeRef) -> NodeRef {
476        let l = &self.nodes[left];
477        let r = &self.nodes[right];
478
479        let mut node = Node {
480            left, right,
481            offset: piece.byte_offset,
482            subtree_len: l.subtree_len + piece.byte_length + r.subtree_len,
483            subtree_chars: l.subtree_chars + piece.char_count + r.subtree_chars,
484            subtree_newlines: l.subtree_newlines + piece.newline_count + r.subtree_newlines,
485            buffer_start_line: piece.buffer_start_line,
486            piece_start_char: piece.piece_start_char,
487            meta: 0,
488        };
489        node.set_color(color);
490        node.set_buffer(piece.buffer);
491
492        self.nodes.push(node)
493    }
494
495    /// Extends a piece without triggering Red-Black structural changes.
496    /// It creates a single new path to the root to maintain persistence.
497    pub fn extend_piece(
498        &mut self,
499        root: NodeRef,
500        piece_start_offset: u32,
501        add_len: u32,
502        add_chars: u32,
503        add_nl: u32,
504    ) -> NodeRef {
505        if root == NIL { return NIL }
506
507        let node = self.nodes[root];
508        let mut new_node = node;
509
510        let left_len = self.nodes[node.left].subtree_len;
511        let piece_len = node.subtree_len - left_len - self.nodes[node.right].subtree_len;
512
513        if piece_start_offset < left_len {
514            new_node.left  = self.extend_piece(node.left, piece_start_offset, add_len, add_chars, add_nl);
515        } else if piece_start_offset > left_len {
516            new_node.right = self.extend_piece(node.right, piece_start_offset - (left_len + piece_len), add_len, add_chars, add_nl);
517        } else {
518            // piece_start_offset == left_len
519            // We found the exact node! No further recursion needed.
520        }
521
522        new_node.subtree_len      += add_len;
523        new_node.subtree_chars    += add_chars;
524        new_node.subtree_newlines += add_nl;
525
526        self.nodes.push(new_node)
527    }
528
529    #[inline]
530    pub fn insert_node(&mut self, root: NodeRef, piece: Piece, at: u32) -> NodeRef {
531        let new_root = self.ins(root, piece, at, 0);
532        let r_node = self.nodes[new_root];
533        let p = self.get_piece(new_root);
534        self.alloc(Color::Black, r_node.left, p, r_node.right)
535    }
536
537    #[inline]
538    pub fn remove_node(&mut self, root: NodeRef, at: u32) -> NodeRef {
539        let new_root = self.rem(root, at, 0);
540        if new_root == NIL { return NIL }
541
542        let r_node = self.nodes[new_root];
543        self.alloc(Color::Black, r_node.left, self.get_piece(new_root), r_node.right)
544    }
545
546    #[inline]
547    fn rem(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
548        if root == NIL { return NIL }
549
550        let node = self.nodes[root];
551        let node_piece = self.get_piece(root);
552        let left_len = self.nodes[node.left].subtree_len;
553
554        match at.cmp(&(total + left_len)) {
555            Ordering::Less => self.remove_left(root, at, total),
556            Ordering::Equal => self.fuse(node.left, node.right),
557            Ordering::Greater => {
558                let next_total = total + left_len + node_piece.byte_length;
559                self.remove_right(root, at, next_total)
560            }
561        }
562    }
563
564    #[inline]
565    fn remove_left(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
566        let node = self.nodes[root];
567        let new_left = self.rem(node.left, at, total);
568        let new_node = self.alloc(Color::Red, new_left, self.get_piece(root), node.right);
569        if self.nodes[node.left].color() == Color::Black {
570            self.balance_left(new_node)
571        } else {
572            new_node
573        }
574    }
575
576    #[inline]
577    fn remove_right(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
578        let node = self.nodes[root];
579        let new_right = self.rem(node.right, at, total);
580        let new_node = self.alloc(Color::Red, node.left, self.get_piece(root), new_right);
581        if self.nodes[node.right].color() == Color::Black {
582            self.balance_right(new_node)
583        } else {
584            new_node
585        }
586    }
587
588    #[inline]
589    fn ins(&mut self, root: NodeRef, p: Piece, at: u32, total_offset: u32) -> NodeRef {
590        if root == NIL { return self.alloc(Color::Red, NIL, p, NIL); }
591
592        let node = self.nodes[root];
593        let node_piece = self.get_piece(root);
594        let left_len = self.nodes[node.left].subtree_len;
595
596        if at < total_offset + left_len + node_piece.byte_length {
597            let lft = self.ins(node.left, p, at, total_offset);
598            self.balance(node.color(), lft, node_piece, node.right)
599        } else {
600            let next_offset = total_offset + left_len + node_piece.byte_length;
601            let rgt = self.ins(node.right, p, at, next_offset);
602            self.balance(node.color(), node.left, node_piece, rgt)
603        }
604    }
605
606    #[inline]
607    fn balance(&mut self, c: Color, l_index: NodeRef, p: Piece, r_index: NodeRef) -> NodeRef {
608        let l = self.nodes[l_index];
609        let r = self.nodes[r_index];
610
611        if c == Color::Black {
612            if l.color() == Color::Red {
613                let ll = self.nodes[l.left];
614                let lr = self.nodes[l.right];
615                if ll.color() == Color::Red {
616                    let new_l = self.alloc(Color::Black, ll.left, self.get_piece(l.left), ll.right);
617                    let new_r = self.alloc(Color::Black, l.right, p, r_index);
618                    return self.alloc(Color::Red, new_l, self.get_piece(l_index), new_r);
619                } else if lr.color() == Color::Red {
620                    let new_l = self.alloc(Color::Black, l.left, self.get_piece(l_index), lr.left);
621                    let new_r = self.alloc(Color::Black, lr.right, p, r_index);
622                    return self.alloc(Color::Red, new_l, self.get_piece(l.right), new_r);
623                }
624            }
625
626            if r.color() == Color::Red {
627                let rl = self.nodes[r.left];
628                let rr = self.nodes[r.right];
629                if rl.color() == Color::Red {
630                    let new_l = self.alloc(Color::Black, l_index, p, rl.left);
631                    let new_r = self.alloc(Color::Black, rl.right, self.get_piece(r_index), r.right);
632                    return self.alloc(Color::Red, new_l, self.get_piece(r.left), new_r);
633                } else if rr.color() == Color::Red {
634                    let new_l = self.alloc(Color::Black, l_index, p, r.left);
635                    let new_r = self.alloc(Color::Black, rr.left, self.get_piece(r.right), rr.right);
636                    return self.alloc(Color::Red, new_l, self.get_piece(r_index), new_r);
637                }
638            }
639        }
640
641        self.alloc(c, l_index, p, r_index)
642    }
643
644    #[inline]
645    fn fuse(&mut self, left: NodeRef, right: NodeRef) -> NodeRef {
646        // match: (left, right)
647
648        // case: (None, r)
649        if left  == NIL { return right }
650        if right == NIL { return left }
651
652        let l_node = self.nodes[left];
653        let r_node = self.nodes[right];
654
655        // match: (left.color, right.color)
656
657        // case: (B, R)
658        if l_node.color() == Color::Black && r_node.color() == Color::Red {
659            let fused = self.fuse(left, r_node.left);
660            return self.alloc(Color::Red, fused, self.get_piece(right), r_node.right);
661        }
662
663        // case: (R, B)
664        if l_node.color() == Color::Red && r_node.color() == Color::Black {
665            let fused = self.fuse(l_node.right, right);
666            return self.alloc(Color::Red, l_node.left, self.get_piece(left), fused);
667        }
668
669        // case: (R, R)
670        if l_node.color() == Color::Red && r_node.color() == Color::Red {
671            let fused = self.fuse(l_node.right, r_node.left);
672            let f_node = self.nodes[fused];
673            if fused != NIL && f_node.color() == Color::Red {
674                let new_l = self.alloc(Color::Red, l_node.left, self.get_piece(left), f_node.left);
675                let new_r = self.alloc(Color::Red, f_node.right, self.get_piece(right), r_node.right);
676                return self.alloc(Color::Red, new_l, self.get_piece(fused), new_r);
677            }
678            let new_r = self.alloc(Color::Red, fused, self.get_piece(right), r_node.right);
679            return self.alloc(Color::Red, l_node.left, self.get_piece(left), new_r);
680        }
681
682        // case: (B, B)
683
684        let fused = self.fuse(l_node.right, r_node.left);
685        let f_node = self.nodes[fused];
686        if fused != NIL && f_node.color() == Color::Red {
687            let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), f_node.left);
688            let new_r = self.alloc(Color::Black, f_node.right, self.get_piece(right), r_node.right);
689            return self.alloc(Color::Red, new_l, self.get_piece(fused), new_r);
690        }
691
692        let new_r = self.alloc(Color::Black, fused, self.get_piece(right), r_node.right);
693        let new_node = self.alloc(Color::Red, l_node.left, self.get_piece(left), new_r);
694        self.balance_left(new_node)
695    }
696
697    #[inline]
698    fn balance_left(&mut self, left: NodeRef) -> NodeRef {
699        let l_node = self.nodes[left];
700        let ll_node = self.nodes[l_node.left];
701        let lr_node = self.nodes[l_node.right];
702
703        // match: (color_l, color_r, color_r_l)
704
705        // case: (Some(R), ..)
706        if l_node.left != NIL && ll_node.color() == Color::Red {
707            let new_ll = self.alloc(Color::Black, ll_node.left, self.get_piece(l_node.left), ll_node.right);
708            return self.alloc(Color::Red, new_ll, self.get_piece(left), l_node.right);
709        }
710
711        // case: (_, Some(B), _)
712        if l_node.right != NIL && lr_node.color() == Color::Black {
713            let new_lr = self.alloc(Color::Red, lr_node.left, self.get_piece(l_node.right), lr_node.right);
714            let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), new_lr);
715            let nl = self.nodes[new_l];
716            return self.balance(Color::Black, nl.left, self.get_piece(new_l), nl.right);
717        }
718
719        // case: (_, Some(R), Some(B))
720        if l_node.right != NIL && lr_node.color() == Color::Red {
721            let lrl_node = self.nodes[lr_node.left];
722            if lr_node.left != NIL && lrl_node.color() == Color::Black {
723                let lrr_node = self.nodes[lr_node.right];
724                let new_lrr = self.alloc(Color::Red, lrr_node.left, self.get_piece(lr_node.right), lrr_node.right);
725                let unbal_r = self.alloc(Color::Black, lrl_node.right, self.get_piece(l_node.right), new_lrr);
726                let ur_node = self.nodes[unbal_r];
727                let new_r = self.balance(ur_node.color(), ur_node.left, self.get_piece(unbal_r), ur_node.right);
728                let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), lrl_node.left);
729                return self.alloc(Color::Red, new_l, self.get_piece(lr_node.left), new_r);
730            }
731        }
732
733        left
734    }
735
736    #[inline]
737    fn balance_right(&mut self, right: NodeRef) -> NodeRef {
738        let r_node = self.nodes[right];
739        let rl_node = self.nodes[r_node.left];
740        let rr_node = self.nodes[r_node.right];
741
742        // match: (color_l, color_l_r, color_r)
743
744        // case: (.., Some(R))
745        if r_node.right != NIL && rr_node.color() == Color::Red {
746            let new_rr = self.alloc(Color::Black, rr_node.left, self.get_piece(r_node.right), rr_node.right);
747            return self.alloc(Color::Red, r_node.left, self.get_piece(right), new_rr);
748        }
749
750        // case: (Some(B), ..)
751        if r_node.left != NIL && rl_node.color() == Color::Black {
752            let new_rl = self.alloc(Color::Red, rl_node.left, self.get_piece(r_node.left), rl_node.right);
753            let new_r = self.alloc(Color::Black, new_rl, self.get_piece(right), r_node.right);
754            let nr = self.nodes[new_r];
755            return self.balance(Color::Black, nr.left, self.get_piece(new_r), nr.right);
756        }
757
758        // case: (Some(R), Some(B), _)
759        if r_node.left != NIL && rl_node.color() == Color::Red {
760            let rlr_node = self.nodes[rl_node.right];
761            if rl_node.right != NIL && rlr_node.color() == Color::Black {
762                let rll_node = self.nodes[rl_node.left];
763                let new_rll = self.alloc(Color::Red, rll_node.left, self.get_piece(rl_node.left), rll_node.right);
764                let unbal_l = self.alloc(Color::Black, new_rll, self.get_piece(r_node.left), rlr_node.left);
765                let ul_node = self.nodes[unbal_l];
766                let new_l = self.balance(ul_node.color(), ul_node.left, self.get_piece(unbal_l), ul_node.right);
767                let new_r = self.alloc(Color::Black, rlr_node.right, self.get_piece(right), r_node.right);
768                return self.alloc(Color::Red, new_l, self.get_piece(rl_node.right), new_r);
769            }
770        }
771
772        right
773    }
774
775    #[inline]
776    #[must_use]
777    pub fn find_offset(
778        &self,
779        mut root: NodeRef,
780        mut target_offset: u32,
781        prefer_left: bool
782    ) -> Option<(NodeRef, u32)> {
783        while root != NIL {
784            let node = &self.nodes[root];
785            let left_len = self.nodes[node.left].subtree_len;
786            let piece_len = self.get_piece(root).byte_length;
787
788            if target_offset < left_len {
789                root = node.left;
790
791            } else if target_offset == left_len + piece_len && prefer_left {
792                // We are perfectly on the boundary and want to merge with the left piece
793                return Some((root, piece_len));
794
795            } else if target_offset < left_len + piece_len {
796                return Some((root, target_offset - left_len));
797
798            } else {
799                target_offset -= left_len + piece_len;
800                root = node.right;
801            }
802        }
803
804        None
805    }
806}
807
808impl PieceTree {
809    /// Slices a tree exactly at `offset`, returning (`LeftTree`, `RightTree`)
810    #[inline]
811    pub fn split(&mut self, root: NodeRef, offset: u32) -> (NodeRef, NodeRef) {
812        if root == NIL {
813            return (NIL, NIL);
814        }
815
816        let node = self.pieces.nodes[root];
817        let p = self.pieces.get_piece(root);
818        let left_len = self.pieces.nodes[node.left].subtree_len;
819
820        if offset < left_len {
821            let (ll, lr) = self.split(node.left, offset);
822
823            // Fast path: Use join_with_middle directly
824            let new_right = self.pieces.join_with_middle(lr, p, node.right);
825            (ll, new_right)
826
827        } else if offset > left_len + p.byte_length {
828            let (rl, rr) = self.split(node.right, offset - left_len - p.byte_length);
829
830            // Fast path: Use join_with_middle directly
831            let new_left = self.pieces.join_with_middle(node.left, p, rl);
832            (new_left, rr)
833
834        } else {
835            let rel_offset = offset - left_len;
836
837            if rel_offset == 0 {
838                (node.left, self.pieces.join_with_middle(NIL, p, node.right))
839
840            } else if rel_offset == p.byte_length {
841                (self.pieces.join_with_middle(node.left, p, NIL), node.right)
842
843            } else {
844                //
845                // We have to slice the Piece exactly in half!
846                //
847                let left_len = rel_offset;
848                let right_len = p.byte_length - rel_offset;
849
850                //
851                // Only scan the smaller half of the piece to avoid O(N) bottlenecks
852                //
853                let (left_chars, left_nl) = if left_len <= right_len {
854                    // Left side is shorter, scan normally
855                    self.buffers.count_chars_and_newlines(p.buffer, p.byte_offset, left_len)
856                } else {
857                    // Right side is shorter, scan it and subtract from the known totals
858                    let (r_chars, r_nl) = self.buffers.count_chars_and_newlines(
859                        p.buffer, p.byte_offset + left_len, right_len
860                    );
861
862                    (p.char_count - r_chars, p.newline_count - r_nl)
863                };
864
865                let left_stump = Piece {
866                    buffer: p.buffer,
867                    byte_offset: p.byte_offset,
868                    byte_length: left_len,
869                    newline_count: left_nl,
870                    char_count: left_chars,
871                    buffer_start_line: p.buffer_start_line,
872                    piece_start_char: p.piece_start_char,
873                };
874
875                let right_stump = Piece {
876                    buffer: p.buffer,
877                    byte_offset: p.byte_offset + left_len,
878                    byte_length: right_len,
879                    newline_count: p.newline_count - left_nl,
880                    char_count: p.char_count - left_chars,
881                    buffer_start_line: p.buffer_start_line + left_nl,
882                    piece_start_char: p.piece_start_char + left_chars,
883                };
884
885                let new_left = self.pieces.join_with_middle(node.left, left_stump, NIL);
886                let new_right = self.pieces.join_with_middle(NIL, right_stump, node.right);
887                (new_left, new_right)
888            }
889        }
890    }
891
892    /// Glues two arbitrary trees together by extracting the max element of the left tree
893    #[inline]
894    pub fn concat(&mut self, left: NodeRef, right: NodeRef) -> NodeRef {
895        //
896        // Group the evaluation so we can force a Black root on the way out!
897        //
898        let new_root = if left == NIL {
899            right
900        } else if right == NIL {
901            left
902        } else {
903            let mut curr = left;
904            while self.pieces.nodes[curr].right != NIL {
905                curr = self.pieces.nodes[curr].right;
906            }
907            let max_piece = self.pieces.get_piece(curr);
908            let max_offset = self.pieces.nodes[left].subtree_len - max_piece.byte_length;
909
910            let left_without_max = self.pieces.remove_node(left, max_offset);
911
912            self.pieces.join_with_middle(left_without_max, max_piece, right)
913        };
914
915        //
916        // Even if we early-returned right, we MUST ensure the final returned root is Black.
917        //
918        if new_root != NIL && self.pieces.nodes[new_root].color() == Color::Red {
919            let p = self.pieces.get_piece(new_root);
920            let r = self.pieces.nodes[new_root];
921            return self.pieces.alloc(Color::Black, r.left, p, r.right);
922        }
923
924        new_root
925    }
926}
927
928impl Pieces {
929    /// Recursively counts the black height of a given subtree
930    #[inline(always)]
931    #[must_use]
932    pub fn black_height(&self, mut node: NodeRef) -> u32 {
933        let mut h = 0;
934        while node != NIL {
935            if self.nodes[node].color() == Color::Black {
936                h += 1;
937            }
938            node = self.nodes[node].left;
939        }
940        h
941    }
942
943    /// Safely joins two arbitrary Red-Black trees using a middle element
944    #[inline(always)]
945    pub fn join_with_middle(&mut self, left: NodeRef, piece: Piece, right: NodeRef) -> NodeRef {
946        let hl = self.black_height(left);
947        let hr = self.black_height(right);
948
949        let new_root = if hl > hr {
950            self.join_right(left, piece, right, hl, hr)
951        } else if hr > hl {
952            self.join_left(left, piece, right, hl, hr)
953        } else {
954            self.alloc(Color::Red, left, piece, right)
955        };
956
957        // Enforce the Red-Black root constraint!
958        if new_root != NIL && self.nodes[new_root].color() == Color::Red {
959            let p = self.get_piece(new_root);
960            let r = self.nodes[new_root];
961            return self.alloc(Color::Black, r.left, p, r.right);
962        }
963
964        new_root
965    }
966
967    #[inline(always)]
968    fn join_right(&mut self, left: NodeRef, piece: Piece, right: NodeRef, hl: u32, hr: u32) -> NodeRef {
969        if hl == hr {
970            return self.alloc(Color::Red, left, piece, right);
971        }
972
973        let l_node = self.nodes[left];
974        let next_hl = if l_node.color() == Color::Black { hl - 1 } else { hl };
975
976        let new_right = self.join_right(l_node.right, piece, right, next_hl, hr);
977        let p = self.get_piece(left);
978
979        self.balance(l_node.color(), l_node.left, p, new_right)
980    }
981
982    #[inline(always)]
983    fn join_left(&mut self, left: NodeRef, piece: Piece, right: NodeRef, hl: u32, hr: u32) -> NodeRef {
984        if hl == hr {
985            return self.alloc(Color::Red, left, piece, right);
986        }
987
988        let r_node = self.nodes[right];
989        let next_hr = if r_node.color() == Color::Black { hr - 1 } else { hr };
990
991        let new_left = self.join_left(left, piece, r_node.left, hl, next_hr);
992        let p = self.get_piece(right);
993
994        self.balance(r_node.color(), new_left, p, r_node.right)
995    }
996}
997
998#[derive(Debug, Clone)]
999pub struct PieceTree {
1000    //
1001    // `last_insert_end`      is the mod-buffer offset one past the last inserted byte.
1002    // `last_insert_tree_end` is the document   offset one past that insertion.
1003    //
1004    // Both are u32::MAX when unset (after removes, undos, or on init).
1005    //
1006    pub last_mod_end:  u32,  // fredbuf's last_insert     (BufferCursor)
1007    pub last_tree_end: u32,  // fredbuf's end_last_insert (CharOffset)
1008
1009    pub root: NodeRef,
1010
1011    /// Tracks nested undo groups. 0 if we are outside any group.
1012    pub transaction_depth: usize,
1013
1014    pub undo_stack: Vec<HistoryEntry>,
1015    pub redo_stack: Vec<HistoryEntry>,
1016
1017    pub pieces:  Pieces,
1018    pub buffers: Buffers,
1019
1020    pub scratch_index_map: Vec<u32>,
1021}
1022
1023impl Default for PieceTree {
1024    fn default() -> Self {
1025        Self {
1026            last_mod_end:      u32::MAX,
1027            last_tree_end:     u32::MAX,
1028            root:              NIL,
1029            transaction_depth: 0,
1030            undo_stack:        Vec::new(),
1031            redo_stack:        Vec::new(),
1032            pieces:            Pieces::new(),
1033            buffers:           Buffers::new(),
1034            scratch_index_map: Vec::new(),
1035        }
1036    }
1037}
1038
1039impl core::fmt::Display for PieceTree {
1040    #[inline(always)]
1041    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1042        for chunk in self.slice(..).chunks() {
1043            f.write_str(chunk)?;
1044        }
1045
1046        Ok(())
1047    }
1048}
1049
1050impl<T> From<T> for PieceTree where T: AsRef<str> {
1051    #[inline(always)]
1052    fn from(value: T) -> Self {
1053        Self::from_arc_str(value.as_ref())
1054    }
1055}
1056
1057impl str::FromStr for PieceTree {
1058    type Err = core::convert::Infallible;
1059
1060    #[inline(always)]
1061    fn from_str(s: &str) -> Result<Self, Self::Err> {
1062        Ok(PieceTree::from(s))
1063    }
1064}
1065
1066impl PieceTree {
1067    #[inline(always)]
1068    #[must_use]
1069    pub fn new() -> Self {
1070        Self::default()
1071    }
1072
1073    #[inline(always)]
1074    #[must_use]
1075    pub fn from_arc_str(text: impl Into<Arc<str>>) -> Self {
1076        let text = text.into();
1077
1078        let mut tree = PieceTree::new();
1079        if text.is_empty() { return tree; }
1080
1081        let bytes = text.as_bytes();
1082        let byte_length = bytes.len() as u32;
1083
1084        let mut offsets     = Vec::with_capacity(bytes.len() / 40);
1085        let mut checkpoints = Vec::with_capacity(bytes.len() / 256);
1086
1087        let (char_count, newline_count) =
1088            count_chars_and_newlines_with_offsets_and_checkpoints(
1089                text.as_bytes(),
1090                &mut offsets,
1091                &mut checkpoints,
1092            );
1093
1094        let buffer = tree.buffers.original_buffers.push(OriginalBuffer {
1095            newline_offsets:  offsets.into(),
1096            char_checkpoints: checkpoints.into(),
1097            text:             text.into(),
1098        });
1099
1100        let piece = Piece {
1101            buffer,
1102            byte_offset:       0,
1103            byte_length,
1104            char_count,
1105            newline_count,
1106            buffer_start_line: 0,
1107            piece_start_char:  0,
1108        };
1109
1110        tree.root = tree.pieces.insert_node(NIL, piece, 0);
1111        tree
1112    }
1113
1114    /// Splits the tree at `byte_offset`, returning the right half as a new
1115    /// `PieceTree`.
1116    ///
1117    /// After this call, `self` contains `[0, byte_offset)` and
1118    /// the returned tree contains `[byte_offset, end)`.
1119    ///
1120    /// `byte_offset` must be on a UTF-8 char boundary. Panics if `byte_offset > self.len_bytes()`.
1121    ///
1122    /// Runs in O(log n).
1123    pub fn split_off(&mut self, byte_offset: u32) -> Self {
1124        assert!(
1125            byte_offset <= self.len_bytes(),
1126            "split_off: byte_offset {} out of bounds (len = {})",
1127            byte_offset, self.len_bytes()
1128        );
1129
1130        if byte_offset == 0 {
1131            //
1132            // Everything goes to the right half -> self becomes empty
1133            //
1134            let mut right = Self::default();
1135            right.pieces  = self.pieces.clone();
1136            right.buffers = self.buffers.clone();
1137            right.root    = self.root;
1138            self.root     = NIL;
1139            self.last_mod_end  = u32::MAX;
1140            self.last_tree_end = u32::MAX;
1141
1142            return right;
1143        }
1144
1145        if byte_offset == self.len_bytes() {
1146            //
1147            // Everything stays in self          -> return empty right half
1148            //
1149
1150            let mut right = Self::default();
1151            right.pieces  = self.pieces.clone();
1152            right.buffers = self.buffers.clone();
1153            // right.root stays NIL
1154
1155            return right;
1156        }
1157
1158        //
1159        // Split the tree at the byte boundary
1160        //
1161        let (left_root, right_root) = self.split(self.root, byte_offset);
1162
1163        //
1164        // Self keeps the left half
1165        //
1166        self.root          = left_root;
1167        self.last_mod_end  = u32::MAX;
1168        self.last_tree_end = u32::MAX;
1169        self.undo_stack.clear();
1170        self.redo_stack.clear();
1171
1172        //
1173        // Right half gets a new PieceTree sharing the same arena and buffers
1174        //
1175        let mut right = Self::default();
1176        right.pieces  = self.pieces.clone();
1177        right.buffers = self.buffers.clone();
1178        right.root    = right_root;
1179
1180        right
1181    }
1182
1183    #[cfg(feature = "write")]
1184    pub fn from_reader<R: std::io::Read>(mut reader: R) -> std::io::Result<Self> {
1185        const CHUNK: usize = 64 * 1024;
1186
1187        let mut buf  = [0u8; CHUNK];
1188        let mut text = String::new();
1189        let mut tail = 0usize;
1190
1191        loop {
1192            let n = reader.read(&mut buf[tail..])?;
1193
1194            if n == 0 {
1195                // EOF
1196                if tail > 0 {
1197                    return Err(std::io::Error::new(
1198                        std::io::ErrorKind::InvalidData,
1199                        "stream ended with incomplete UTF-8 sequence",
1200                    ));
1201                }
1202
1203                break;
1204            }
1205
1206            let filled = tail + n;
1207            let slice  = &buf[..filled];
1208
1209            //
1210            // Find how much is valid UTF-8
1211            //
1212            let valid_up_to = match str::from_utf8(slice) {
1213                Ok(_)  => filled,
1214                Err(e) => {
1215                    if e.error_len().is_some() {
1216                        return Err(std::io::Error::new(
1217                            std::io::ErrorKind::InvalidData,
1218                            "stream did not contain valid UTF-8",
1219                        ));
1220                    }
1221                    e.valid_up_to()
1222                }
1223            };
1224
1225            //
1226            // SAFETY: validated above
1227            //
1228            text.push_str(unsafe { std::str::from_utf8_unchecked(&slice[..valid_up_to]) });
1229
1230            //
1231            // Carry over the incomplete sequence (0-3 bytes) to next iteration
1232            //
1233            tail = filled - valid_up_to;
1234            if tail > 0 {
1235                buf.copy_within(valid_up_to..filled, 0);
1236            }
1237        }
1238
1239        Ok(Self::from(text))
1240    }
1241}
1242
1243impl PieceTree {
1244    /// Captures the current state of the document.
1245    #[inline]
1246    #[must_use]
1247    pub fn take_snapshot(&self, current_cursor: u32) -> HistoryEntry {
1248        HistoryEntry {
1249            root: self.root,
1250            cursor_offset: current_cursor,
1251        }
1252    }
1253
1254    /// Restores the tree to a previously saved snapshot.
1255    /// Returns the cursor offset from the snapshot.
1256    #[inline]
1257    #[must_use]
1258    pub fn snap_to(&mut self, snapshot: HistoryEntry, current_cursor: u32) -> u32 {
1259        if self.root == snapshot.root {
1260            return snapshot.cursor_offset;
1261        }
1262
1263        //
1264        // Prevent snapping while in the middle of an active transaction
1265        //
1266        assert!(self.transaction_depth == 0, "Cannot snap_to during an active undo group");
1267
1268        //
1269        // Save the current state to the undo stack so the user can undo the jump
1270        //
1271        self.undo_stack.push(HistoryEntry {
1272            root: self.root,
1273            cursor_offset: current_cursor,
1274        });
1275
1276        //
1277        // Clear the redo stack
1278        //
1279        self.redo_stack.clear();
1280
1281        //
1282        // Invalidate caches
1283        //
1284        self.last_tree_end = u32::MAX;
1285        self.last_mod_end  = u32::MAX;
1286
1287        //
1288        //  Restore the tree
1289        //
1290        self.root = snapshot.root;
1291
1292        snapshot.cursor_offset
1293    }
1294}
1295
1296impl PieceTree {
1297    /// Starts a new undo group, saves the current state if this is the outermost group.
1298    #[inline]
1299    pub fn begin_undo_group(&mut self, cursor_offset: u32) {
1300        if self.transaction_depth == 0 {
1301            self.commit_head(cursor_offset);
1302        }
1303
1304        self.transaction_depth += 1;
1305    }
1306
1307    /// Ends the current undo group
1308    #[inline]
1309    pub fn end_undo_group(&mut self) {
1310        if self.transaction_depth > 0 {
1311            self.transaction_depth -= 1;
1312        }
1313    }
1314
1315    #[inline(always)]
1316    pub fn commit_head(&mut self, cursor_offset: u32) {
1317        if let Some(last) = self.undo_stack.last() {
1318            if last.root == self.root {
1319                return;  // Root hasn't changed, skip duplication
1320            }
1321        }
1322
1323        self.undo_stack.push(HistoryEntry { root: self.root, cursor_offset });
1324        self.redo_stack.clear();
1325    }
1326
1327    #[inline(always)]
1328    pub fn try_undo(&mut self, current_cursor: u32) -> Option<u32> {
1329        if self.transaction_depth > 0 {
1330            return None;
1331        }
1332
1333        if let Some(entry) = self.undo_stack.pop() {
1334            self.last_tree_end = u32::MAX;
1335            self.last_mod_end  = u32::MAX;
1336
1337            self.redo_stack.push(HistoryEntry {
1338                root: self.root, cursor_offset: current_cursor
1339            });
1340            self.root = entry.root;
1341            return Some(entry.cursor_offset);
1342        }
1343
1344        None
1345    }
1346
1347    #[inline(always)]
1348    pub fn try_redo(&mut self, current_cursor: u32) -> Option<u32> {
1349        if let Some(entry) = self.redo_stack.pop() {
1350            self.last_tree_end = u32::MAX;
1351            self.last_mod_end  = u32::MAX;
1352
1353            self.undo_stack.push(HistoryEntry {
1354                root: self.root, cursor_offset: current_cursor
1355            });
1356            self.root = entry.root;
1357            return Some(entry.cursor_offset);
1358        }
1359
1360        None
1361    }
1362
1363    #[inline(always)]
1364    #[must_use]
1365    pub fn total_length(&self) -> u32 {
1366        self.pieces.get(self.root).subtree_len
1367    }
1368
1369    #[inline(always)]
1370    pub fn apply_edits(&mut self, primary_cursor_offset: u32, edits: &mut [Edit]) {
1371        if edits.is_empty() { return }
1372
1373        self.begin_undo_group(primary_cursor_offset);
1374
1375        edits.sort_by_key(|b| core::cmp::Reverse(b.offset()));
1376        for edit in edits {
1377            match edit {
1378                Edit::Insert { offset, text }   => self.insert_no_commit(*offset, text),
1379                Edit::Remove { offset, length } => self.remove_no_commit(*offset, *length),
1380            }
1381        }
1382
1383        self.end_undo_group();
1384    }
1385
1386    /// Inserts a single character at the specified logical byte offset.
1387    #[inline(always)]
1388    pub fn insert_char(&mut self, offset: u32, ch: char) {
1389        self.commit_head(offset);
1390        self.insert_char_no_commit(offset, ch);
1391    }
1392
1393    /// Inserts a single character at the specified logical byte offset.
1394    #[inline(always)]
1395    pub fn insert_char_no_commit(&mut self, offset: u32, ch: char) {
1396        let mut buf = [0; 4];
1397        self.insert_no_commit(offset, ch.encode_utf8(&mut buf));
1398    }
1399
1400    #[inline(always)]
1401    pub fn insert(&mut self, offset: u32, text: &str) {
1402        if text.is_empty() { return }
1403
1404        let auto_group = self.transaction_depth == 0;
1405        if auto_group {
1406            self.begin_undo_group(offset);
1407        }
1408
1409        self.insert_no_commit(offset, text);
1410
1411        if auto_group {
1412            self.end_undo_group();
1413        }
1414    }
1415
1416    #[inline(always)]
1417    pub fn remove_at(&mut self, offset: u32, length: u32) {
1418        if length == 0 || self.root == NIL { return }
1419
1420        let auto_group = self.transaction_depth == 0;
1421        if auto_group {
1422            self.begin_undo_group(offset);
1423        }
1424
1425        self.remove_no_commit(offset, length);
1426
1427        if auto_group {
1428            self.end_undo_group();
1429        }
1430    }
1431
1432    pub fn insert_no_commit(&mut self, offset: u32, text: &str) {
1433        let mod_offset = self.buffers.modifications_buffer.len() as u32;
1434        let start_line_in_buffer = self.buffers.modifications_newline_offsets.len() as u32;
1435
1436        let total_char_count_before = self.buffers.modifications_char_count;
1437        let mut total_char_count = total_char_count_before;
1438
1439        self.buffers.modifications_newline_offsets.reserve(text.len() / 20 + 1);
1440        self.buffers.modifications_char_checkpoints.reserve(text.len() / CHECKPOINT_INTERVAL as usize + 1);
1441
1442        let mut newline_count = 0;
1443        let mut char_count = 0;
1444        {
1445            let rem = total_char_count % CHECKPOINT_INTERVAL;
1446            let mut next_checkpoint = total_char_count + ((CHECKPOINT_INTERVAL - rem) % CHECKPOINT_INTERVAL);
1447
1448            if next_checkpoint == 0 {
1449                next_checkpoint = CHECKPOINT_INTERVAL;
1450            }
1451
1452            //
1453            // @Cutnpaste from count_chars_and_newlines_with_offsets_and_checkpoints
1454            //
1455            for (i, b) in text.bytes().enumerate() {
1456                if (b as i8) >= -0x40 {
1457                    if total_char_count == next_checkpoint {
1458                        self.buffers.modifications_char_checkpoints.push((total_char_count, mod_offset + i as u32));
1459                        next_checkpoint += CHECKPOINT_INTERVAL;
1460                    }
1461
1462                    total_char_count += 1;
1463                    char_count += 1;
1464                }
1465
1466                if b == b'\n' {
1467                    self.buffers.modifications_newline_offsets.push(mod_offset + i as u32);
1468                    newline_count += 1;
1469                }
1470            }
1471        }
1472
1473        self.buffers.modifications_char_count = total_char_count;
1474        self.buffers.modifications_buffer.push_str(text);
1475        let text_len = text.len() as u32;
1476
1477        let new_mod_end  = mod_offset + text_len;
1478        let new_tree_end = offset + text_len;
1479
1480        //
1481        // Fast path for sequential typing
1482        //
1483        if offset > 0
1484        && self.last_tree_end == offset
1485        && self.last_mod_end == mod_offset
1486        && let Some((prev_index, prev_rel)) = self.find_position(offset - 1, false)
1487        {
1488            let prev = self.pieces.get_piece(prev_index);
1489
1490            //
1491            // Predecessor must be a mod-buf piece ending at mod_offset.
1492            //
1493            if prev.buffer == MOD_BUFFER && prev.byte_offset + prev.byte_length == mod_offset {
1494                let prev_start = (offset - 1) - prev_rel;
1495
1496                self.root = self.pieces.extend_piece(
1497                    self.root,
1498                    prev_start,
1499                    text_len,
1500                    char_count,
1501                    newline_count
1502                );
1503
1504                self.last_mod_end  = new_mod_end;
1505                self.last_tree_end = new_tree_end;
1506                return;
1507            }
1508        }
1509
1510        let new_piece = Piece {
1511            buffer: MOD_BUFFER,
1512            byte_offset: mod_offset,
1513            byte_length: text_len,
1514            newline_count,
1515            char_count,
1516            buffer_start_line: start_line_in_buffer,
1517            piece_start_char:  total_char_count - char_count,
1518        };
1519
1520        if self.root == NIL {
1521            self.root = self.pieces.insert_node(self.root, new_piece, offset);
1522        } else {
1523            //
1524            // Snip the tree exactly at the insertion offset
1525            //
1526            let (left, right) = self.split(self.root, offset);
1527
1528            //
1529            // Sandwich the new piece directly between the left and right trees!
1530            //
1531            self.root = self.pieces.join_with_middle(left, new_piece, right);
1532
1533            //
1534            // Clean up the resulting seams
1535            //
1536
1537            // Attempt to merge the left side with new_piece
1538            self.try_merge_at(offset);
1539
1540            // Attempt to merge new_piece with the right side
1541            self.try_merge_at(offset + text_len);
1542        }
1543
1544        self.last_mod_end  = new_mod_end;
1545        self.last_tree_end = new_tree_end;
1546    }
1547
1548    pub fn remove_no_commit(&mut self, offset: u32, mut length: u32) {
1549        let total = self.total_length();
1550        if offset >= total { return; }
1551        if offset + length > total { length = total - offset; }
1552        if length == 0 { return }
1553
1554        //
1555        // Snip off the portion of the tree that comes BEFORE the deletion
1556        //
1557        let (left, remainder) = self.split(self.root, offset);
1558
1559        //
1560        // Snip the deleted portion out of the remainder
1561        //
1562        let (_deleted, right) = self.split(remainder, length);
1563
1564        //
1565        // Glue the surviving outer halves back together
1566        //
1567        self.root = self.concat(left, right);
1568
1569        //
1570        // Clean up the resulting seam
1571        //
1572        self.try_merge_at(offset);
1573    }
1574
1575    #[inline]
1576    fn try_merge_at(&mut self, pos: u32) {
1577        if pos == 0 { return }
1578
1579        let Some((right_index, right_rel)) = self.find_position(pos, false) else { return };
1580
1581        if right_rel != 0 { return }
1582
1583        let right = self.pieces.get_piece(right_index);
1584        self.try_merge_right_with_left(pos, right);
1585    }
1586
1587    // Returns Some((merged_start, merged_piece)) if merge happened, None otherwise.
1588    #[inline]
1589    fn try_merge_right_with_left(&mut self, right_start: u32, right: Piece) -> Option<(u32, Piece)> {
1590        if right_start == 0 { return None; }
1591        if right.buffer != MOD_BUFFER { return None; }
1592
1593        let (left_index, left_rel) = self.find_position(right_start - 1, false)?;
1594        let left = self.pieces.get_piece(left_index);
1595
1596        if left.buffer != MOD_BUFFER { return None; }
1597        if left.byte_offset + left.byte_length != right.byte_offset { return None; }
1598
1599        let left_start = (right_start - 1) - left_rel;
1600
1601        self.root = self.pieces.remove_node(self.root, left_start);
1602        self.root = self.pieces.remove_node(self.root, left_start);
1603
1604        let merged = Piece {
1605            buffer:  MOD_BUFFER,
1606            byte_offset:        left.byte_offset,
1607            byte_length:        left.byte_length        + right.byte_length,
1608            newline_count: left.newline_count + right.newline_count,
1609            char_count:    left.char_count    + right.char_count,
1610            buffer_start_line: left.buffer_start_line,
1611            piece_start_char: left.piece_start_char,  // right side extends, left start unchanged
1612        };
1613        self.root = self.pieces.insert_node(self.root, merged, left_start);
1614        Some((left_start, merged))
1615    }
1616
1617    #[inline(always)]
1618    #[must_use]
1619    pub fn get_piece(&self, index: NodeRef) -> Piece {
1620        self.pieces.get_piece(index)
1621    }
1622
1623    #[inline]
1624    #[must_use]
1625    pub fn find_position(&self, offset: u32, prefer_left: bool) -> Option<(NodeRef, u32)> {
1626        let total = self.total_length();
1627        if offset > total { return None; }
1628
1629        if offset == total && total > 0 {
1630            let mut current = self.root;
1631            let mut last_valid = NIL;
1632            while current != NIL {
1633                last_valid = current;
1634                current = self.pieces.get(current).right;
1635            }
1636            let len = self.pieces.get_piece(last_valid).byte_length;
1637            return Some((last_valid, len));
1638        }
1639
1640        self.pieces.find_offset(self.root, offset, prefer_left)
1641    }
1642
1643    #[cfg(feature = "write")]
1644    #[inline]
1645    pub fn write_to<W: std::io::Write>(&self, mut writer: W) -> std::io::Result<()> {
1646        if self.root == NIL {
1647            return Ok(());
1648        }
1649
1650        for (_, piece) in self.pieces() {
1651            let slice = self.buffers.get_slice(piece.buffer, piece.byte_offset, piece.byte_length);
1652            writer.write_all(slice.as_bytes())?;
1653        }
1654
1655        writer.flush()
1656    }
1657}
1658
1659impl PieceTree {
1660    #[inline]
1661    pub fn compact(&mut self) {
1662        #[inline(always)]
1663        fn copy_node(
1664            old_index: NodeRef, old_arena: &Pieces,
1665            new_arena: &mut Pieces, index_map: &mut [u32]
1666        ) -> NodeRef {
1667            if old_index == NIL { return NIL }
1668
1669            if index_map[old_index.index()] != 0 {
1670                return NodeRef::new(index_map[old_index.index()] as usize);
1671            }
1672
1673            let node = old_arena.get(old_index);
1674            let left_new = copy_node(node.left, old_arena, new_arena, index_map);
1675            let right_new = copy_node(node.right, old_arena, new_arena, index_map);
1676
1677            let new_index = NodeRef::new(new_arena.nodes.len());
1678            new_arena.nodes.push(Node {
1679                left: left_new,
1680                right: right_new,
1681                offset: node.offset,
1682                subtree_len: node.subtree_len,
1683                subtree_chars: node.subtree_chars,
1684                subtree_newlines: node.subtree_newlines,
1685                piece_start_char: node.piece_start_char,
1686                meta: node.meta,
1687                buffer_start_line: node.buffer_start_line
1688            });
1689
1690            index_map[old_index.index()] = new_index.index() as u32;
1691            new_index
1692        }
1693
1694        let mut new_pieces = Pieces::new();
1695
1696        self.scratch_index_map.clear();
1697        self.scratch_index_map.resize(self.pieces.nodes.len(), 0);
1698
1699        self.root = copy_node(self.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1700        for entry in &mut self.undo_stack {
1701            entry.root = copy_node(entry.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1702        }
1703        for entry in &mut self.redo_stack {
1704            entry.root = copy_node(entry.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1705        }
1706
1707        self.pieces = new_pieces;
1708    }
1709
1710    #[inline]
1711    pub fn squash(&mut self) {  // @Memory
1712        if self.root == NIL { return }
1713
1714        let squashed_text = self.to_string();  // @Memory
1715        let bytes  = squashed_text.as_bytes();
1716        let length = squashed_text.len() as u32;
1717
1718        let mut offsets     = Vec::with_capacity(bytes.len() / 40);
1719        let mut checkpoints = Vec::with_capacity(bytes.len() / 256);
1720
1721        let (char_count, newline_count) =
1722            count_chars_and_newlines_with_offsets_and_checkpoints(
1723                squashed_text.as_bytes(),
1724                &mut offsets,
1725                &mut checkpoints
1726            );
1727
1728        self.buffers = Buffers::new();
1729        let buffer = self.buffers.original_buffers.push(OriginalBuffer {
1730            newline_offsets: offsets.into(),
1731            char_checkpoints: checkpoints.into(),
1732            text: squashed_text.into()
1733        });
1734        self.pieces = Pieces::new();
1735
1736        let piece = Piece {
1737            byte_length: length, newline_count, char_count,
1738            buffer, byte_offset: 0,
1739            buffer_start_line: 0, piece_start_char: 0
1740        };
1741        self.root = self.pieces.insert_node(NIL, piece, 0);
1742
1743        self.undo_stack.clear();
1744        self.redo_stack.clear();
1745    }
1746}
1747
1748#[derive(Debug, Clone, Copy)]
1749pub struct MemoryUsage {
1750    pub node_arena:       u32,
1751    pub mod_buffer:       u32,
1752    pub original_buffers: u32,
1753    pub history:          u32,
1754}
1755
1756impl MemoryUsage {
1757    #[inline(always)]
1758    #[must_use]
1759    pub const fn total(&self) -> u32 {
1760        self.node_arena + self.mod_buffer + self.original_buffers + self.history
1761    }
1762
1763    /// Overhead = everything except the actual document content in buffers.
1764    #[inline(always)]
1765    #[must_use]
1766    pub const fn overhead(&self) -> u32 {
1767        self.node_arena + self.history
1768    }
1769}
1770
1771impl PieceTree {
1772    /// Total bytes allocated for the node arena (includes NIL sentinel and
1773    /// all historical nodes retained for undo/redo).
1774    #[inline(always)]
1775    #[must_use]
1776    pub fn node_arena_bytes(&self) -> u32 {
1777        (self.pieces.nodes.len() * size_of::<Node>()) as _
1778    }
1779
1780    /// Bytes consumed by the modifications buffer (append-only, never shrinks).
1781    #[inline(always)]
1782    #[must_use]
1783    pub fn mod_buffer_bytes(&self) -> u32 {
1784        self.buffers.modifications_buffer.len() as u32 +
1785            (self.buffers.modifications_char_checkpoints.len() * size_of::<(u32, u32)>()) as u32 +
1786            (self.buffers.modifications_newline_offsets.len() * size_of::<u32>()) as u32
1787    }
1788
1789    /// Bytes consumed by all original (read) buffers.
1790    #[inline(always)]
1791    #[must_use]
1792    pub fn original_buffers_bytes(&self) -> u32 {
1793        self.buffers.original_buffers.values().map(|s| {
1794            s.len() +
1795                s.char_checkpoints.len() * size_of::<(u32, u32)>() +
1796                s.newline_offsets.len() * size_of::<u32>()
1797        }).sum::<usize>() as _
1798    }
1799
1800    /// Bytes consumed by undo + redo history entries.
1801    #[inline(always)]
1802    #[must_use]
1803    pub fn history_bytes(&self) -> u32 {
1804        ((self.undo_stack.len() + self.redo_stack.len()) * size_of::<HistoryEntry>()) as _
1805    }
1806
1807    /// Number of live nodes in the arena (includes NIL and all historical nodes).
1808    #[inline(always)]
1809    #[must_use]
1810    pub fn node_count(&self) -> u32 {
1811        self.pieces.nodes.len() as _
1812    }
1813
1814    /// Aggregate memory usage breakdown.
1815    #[inline(always)]
1816    #[must_use]
1817    pub fn memory_usage(&self) -> MemoryUsage {
1818        MemoryUsage {
1819            node_arena:       self.node_arena_bytes(),
1820            mod_buffer:       self.mod_buffer_bytes(),
1821            original_buffers: self.original_buffers_bytes(),
1822            history:          self.history_bytes(),
1823        }
1824    }
1825}
1826
1827#[derive(Debug)]
1828pub struct LinesIter<'a> {
1829    pub tree: &'a PieceTree,
1830    pub current_line: u32,
1831    pub total_lines: u32,
1832}
1833
1834impl Iterator for LinesIter<'_> {
1835    type Item = String;
1836
1837    fn next(&mut self) -> Option<Self::Item> {
1838        if self.current_line >= self.total_lines { return None }
1839
1840        let line = self.tree.get_line_content_allocating(self.current_line)?;
1841        self.current_line += 1;
1842        Some(line)
1843    }
1844}
1845
1846/// A lazy, non-allocating slice view over a byte range of the tree
1847#[derive(Debug)]
1848pub struct ChunkIter<'a> {
1849    pub tree:       &'a PieceTree,
1850    pub pieces:     PieceTreeIter<'a>,
1851
1852    // Byte range we care about
1853    pub start:      u32,
1854    pub end:        u32,
1855
1856    // Bytes consumed so far across all pieces
1857    pub piece_start: u32,
1858}
1859
1860impl<'a> ChunkIter<'a> {
1861    #[inline]
1862    #[must_use]
1863    pub fn new(tree: &'a PieceTree, start: u32, end: u32) -> Self {
1864        Self {
1865            tree,
1866            pieces: PieceTreeIter::new(&tree.pieces, tree.root),
1867            start,
1868            end,
1869            piece_start: 0,
1870        }
1871    }
1872
1873    /// Returns the total number of characters within this chunk view boundary.
1874    #[inline]
1875    #[must_use]
1876    pub fn len_chars(&self) -> u32 {
1877        let start_char = self.tree.try_byte_to_char(self.start).unwrap_or(0);
1878        let end_char = self.tree.try_byte_to_char(self.end).unwrap_or_else(|| self.tree.len_chars());
1879        end_char.saturating_sub(start_char)
1880    }
1881
1882    /// Returns the total number of characters within this chunk view boundary.
1883    #[inline]
1884    #[must_use]
1885    pub fn len_bytes(&self) -> u32 {
1886        self.end.saturating_sub(self.start)
1887    }
1888}
1889
1890impl<'a> Iterator for ChunkIter<'a> {
1891    type Item = &'a str;
1892
1893    #[inline]
1894    fn next(&mut self) -> Option<&'a str> {
1895        loop {
1896            let (_, p) = self.pieces.next()?;
1897            let piece_end = self.piece_start + p.byte_length;
1898
1899            // Skip pieces entirely before our window
1900            if piece_end <= self.start {
1901                self.piece_start = piece_end;
1902                continue;
1903            }
1904            // Stop once we're past our window
1905            if self.piece_start >= self.end {
1906                return None;
1907            }
1908
1909            let slice_start = self.start.saturating_sub(self.piece_start);
1910            let slice_end   = (self.end - self.piece_start).min(p.byte_length);
1911
1912            self.piece_start = piece_end;
1913
1914            let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset + slice_start, slice_end - slice_start);
1915            if text.is_empty() { continue; }
1916            return Some(text);
1917        }
1918    }
1919}
1920
1921/// A zero-copy iterator that yields lines as perfectly bounded `TreeSlice`s.
1922#[derive(Debug, Clone)]
1923pub struct SliceLines<'a> {
1924    pub slice: TreeSlice<'a>,
1925    pub current_abs_line: u32,
1926    pub end_abs_line: u32,
1927    pub yielded_all: bool,
1928}
1929
1930impl<'a> Iterator for SliceLines<'a> {
1931    type Item = TreeSlice<'a>;
1932
1933    #[inline]
1934    fn next(&mut self) -> Option<Self::Item> {
1935        if self.yielded_all { return None; }
1936
1937        // Fetch the absolute start byte of the current line from the tree
1938        let line_start = self.slice.tree.try_line_to_byte(self.current_abs_line).unwrap_or(self.slice.start);
1939
1940        //
1941        // If we are on the very last line that this slice overlaps
1942        //
1943        if self.current_abs_line >= self.end_abs_line {
1944            self.yielded_all = true;
1945
1946            // Clamp the start byte between the slice start and end boundaries
1947            let start_byte = line_start.max(self.slice.start).min(self.slice.end);
1948
1949            return Some(TreeSlice {
1950                tree: self.slice.tree,
1951                start: start_byte,
1952                end: self.slice.end,
1953            });
1954        }
1955
1956        // We are on an intermediate line, find where the next line starts.
1957        let next_line_start = self.slice.tree.try_line_to_byte(self.current_abs_line + 1).unwrap_or(self.slice.end);
1958
1959        // Clamp boundaries to ensure we never bleed outside the TreeSlice
1960        let start_byte = line_start.max(self.slice.start).min(self.slice.end);
1961        let end_byte = next_line_start.min(self.slice.end);
1962
1963        self.current_abs_line += 1;
1964
1965        Some(TreeSlice {
1966            tree: self.slice.tree,
1967            start: start_byte,
1968            end: end_byte,
1969        })
1970    }
1971}
1972
1973#[derive(Debug)]
1974pub struct SliceCharsRev<'a> {
1975    pub walker: ReverseTreeWalker<'a>,
1976    pub current_byte: u32,
1977    pub start_byte: u32,
1978}
1979
1980impl<'a> Iterator for SliceCharsRev<'a> {
1981    type Item = char;
1982
1983    #[inline]
1984    fn next(&mut self) -> Option<char> {
1985        // If we've hit or crossed the start boundary, stop
1986        if self.current_byte <= self.start_byte {
1987            return None;
1988        }
1989
1990        let c = self.walker.next()?;
1991
1992        // Subtract the exact byte width of the character we just yielded
1993        self.current_byte = self.current_byte.saturating_sub(c.len_utf8() as u32);
1994
1995        // If yielding this character pushed us before the slice's start
1996        // boundary, we discard it and end the iterator.
1997        if self.current_byte < self.start_byte {
1998            return None;
1999        }
2000
2001        Some(c)
2002    }
2003}
2004
2005impl core::fmt::Display for SliceCharsRev<'_> {
2006    #[inline]
2007    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2008        let mut chars = Vec::new();
2009        let mut temp_walker = ReverseTreeWalker::new_at(self.walker.tree, self.current_byte);
2010
2011        let mut temp_byte = self.current_byte;
2012        while temp_byte > self.start_byte {
2013            let Some(c) = temp_walker.next() else { break };
2014            let new_byte = temp_byte.saturating_sub(c.len_utf8() as u32);
2015            if new_byte < self.start_byte { break; }
2016            temp_byte = new_byte;
2017            chars.push(c);
2018        }
2019
2020        chars.reverse();
2021
2022        let mut buf = Vec::with_capacity(chars.iter().map(|c| c.len_utf8()).sum());
2023        let mut tmp = [0u8; 4];
2024        for c in &chars {
2025            buf.extend_from_slice(c.encode_utf8(&mut tmp).as_bytes());
2026        }
2027
2028        f.write_str(unsafe { core::str::from_utf8_unchecked(&buf) })
2029    }
2030}
2031
2032/// Non-allocating char iterator over a byte-bounded window of the tree.
2033#[derive(Debug)]
2034pub struct SliceChars<'a> {
2035    pub walker:   TreeWalker<'a>,
2036    pub byte_end: u32,
2037}
2038
2039impl Iterator for SliceChars<'_> {
2040    type Item = char;
2041    #[inline]
2042    fn next(&mut self) -> Option<char> {
2043        if self.walker.offset >= self.byte_end { return None; }
2044        self.walker.next()
2045    }
2046}
2047
2048impl core::fmt::Display for SliceChars<'_> {
2049    #[inline]
2050    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2051        // Grab the current absolute byte offset from the existing walker
2052        let current_offset = self.walker.offset;
2053
2054        // Spawn a transient walker and seek it to the exact current offset
2055        let mut temp_walker = TreeWalker::<32>::new(self.walker.tree);
2056        temp_walker.seek(current_offset);
2057
2058        while temp_walker.offset < self.byte_end {
2059            let chunk = temp_walker.current_str.as_str();
2060            if chunk.is_empty() {
2061                if temp_walker.stack.is_empty() { break; }
2062                temp_walker.populate_chars();
2063                continue;
2064            }
2065
2066            // Clamp chunk to byte_end
2067            let remaining = (self.byte_end - temp_walker.offset) as usize;
2068            let chunk = &chunk[..remaining.min(chunk.len())];
2069            f.write_str(chunk)?;
2070            temp_walker.offset += chunk.len() as u32;
2071            temp_walker.current_str = "".chars();
2072            temp_walker.populate_chars();
2073        }
2074
2075        Ok(())
2076    }
2077}
2078
2079/// A zero-copy borrowed view over a byte range of the tree.
2080/// All offsets are relative to the slice start.
2081#[derive(Debug, Clone, Copy)]
2082pub struct TreeSlice<'a> {
2083    pub tree:  &'a PieceTree,
2084    pub start: u32,   // byte offset into tree (inclusive)
2085    pub end:   u32,   // byte offset into tree (exclusive)
2086}
2087
2088impl PartialEq<&str> for TreeSlice<'_> {
2089    #[inline]
2090    fn eq(&self, other: &&str) -> bool {
2091        let mut remaining = other.as_bytes();
2092
2093        for chunk in self.chunks() {
2094            let chunk_bytes = chunk.as_bytes();
2095
2096            // If the chunk is longer than the remaining string, they don't match
2097            if remaining.len() < chunk_bytes.len() {
2098                return false;
2099            }
2100
2101            // If the bytes don't match, fail fast
2102            if !remaining.starts_with(chunk_bytes) {
2103                return false;
2104            }
2105
2106            remaining = &remaining[chunk_bytes.len()..];
2107        }
2108
2109        // If we exhausted all chunks and have no remaining bytes, it's a perfect match
2110        remaining.is_empty()
2111    }
2112}
2113
2114impl PartialEq<str> for TreeSlice<'_> {
2115    #[inline]
2116    fn eq(&self, other: &str) -> bool {
2117        self == &other
2118    }
2119}
2120
2121impl core::fmt::Display for TreeSlice<'_> {
2122    #[inline(always)]
2123    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2124        for chunk in self.chunks() {
2125            f.write_str(chunk)?;
2126        }
2127        Ok(())
2128    }
2129}
2130
2131/// Slice Iterators
2132impl<'a> TreeSlice<'a> {
2133    #[inline]
2134    pub fn chunks(self) -> ChunkIter<'a> {
2135        ChunkIter::new(self.tree, self.start, self.end)
2136    }
2137
2138    #[inline]
2139    pub fn bytes(self) -> impl Iterator<Item = u8> + 'a {
2140        self.chunks().flat_map(|chunk| chunk.bytes())
2141    }
2142
2143    #[inline]
2144    pub fn chars(self) -> impl Iterator<Item = char> + 'a {
2145        self.chunks().flat_map(|chunk| chunk.chars())
2146    }
2147
2148    #[inline]
2149    pub fn chars_rev(self) -> SliceCharsRev<'a> {
2150        SliceCharsRev {
2151            walker: ReverseTreeWalker::new_at(self.tree, self.end),
2152            current_byte: self.end,
2153            start_byte: self.start,
2154        }
2155    }
2156
2157    #[inline]
2158    pub fn lines(self) -> SliceLines<'a> {
2159        // Query the tree directly for the absolute line overlap
2160        let start_line = self.tree.byte_to_line(self.start);
2161        let end_line = self.tree.byte_to_line(self.end);
2162
2163        SliceLines {
2164            slice: self,
2165            current_abs_line: start_line,
2166            end_abs_line: end_line,
2167            yielded_all: false,
2168        }
2169    }
2170
2171    #[inline]
2172    pub fn lines_at(self, line_idx: u32) -> SliceLines<'a> {
2173        let start_line = self.tree.byte_to_line(self.start);
2174        let end_line = self.tree.byte_to_line(self.end);
2175
2176        let target_line = (start_line + line_idx).min(end_line);
2177
2178        SliceLines {
2179            slice: self,
2180            current_abs_line: target_line,
2181            end_abs_line: end_line,
2182            yielded_all: false,
2183        }
2184    }
2185
2186    #[inline]
2187    pub fn chunks_at_byte(self, byte_idx: u32) -> ChunkIter<'a> {
2188        let safe_idx = byte_idx.min(self.len_bytes());
2189        ChunkIter::new(self.tree, self.start + safe_idx, self.end)
2190    }
2191
2192    #[inline]
2193    pub fn bytes_at(self, byte_idx: u32) -> impl Iterator<Item = u8> + 'a {
2194        self.chunks_at_byte(byte_idx).flat_map(|chunk| chunk.bytes())
2195    }
2196
2197    #[inline]
2198    pub fn chars_at(self, char_idx: u32) -> impl Iterator<Item = char> + 'a {
2199        let safe_idx = char_idx.min(self.len_chars());
2200        let byte_offset = self.try_char_to_byte(safe_idx).unwrap_or(self.len_bytes());
2201        self.chunks_at_byte(byte_offset).flat_map(|chunk| chunk.chars())
2202    }
2203}
2204
2205/// Tree Iterators
2206impl PieceTree {
2207    #[inline]
2208    pub fn slice_whole(&self) -> TreeSlice<'_> {
2209        self.slice(0..self.len_bytes())
2210    }
2211
2212    #[inline]
2213    pub fn chunks(&self) -> ChunkIter<'_> { self.slice_whole().chunks() }
2214
2215    #[inline]
2216    pub fn bytes(&self) -> impl Iterator<Item = u8> + '_ { self.slice_whole().bytes() }
2217
2218    #[inline]
2219    pub fn chars(&self) -> impl Iterator<Item = char> + '_ { self.slice_whole().chars() }
2220
2221    #[inline]
2222    pub fn lines(&self) -> SliceLines<'_> { self.slice_whole().lines() }
2223
2224    #[inline(always)]
2225    #[must_use]
2226    pub fn chars_rev(&self) -> ReverseTreeWalker<'_> { ReverseTreeWalker::new(self) }
2227
2228    #[inline(always)]
2229    #[must_use]
2230    pub fn chars_at_rev(&self, char_idx: u32) -> ReverseTreeWalker<'_> { ReverseTreeWalker::new_at(self, char_idx) }
2231
2232    #[inline]
2233    pub fn chunks_at_byte(&self, byte_idx: u32) -> ChunkIter<'_> {
2234        self.slice_whole().chunks_at_byte(byte_idx)
2235    }
2236
2237    #[inline]
2238    pub fn bytes_at(&self, byte_idx: u32) -> impl Iterator<Item = u8> + '_ {
2239        self.slice_whole().bytes_at(byte_idx)
2240    }
2241
2242    #[inline]
2243    pub fn chars_at(&self, char_idx: u32) -> impl Iterator<Item = char> + '_ {
2244        self.slice_whole().chars_at(char_idx)
2245    }
2246
2247    #[inline]
2248    pub fn lines_at(&self, line_idx: u32) -> SliceLines<'_> {
2249        self.slice_whole().lines_at(line_idx)
2250    }
2251}
2252
2253impl<'a> TreeSlice<'a> {
2254    #[inline(always)]
2255    #[must_use]
2256    pub fn new(tree: &'a PieceTree, start: u32, end: u32) -> Self {
2257        let end = end.min(tree.len_bytes());
2258        debug_assert!(start <= end);
2259        Self { tree, start, end }
2260    }
2261
2262    #[inline(always)]
2263    #[must_use]
2264    pub fn len_bytes(&self) -> u32  { self.end - self.start }
2265
2266    #[inline(always)]
2267    #[must_use]
2268    pub fn is_empty(&self)  -> bool { self.start == self.end }
2269
2270    #[inline(always)]
2271    #[must_use]
2272    pub fn len_chars(&self) -> u32 {
2273        let a = self.tree.try_byte_to_char(self.start).unwrap_or(0);
2274        let b = self.tree.try_byte_to_char(self.end).unwrap_or_else(|| self.tree.len_chars());
2275        b - a
2276    }
2277
2278    #[inline]
2279    #[must_use]
2280    pub fn len_lines(&self) -> u32 {
2281        let start_line = self.tree.try_byte_to_line(self.start).unwrap_or(0);
2282        let end_line = self.tree.try_byte_to_line(self.end).unwrap_or_else(|| self.tree.len_lines() - 1);
2283        end_line - start_line + 1
2284    }
2285
2286    #[must_use]
2287    pub fn chunk_at_byte(&self, offset: u32) -> &[u8] {
2288        if offset >= self.len_bytes() { return &[] }
2289
2290        let abs_byte = self.start + offset;
2291        let chunk = self.tree.chunk_at_byte(abs_byte).0;
2292
2293        // Truncate the chunk if it bleeds past the end of the slice
2294        let max_valid_len = (self.end - abs_byte) as usize;
2295        let bounded_len = chunk.len().min(max_valid_len);
2296
2297        &chunk[..bounded_len]
2298    }
2299
2300    #[inline]
2301    #[must_use]
2302    pub fn chunk_at_char(&self, char_offset: u32) -> &[u8] {
2303        if char_offset >= self.len_chars() { return &[] }
2304
2305        let Some(base_char) = self.tree.try_byte_to_char(self.start) else {
2306            return &[];
2307        };
2308        let abs_char = base_char + char_offset;
2309
2310        let chunk = self.tree.chunk_at_char(abs_char).0;
2311
2312        // Truncate the chunk if it bleeds past the end of the slice
2313        let Some(abs_byte) = self.tree.try_char_to_byte(abs_char) else {
2314            return &[];
2315        };
2316
2317        let max_valid_len = (self.end - abs_byte) as usize;
2318        let bounded_len = chunk.len().min(max_valid_len);
2319
2320        &chunk[..bounded_len]
2321    }
2322
2323    #[inline]
2324    #[must_use]
2325    pub fn chunk_at_line_break(&self, break_index: u32) -> &[u8] {
2326        let Some((base_line, _)) = self.tree.try_byte_to_line_col(self.start) else {
2327            return &[];
2328        };
2329
2330        let abs_break_index = base_line + break_index;
2331
2332        let Some(abs_byte) = self.tree.try_line_break_to_byte(abs_break_index) else {
2333            return &[];
2334        };
2335
2336        // Ensure the line break actually falls within the slice bounds
2337        if abs_byte < self.start || abs_byte >= self.end {
2338            return &[];
2339        }
2340
2341        let chunk = self.tree.chunk_at_byte(abs_byte).0;
2342
2343        // Bound the chunk so it doesn't bleed past self.end
2344        let max_valid_len = (self.end - abs_byte) as usize;
2345        let bounded_len = chunk.len().min(max_valid_len);
2346
2347        &chunk[..bounded_len]
2348    }
2349
2350    /// Byte at a slice-relative byte offset.
2351    #[inline(always)]
2352    #[must_use]
2353    pub fn byte(&self, offset: u32) -> u8 {
2354        self.try_byte(offset).unwrap()
2355    }
2356
2357    /// Byte at a slice-relative byte offset.
2358    #[inline(always)]
2359    #[must_use]
2360    pub fn try_byte(&self, offset: u32) -> Option<u8> {
2361        if offset >= self.len_bytes() { return None; }
2362        self.tree.try_byte(self.start + offset)
2363    }
2364
2365    /// Char at a slice-relative char index.
2366    #[inline(always)]
2367    #[must_use]
2368    pub fn char(&self, char_index: u32) -> char {
2369        self.try_char(char_index).unwrap()
2370    }
2371
2372    /// Char at a slice-relative char index.
2373    #[inline(always)]
2374    #[must_use]
2375    pub fn try_char(&self, char_index: u32) -> Option<char> {
2376        if char_index >= self.len_chars() { return None; }
2377        let abs_char = self.tree.try_byte_to_char(self.start)? + char_index;
2378        self.tree.try_char(abs_char)
2379    }
2380
2381    #[inline(always)]
2382    #[must_use]
2383    pub fn line(&self, line: u32) -> ChunkIter<'a> {
2384        self.try_line(line).unwrap()
2385    }
2386
2387    #[inline(always)]
2388    #[must_use]
2389    pub fn try_line(&self, line: u32) -> Option<ChunkIter<'a>> {
2390        let (abs_start, abs_end) = self.abs_line_range(line)?;
2391        // Use ChunkIter directly to enforce the absolute end boundary
2392        Some(ChunkIter::new(self.tree, abs_start, abs_end))
2393    }
2394
2395    /// Returns a sub-slice over a byte range (relative to this slice).
2396    #[inline(always)]
2397    #[must_use]
2398    pub fn slice<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'a> {
2399        let s = match range.start_bound() {
2400            Bound::Included(&n) => n,
2401            Bound::Excluded(&n) => n + 1,
2402            Bound::Unbounded    => 0,
2403        };
2404        let e = match range.end_bound() {
2405            Bound::Included(&n) => n + 1,
2406            Bound::Excluded(&n) => n,
2407            Bound::Unbounded    => self.len_bytes(),
2408        };
2409        TreeSlice::new(self.tree, self.start + s, self.start + e)
2410    }
2411
2412    #[inline(always)]
2413    #[must_use]
2414    pub fn byte_to_char(&self, byte_offset: u32) -> u32 {
2415        self.try_byte_to_char(byte_offset).unwrap()
2416    }
2417
2418    #[inline(always)]
2419    #[must_use]
2420    pub fn try_byte_to_char(&self, byte_offset: u32) -> Option<u32> {
2421        let base = self.tree.try_byte_to_char(self.start)?;
2422        let abs  = self.tree.try_byte_to_char(self.start + byte_offset)?;
2423        Some(abs - base)
2424    }
2425
2426    #[inline(always)]
2427    #[must_use]
2428    pub fn char_to_byte(&self, char_index: u32) -> u32 {
2429        self.try_char_to_byte(char_index).unwrap()
2430    }
2431
2432    #[inline(always)]
2433    #[must_use]
2434    pub fn try_char_to_byte(&self, char_index: u32) -> Option<u32> {
2435        let base_char = self.tree.try_byte_to_char(self.start)?;
2436        let abs_byte  = self.tree.try_char_to_byte(base_char + char_index)?;
2437        Some(abs_byte - self.start)
2438    }
2439
2440    #[inline(always)]
2441    #[must_use]
2442    pub fn byte_to_line(&self, byte_offset: u32) -> u32 {
2443        self.try_byte_to_line(byte_offset).unwrap()
2444    }
2445
2446    #[inline(always)]
2447    #[must_use]
2448    pub fn try_byte_to_line(&self, byte_offset: u32) -> Option<u32> {
2449        let (abs_line, _) = self.tree.try_byte_to_line_col(self.start + byte_offset)?;
2450        let base_line     = self.tree.try_byte_to_line_col(self.start).map(|(l, _)| l)?;
2451        Some(abs_line - base_line)
2452    }
2453
2454    #[inline(always)]
2455    #[must_use]
2456    pub fn line_to_byte(&self, line: u32) -> u32 {
2457        self.try_line_to_byte(line).unwrap()
2458    }
2459
2460    #[inline(always)]
2461    #[must_use]
2462    pub fn try_line_to_byte(&self, line: u32) -> Option<u32> {
2463        let (abs_start, _) = self.abs_line_range(line)?;
2464        Some(abs_start - self.start)
2465    }
2466
2467    #[inline]
2468    #[must_use]
2469    pub fn abs_line_range(&self, rel_line: u32) -> Option<(u32, u32)> {
2470        let base_line = self.tree.try_byte_to_line_col(self.start).map(|(l, _)| l)?;
2471        let abs_line  = base_line + rel_line;
2472
2473        let line_start = self.tree.try_line_to_byte(abs_line)?;
2474        let line_end   = self.tree.try_line_to_byte(abs_line + 1)
2475            .unwrap_or_else(|| self.tree.len_bytes());
2476
2477        //
2478        // Clamp to slice bounds.
2479        //
2480        let s = line_start.max(self.start);
2481        let e = line_end.min(self.end);
2482        if s > e { return None; }
2483
2484        Some((s, e))
2485    }
2486}
2487
2488// Allow slicing directly from PieceTree.
2489impl PieceTree {
2490    #[inline]
2491    pub fn slice<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'_> {
2492        let s = match range.start_bound() {
2493            Bound::Included(&n) => n,
2494            Bound::Excluded(&n) => n + 1,
2495            Bound::Unbounded    => 0,
2496        };
2497        let e = match range.end_bound() {
2498            Bound::Included(&n) => n + 1,
2499            Bound::Excluded(&n) => n,
2500            Bound::Unbounded    => self.len_bytes(),
2501        };
2502        TreeSlice::new(self, s, e)
2503    }
2504
2505    /// Slice by char range.
2506    #[inline]
2507    pub fn slice_chars_range<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'_> {
2508        let sc = match range.start_bound() {
2509            Bound::Included(&n) => n,
2510            Bound::Excluded(&n) => n + 1,
2511            Bound::Unbounded    => 0,
2512        };
2513        let ec = match range.end_bound() {
2514            Bound::Included(&n) => n + 1,
2515            Bound::Excluded(&n) => n,
2516            Bound::Unbounded    => self.len_chars(),
2517        };
2518        let s = self.try_char_to_byte(sc).unwrap_or(0);
2519        let e = self.try_char_to_byte(ec).unwrap_or_else(|| self.len_bytes());
2520        TreeSlice::new(self, s, e)
2521    }
2522}
2523
2524impl PieceTree {
2525    /// Fast-path chunk reader specifically designed for the Tree-sitter C API.
2526    /// Given an absolute byte offset, it returns the largest contiguous byte
2527    /// slice available starting exactly at that offset.
2528    #[inline]
2529    #[must_use]
2530    pub fn read_largest_contigous_chunk_at_byte(&self, offset: u32) -> (&[u8], u32) {
2531        let total = self.total_length();
2532        if offset >= total {
2533            return (&[], offset);
2534        }
2535
2536        let mut current = self.root;
2537        let mut current_offset = offset;
2538        let mut doc_offset = 0u32;
2539
2540        while current != NIL {
2541            let node = self.pieces.get(current);
2542            let p = self.pieces.get_piece(current);
2543            let left_len = self.pieces.get(node.left).subtree_len;
2544            let piece_len = p.byte_length;
2545
2546            if current_offset < left_len {
2547                current = node.left;
2548
2549            } else if current_offset < left_len + piece_len {
2550                //
2551                // The requested offset falls inside this exact piece
2552                //
2553
2554                let rel_offset = current_offset - left_len;
2555                let piece_doc_start = doc_offset + left_len;
2556
2557                let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2558
2559                return (&text.as_bytes()[rel_offset as usize..], piece_doc_start);
2560
2561            } else {
2562                doc_offset     += left_len + piece_len;
2563                current_offset -= left_len + piece_len;
2564                current = node.right;
2565            }
2566        }
2567
2568        (&[], offset)
2569    }
2570
2571    #[inline]
2572    #[must_use]
2573    pub fn chunk_at_byte(&self, offset: u32) -> (&[u8], u32) {
2574        self.read_largest_contigous_chunk_at_byte(offset)
2575    }
2576
2577    #[inline]
2578    #[must_use]
2579    pub fn chunk_at_char(&self, char_index: u32) -> (&[u8], u32) {
2580        let Some(offset) = self.try_char_to_byte(char_index) else { return (&[], 0) };
2581        self.chunk_at_byte(offset)
2582    }
2583
2584    #[inline]
2585    #[must_use]
2586    pub fn chunk_at_line_break(&self, break_index: u32) -> (&[u8], u32) {
2587        let Some(offset) = self.try_line_break_to_byte(break_index) else { return (&[], 0) };
2588        self.chunk_at_byte(offset)
2589    }
2590
2591    #[inline]
2592    #[must_use]
2593    pub fn line_break_to_byte(&self, break_index: u32) -> u32 {
2594        self.try_line_break_to_byte(break_index).unwrap()
2595    }
2596
2597    #[inline]
2598    #[must_use]
2599    pub fn try_line_break_to_byte(&self, break_index: u32) -> Option<u32> {
2600        // @Cutnpaste from line_to_byte
2601
2602        let total_newlines = self.pieces.get(self.root).subtree_newlines;
2603        if break_index >= total_newlines { return None; }
2604
2605        let mut current = self.root;
2606        let mut current_offset = 0;
2607        let mut current_newlines = 0;
2608
2609        while current != NIL {
2610            let node = self.pieces.get(current);
2611            let p = self.pieces.get_piece(current);
2612            let left_newlines = self.pieces.get(node.left).subtree_newlines;
2613            let left_len      = self.pieces.get(node.left).subtree_len;
2614            let piece_newlines = p.newline_count;
2615
2616            if break_index < current_newlines + left_newlines {
2617                //
2618                // The target line break is somewhere in the left subtree
2619                //
2620                current = node.left;
2621
2622            } else if break_index < current_newlines + left_newlines + piece_newlines {
2623                //
2624                // The target line break is inside this piece
2625                //
2626                let rel_break = break_index - (current_newlines + left_newlines);
2627                let absolute_newline_index = p.buffer_start_line + rel_break;
2628
2629                let absolute_byte_offset = if p.buffer == MOD_BUFFER {
2630                    self.buffers.modifications_newline_offsets[absolute_newline_index as usize]
2631                } else {
2632                    self.buffers.original_buffers[p.buffer].newline_offsets[absolute_newline_index as usize]
2633                };
2634
2635                //
2636                //
2637                //
2638                //
2639                //
2640                // Unlike `line_to_byte` which adds + 1 to find the start of the *next* line,
2641                // we omit it here to return the exact byte offset of the newline character itself.
2642                //
2643                //
2644                //
2645                //
2646                //
2647                let local_piece_offset = absolute_byte_offset - p.byte_offset;
2648                return Some(current_offset + left_len + local_piece_offset);
2649
2650            } else {
2651                current_newlines += left_newlines + piece_newlines;
2652                current_offset   += left_len + p.byte_length;
2653                current = node.right;
2654            }
2655        }
2656
2657        None
2658    }
2659
2660    #[inline]
2661    #[must_use]
2662    pub fn line_to_byte(&self, target_line: u32) -> u32 {
2663        self.try_line_to_byte(target_line).unwrap()
2664    }
2665
2666    #[inline]
2667    #[must_use]
2668    pub fn try_line_to_byte(&self, target_line: u32) -> Option<u32> {
2669        if target_line == 0 { return Some(0) }
2670        let mut current = self.root;
2671        let mut current_offset = 0;
2672        let mut current_line = 0;
2673        while current != NIL {
2674            let node = self.pieces.get(current);
2675            let p = self.pieces.get_piece(current);
2676            let left_newlines = self.pieces.get(node.left).subtree_newlines;
2677            let left_len      = self.pieces.get(node.left).subtree_len;
2678            let piece_newlines = p.newline_count;
2679            let piece_len      = p.byte_length;
2680
2681            if target_line <= current_line + left_newlines {
2682                //
2683                // The target line starts somewhere in the left subtree
2684                //
2685                current = node.left;
2686
2687            } else if target_line <= current_line + left_newlines + piece_newlines {
2688                //
2689                // The target line starts inside this piece
2690                //
2691
2692                current_line   += left_newlines;
2693                current_offset += left_len;
2694
2695                let rel_line = target_line - current_line; // >= 1, guaranteed
2696                let absolute_newline_index = p.buffer_start_line + rel_line - 1;
2697                let absolute_byte_offset = if p.buffer == MOD_BUFFER {
2698                    self.buffers.modifications_newline_offsets[absolute_newline_index as usize]
2699                } else {
2700                    self.buffers.original_buffers[p.buffer].newline_offsets[absolute_newline_index as usize]
2701                };
2702
2703                let local_piece_offset = absolute_byte_offset - p.byte_offset + 1;
2704                return Some(current_offset + local_piece_offset);
2705
2706            } else {
2707                current_line   += left_newlines + piece_newlines;
2708                current_offset += left_len + piece_len;
2709                current = node.right;
2710            }
2711        }
2712
2713        None
2714    }
2715
2716    #[inline]
2717    #[must_use]
2718    pub fn char_to_byte(&self, char_index: u32) -> u32 {
2719        self.try_char_to_byte(char_index).unwrap()
2720    }
2721
2722    #[inline]
2723    #[must_use]
2724    pub fn try_char_to_byte(&self, char_index: u32) -> Option<u32> {
2725        let total_chars = self.len_chars();
2726        if char_index > total_chars { return None; }
2727        if char_index == total_chars { return Some(self.len_bytes()); }
2728
2729        let mut current = self.root;
2730        let mut current_byte = 0;
2731        let mut current_char = 0;
2732
2733        while current != NIL {
2734            let node = self.pieces.get(current);
2735            let left_node = self.pieces.get(node.left);
2736
2737            let left_chars = left_node.subtree_chars;
2738            let left_bytes = left_node.subtree_len;
2739
2740            let p = self.pieces.get_piece(current);
2741            let piece_chars = p.char_count;
2742
2743            if char_index < current_char + left_chars {
2744                current = node.left;
2745
2746            } else if char_index < current_char + left_chars + piece_chars {
2747                //
2748                // Target char is inside this piece,
2749                // p.piece_start_char is the absolute char index of p.offset in its buffer,
2750                // so (p.piece_start_char + rel_char) is the absolute char target.
2751                //
2752
2753                let rel_char = char_index - (current_char + left_chars);
2754                let absolute_target_byte =
2755                    self.buffers.char_to_byte_absolute(p.buffer, p.piece_start_char + rel_char);
2756
2757                return Some(current_byte + left_bytes + (absolute_target_byte - p.byte_offset));
2758
2759            } else {
2760                current_char += left_chars + piece_chars;
2761                current_byte += left_bytes + p.byte_length;
2762                current = node.right;
2763            }
2764        }
2765
2766        None
2767    }
2768
2769    #[inline]
2770    #[must_use]
2771    pub fn byte_to_char(&self, byte_offset: u32) -> u32 {
2772        self.try_byte_to_char(byte_offset).unwrap()
2773    }
2774
2775    #[inline]
2776    #[must_use]
2777    pub fn try_byte_to_char(&self, byte_offset: u32) -> Option<u32> {
2778        let total_bytes = self.len_bytes();
2779        if byte_offset > total_bytes { return None; }
2780        if byte_offset == total_bytes { return Some(self.len_chars()); }
2781
2782        let mut current = self.root;
2783        let mut current_byte = 0;
2784        let mut current_char = 0;
2785
2786        while current != NIL {
2787            let node = self.pieces.get(current);
2788            let left_node = self.pieces.get(node.left);
2789
2790            let left_bytes = left_node.subtree_len;
2791            let left_chars = left_node.subtree_chars;
2792
2793            let p = self.pieces.get_piece(current);
2794            let piece_bytes = p.byte_length;
2795
2796            if byte_offset < current_byte + left_bytes {
2797                current = node.left;
2798
2799            } else if byte_offset < current_byte + left_bytes + piece_bytes {
2800                // Target byte is inside this piece.
2801                // byte_to_char_absolute gives the absolute char index, so subtracting
2802                // piece_start_char converts it to a piece-relative char count.
2803
2804                let rel_byte = byte_offset - (current_byte + left_bytes);
2805                let absolute_target_char =
2806                    self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2807
2808                return Some(current_char + left_chars + (absolute_target_char - p.piece_start_char));
2809
2810            } else {
2811                current_byte += left_bytes + piece_bytes;
2812                current_char += left_chars + p.char_count;
2813                current = node.right;
2814            }
2815        }
2816
2817        None
2818    }
2819
2820    #[inline]
2821    #[must_use]
2822    pub fn byte_to_line(&self, offset: u32) -> u32 {
2823        self.try_byte_to_line(offset).unwrap()
2824    }
2825
2826    #[inline]
2827    #[must_use]
2828    pub fn try_byte_to_line(&self, offset: u32) -> Option<u32> {
2829        self.try_byte_to_line_col(offset).map(|(l, _)| l)
2830    }
2831
2832    #[inline]
2833    #[must_use]
2834    pub fn byte_to_line_col(&self, offset: u32) -> (u32, u32) {
2835        self.try_byte_to_line_col(offset).unwrap()
2836    }
2837
2838    #[inline]
2839    #[must_use]
2840    pub fn try_byte_to_line_col(&self, offset: u32) -> Option<(u32, u32)> {
2841        let total_bytes = self.len_bytes();
2842        if offset > total_bytes { return None }
2843        if offset == total_bytes {
2844            //
2845            // Get offset of the last line
2846            //
2847            let line            = self.pieces.get(self.root).subtree_newlines;
2848            let line_start_byte = self.try_line_to_byte(line).unwrap_or(0);
2849            let target_char     = self.len_chars();
2850            let line_start_char = self.try_byte_to_char(line_start_byte)?;
2851            return Some((line, target_char - line_start_char));
2852        }
2853
2854        let mut current         = self.root;
2855        let mut current_line    = 0u32;
2856        let mut current_byte    = 0u32;
2857        let mut current_char    = 0u32;
2858
2859        while current != NIL {
2860            let node          = self.pieces.get(current);
2861            let left_node     = self.pieces.get(node.left);
2862            let left_bytes    = left_node.subtree_len;
2863            let left_newlines = left_node.subtree_newlines;
2864            let left_chars    = left_node.subtree_chars;
2865            let p             = self.pieces.get_piece(current);
2866            let piece_bytes   = p.byte_length;
2867
2868            if offset < current_byte + left_bytes {
2869                current = node.left;
2870
2871            } else if offset < current_byte + left_bytes + piece_bytes {
2872                current_line += left_newlines;
2873                current_char += left_chars;
2874
2875                let rel_byte = offset - (current_byte + left_bytes);
2876
2877                let (local_newlines, col) = if p.newline_count == 0 {
2878                    let target_char = self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2879                    let rel_char    = target_char - p.piece_start_char;
2880                    let abs_char    = current_char + rel_char;
2881
2882                    let line_start_byte = self.try_line_to_byte(current_line)?;
2883                    let line_start_char = self.try_byte_to_char(line_start_byte)?;
2884
2885                    (0, abs_char - line_start_char)
2886                } else {
2887                    let start_index       = p.buffer_start_line as usize;
2888                    let end_index         = start_index + p.newline_count as usize;
2889                    let absolute_target = p.byte_offset + rel_byte;
2890
2891                    let offsets = &self.buffers.get_newlines(p.buffer)[start_index..end_index];
2892
2893                    let local_nl = offsets.partition_point(|&off| off < absolute_target) as u32;
2894
2895                    let target_char = self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2896                    let col = if local_nl > 0 {
2897                        let last_nl_abs_byte = offsets[local_nl as usize - 1];
2898                        let last_nl_char     = self.buffers.byte_to_char_absolute(p.buffer, last_nl_abs_byte);
2899                        target_char - (last_nl_char + 1)
2900                    } else {
2901                        let line_start_byte = self.try_line_to_byte(current_line)?;
2902                        let line_start_char = self.try_byte_to_char(line_start_byte)?;
2903                        let rel_char        = target_char - p.piece_start_char;
2904                        (current_char + rel_char) - line_start_char
2905                    };
2906
2907                    (local_nl, col)
2908                };
2909
2910                return Some((current_line + local_newlines, col));
2911
2912            } else {
2913                current_line += left_newlines + p.newline_count;
2914                current_byte += left_bytes + piece_bytes;
2915                current_char += left_chars + p.char_count;
2916                current = node.right;
2917            }
2918        }
2919
2920        None
2921    }
2922
2923    #[inline]
2924    #[must_use]
2925    pub fn pieces(&self) -> PieceTreeIter<'_> {
2926        PieceTreeIter::new(&self.pieces, self.root)
2927    }
2928
2929    #[inline]
2930    #[must_use]
2931    pub fn get_line_range(&self, line: u32) -> (u32, u32) {
2932        self.try_get_line_range(line).unwrap()
2933    }
2934
2935    #[inline]
2936    #[must_use]
2937    pub fn try_get_line_range(&self, line: u32) -> Option<(u32, u32)> {
2938        let start = self.try_line_to_byte(line)?;
2939        let end = self.try_line_to_byte(line + 1).unwrap_or_else(|| self.total_length());
2940        Some((start, end))
2941    }
2942
2943    #[inline]
2944    #[must_use]
2945    pub fn get_line_content_allocating(&self, line: u32) -> Option<String> {
2946        let (start, end) = self.try_get_line_range(line)?;
2947
2948        let mut content = String::with_capacity((end - start) as usize);
2949        let mut walker = TreeWalker::new(self);
2950
2951        walker.seek(start);
2952        while walker.offset < end {
2953            if let Some(c) = walker.next() {
2954                content.push(c);
2955            } else {
2956                break;
2957            }
2958        }
2959
2960        Some(content)
2961    }
2962
2963    /// Byte at a given byte offset. O(log n).
2964    #[inline]
2965    #[must_use]
2966    pub fn byte(&self, offset: u32) -> u8 {
2967        self.try_byte(offset).unwrap()
2968    }
2969
2970    /// Byte at a given byte offset. O(log n).
2971    #[inline]
2972    #[must_use]
2973    pub fn try_byte(&self, offset: u32) -> Option<u8> {
2974        let (node, rel) = self.find_position(offset, false)?;
2975        let p = self.get_piece(node);
2976        let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2977        text.as_bytes().get(rel as usize).copied()
2978    }
2979
2980    /// Char at a given char index. O(log n) to find the piece, then
2981    /// a short scan within the piece.
2982    #[inline]
2983    #[must_use]
2984    pub fn char(&self, char_index: u32) -> char {
2985        self.try_char(char_index).unwrap()
2986    }
2987
2988    /// Char at a given char index. O(log n) to find the piece, then
2989    /// a short scan within the piece.
2990    #[inline]
2991    #[must_use]
2992    pub fn try_char(&self, char_index: u32) -> Option<char> {
2993        let byte_offset = self.try_char_to_byte(char_index)?;
2994        let (node, rel) = self.find_position(byte_offset, false)?;
2995        let p = self.get_piece(node);
2996        let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2997        text[rel as usize..].chars().next()
2998    }
2999
3000    /// Returns a non-allocating iterator of chars over the given char range.
3001    /// Backed by `TreeWalker::seek` so it reuses existing infrastructure.
3002    #[inline]
3003    #[must_use]
3004    pub fn slice_chars(&self, char_start: u32, char_end: u32) -> SliceChars<'_> {
3005        let byte_start = self.try_char_to_byte(char_start).unwrap_or(0);
3006        let byte_end   = self.try_char_to_byte(char_end).unwrap_or_else(|| self.total_length());
3007        SliceChars {
3008            walker:   { let mut w = TreeWalker::new(self); w.seek(byte_start); w },
3009            byte_end,
3010        }
3011    }
3012
3013    /// Returns a non-allocating iterator of chars over the given byte range.
3014    #[inline]
3015    #[must_use]
3016    pub fn slice_bytes(&self, byte_start: u32, byte_end: u32) -> SliceChars<'_> {
3017        SliceChars {
3018            walker:   { let mut w = TreeWalker::new(self); w.seek(byte_start); w },
3019            byte_end,
3020        }
3021    }
3022
3023    /// Non-allocating line view: returns a `ChunkIter` over the byte range of
3024    /// `line`. Line numbers are 0-based. The trailing \n is included if present.
3025    #[inline]
3026    #[must_use]
3027    pub fn line(&self, line: u32) -> TreeSlice<'_> {
3028        self.try_line(line).unwrap()
3029    }
3030
3031    #[inline]
3032    #[must_use]
3033    pub fn try_line(&self, line: u32) -> Option<TreeSlice<'_>> {
3034        let start = self.try_line_to_byte(line)?;
3035        let end = self.try_line_to_byte(line + 1).unwrap_or_else(|| self.len_bytes());
3036
3037        Some(self.slice(start..end))
3038    }
3039
3040    /// Number of lines (= newline count + 1)
3041    #[inline]
3042    #[must_use]
3043    pub fn len_lines(&self) -> u32 { self.pieces.get(self.root).subtree_newlines + 1 }
3044
3045    #[inline(always)]
3046    #[must_use]
3047    pub fn len_chars(&self) -> u32 { self.pieces.get(self.root).subtree_chars }
3048
3049    #[inline]
3050    #[must_use]
3051    pub fn len_bytes(&self) -> u32 { self.total_length() }
3052
3053    #[inline]
3054    pub fn remove<R>(&mut self, range: R) where R: RangeBounds<u32> {
3055        let start = match range.start_bound() {
3056            Bound::Included(&n) => n,
3057            Bound::Excluded(&n) => n + 1,
3058            Bound::Unbounded => 0,
3059        };
3060        let end = match range.end_bound() {
3061            Bound::Included(&n) => n + 1,
3062            Bound::Excluded(&n) => n,
3063            Bound::Unbounded => self.len_bytes(),
3064        };
3065        if end > start { self.remove_at(start, end - start) }
3066    }
3067
3068    #[inline]
3069    #[must_use]
3070    pub fn char_to_line(&self, char_index: u32) -> u32 {
3071        self.try_char_to_line(char_index).unwrap()
3072    }
3073
3074    #[inline]
3075    #[must_use]
3076    pub fn try_char_to_line(&self, char_index: u32) -> Option<u32> {
3077        let byte_index = self.try_char_to_byte(char_index)?;
3078        self.try_byte_to_line_col(byte_index).map(|(line, _)| line)
3079    }
3080
3081    #[inline]
3082    #[must_use]
3083    pub fn char_to_line_col(&self, char_index: u32) -> (u32, u32) {
3084        self.try_char_to_line_col(char_index).unwrap()
3085    }
3086
3087    #[inline]
3088    #[must_use]
3089    pub fn try_char_to_line_col(&self, char_index: u32) -> Option<(u32, u32)> {
3090        let byte_index = self.try_char_to_byte(char_index)?;
3091        self.try_byte_to_line_col(byte_index)
3092    }
3093
3094    #[inline]
3095    #[must_use]
3096    pub fn line_to_char(&self, line: u32) -> u32 {
3097        self.try_line_to_char(line).unwrap()
3098    }
3099
3100    #[inline]
3101    #[must_use]
3102    pub fn try_line_to_char(&self, line: u32) -> Option<u32> {
3103        let byte_index = self.try_line_to_byte(line)?;
3104        self.try_byte_to_char(byte_index)
3105    }
3106}
3107
3108#[derive(Debug)]
3109pub struct PieceTreeIter<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
3110    pub arena: &'a Pieces,
3111    pub stack: SmallVec<[NodeRef; MAX_INLINE_TREE_DEPTH]>,
3112}
3113
3114impl<'a, const MAX_INLINE_TREE_DEPTH: usize> PieceTreeIter<'a, MAX_INLINE_TREE_DEPTH> {
3115    #[inline]
3116    #[must_use]
3117    pub fn new(arena: &'a Pieces, mut root: NodeRef) -> Self {
3118        let mut stack = SmallVec::new();
3119        while root != NIL {
3120            stack.push(root);
3121            root = arena.get(root).left;
3122        }
3123
3124        Self { arena, stack }
3125    }
3126}
3127
3128impl<const MAX_INLINE_TREE_DEPTH: usize> Iterator for PieceTreeIter<'_, MAX_INLINE_TREE_DEPTH> {
3129    type Item = (NodeRef, Piece);
3130
3131    #[inline]
3132    fn next(&mut self) -> Option<Self::Item> {
3133        let node_index = self.stack.pop()?;
3134        let node = self.arena.get(node_index);
3135        let p = self.arena.get_piece(node_index);
3136
3137        let mut current = node.right;
3138        while current != NIL {
3139            self.stack.push(current);
3140            current = self.arena.get(current).left;
3141        }
3142
3143        Some((node_index, p))
3144    }
3145}
3146
3147#[derive(Clone, Copy, PartialEq, Debug)]
3148pub enum Direction { Left, Center, Right }
3149
3150#[derive(Debug)]
3151pub struct TreeWalker<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
3152    pub tree: &'a PieceTree,
3153    pub stack: SmallVec<[(NodeRef, Direction); MAX_INLINE_TREE_DEPTH]>,
3154    pub current_str: str::Chars<'a>,
3155    pub offset: u32,
3156}
3157
3158impl<'a, const MAX_INLINE_TREE_DEPTH: usize> TreeWalker<'a, MAX_INLINE_TREE_DEPTH> {
3159    #[inline]
3160    #[must_use]
3161    pub fn new(tree: &'a PieceTree) -> Self {
3162        let mut walker = Self {
3163            tree,
3164            stack: SmallVec::new(),
3165            current_str: "".chars(),
3166            offset: 0,
3167        };
3168        if tree.root != NIL {
3169            walker.stack.push((tree.root, Direction::Left));
3170        }
3171        walker.populate_chars();
3172        walker
3173    }
3174
3175    #[inline]
3176    pub fn seek(&mut self, target: u32) {
3177        self.stack.clear();
3178        self.offset = target;
3179        self.current_str = "".chars();
3180
3181        if self.tree.root == NIL { return; }
3182
3183        let mut current = self.tree.root;
3184        let mut current_offset = target;
3185
3186        while current != NIL {
3187            let node = self.tree.pieces.get(current);
3188            let p = self.tree.pieces.get_piece(current);
3189            let left_len = self.tree.pieces.get(node.left).subtree_len;
3190            let piece_len = p.byte_length;
3191
3192            if current_offset < left_len {
3193                self.stack.push((current, Direction::Center));
3194                current = node.left;
3195            } else if current_offset < left_len + piece_len {
3196                self.stack.push((current, Direction::Right));
3197                let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3198                let rel_offset = current_offset - left_len;
3199                self.current_str = text[rel_offset as usize..].chars();
3200                break;
3201            } else {
3202                current_offset -= left_len + piece_len;
3203                current = node.right;
3204            }
3205        }
3206    }
3207
3208    #[inline]
3209    pub fn populate_chars(&mut self) {
3210        while let Some((node_index, dir)) = self.stack.pop() {
3211            let node = self.tree.pieces.get(node_index);
3212            match dir {
3213                Direction::Left => {
3214                    self.stack.push((node_index, Direction::Center));
3215                    if node.left != NIL { self.stack.push((node.left, Direction::Left)); }
3216                }
3217
3218                Direction::Center => {
3219                    self.stack.push((node_index, Direction::Right));
3220                    let p = self.tree.pieces.get_piece(node_index);
3221                    let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3222                    self.current_str = text.chars();
3223                    return;
3224                }
3225
3226                Direction::Right => {
3227                    if node.right != NIL { self.stack.push((node.right, Direction::Left)); }
3228                }
3229            }
3230        }
3231    }
3232}
3233
3234impl Iterator for TreeWalker<'_> {
3235    type Item = char;
3236
3237    #[inline(always)]
3238    fn next(&mut self) -> Option<char> {
3239        loop {
3240            if let Some(c) = self.current_str.next() {
3241                self.offset += c.len_utf8() as u32;
3242                return Some(c);
3243            }
3244
3245            if self.stack.is_empty() { return None; }
3246            self.populate_chars();
3247        }
3248    }
3249}
3250
3251#[derive(Debug)]
3252pub struct ReverseTreeWalker<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
3253    pub tree: &'a PieceTree,
3254    pub stack: SmallVec<[(NodeRef, bool); MAX_INLINE_TREE_DEPTH]>,
3255    pub current_str: str::Chars<'a>,
3256}
3257
3258impl<'a, const MAX_INLINE_TREE_DEPTH: usize> ReverseTreeWalker<'a, MAX_INLINE_TREE_DEPTH> {
3259    #[inline(always)]
3260    #[must_use]
3261    pub fn new(tree: &'a PieceTree) -> Self {
3262        let mut walker = Self {
3263            tree,
3264            stack: SmallVec::new(),
3265            current_str: "".chars(),
3266        };
3267        walker.push_rightmost(tree.root);
3268        walker
3269    }
3270
3271    #[inline]
3272    #[must_use]
3273    pub fn new_at(tree: &'a PieceTree, target_offset: u32) -> Self {
3274        let mut walker = Self {
3275            tree,
3276            stack: SmallVec::new(),
3277            current_str: "".chars(),
3278        };
3279        walker.seek(target_offset);
3280        walker
3281    }
3282
3283    #[inline(always)]
3284    pub fn push_rightmost(&mut self, mut node: NodeRef) {
3285        while node != NIL {
3286            self.stack.push((node, false));
3287            node = self.tree.pieces.get(node).right;
3288        }
3289    }
3290
3291    #[inline]
3292    pub fn seek(&mut self, mut target_offset: u32) {
3293        self.stack.clear();
3294        self.current_str = "".chars();
3295
3296        if self.tree.root == NIL { return; }
3297
3298        let total_bytes = self.tree.len_bytes();
3299
3300        if target_offset > total_bytes {
3301            target_offset = total_bytes;
3302        }
3303        if target_offset == total_bytes {
3304            self.push_rightmost(self.tree.root);
3305            return;
3306        }
3307
3308        let mut current = self.tree.root;
3309        let mut current_offset = target_offset;
3310
3311        while current != NIL {
3312            let node = self.tree.pieces.get(current);
3313            let p = self.tree.pieces.get_piece(current);
3314            let left_len = self.tree.pieces.get(node.left).subtree_len;
3315            let piece_len = p.byte_length;
3316
3317            if current_offset < left_len {
3318                self.stack.push((current, true));
3319                current = node.left;
3320
3321            } else if current_offset < left_len + piece_len {
3322                self.stack.push((current, true));
3323
3324                let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3325                let rel_offset = current_offset - left_len;
3326
3327                // Keep the subset of the string before the offset
3328                self.current_str = text[..rel_offset as usize].chars();
3329
3330                break;
3331
3332            } else {
3333                self.stack.push((current, false));
3334                current_offset -= left_len + piece_len;
3335                current = node.right;
3336            }
3337        }
3338    }
3339}
3340
3341impl Iterator for ReverseTreeWalker<'_> {
3342    type Item = char;
3343
3344    #[inline]
3345    fn next(&mut self) -> Option<Self::Item> {
3346        if let Some(c) = self.current_str.next_back() { return Some(c); }
3347
3348        while let Some((node_index, visited_right)) = self.stack.pop() {
3349            let node = self.tree.pieces.get(node_index);
3350            if visited_right {
3351                self.push_rightmost(node.left);
3352            } else {
3353                self.stack.push((node_index, true));
3354
3355                let p = self.tree.pieces.get_piece(node_index);
3356                let text_slice = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3357
3358                self.current_str = text_slice.chars();
3359
3360                if let Some(c) = self.current_str.next_back() { return Some(c); }
3361            }
3362        }
3363
3364        None
3365    }
3366}
3367
3368impl PieceTree {
3369    #[inline]
3370    pub fn debug(&self, f: &mut impl core::fmt::Write) -> core::fmt::Result {
3371        writeln!(f, "\n--- Tree State (Root: {:?}) ---", self.root)?;
3372        self.print_inorder(f, self.root, &mut None, 0)?;
3373        writeln!(f, "------------------------------\n")
3374    }
3375
3376    pub fn print_inorder(&self, f: &mut impl core::fmt::Write, node: NodeRef, last: &mut Option<Piece>, depth: usize) -> core::fmt::Result {
3377        if node == NIL { return Ok(()) }
3378
3379        let n = self.pieces.nodes[node];
3380
3381        self.print_inorder(f, n.left, last, depth + 1)?;
3382
3383        let cur = self.get_piece(node);
3384
3385        //
3386        // Check for the mergeable invariant
3387        //
3388        let warning = if let Some(prev) = last {
3389            if prev.buffer == cur.buffer
3390               && prev.byte_offset + prev.byte_length == cur.byte_offset
3391            {
3392                " --------- [!!! MERGEABLE NEIGHBORS NOT MERGED !!!] ---------"
3393            } else {
3394                ""
3395            }
3396        } else {
3397            ""
3398        };
3399
3400        let Piece { buffer, byte_offset: offset, byte_length: length, .. } = cur;
3401
3402        writeln!(
3403            f,
3404            "{:indent$}Node {node:?}: Buf={}, Off={offset}, Len={length}{warning}",
3405            "", buffer.as_u32(), indent = depth * 4
3406        )?;
3407
3408        *last = Some(cur);
3409
3410        self.print_inorder(f, n.right, last, depth + 1)
3411    }
3412}
3413
3414pub fn assert_state(tree: &PieceTree, expected: &str) {
3415    let tree_text = tree.to_string();
3416
3417    assert_eq!(tree_text, expected, "Text mismatch");
3418
3419    if !expected.is_empty() {
3420        let offsets = [0, expected.len() / 2, expected.len() - 1];
3421        for off in offsets {
3422            let chunk = tree.read_largest_contigous_chunk_at_byte(off as u32).0;
3423            let chunk_str = str::from_utf8(chunk).unwrap();
3424            assert!(
3425                expected[off..].starts_with(chunk_str),
3426                "Chunk mismatch at offset {}",
3427                off
3428            );
3429        }
3430    }
3431}
3432
3433pub fn assert_invariants(tree: &PieceTree) {
3434    fn check(tree: &PieceTree, node: NodeRef) -> (usize, usize) {
3435        if node == NIL { return (0, 1) }
3436
3437        let n = tree.pieces.nodes[node];
3438        let piece = tree.get_piece(node);
3439
3440        let (l_len, l_bh) = check(tree, n.left);
3441        let (r_len, r_bh) = check(tree, n.right);
3442
3443        let expected_len = l_len + piece.byte_length as usize + r_len;
3444        assert_eq!(n.subtree_len as usize, expected_len, "subtree_len mismatch");
3445
3446        if n.color() == Color::Red {
3447            assert_eq!(
3448                tree.pieces.nodes[n.left].color(),
3449                Color::Black,
3450                "red node with red left child"
3451            );
3452            assert_eq!(
3453                tree.pieces.nodes[n.right].color(),
3454                Color::Black,
3455                "red node with red right child"
3456            );
3457        }
3458
3459        assert_eq!(l_bh, r_bh, "black height mismatch");
3460
3461        let bh = l_bh + usize::from(n.color() == Color::Black);
3462        (expected_len, bh)
3463    }
3464
3465    if tree.root == NIL {
3466        assert_eq!(tree.total_length(), 0,            "Empty tree has nonzero length");
3467    } else {
3468        assert_eq!(
3469            tree.pieces.nodes[tree.root].color(),
3470            Color::Black,
3471            "root is not black"
3472        );
3473        let (len, _) = check(tree, tree.root);
3474        assert_eq!(len, tree.total_length() as usize, "Tree length and root length differ");
3475
3476    }
3477}
3478
3479pub fn assert_piece_metadata(tree: &PieceTree) {
3480    fn collect(tree: &PieceTree, node: NodeRef, out: &mut Vec<Piece>) {
3481        if node == NIL { return }
3482
3483        let n = tree.pieces.nodes[node];
3484
3485        collect(tree, n.left, out);
3486        out.push(tree.pieces.get_piece(node));
3487        collect(tree, n.right, out);
3488    }
3489
3490    let mut pieces = Vec::new();
3491    collect(tree, tree.root, &mut pieces);
3492
3493    for p in &pieces {
3494        let buf   = tree.buffers.get(p.buffer);
3495        let start = p.byte_offset as usize;
3496        let end   = start + p.byte_length as usize;
3497
3498        assert!(end <= buf.len(), "Piece points past end of buffer");
3499        assert!(p.byte_length > 0, "Zero-length piece found");
3500
3501        let slice = &buf[start..end];
3502        let (actual_chars, actual_nl) = count_chars_and_newlines(slice.as_bytes());
3503        assert_eq!(p.char_count,    actual_chars, "char_count mismatch");
3504        assert_eq!(p.newline_count, actual_nl,    "newline_count mismatch");
3505
3506        //
3507        // piece_start_char and buffer_start_line: scan only the prefix once
3508        //
3509        let prefix = &buf.as_bytes()[..p.byte_offset as usize];
3510        let actual_start_char = bytecount::num_chars(prefix) as u32;
3511        let actual_start_line = bytecount::count(prefix, b'\n') as u32;
3512
3513        assert_eq!(p.piece_start_char,  actual_start_char,
3514            "piece_start_char mismatch buffer={} offset={}", p.buffer.as_u32(), p.byte_offset);
3515
3516        assert_eq!(p.buffer_start_line, actual_start_line,
3517            "buffer_start_line mismatch buffer={} offset={}", p.buffer.as_u32(), p.byte_offset);
3518
3519        //
3520        // Verify the newline_offsets slice
3521        //
3522        let nl_offsets = &tree.buffers.get_newlines(p.buffer)
3523            [p.buffer_start_line as usize..(p.buffer_start_line + p.newline_count) as usize];
3524
3525        for (i, &abs_byte) in nl_offsets.iter().enumerate() {
3526            assert!(
3527                abs_byte >= p.byte_offset && abs_byte < p.byte_offset + p.byte_length,
3528                "newline_offsets[{}] outside piece", p.buffer_start_line as usize + i
3529            );
3530
3531            assert_eq!(
3532                buf.as_bytes()[abs_byte as usize], b'\n',
3533                "newline_offsets[{}] not a newline", p.buffer_start_line as usize + i
3534            );
3535        }
3536    }
3537}
3538
3539pub fn assert_no_mergeable_neighbors(tree: &PieceTree) {
3540    fn inorder(tree: &PieceTree, node: NodeRef, last: &mut Option<Piece>) {
3541        if node == NIL { return }
3542
3543        let n = tree.pieces.nodes[node];
3544        inorder(tree, n.left, last);
3545
3546        let cur = tree.get_piece(node);
3547
3548        if let Some(prev) = *last {
3549            if prev.buffer == cur.buffer
3550                && prev.byte_offset + prev.byte_length == cur.byte_offset
3551            {
3552                panic!("Mergeable neighboring pieces were left unmerged");
3553            }
3554        }
3555
3556        *last = Some(cur);
3557        inorder(tree, n.right, last);
3558    }
3559
3560    let mut last = None;
3561    inorder(tree, tree.root, &mut last);
3562}
3563
3564pub fn assert_coordinates(tree: &PieceTree, oracle: &str) {
3565    #[inline]
3566    fn oracle_offset_to_line_col(s: &str, byte_offset: usize) -> (usize, usize) {
3567        let slice = &s[..byte_offset];
3568        let line  = bytecount::count(slice.as_bytes(), b'\n');
3569        let last_nl = slice.rfind('\n').map_or(0, |i| i + 1);
3570        let col   = bytecount::num_chars(s[last_nl..byte_offset].as_bytes());
3571        (line, col)
3572    }
3573
3574    #[inline]
3575    fn oracle_line_to_offset(s: &str, target_line: usize) -> usize {
3576        if target_line == 0 { return 0 }
3577
3578        let mut line = 0;
3579        for (i, c) in s.char_indices() {
3580            if c == '\n' {
3581                line += 1;
3582                if line == target_line { return i + 1; }
3583            }
3584        }
3585
3586        s.len()
3587    }
3588
3589    fn check_coordinate(tree: &PieceTree, oracle: &str, byte_index: u32) {
3590        let char_index = bytecount::num_chars(oracle[..byte_index as usize].as_bytes()) as u32;
3591
3592        assert_eq!(tree.char_to_byte(char_index), byte_index,
3593                   "char_to_byte({}) wrong", char_index);
3594
3595        assert_eq!(tree.byte_to_char(byte_index), char_index,
3596                   "byte_to_char({}) wrong", byte_index);
3597
3598        let expected_lc = oracle_offset_to_line_col(oracle, byte_index as usize);
3599        assert_eq!(
3600            tree.byte_to_line_col(byte_index),
3601            (expected_lc.0 as u32, expected_lc.1 as u32),
3602            "offset_to_line_col({}) wrong", byte_index
3603        );
3604    }
3605
3606    let total_bytes = oracle.len() as u32;
3607    let total_chars = oracle.chars().count() as u32;
3608
3609    //
3610    // Always check these
3611    //
3612    check_coordinate(tree, oracle, 0);
3613    if total_bytes > 0 {
3614        check_coordinate(tree, oracle, total_bytes - 1);
3615        check_coordinate(tree, oracle, total_bytes / 2);
3616    }
3617
3618
3619    //
3620    // Sample ~8 positions spread across the document
3621    //
3622    for i in 0u32..8 {
3623        let byte =
3624            ((total_bytes as u64 * ((i as u64 * 2_654_435_761) & 0xFFFF_FFFF)) >> 32) as u32 % total_bytes.max(1);
3625
3626        // Snap to char boundary
3627        let byte = oracle.as_bytes()[..byte as usize]
3628            .iter().rposition(|&b| !(0x80..0xC0).contains(&b))
3629            .map_or(0, |i| i as u32);
3630
3631        check_coordinate(tree, oracle, byte);
3632    }
3633
3634    //
3635    // EOF and out-of-bounds always
3636    //
3637    assert_eq!(tree.char_to_byte(total_chars), total_bytes);
3638    assert_eq!(tree.byte_to_char(total_bytes), total_chars);
3639    assert_eq!(tree.try_char_to_byte(total_chars + 1), None);
3640    assert_eq!(tree.try_byte_to_char(total_bytes + 1), None);
3641    assert_eq!(tree.try_byte_to_line_col(total_bytes + 1), None);
3642
3643    //
3644    // line_to_offset: check first, last, middle line only
3645    //
3646    let total_lines = oracle.chars().filter(|&c| c == '\n').count() + 1;
3647    for line in [0, total_lines / 2, total_lines.saturating_sub(1)] {
3648        let expected = oracle_line_to_offset(oracle, line) as u32;
3649        assert_eq!(tree.try_line_to_byte(line as u32), Some(expected),
3650            "line_to_offset({}) wrong", line);
3651    }
3652
3653    assert_eq!(tree.try_line_to_byte(total_lines as u32), None);
3654}