Skip to main content

piece_tree/
lib.rs

1#![cfg_attr(not(feature = "write"), no_std)]
2
3#![cfg_attr(all(not(feature = "runtime-dispatch-simd"), feature = "generic-simd"), feature(portable_simd))]
4
5#![warn(clippy::all, clippy::pedantic, dead_code, clippy::cargo)]
6#![allow(
7    unused_assignments,
8    clippy::inline_always,
9    clippy::uninlined_format_args, // ?...
10    clippy::borrow_as_ptr,
11    clippy::negative_feature_names,
12    clippy::redundant_closure_for_method_calls,
13    clippy::sliced_string_as_bytes,
14    clippy::should_implement_trait,
15    clippy::collapsible_if,
16    clippy::new_without_default,
17    clippy::comparison_chain,
18    clippy::redundant_field_names,
19    clippy::semicolon_if_nothing_returned,
20    clippy::pub_underscore_fields,
21    clippy::struct_field_names,
22    clippy::ptr_as_ptr,
23    clippy::missing_transmute_annotations,
24    clippy::multiple_crate_versions,
25    clippy::cast_precision_loss,
26    clippy::cast_possible_truncation,
27    clippy::similar_names,
28    clippy::cast_sign_loss,
29    clippy::cast_possible_wrap,
30    clippy::used_underscore_binding,
31    clippy::nonstandard_macro_braces,
32    clippy::used_underscore_items,
33    clippy::enum_glob_use,
34    clippy::cast_lossless,
35    clippy::match_same_arms,
36    clippy::too_many_lines,
37    clippy::unnested_or_patterns,
38    clippy::blocks_in_conditions,
39    clippy::missing_errors_doc,
40    clippy::missing_panics_doc,
41)]
42
43#[cfg(feature = "write")]
44extern crate std;
45
46extern crate alloc;
47
48#[allow(unused)]
49use alloc::vec;
50use alloc::vec::Vec;
51use alloc::sync::Arc;
52use alloc::string::{String, ToString};
53
54use core::str;
55use core::cmp::Ordering;
56use core::ops::{Bound, Deref, RangeBounds};
57
58#[cfg(not(feature = "dont_vendor"))]
59mod cranelift_entity_vendor;
60#[cfg(not(feature = "dont_vendor"))]
61use cranelift_entity_vendor as cranelift_entity;
62
63#[cfg(not(feature = "dont_vendor"))]
64mod smallvec_vendor;
65#[cfg(not(feature = "dont_vendor"))]
66use smallvec_vendor as smallvec;
67
68#[cfg(not(feature = "dont_vendor"))]
69mod bytecount_vendor;
70#[cfg(not(feature = "dont_vendor"))]
71use bytecount_vendor as bytecount;
72
73use smallvec::SmallVec;
74use cranelift_entity::{EntityRef, PrimaryMap};
75#[cfg(feature = "dont_vendor")]
76use cranelift_entity::entity_impl;
77
78#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
79pub struct BufferRef(pub u32);
80entity_impl!(BufferRef);
81
82#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
83pub struct NodeRef(pub u32);
84entity_impl!(NodeRef);
85
86pub const NIL: NodeRef = NodeRef(0);
87
88pub const MOD_BUFFER: BufferRef = BufferRef(u32::MAX >> 1);
89
90pub const CHECKPOINT_INTERVAL: u32 = 64;
91
92#[derive(Clone, Copy, PartialEq, Eq, Debug)]
93pub enum Color { Black = 0, Red = 1 }
94
95#[derive(Clone, Copy, Debug)]
96pub struct HistoryEntry {
97    pub root:          NodeRef,
98    pub cursor_offset: u32,
99}
100
101#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
102pub struct Piece {
103    pub buffer:            BufferRef,
104
105    pub byte_offset:       u32,
106    pub byte_length:       u32,
107
108    pub newline_count:     u32,
109    pub char_count:        u32,
110
111    pub buffer_start_line: u32,  // Index into that buffer's `newline_offsets` array of this piece's first '\n'
112
113    pub piece_start_char:  u32,  // Absolute char index of p.offset in its buffer
114}
115
116// For tests and other stuff
117#[derive(Clone, Copy, Debug)]
118pub enum Edit {
119    Insert { offset: u32, text: &'static str },
120    Remove { offset: u32, length: u32 },
121}
122
123impl Edit {
124    #[inline(always)]
125    #[must_use]
126    pub fn offset(&self) -> u32 {
127        match self {
128            Edit::Insert { offset, .. } => *offset,
129            Edit::Remove { offset, .. } => *offset,
130        }
131    }
132}
133
134#[derive(Clone, Copy, Debug)]
135#[repr(C)]
136pub struct Node {
137    pub left:              NodeRef, // 4 bytes
138    pub right:             NodeRef, // 4 bytes
139    pub offset:            u32,     // 4 bytes
140    pub subtree_len:       u32,     // 4 bytes
141    pub subtree_chars:     u32,     // 4 bytes
142    pub subtree_newlines:  u32,     // 4 bytes
143    pub meta:              u32,     // 4 bytes (Bit 0: Color, Bits 1..31: BufferIndex)
144    pub buffer_start_line: u32,     // 4 bytes
145    pub piece_start_char:  u32,     // 4 bytes
146}
147
148const _: () = assert!(size_of::<Node>() == 36);
149
150impl Node {
151    #[inline(always)]
152    #[must_use]
153    pub fn color(&self) -> Color {
154        if (self.meta & 1) == 1 { Color::Red } else { Color::Black }
155    }
156
157    #[inline(always)]
158    pub fn set_color(&mut self, color: Color) {
159        self.meta = (self.meta & !1) | (color as u32);
160    }
161
162    #[inline(always)]
163    #[must_use]
164    pub fn buffer_index(&self) -> BufferRef {
165        BufferRef(self.meta >> 1)
166    }
167
168    #[inline(always)]
169    pub fn set_buffer(&mut self, buffer: BufferRef) {
170        self.meta = (buffer.as_u32() << 1) | (self.meta & 1);
171    }
172}
173
174#[derive(Debug, Clone)]
175pub struct OriginalBuffer {
176    pub text:             Arc<str>,
177    pub newline_offsets:  Arc<[u32]>,
178    pub char_checkpoints: Arc<[(u32, u32)]>,
179}
180
181impl Deref for OriginalBuffer {
182    type Target = str;
183
184    #[inline(always)]
185    fn deref(&self) -> &Self::Target {
186        self.text.as_ref()
187    }
188}
189
190#[derive(Debug, Clone)]
191pub struct Buffers {
192    pub original_buffers: PrimaryMap<BufferRef, OriginalBuffer>,
193
194    pub modifications_char_count:       u32,
195    pub modifications_buffer:           String,
196    pub modifications_newline_offsets:  Vec<u32>,
197    pub modifications_char_checkpoints: Vec<(u32, u32)>,
198}
199
200impl Default for Buffers {
201    #[inline(always)]
202    fn default() -> Self {
203        Self {
204            original_buffers: PrimaryMap::new(),
205            modifications_char_count: 0,
206            modifications_newline_offsets: Vec::with_capacity(1024),
207            modifications_char_checkpoints: Vec::with_capacity(128),
208            modifications_buffer: String::with_capacity(1024 * 64),
209        }
210    }
211}
212
213#[must_use]
214#[inline(always)]
215pub fn count_chars_and_newlines(bytes: &[u8]) -> (u32, u32) {
216    let mut chars    = 0u32;
217    let mut newlines = 0u32;
218
219    for &b in bytes {
220        newlines += (b == b'\n')         as u32;
221        chars    += ((b & 0xC0) != 0x80) as u32;
222    }
223
224    (chars, newlines)
225}
226
227#[must_use]
228#[inline(always)]
229pub fn count_chars_and_newlines_with_offsets_and_checkpoints(
230    bytes: &[u8],
231    offsets: &mut Vec<u32>,
232    checkpoints: &mut Vec<(u32, u32)>,
233) -> (u32, u32) {
234    let mut chars    = 0u32;
235    let mut newlines = 0u32;
236
237    let mut next_checkpoint = CHECKPOINT_INTERVAL;
238
239    for (i, &b) in bytes.iter().enumerate() {
240        if (b as i8) >= -0x40 {
241            if chars == next_checkpoint {
242                checkpoints.push((chars, i as u32));
243                next_checkpoint += CHECKPOINT_INTERVAL;
244            }
245            chars += 1;
246        }
247
248        if b == b'\n' {
249            offsets.push(i as u32);
250            newlines += 1;
251        }
252    }
253
254    (chars, newlines)
255}
256
257impl Buffers {
258    #[inline(always)]
259    #[must_use]
260    pub fn new() -> Self { Self::default() }
261
262    #[inline(always)]
263    #[must_use]
264    pub fn get(&self, buffer: BufferRef) -> &str {
265        if buffer == MOD_BUFFER {
266            &self.modifications_buffer
267        } else {
268            &self.original_buffers[buffer]
269        }
270    }
271
272    #[inline(always)]
273    #[must_use]
274    pub fn get_slice(&self, buffer: BufferRef, offset: u32, len: u32) -> &str {
275        let buf = self.get(buffer);
276        let start = offset as usize;
277        let end = start + len as usize;
278        unsafe { str::from_utf8_unchecked(buf.as_bytes().get_unchecked(start..end)) }
279    }
280
281    #[inline]
282    #[must_use]
283    pub fn count_chars_and_newlines(&self, buffer: BufferRef, start_offset: u32, len: u32) -> (u32, u32) {
284        if len == 0 { return (0, 0) }
285
286        let end_offset = start_offset + len;
287
288        //
289        // O(log N) newlines count
290        //
291        let nl_offsets = self.get_newlines(buffer);
292        let start_nl_index = nl_offsets.binary_search(&start_offset).unwrap_or_else(|x| x);
293        let end_nl_index = nl_offsets.binary_search(&end_offset).unwrap_or_else(|x| x);
294        let newlines = (end_nl_index - start_nl_index) as u32;
295
296        //
297        // O(log N) char count
298        //
299
300        let checkpoints = self.get_checkpoints(buffer);
301        let buffer_text = self.get(buffer);
302
303        //
304        // Gets absolute char count from start of buffer to `target_offset`
305        //
306        let chars_up_to = |target_offset: u32| -> u32 {
307            if target_offset == 0 { return 0; }
308
309            //
310            // Find the index of the first checkpoint that is strictly strictly greater than our target
311            //
312            let partition_index = checkpoints.partition_point(|&(_char_cnt, byte_offset)| byte_offset <= target_offset);
313
314            let (base_chars, base_bytes) = if partition_index > 0 {
315                checkpoints[partition_index - 1]
316            } else {
317                (0, 0)
318            };
319
320            //
321            // Linearly scan only the remaining bytes from the checkpoint to the target
322            //
323            let tail_bytes = &buffer_text[base_bytes as usize .. target_offset as usize];
324            base_chars + bytecount::num_chars(tail_bytes.as_bytes()) as u32
325        };
326
327        let chars = chars_up_to(end_offset) - chars_up_to(start_offset);
328
329        (chars, newlines)
330    }
331
332    #[inline(always)]
333    #[must_use]
334    pub fn count_chars(&self, buffer: BufferRef, offset: u32, len: u32) -> u32 {
335        bytecount::num_chars(self.get_slice(buffer, offset, len).as_bytes()) as u32
336    }
337
338    #[inline(always)]
339    #[must_use]
340    pub fn count_newlines(&self, buffer: BufferRef, offset: u32, len: u32) -> u32 {
341        bytecount::count(self.get_slice(buffer, offset, len).as_bytes(), b'\n') as _
342    }
343
344    #[inline(always)]
345    fn get_newlines(&self, buffer: BufferRef) -> &[u32] {
346        if buffer == MOD_BUFFER {
347            &self.modifications_newline_offsets
348        } else {
349            &self.original_buffers[buffer].newline_offsets
350        }
351    }
352
353    #[inline(always)]
354    fn get_checkpoints(&self, buffer: BufferRef) -> &[(u32, u32)] {
355        if buffer == MOD_BUFFER {
356            &self.modifications_char_checkpoints
357        } else {
358            &self.original_buffers[buffer].char_checkpoints
359        }
360    }
361
362    /// Converts an absolute char index to an absolute byte offset for a specific buffer
363    #[must_use]
364    #[inline]
365    pub fn char_to_byte_absolute(&self, buffer: BufferRef, target_char: u32) -> u32 {
366        if target_char == 0 { return 0; }
367
368        let checkpoints = self.get_checkpoints(buffer);
369        let cp_index = checkpoints.partition_point(|&(c, _)| c <= target_char);
370
371        let (current_char, current_byte) = if cp_index == 0 {
372            (0, 0)
373        } else {
374            unsafe { *checkpoints.get_unchecked(cp_index - 1) }
375        };
376
377        let remainder_chars = target_char - current_char;
378        if remainder_chars == 0 { return current_byte; }
379
380        let text = self.get(buffer);
381        let slice = unsafe { text.get_unchecked(current_byte as usize..) };
382
383        let additional_bytes = slice
384            .char_indices()
385            .nth(remainder_chars as usize).map_or_else(|| slice.len() as u32, |(b, _)| b as u32);  // Fallback to end if out of bounds
386
387        current_byte + additional_bytes
388    }
389
390    /// Converts an absolute byte offset to an absolute char index for a specific buffer
391    #[must_use]
392    #[inline]
393    pub fn byte_to_char_absolute(&self, buffer: BufferRef, target_byte: u32) -> u32 {
394        if target_byte == 0 { return 0; }
395
396        let checkpoints = self.get_checkpoints(buffer);
397        let cp_index = checkpoints.partition_point(|&(_, b)| b <= target_byte);
398
399        let (current_char, current_byte) = if cp_index == 0 {
400            (0, 0)
401        } else {
402            unsafe { *checkpoints.get_unchecked(cp_index - 1) }
403        };
404
405        let remainder_bytes = target_byte - current_byte;
406        if remainder_bytes == 0 { return current_char; }
407
408        let text = self.get(buffer);
409        let slice = unsafe { text.get_unchecked(current_byte as usize..target_byte as usize) };
410        let additional_chars = bytecount::num_chars(slice.as_bytes()) as u32;
411
412        current_char + additional_chars
413    }
414}
415
416#[derive(Debug, Clone)]
417pub struct Pieces {
418    pub nodes: PrimaryMap<NodeRef, Node>,
419}
420
421impl Default for Pieces {
422    #[inline(always)]
423    fn default() -> Self {
424        Self::new()
425    }
426}
427
428impl Pieces {
429    #[inline(always)]
430    #[must_use]
431    pub fn new() -> Self {
432        let mut nodes = PrimaryMap::new();
433        nodes.push(Node {
434            left: NIL, right: NIL, offset: 0,
435            subtree_len: 0, subtree_chars: 0, subtree_newlines: 0,
436            meta: 0, buffer_start_line: 0,
437            piece_start_char: 0
438        });
439        Self { nodes }
440    }
441
442    #[inline(always)]
443    #[must_use]
444    pub fn len(&self) -> usize { self.nodes.len() }
445
446    #[inline(always)]
447    #[must_use]
448    pub fn is_empty(&self) -> bool { self.nodes.is_empty() }
449
450    #[inline(always)]
451    #[must_use]
452    pub fn get(&self, index: NodeRef) -> &Node { &self.nodes[index] }
453
454    #[inline(always)]
455    #[must_use]
456    pub fn get_piece(&self, index: NodeRef) -> Piece {
457        if index == NIL { return Piece::default(); }
458
459        let node = &self.nodes[index];
460        let l = &self.nodes[node.left];
461        let r = &self.nodes[node.right];
462
463        Piece {
464            buffer: node.buffer_index(),
465            byte_offset: node.offset,
466            byte_length: node.subtree_len - l.subtree_len - r.subtree_len,
467            char_count: node.subtree_chars - l.subtree_chars - r.subtree_chars,
468            newline_count: node.subtree_newlines - l.subtree_newlines - r.subtree_newlines,
469            buffer_start_line: node.buffer_start_line,
470            piece_start_char: node.piece_start_char,
471        }
472    }
473
474    #[inline(always)]
475    pub fn alloc(&mut self, color: Color, left: NodeRef, piece: Piece, right: NodeRef) -> NodeRef {
476        let l = &self.nodes[left];
477        let r = &self.nodes[right];
478
479        let mut node = Node {
480            left, right,
481            offset: piece.byte_offset,
482            subtree_len: l.subtree_len + piece.byte_length + r.subtree_len,
483            subtree_chars: l.subtree_chars + piece.char_count + r.subtree_chars,
484            subtree_newlines: l.subtree_newlines + piece.newline_count + r.subtree_newlines,
485            buffer_start_line: piece.buffer_start_line,
486            piece_start_char: piece.piece_start_char,
487            meta: 0,
488        };
489        node.set_color(color);
490        node.set_buffer(piece.buffer);
491
492        self.nodes.push(node)
493    }
494
495    /// Extends a piece without triggering Red-Black structural changes.
496    /// It creates a single new path to the root to maintain persistence.
497    pub fn extend_piece(
498        &mut self,
499        root: NodeRef,
500        piece_start_offset: u32,
501        add_len: u32,
502        add_chars: u32,
503        add_nl: u32,
504    ) -> NodeRef {
505        if root == NIL { return NIL }
506
507        let node = self.nodes[root];
508        let mut new_node = node;
509
510        let left_len = self.nodes[node.left].subtree_len;
511        let piece_len = node.subtree_len - left_len - self.nodes[node.right].subtree_len;
512
513        if piece_start_offset < left_len {
514            new_node.left  = self.extend_piece(node.left, piece_start_offset, add_len, add_chars, add_nl);
515        } else if piece_start_offset > left_len {
516            new_node.right = self.extend_piece(node.right, piece_start_offset - (left_len + piece_len), add_len, add_chars, add_nl);
517        } else {
518            // piece_start_offset == left_len
519            // We found the exact node! No further recursion needed.
520        }
521
522        new_node.subtree_len      += add_len;
523        new_node.subtree_chars    += add_chars;
524        new_node.subtree_newlines += add_nl;
525
526        self.nodes.push(new_node)
527    }
528
529    #[inline]
530    pub fn insert_node(&mut self, root: NodeRef, piece: Piece, at: u32) -> NodeRef {
531        let new_root = self.ins(root, piece, at, 0);
532        let r_node = self.nodes[new_root];
533        let p = self.get_piece(new_root);
534        self.alloc(Color::Black, r_node.left, p, r_node.right)
535    }
536
537    #[inline]
538    pub fn remove_node(&mut self, root: NodeRef, at: u32) -> NodeRef {
539        let new_root = self.rem(root, at, 0);
540        if new_root == NIL { return NIL }
541
542        let r_node = self.nodes[new_root];
543        self.alloc(Color::Black, r_node.left, self.get_piece(new_root), r_node.right)
544    }
545
546    #[inline]
547    fn rem(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
548        if root == NIL { return NIL }
549
550        let node = self.nodes[root];
551        let node_piece = self.get_piece(root);
552        let left_len = self.nodes[node.left].subtree_len;
553
554        match at.cmp(&(total + left_len)) {
555            Ordering::Less => self.remove_left(root, at, total),
556            Ordering::Equal => self.fuse(node.left, node.right),
557            Ordering::Greater => {
558                let next_total = total + left_len + node_piece.byte_length;
559                self.remove_right(root, at, next_total)
560            }
561        }
562    }
563
564    #[inline]
565    fn remove_left(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
566        let node = self.nodes[root];
567        let new_left = self.rem(node.left, at, total);
568        let new_node = self.alloc(Color::Red, new_left, self.get_piece(root), node.right);
569        if self.nodes[node.left].color() == Color::Black {
570            self.balance_left(new_node)
571        } else {
572            new_node
573        }
574    }
575
576    #[inline]
577    fn remove_right(&mut self, root: NodeRef, at: u32, total: u32) -> NodeRef {
578        let node = self.nodes[root];
579        let new_right = self.rem(node.right, at, total);
580        let new_node = self.alloc(Color::Red, node.left, self.get_piece(root), new_right);
581        if self.nodes[node.right].color() == Color::Black {
582            self.balance_right(new_node)
583        } else {
584            new_node
585        }
586    }
587
588    #[inline]
589    fn ins(&mut self, root: NodeRef, p: Piece, at: u32, total_offset: u32) -> NodeRef {
590        if root == NIL { return self.alloc(Color::Red, NIL, p, NIL); }
591
592        let node = self.nodes[root];
593        let node_piece = self.get_piece(root);
594        let left_len = self.nodes[node.left].subtree_len;
595
596        if at < total_offset + left_len + node_piece.byte_length {
597            let lft = self.ins(node.left, p, at, total_offset);
598            self.balance(node.color(), lft, node_piece, node.right)
599        } else {
600            let next_offset = total_offset + left_len + node_piece.byte_length;
601            let rgt = self.ins(node.right, p, at, next_offset);
602            self.balance(node.color(), node.left, node_piece, rgt)
603        }
604    }
605
606    #[inline]
607    fn balance(&mut self, c: Color, l_index: NodeRef, p: Piece, r_index: NodeRef) -> NodeRef {
608        let l = self.nodes[l_index];
609        let r = self.nodes[r_index];
610
611        if c == Color::Black {
612            if l.color() == Color::Red {
613                let ll = self.nodes[l.left];
614                let lr = self.nodes[l.right];
615                if ll.color() == Color::Red {
616                    let new_l = self.alloc(Color::Black, ll.left, self.get_piece(l.left), ll.right);
617                    let new_r = self.alloc(Color::Black, l.right, p, r_index);
618                    return self.alloc(Color::Red, new_l, self.get_piece(l_index), new_r);
619                } else if lr.color() == Color::Red {
620                    let new_l = self.alloc(Color::Black, l.left, self.get_piece(l_index), lr.left);
621                    let new_r = self.alloc(Color::Black, lr.right, p, r_index);
622                    return self.alloc(Color::Red, new_l, self.get_piece(l.right), new_r);
623                }
624            }
625
626            if r.color() == Color::Red {
627                let rl = self.nodes[r.left];
628                let rr = self.nodes[r.right];
629                if rl.color() == Color::Red {
630                    let new_l = self.alloc(Color::Black, l_index, p, rl.left);
631                    let new_r = self.alloc(Color::Black, rl.right, self.get_piece(r_index), r.right);
632                    return self.alloc(Color::Red, new_l, self.get_piece(r.left), new_r);
633                } else if rr.color() == Color::Red {
634                    let new_l = self.alloc(Color::Black, l_index, p, r.left);
635                    let new_r = self.alloc(Color::Black, rr.left, self.get_piece(r.right), rr.right);
636                    return self.alloc(Color::Red, new_l, self.get_piece(r_index), new_r);
637                }
638            }
639        }
640
641        self.alloc(c, l_index, p, r_index)
642    }
643
644    #[inline]
645    fn fuse(&mut self, left: NodeRef, right: NodeRef) -> NodeRef {
646        // match: (left, right)
647
648        // case: (None, r)
649        if left  == NIL { return right }
650        if right == NIL { return left }
651
652        let l_node = self.nodes[left];
653        let r_node = self.nodes[right];
654
655        // match: (left.color, right.color)
656
657        // case: (B, R)
658        if l_node.color() == Color::Black && r_node.color() == Color::Red {
659            let fused = self.fuse(left, r_node.left);
660            return self.alloc(Color::Red, fused, self.get_piece(right), r_node.right);
661        }
662
663        // case: (R, B)
664        if l_node.color() == Color::Red && r_node.color() == Color::Black {
665            let fused = self.fuse(l_node.right, right);
666            return self.alloc(Color::Red, l_node.left, self.get_piece(left), fused);
667        }
668
669        // case: (R, R)
670        if l_node.color() == Color::Red && r_node.color() == Color::Red {
671            let fused = self.fuse(l_node.right, r_node.left);
672            let f_node = self.nodes[fused];
673            if fused != NIL && f_node.color() == Color::Red {
674                let new_l = self.alloc(Color::Red, l_node.left, self.get_piece(left), f_node.left);
675                let new_r = self.alloc(Color::Red, f_node.right, self.get_piece(right), r_node.right);
676                return self.alloc(Color::Red, new_l, self.get_piece(fused), new_r);
677            }
678            let new_r = self.alloc(Color::Red, fused, self.get_piece(right), r_node.right);
679            return self.alloc(Color::Red, l_node.left, self.get_piece(left), new_r);
680        }
681
682        // case: (B, B)
683
684        let fused = self.fuse(l_node.right, r_node.left);
685        let f_node = self.nodes[fused];
686        if fused != NIL && f_node.color() == Color::Red {
687            let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), f_node.left);
688            let new_r = self.alloc(Color::Black, f_node.right, self.get_piece(right), r_node.right);
689            return self.alloc(Color::Red, new_l, self.get_piece(fused), new_r);
690        }
691
692        let new_r = self.alloc(Color::Black, fused, self.get_piece(right), r_node.right);
693        let new_node = self.alloc(Color::Red, l_node.left, self.get_piece(left), new_r);
694        self.balance_left(new_node)
695    }
696
697    #[inline]
698    fn balance_left(&mut self, left: NodeRef) -> NodeRef {
699        let l_node = self.nodes[left];
700        let ll_node = self.nodes[l_node.left];
701        let lr_node = self.nodes[l_node.right];
702
703        // match: (color_l, color_r, color_r_l)
704
705        // case: (Some(R), ..)
706        if l_node.left != NIL && ll_node.color() == Color::Red {
707            let new_ll = self.alloc(Color::Black, ll_node.left, self.get_piece(l_node.left), ll_node.right);
708            return self.alloc(Color::Red, new_ll, self.get_piece(left), l_node.right);
709        }
710
711        // case: (_, Some(B), _)
712        if l_node.right != NIL && lr_node.color() == Color::Black {
713            let new_lr = self.alloc(Color::Red, lr_node.left, self.get_piece(l_node.right), lr_node.right);
714            let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), new_lr);
715            let nl = self.nodes[new_l];
716            return self.balance(Color::Black, nl.left, self.get_piece(new_l), nl.right);
717        }
718
719        // case: (_, Some(R), Some(B))
720        if l_node.right != NIL && lr_node.color() == Color::Red {
721            let lrl_node = self.nodes[lr_node.left];
722            if lr_node.left != NIL && lrl_node.color() == Color::Black {
723                let lrr_node = self.nodes[lr_node.right];
724                let new_lrr = self.alloc(Color::Red, lrr_node.left, self.get_piece(lr_node.right), lrr_node.right);
725                let unbal_r = self.alloc(Color::Black, lrl_node.right, self.get_piece(l_node.right), new_lrr);
726                let ur_node = self.nodes[unbal_r];
727                let new_r = self.balance(ur_node.color(), ur_node.left, self.get_piece(unbal_r), ur_node.right);
728                let new_l = self.alloc(Color::Black, l_node.left, self.get_piece(left), lrl_node.left);
729                return self.alloc(Color::Red, new_l, self.get_piece(lr_node.left), new_r);
730            }
731        }
732
733        left
734    }
735
736    #[inline]
737    fn balance_right(&mut self, right: NodeRef) -> NodeRef {
738        let r_node = self.nodes[right];
739        let rl_node = self.nodes[r_node.left];
740        let rr_node = self.nodes[r_node.right];
741
742        // match: (color_l, color_l_r, color_r)
743
744        // case: (.., Some(R))
745        if r_node.right != NIL && rr_node.color() == Color::Red {
746            let new_rr = self.alloc(Color::Black, rr_node.left, self.get_piece(r_node.right), rr_node.right);
747            return self.alloc(Color::Red, r_node.left, self.get_piece(right), new_rr);
748        }
749
750        // case: (Some(B), ..)
751        if r_node.left != NIL && rl_node.color() == Color::Black {
752            let new_rl = self.alloc(Color::Red, rl_node.left, self.get_piece(r_node.left), rl_node.right);
753            let new_r = self.alloc(Color::Black, new_rl, self.get_piece(right), r_node.right);
754            let nr = self.nodes[new_r];
755            return self.balance(Color::Black, nr.left, self.get_piece(new_r), nr.right);
756        }
757
758        // case: (Some(R), Some(B), _)
759        if r_node.left != NIL && rl_node.color() == Color::Red {
760            let rlr_node = self.nodes[rl_node.right];
761            if rl_node.right != NIL && rlr_node.color() == Color::Black {
762                let rll_node = self.nodes[rl_node.left];
763                let new_rll = self.alloc(Color::Red, rll_node.left, self.get_piece(rl_node.left), rll_node.right);
764                let unbal_l = self.alloc(Color::Black, new_rll, self.get_piece(r_node.left), rlr_node.left);
765                let ul_node = self.nodes[unbal_l];
766                let new_l = self.balance(ul_node.color(), ul_node.left, self.get_piece(unbal_l), ul_node.right);
767                let new_r = self.alloc(Color::Black, rlr_node.right, self.get_piece(right), r_node.right);
768                return self.alloc(Color::Red, new_l, self.get_piece(rl_node.right), new_r);
769            }
770        }
771
772        right
773    }
774
775    #[inline]
776    #[must_use]
777    pub fn find_offset(
778        &self,
779        mut root: NodeRef,
780        mut target_offset: u32,
781        prefer_left: bool
782    ) -> Option<(NodeRef, u32)> {
783        while root != NIL {
784            let node = &self.nodes[root];
785            let left_len = self.nodes[node.left].subtree_len;
786            let piece_len = self.get_piece(root).byte_length;
787
788            if target_offset < left_len {
789                root = node.left;
790
791            } else if target_offset == left_len + piece_len && prefer_left {
792                // We are perfectly on the boundary and want to merge with the left piece
793                return Some((root, piece_len));
794
795            } else if target_offset < left_len + piece_len {
796                return Some((root, target_offset - left_len));
797
798            } else {
799                target_offset -= left_len + piece_len;
800                root = node.right;
801            }
802        }
803
804        None
805    }
806}
807
808impl PieceTree {
809    /// Slices a tree exactly at `offset`, returning (`LeftTree`, `RightTree`)
810    #[inline]
811    pub fn split(&mut self, root: NodeRef, offset: u32) -> (NodeRef, NodeRef) {
812        if root == NIL {
813            return (NIL, NIL);
814        }
815
816        let node = self.pieces.nodes[root];
817        let p = self.pieces.get_piece(root);
818        let left_len = self.pieces.nodes[node.left].subtree_len;
819
820        if offset < left_len {
821            let (ll, lr) = self.split(node.left, offset);
822
823            // Fast path: Use join_with_middle directly
824            let new_right = self.pieces.join_with_middle(lr, p, node.right);
825            (ll, new_right)
826
827        } else if offset > left_len + p.byte_length {
828            let (rl, rr) = self.split(node.right, offset - left_len - p.byte_length);
829
830            // Fast path: Use join_with_middle directly
831            let new_left = self.pieces.join_with_middle(node.left, p, rl);
832            (new_left, rr)
833
834        } else {
835            let rel_offset = offset - left_len;
836
837            if rel_offset == 0 {
838                (node.left, self.pieces.join_with_middle(NIL, p, node.right))
839
840            } else if rel_offset == p.byte_length {
841                (self.pieces.join_with_middle(node.left, p, NIL), node.right)
842
843            } else {
844                //
845                // We have to slice the Piece exactly in half!
846                //
847                let left_len = rel_offset;
848                let right_len = p.byte_length - rel_offset;
849
850                //
851                // Only scan the smaller half of the piece to avoid O(N) bottlenecks
852                //
853                let (left_chars, left_nl) = if left_len <= right_len {
854                    // Left side is shorter, scan normally
855                    self.buffers.count_chars_and_newlines(p.buffer, p.byte_offset, left_len)
856                } else {
857                    // Right side is shorter, scan it and subtract from the known totals
858                    let (r_chars, r_nl) = self.buffers.count_chars_and_newlines(
859                        p.buffer, p.byte_offset + left_len, right_len
860                    );
861
862                    (p.char_count - r_chars, p.newline_count - r_nl)
863                };
864
865                let left_stump = Piece {
866                    buffer: p.buffer,
867                    byte_offset: p.byte_offset,
868                    byte_length: left_len,
869                    newline_count: left_nl,
870                    char_count: left_chars,
871                    buffer_start_line: p.buffer_start_line,
872                    piece_start_char: p.piece_start_char,
873                };
874
875                let right_stump = Piece {
876                    buffer: p.buffer,
877                    byte_offset: p.byte_offset + left_len,
878                    byte_length: right_len,
879                    newline_count: p.newline_count - left_nl,
880                    char_count: p.char_count - left_chars,
881                    buffer_start_line: p.buffer_start_line + left_nl,
882                    piece_start_char: p.piece_start_char + left_chars,
883                };
884
885                let new_left = self.pieces.join_with_middle(node.left, left_stump, NIL);
886                let new_right = self.pieces.join_with_middle(NIL, right_stump, node.right);
887                (new_left, new_right)
888            }
889        }
890    }
891
892    /// Glues two arbitrary trees together by extracting the max element of the left tree
893    #[inline]
894    pub fn concat(&mut self, left: NodeRef, right: NodeRef) -> NodeRef {
895        //
896        // Group the evaluation so we can force a Black root on the way out!
897        //
898        let new_root = if left == NIL {
899            right
900        } else if right == NIL {
901            left
902        } else {
903            let mut curr = left;
904            while self.pieces.nodes[curr].right != NIL {
905                curr = self.pieces.nodes[curr].right;
906            }
907            let max_piece = self.pieces.get_piece(curr);
908            let max_offset = self.pieces.nodes[left].subtree_len - max_piece.byte_length;
909
910            let left_without_max = self.pieces.remove_node(left, max_offset);
911
912            self.pieces.join_with_middle(left_without_max, max_piece, right)
913        };
914
915        //
916        // Even if we early-returned right, we MUST ensure the final returned root is Black.
917        //
918        if new_root != NIL && self.pieces.nodes[new_root].color() == Color::Red {
919            let p = self.pieces.get_piece(new_root);
920            let r = self.pieces.nodes[new_root];
921            return self.pieces.alloc(Color::Black, r.left, p, r.right);
922        }
923
924        new_root
925    }
926}
927
928impl Pieces {
929    /// Recursively counts the black height of a given subtree
930    #[inline(always)]
931    #[must_use]
932    pub fn black_height(&self, mut node: NodeRef) -> u32 {
933        let mut h = 0;
934        while node != NIL {
935            if self.nodes[node].color() == Color::Black {
936                h += 1;
937            }
938            node = self.nodes[node].left;
939        }
940        h
941    }
942
943    /// Safely joins two arbitrary Red-Black trees using a middle element
944    #[inline(always)]
945    pub fn join_with_middle(&mut self, left: NodeRef, piece: Piece, right: NodeRef) -> NodeRef {
946        let hl = self.black_height(left);
947        let hr = self.black_height(right);
948
949        let new_root = if hl > hr {
950            self.join_right(left, piece, right, hl, hr)
951        } else if hr > hl {
952            self.join_left(left, piece, right, hl, hr)
953        } else {
954            self.alloc(Color::Red, left, piece, right)
955        };
956
957        // Enforce the Red-Black root constraint!
958        if new_root != NIL && self.nodes[new_root].color() == Color::Red {
959            let p = self.get_piece(new_root);
960            let r = self.nodes[new_root];
961            return self.alloc(Color::Black, r.left, p, r.right);
962        }
963
964        new_root
965    }
966
967    #[inline(always)]
968    fn join_right(&mut self, left: NodeRef, piece: Piece, right: NodeRef, hl: u32, hr: u32) -> NodeRef {
969        if hl == hr {
970            return self.alloc(Color::Red, left, piece, right);
971        }
972
973        let l_node = self.nodes[left];
974        let next_hl = if l_node.color() == Color::Black { hl - 1 } else { hl };
975
976        let new_right = self.join_right(l_node.right, piece, right, next_hl, hr);
977        let p = self.get_piece(left);
978
979        self.balance(l_node.color(), l_node.left, p, new_right)
980    }
981
982    #[inline(always)]
983    fn join_left(&mut self, left: NodeRef, piece: Piece, right: NodeRef, hl: u32, hr: u32) -> NodeRef {
984        if hl == hr {
985            return self.alloc(Color::Red, left, piece, right);
986        }
987
988        let r_node = self.nodes[right];
989        let next_hr = if r_node.color() == Color::Black { hr - 1 } else { hr };
990
991        let new_left = self.join_left(left, piece, r_node.left, hl, next_hr);
992        let p = self.get_piece(right);
993
994        self.balance(r_node.color(), new_left, p, r_node.right)
995    }
996}
997
998#[derive(Debug, Clone)]
999pub struct PieceTree {
1000    //
1001    // `last_insert_end`      is the mod-buffer offset one past the last inserted byte.
1002    // `last_insert_tree_end` is the document   offset one past that insertion.
1003    //
1004    // Both are u32::MAX when unset (after removes, undos, or on init).
1005    //
1006    last_mod_end:  u32,  // fredbuf's last_insert     (BufferCursor)
1007    last_tree_end: u32,  // fredbuf's end_last_insert (CharOffset)
1008
1009    pub root: NodeRef,
1010
1011    /// Tracks nested undo groups. 0 if we are outside any group.
1012    pub transaction_depth: usize,
1013
1014    pub undo_stack: Vec<HistoryEntry>,
1015    pub redo_stack: Vec<HistoryEntry>,
1016
1017    pub pieces:  Pieces,
1018    pub buffers: Buffers,
1019
1020    pub scratch_index_map: Vec<u32>,
1021}
1022
1023impl Default for PieceTree {
1024    fn default() -> Self {
1025        Self {
1026            last_mod_end:      u32::MAX,
1027            last_tree_end:     u32::MAX,
1028            root:              NIL,
1029            transaction_depth: 0,
1030            undo_stack:        Vec::new(),
1031            redo_stack:        Vec::new(),
1032            pieces:            Pieces::new(),
1033            buffers:           Buffers::new(),
1034            scratch_index_map: Vec::new(),
1035        }
1036    }
1037}
1038
1039impl core::fmt::Display for PieceTree {
1040    #[inline(always)]
1041    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1042        let walker = TreeWalker::new(self);
1043        for char in walker {
1044            write!(f, "{char}")?;
1045        }
1046
1047        Ok(())
1048    }
1049}
1050
1051impl<T> From<T> for PieceTree where T: AsRef<str> {
1052    #[inline(always)]
1053    fn from(value: T) -> Self {
1054        Self::from_str(value.as_ref())
1055    }
1056}
1057
1058impl str::FromStr for PieceTree {
1059    type Err = core::convert::Infallible;
1060
1061    #[inline(always)]
1062    fn from_str(s: &str) -> Result<Self, Self::Err> {
1063        Ok(PieceTree::from(s))
1064    }
1065}
1066
1067impl PieceTree {
1068    #[inline(always)]
1069    #[must_use]
1070    pub fn new() -> Self {
1071        Self::default()
1072    }
1073
1074    #[inline(always)]
1075    #[must_use]
1076    pub fn from_str(text: impl Into<Arc<str>>) -> Self {
1077        let text = text.into();
1078
1079        let mut tree = PieceTree::new();
1080        if text.is_empty() { return tree; }
1081
1082        let bytes = text.as_bytes();
1083        let byte_length = bytes.len() as u32;
1084
1085        let mut offsets     = Vec::with_capacity(bytes.len() / 40);
1086        let mut checkpoints = Vec::with_capacity(bytes.len() / 256);
1087
1088        let (char_count, newline_count) =
1089            count_chars_and_newlines_with_offsets_and_checkpoints(
1090                text.as_bytes(),
1091                &mut offsets,
1092                &mut checkpoints,
1093            );
1094
1095        let buffer = tree.buffers.original_buffers.push(OriginalBuffer {
1096            newline_offsets:  offsets.into(),
1097            char_checkpoints: checkpoints.into(),
1098            text:             text.into(),
1099        });
1100
1101        let piece = Piece {
1102            buffer,
1103            byte_offset:       0,
1104            byte_length,
1105            char_count,
1106            newline_count,
1107            buffer_start_line: 0,
1108            piece_start_char:  0,
1109        };
1110
1111        tree.root = tree.pieces.insert_node(NIL, piece, 0);
1112        tree
1113    }
1114
1115    /// Splits the tree at `byte_offset`, returning the right half as a new
1116    /// `PieceTree`.
1117    ///
1118    /// After this call, `self` contains `[0, byte_offset)` and
1119    /// the returned tree contains `[byte_offset, end)`.
1120    ///
1121    /// `byte_offset` must be on a UTF-8 char boundary. Panics if `byte_offset > self.len_bytes()`.
1122    ///
1123    /// Runs in O(log n).
1124    pub fn split_off(&mut self, byte_offset: u32) -> Self {
1125        assert!(
1126            byte_offset <= self.len_bytes(),
1127            "split_off: byte_offset {} out of bounds (len = {})",
1128            byte_offset, self.len_bytes()
1129        );
1130
1131        if byte_offset == 0 {
1132            //
1133            // Everything goes to the right half -> self becomes empty
1134            //
1135            let mut right = Self::default();
1136            right.pieces  = self.pieces.clone();
1137            right.buffers = self.buffers.clone();
1138            right.root    = self.root;
1139            self.root     = NIL;
1140            self.last_mod_end  = u32::MAX;
1141            self.last_tree_end = u32::MAX;
1142
1143            return right;
1144        }
1145
1146        if byte_offset == self.len_bytes() {
1147            //
1148            // Everything stays in self          -> return empty right half
1149            //
1150
1151            let mut right = Self::default();
1152            right.pieces  = self.pieces.clone();
1153            right.buffers = self.buffers.clone();
1154            // right.root stays NIL
1155
1156            return right;
1157        }
1158
1159        //
1160        // Split the tree at the byte boundary
1161        //
1162        let (left_root, right_root) = self.split(self.root, byte_offset);
1163
1164        //
1165        // Self keeps the left half
1166        //
1167        self.root          = left_root;
1168        self.last_mod_end  = u32::MAX;
1169        self.last_tree_end = u32::MAX;
1170        self.undo_stack.clear();
1171        self.redo_stack.clear();
1172
1173        //
1174        // Right half gets a new PieceTree sharing the same arena and buffers
1175        //
1176        let mut right = Self::default();
1177        right.pieces  = self.pieces.clone();
1178        right.buffers = self.buffers.clone();
1179        right.root    = right_root;
1180
1181        right
1182    }
1183
1184    #[cfg(feature = "write")]
1185    pub fn from_reader<R: std::io::Read>(mut reader: R) -> std::io::Result<Self> {
1186        const CHUNK: usize = 64 * 1024;
1187
1188        let mut buf  = [0u8; CHUNK];
1189        let mut text = String::new();
1190        let mut tail = 0usize;
1191
1192        loop {
1193            let n = reader.read(&mut buf[tail..])?;
1194
1195            if n == 0 {
1196                // EOF
1197                if tail > 0 {
1198                    return Err(std::io::Error::new(
1199                        std::io::ErrorKind::InvalidData,
1200                        "stream ended with incomplete UTF-8 sequence",
1201                    ));
1202                }
1203
1204                break;
1205            }
1206
1207            let filled = tail + n;
1208            let slice  = &buf[..filled];
1209
1210            //
1211            // Find how much is valid UTF-8
1212            //
1213            let valid_up_to = match str::from_utf8(slice) {
1214                Ok(_)  => filled,
1215                Err(e) => {
1216                    if e.error_len().is_some() {
1217                        return Err(std::io::Error::new(
1218                            std::io::ErrorKind::InvalidData,
1219                            "stream did not contain valid UTF-8",
1220                        ));
1221                    }
1222                    e.valid_up_to()
1223                }
1224            };
1225
1226            //
1227            // SAFETY: validated above
1228            //
1229            text.push_str(unsafe { std::str::from_utf8_unchecked(&slice[..valid_up_to]) });
1230
1231            //
1232            // Carry over the incomplete sequence (0-3 bytes) to next iteration
1233            //
1234            tail = filled - valid_up_to;
1235            if tail > 0 {
1236                buf.copy_within(valid_up_to..filled, 0);
1237            }
1238        }
1239
1240        Ok(Self::from_str(text))
1241    }
1242}
1243
1244impl PieceTree {
1245    /// Captures the current state of the document.
1246    #[inline]
1247    #[must_use]
1248    pub fn take_snapshot(&self, current_cursor: u32) -> HistoryEntry {
1249        HistoryEntry {
1250            root: self.root,
1251            cursor_offset: current_cursor,
1252        }
1253    }
1254
1255    /// Restores the tree to a previously saved snapshot.
1256    /// Returns the cursor offset from the snapshot.
1257    #[inline]
1258    #[must_use]
1259    pub fn snap_to(&mut self, snapshot: HistoryEntry, current_cursor: u32) -> u32 {
1260        if self.root == snapshot.root {
1261            return snapshot.cursor_offset;
1262        }
1263
1264        //
1265        // Prevent snapping while in the middle of an active transaction
1266        //
1267        assert!(self.transaction_depth == 0, "Cannot snap_to during an active undo group");
1268
1269        //
1270        // Save the current state to the undo stack so the user can undo the jump
1271        //
1272        self.undo_stack.push(HistoryEntry {
1273            root: self.root,
1274            cursor_offset: current_cursor,
1275        });
1276
1277        //
1278        // Clear the redo stack
1279        //
1280        self.redo_stack.clear();
1281
1282        //
1283        // Invalidate caches
1284        //
1285        self.last_tree_end = u32::MAX;
1286        self.last_mod_end  = u32::MAX;
1287
1288        //
1289        //  Restore the tree
1290        //
1291        self.root = snapshot.root;
1292
1293        snapshot.cursor_offset
1294    }
1295}
1296
1297impl PieceTree {
1298    /// Starts a new undo group, saves the current state if this is the outermost group.
1299    #[inline]
1300    pub fn begin_undo_group(&mut self, cursor_offset: u32) {
1301        if self.transaction_depth == 0 {
1302            self.commit_head(cursor_offset);
1303        }
1304
1305        self.transaction_depth += 1;
1306    }
1307
1308    /// Ends the current undo group
1309    #[inline]
1310    pub fn end_undo_group(&mut self) {
1311        if self.transaction_depth > 0 {
1312            self.transaction_depth -= 1;
1313        }
1314    }
1315
1316    #[inline(always)]
1317    pub fn commit_head(&mut self, cursor_offset: u32) {
1318        if let Some(last) = self.undo_stack.last() {
1319            if last.root == self.root {
1320                return;  // Root hasn't changed, skip duplication
1321            }
1322        }
1323
1324        self.undo_stack.push(HistoryEntry { root: self.root, cursor_offset });
1325        self.redo_stack.clear();
1326    }
1327
1328    #[inline(always)]
1329    pub fn try_undo(&mut self, current_cursor: u32) -> Option<u32> {
1330        if self.transaction_depth > 0 {
1331            return None;
1332        }
1333
1334        if let Some(entry) = self.undo_stack.pop() {
1335            self.last_tree_end = u32::MAX;
1336            self.last_mod_end  = u32::MAX;
1337
1338            self.redo_stack.push(HistoryEntry {
1339                root: self.root, cursor_offset: current_cursor
1340            });
1341            self.root = entry.root;
1342            return Some(entry.cursor_offset);
1343        }
1344
1345        None
1346    }
1347
1348    #[inline(always)]
1349    pub fn try_redo(&mut self, current_cursor: u32) -> Option<u32> {
1350        if let Some(entry) = self.redo_stack.pop() {
1351            self.last_tree_end = u32::MAX;
1352            self.last_mod_end  = u32::MAX;
1353
1354            self.undo_stack.push(HistoryEntry {
1355                root: self.root, cursor_offset: current_cursor
1356            });
1357            self.root = entry.root;
1358            return Some(entry.cursor_offset);
1359        }
1360
1361        None
1362    }
1363
1364    #[inline(always)]
1365    #[must_use]
1366    pub fn total_length(&self) -> u32 {
1367        self.pieces.get(self.root).subtree_len
1368    }
1369
1370    #[inline(always)]
1371    pub fn apply_edits(&mut self, primary_cursor_offset: u32, edits: &mut [Edit]) {
1372        if edits.is_empty() { return }
1373
1374        self.begin_undo_group(primary_cursor_offset);
1375
1376        edits.sort_by_key(|b| core::cmp::Reverse(b.offset()));
1377        for edit in edits {
1378            match edit {
1379                Edit::Insert { offset, text }   => self.insert_no_commit(*offset, text),
1380                Edit::Remove { offset, length } => self.remove_no_commit(*offset, *length),
1381            }
1382        }
1383
1384        self.end_undo_group();
1385    }
1386
1387    /// Inserts a single character at the specified logical byte offset.
1388    #[inline(always)]
1389    pub fn insert_char(&mut self, offset: u32, ch: char) {
1390        self.commit_head(offset);
1391        self.insert_char_no_commit(offset, ch);
1392    }
1393
1394    /// Inserts a single character at the specified logical byte offset.
1395    #[inline(always)]
1396    pub fn insert_char_no_commit(&mut self, offset: u32, ch: char) {
1397        let mut buf = [0; 4];
1398        self.insert_no_commit(offset, ch.encode_utf8(&mut buf));
1399    }
1400
1401    #[inline(always)]
1402    pub fn insert(&mut self, offset: u32, text: &str) {
1403        if text.is_empty() { return }
1404
1405        let auto_group = self.transaction_depth == 0;
1406        if auto_group {
1407            self.begin_undo_group(offset);
1408        }
1409
1410        self.insert_no_commit(offset, text);
1411
1412        if auto_group {
1413            self.end_undo_group();
1414        }
1415    }
1416
1417    #[inline(always)]
1418    pub fn remove_at(&mut self, offset: u32, length: u32) {
1419        if length == 0 || self.root == NIL { return }
1420
1421        let auto_group = self.transaction_depth == 0;
1422        if auto_group {
1423            self.begin_undo_group(offset);
1424        }
1425
1426        self.remove_no_commit(offset, length);
1427
1428        if auto_group {
1429            self.end_undo_group();
1430        }
1431    }
1432
1433    pub fn insert_no_commit(&mut self, offset: u32, text: &str) {
1434        let mod_offset = self.buffers.modifications_buffer.len() as u32;
1435        let start_line_in_buffer = self.buffers.modifications_newline_offsets.len() as u32;
1436
1437        let total_char_count_before = self.buffers.modifications_char_count;
1438        let mut total_char_count = total_char_count_before;
1439
1440        self.buffers.modifications_newline_offsets.reserve(text.len() / 20 + 1);
1441        self.buffers.modifications_char_checkpoints.reserve(text.len() / CHECKPOINT_INTERVAL as usize + 1);
1442
1443        let mut newline_count = 0;
1444        let mut char_count = 0;
1445        {
1446            let rem = total_char_count % CHECKPOINT_INTERVAL;
1447            let mut next_checkpoint = total_char_count + ((CHECKPOINT_INTERVAL - rem) % CHECKPOINT_INTERVAL);
1448
1449            if next_checkpoint == 0 {
1450                next_checkpoint = CHECKPOINT_INTERVAL;
1451            }
1452
1453            //
1454            // @Cutnpaste from count_chars_and_newlines_with_offsets_and_checkpoints
1455            //
1456            for (i, b) in text.bytes().enumerate() {
1457                if (b as i8) >= -0x40 {
1458                    if total_char_count == next_checkpoint {
1459                        self.buffers.modifications_char_checkpoints.push((total_char_count, mod_offset + i as u32));
1460                        next_checkpoint += CHECKPOINT_INTERVAL;
1461                    }
1462
1463                    total_char_count += 1;
1464                    char_count += 1;
1465                }
1466
1467                if b == b'\n' {
1468                    self.buffers.modifications_newline_offsets.push(mod_offset + i as u32);
1469                    newline_count += 1;
1470                }
1471            }
1472        }
1473
1474        self.buffers.modifications_char_count = total_char_count;
1475        self.buffers.modifications_buffer.push_str(text);
1476        let text_len = text.len() as u32;
1477
1478        let new_mod_end  = mod_offset + text_len;
1479        let new_tree_end = offset + text_len;
1480
1481        //
1482        // Fast path for sequential typing
1483        //
1484        if offset > 0
1485        && self.last_tree_end == offset
1486        && self.last_mod_end == mod_offset
1487        && let Some((prev_index, prev_rel)) = self.find_position(offset - 1, false)
1488        {
1489            let prev = self.pieces.get_piece(prev_index);
1490
1491            //
1492            // Predecessor must be a mod-buf piece ending at mod_offset.
1493            //
1494            if prev.buffer == MOD_BUFFER && prev.byte_offset + prev.byte_length == mod_offset {
1495                let prev_start = (offset - 1) - prev_rel;
1496
1497                self.root = self.pieces.extend_piece(
1498                    self.root,
1499                    prev_start,
1500                    text_len,
1501                    char_count,
1502                    newline_count
1503                );
1504
1505                self.last_mod_end  = new_mod_end;
1506                self.last_tree_end = new_tree_end;
1507                return;
1508            }
1509        }
1510
1511        let new_piece = Piece {
1512            buffer: MOD_BUFFER,
1513            byte_offset: mod_offset,
1514            byte_length: text_len,
1515            newline_count,
1516            char_count,
1517            buffer_start_line: start_line_in_buffer,
1518            piece_start_char:  total_char_count - char_count,
1519        };
1520
1521        if self.root == NIL {
1522            self.root = self.pieces.insert_node(self.root, new_piece, offset);
1523        } else {
1524            //
1525            // Snip the tree exactly at the insertion offset
1526            //
1527            let (left, right) = self.split(self.root, offset);
1528
1529            //
1530            // Sandwich the new piece directly between the left and right trees!
1531            //
1532            self.root = self.pieces.join_with_middle(left, new_piece, right);
1533
1534            //
1535            // Clean up the resulting seams
1536            //
1537
1538            // Attempt to merge the left side with new_piece
1539            self.try_merge_at(offset);
1540
1541            // Attempt to merge new_piece with the right side
1542            self.try_merge_at(offset + text_len);
1543        }
1544
1545        self.last_mod_end  = new_mod_end;
1546        self.last_tree_end = new_tree_end;
1547    }
1548
1549    pub fn remove_no_commit(&mut self, offset: u32, mut length: u32) {
1550        let total = self.total_length();
1551        if offset >= total { return; }
1552        if offset + length > total { length = total - offset; }
1553        if length == 0 { return }
1554
1555        //
1556        // Snip off the portion of the tree that comes BEFORE the deletion
1557        //
1558        let (left, remainder) = self.split(self.root, offset);
1559
1560        //
1561        // Snip the deleted portion out of the remainder
1562        //
1563        let (_deleted, right) = self.split(remainder, length);
1564
1565        //
1566        // Glue the surviving outer halves back together
1567        //
1568        self.root = self.concat(left, right);
1569
1570        //
1571        // Clean up the resulting seam
1572        //
1573        self.try_merge_at(offset);
1574    }
1575
1576    #[inline]
1577    fn try_merge_at(&mut self, pos: u32) {
1578        if pos == 0 { return }
1579
1580        let Some((right_index, right_rel)) = self.find_position(pos, false) else { return };
1581
1582        if right_rel != 0 { return }
1583
1584        let right = self.pieces.get_piece(right_index);
1585        self.try_merge_right_with_left(pos, right);
1586    }
1587
1588    // Returns Some((merged_start, merged_piece)) if merge happened, None otherwise.
1589    #[inline]
1590    fn try_merge_right_with_left(&mut self, right_start: u32, right: Piece) -> Option<(u32, Piece)> {
1591        if right_start == 0 { return None; }
1592        if right.buffer != MOD_BUFFER { return None; }
1593
1594        let (left_index, left_rel) = self.find_position(right_start - 1, false)?;
1595        let left = self.pieces.get_piece(left_index);
1596
1597        if left.buffer != MOD_BUFFER { return None; }
1598        if left.byte_offset + left.byte_length != right.byte_offset { return None; }
1599
1600        let left_start = (right_start - 1) - left_rel;
1601
1602        self.root = self.pieces.remove_node(self.root, left_start);
1603        self.root = self.pieces.remove_node(self.root, left_start);
1604
1605        let merged = Piece {
1606            buffer:  MOD_BUFFER,
1607            byte_offset:        left.byte_offset,
1608            byte_length:        left.byte_length        + right.byte_length,
1609            newline_count: left.newline_count + right.newline_count,
1610            char_count:    left.char_count    + right.char_count,
1611            buffer_start_line: left.buffer_start_line,
1612            piece_start_char: left.piece_start_char,  // right side extends, left start unchanged
1613        };
1614        self.root = self.pieces.insert_node(self.root, merged, left_start);
1615        Some((left_start, merged))
1616    }
1617
1618    #[inline(always)]
1619    #[must_use]
1620    pub fn get_piece(&self, index: NodeRef) -> Piece {
1621        self.pieces.get_piece(index)
1622    }
1623
1624    #[inline]
1625    #[must_use]
1626    pub fn find_position(&self, offset: u32, prefer_left: bool) -> Option<(NodeRef, u32)> {
1627        let total = self.total_length();
1628        if offset > total { return None; }
1629
1630        if offset == total && total > 0 {
1631            let mut current = self.root;
1632            let mut last_valid = NIL;
1633            while current != NIL {
1634                last_valid = current;
1635                current = self.pieces.get(current).right;
1636            }
1637            let len = self.pieces.get_piece(last_valid).byte_length;
1638            return Some((last_valid, len));
1639        }
1640
1641        self.pieces.find_offset(self.root, offset, prefer_left)
1642    }
1643
1644    #[cfg(feature = "write")]
1645    #[inline]
1646    pub fn write_to<W: std::io::Write>(&self, mut writer: W) -> std::io::Result<()> {
1647        if self.root == NIL {
1648            return Ok(());
1649        }
1650
1651        for (_, piece) in self.pieces() {
1652            let slice = self.buffers.get_slice(piece.buffer, piece.byte_offset, piece.byte_length);
1653            writer.write_all(slice.as_bytes())?;
1654        }
1655
1656        writer.flush()
1657    }
1658}
1659
1660impl PieceTree {
1661    #[inline]
1662    pub fn compact(&mut self) {
1663        #[inline(always)]
1664        fn copy_node(
1665            old_index: NodeRef, old_arena: &Pieces,
1666            new_arena: &mut Pieces, index_map: &mut [u32]
1667        ) -> NodeRef {
1668            if old_index == NIL { return NIL }
1669
1670            if index_map[old_index.index()] != 0 {
1671                return NodeRef::new(index_map[old_index.index()] as usize);
1672            }
1673
1674            let node = old_arena.get(old_index);
1675            let left_new = copy_node(node.left, old_arena, new_arena, index_map);
1676            let right_new = copy_node(node.right, old_arena, new_arena, index_map);
1677
1678            let new_index = NodeRef::new(new_arena.nodes.len());
1679            new_arena.nodes.push(Node {
1680                left: left_new,
1681                right: right_new,
1682                offset: node.offset,
1683                subtree_len: node.subtree_len,
1684                subtree_chars: node.subtree_chars,
1685                subtree_newlines: node.subtree_newlines,
1686                piece_start_char: node.piece_start_char,
1687                meta: node.meta,
1688                buffer_start_line: node.buffer_start_line
1689            });
1690
1691            index_map[old_index.index()] = new_index.index() as u32;
1692            new_index
1693        }
1694
1695        let mut new_pieces = Pieces::new();
1696
1697        self.scratch_index_map.clear();
1698        self.scratch_index_map.resize(self.pieces.nodes.len(), 0);
1699
1700        self.root = copy_node(self.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1701        for entry in &mut self.undo_stack {
1702            entry.root = copy_node(entry.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1703        }
1704        for entry in &mut self.redo_stack {
1705            entry.root = copy_node(entry.root, &self.pieces, &mut new_pieces, &mut self.scratch_index_map);
1706        }
1707
1708        self.pieces = new_pieces;
1709    }
1710
1711    #[inline]
1712    pub fn squash(&mut self) {  // @Memory
1713        if self.root == NIL { return }
1714
1715        let squashed_text = self.to_string();  // @Memory
1716        let bytes  = squashed_text.as_bytes();
1717        let length = squashed_text.len() as u32;
1718
1719        let mut offsets     = Vec::with_capacity(bytes.len() / 40);
1720        let mut checkpoints = Vec::with_capacity(bytes.len() / 256);
1721
1722        let (char_count, newline_count) =
1723            count_chars_and_newlines_with_offsets_and_checkpoints(
1724                squashed_text.as_bytes(),
1725                &mut offsets,
1726                &mut checkpoints
1727            );
1728
1729        self.buffers = Buffers::new();
1730        let buffer = self.buffers.original_buffers.push(OriginalBuffer {
1731            newline_offsets: offsets.into(),
1732            char_checkpoints: checkpoints.into(),
1733            text: squashed_text.into()
1734        });
1735        self.pieces = Pieces::new();
1736
1737        let piece = Piece {
1738            byte_length: length, newline_count, char_count,
1739            buffer, byte_offset: 0,
1740            buffer_start_line: 0, piece_start_char: 0
1741        };
1742        self.root = self.pieces.insert_node(NIL, piece, 0);
1743
1744        self.undo_stack.clear();
1745        self.redo_stack.clear();
1746    }
1747}
1748
1749#[derive(Debug, Clone, Copy)]
1750pub struct MemoryUsage {
1751    pub node_arena:       u32,
1752    pub mod_buffer:       u32,
1753    pub original_buffers: u32,
1754    pub history:          u32,
1755}
1756
1757impl MemoryUsage {
1758    #[inline(always)]
1759    #[must_use]
1760    pub const fn total(&self) -> u32 {
1761        self.node_arena + self.mod_buffer + self.original_buffers + self.history
1762    }
1763
1764    /// Overhead = everything except the actual document content in buffers.
1765    #[inline(always)]
1766    #[must_use]
1767    pub const fn overhead(&self) -> u32 {
1768        self.node_arena + self.history
1769    }
1770}
1771
1772impl PieceTree {
1773    /// Total bytes allocated for the node arena (includes NIL sentinel and
1774    /// all historical nodes retained for undo/redo).
1775    #[inline(always)]
1776    #[must_use]
1777    pub fn node_arena_bytes(&self) -> u32 {
1778        (self.pieces.nodes.len() * size_of::<Node>()) as _
1779    }
1780
1781    /// Bytes consumed by the modifications buffer (append-only, never shrinks).
1782    #[inline(always)]
1783    #[must_use]
1784    pub fn mod_buffer_bytes(&self) -> u32 {
1785        self.buffers.modifications_buffer.len() as u32 +
1786            (self.buffers.modifications_char_checkpoints.len() * size_of::<(u32, u32)>()) as u32 +
1787            (self.buffers.modifications_newline_offsets.len() * size_of::<u32>()) as u32
1788    }
1789
1790    /// Bytes consumed by all original (read) buffers.
1791    #[inline(always)]
1792    #[must_use]
1793    pub fn original_buffers_bytes(&self) -> u32 {
1794        self.buffers.original_buffers.values().map(|s| {
1795            s.len() +
1796                s.char_checkpoints.len() * size_of::<(u32, u32)>() +
1797                s.newline_offsets.len() * size_of::<u32>()
1798        }).sum::<usize>() as _
1799    }
1800
1801    /// Bytes consumed by undo + redo history entries.
1802    #[inline(always)]
1803    #[must_use]
1804    pub fn history_bytes(&self) -> u32 {
1805        ((self.undo_stack.len() + self.redo_stack.len()) * size_of::<HistoryEntry>()) as _
1806    }
1807
1808    /// Number of live nodes in the arena (includes NIL and all historical nodes).
1809    #[inline(always)]
1810    #[must_use]
1811    pub fn node_count(&self) -> u32 {
1812        self.pieces.nodes.len() as _
1813    }
1814
1815    /// Aggregate memory usage breakdown.
1816    #[inline(always)]
1817    #[must_use]
1818    pub fn memory_usage(&self) -> MemoryUsage {
1819        MemoryUsage {
1820            node_arena:       self.node_arena_bytes(),
1821            mod_buffer:       self.mod_buffer_bytes(),
1822            original_buffers: self.original_buffers_bytes(),
1823            history:          self.history_bytes(),
1824        }
1825    }
1826}
1827
1828#[derive(Debug)]
1829pub struct LinesIter<'a> {
1830    tree: &'a PieceTree,
1831    current_line: u32,
1832    total_lines: u32,
1833}
1834
1835impl Iterator for LinesIter<'_> {
1836    type Item = String;
1837
1838    fn next(&mut self) -> Option<Self::Item> {
1839        if self.current_line >= self.total_lines { return None }
1840
1841        let line = self.tree.get_line_content_allocating(self.current_line)?;
1842        self.current_line += 1;
1843        Some(line)
1844    }
1845}
1846
1847/// A lazy, non-allocating slice view over a byte range of the tree
1848#[derive(Debug)]
1849pub struct ChunkIter<'a> {
1850    tree:       &'a PieceTree,
1851    pieces:     PieceTreeIter<'a>,
1852    // Byte range we care about
1853    start:      u32,
1854    end:        u32,
1855    // Bytes consumed so far across all pieces
1856    piece_start: u32,
1857}
1858
1859impl<'a> ChunkIter<'a> {
1860    #[inline]
1861    #[must_use]
1862    pub fn new(tree: &'a PieceTree, start: u32, end: u32) -> Self {
1863        Self {
1864            tree,
1865            pieces: PieceTreeIter::new(&tree.pieces, tree.root),
1866            start,
1867            end,
1868            piece_start: 0,
1869        }
1870    }
1871
1872    /// Returns the total number of characters within this chunk view boundary.
1873    #[inline]
1874    #[must_use]
1875    pub fn len_chars(&self) -> u32 {
1876        let start_char = self.tree.try_byte_to_char(self.start).unwrap_or(0);
1877        let end_char = self.tree.try_byte_to_char(self.end).unwrap_or_else(|| self.tree.len_chars());
1878        end_char.saturating_sub(start_char)
1879    }
1880
1881    /// Returns the total number of characters within this chunk view boundary.
1882    #[inline]
1883    #[must_use]
1884    pub fn len_bytes(&self) -> u32 {
1885        self.end.saturating_sub(self.start)
1886    }
1887}
1888
1889impl<'a> Iterator for ChunkIter<'a> {
1890    type Item = &'a str;
1891
1892    #[inline]
1893    fn next(&mut self) -> Option<&'a str> {
1894        loop {
1895            let (_, p) = self.pieces.next()?;
1896            let piece_end = self.piece_start + p.byte_length;
1897
1898            // Skip pieces entirely before our window
1899            if piece_end <= self.start {
1900                self.piece_start = piece_end;
1901                continue;
1902            }
1903            // Stop once we're past our window
1904            if self.piece_start >= self.end {
1905                return None;
1906            }
1907
1908            let slice_start = self.start.saturating_sub(self.piece_start);
1909            let slice_end   = (self.end - self.piece_start).min(p.byte_length);
1910
1911            self.piece_start = piece_end;
1912
1913            let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset + slice_start, slice_end - slice_start);
1914            if text.is_empty() { continue; }
1915            return Some(text);
1916        }
1917    }
1918}
1919
1920/// A zero-copy iterator that yields lines as perfectly bounded `TreeSlice`s.
1921#[derive(Debug, Clone)]
1922pub struct SliceLines<'a> {
1923    slice: TreeSlice<'a>,
1924    current_abs_line: u32,
1925    end_abs_line: u32,
1926    yielded_all: bool,
1927}
1928
1929impl<'a> Iterator for SliceLines<'a> {
1930    type Item = TreeSlice<'a>;
1931
1932    #[inline]
1933    fn next(&mut self) -> Option<Self::Item> {
1934        if self.yielded_all { return None; }
1935
1936        // Fetch the absolute start byte of the current line from the tree
1937        let line_start = self.slice.tree.try_line_to_byte(self.current_abs_line).unwrap_or(self.slice.start);
1938
1939        //
1940        // If we are on the very last line that this slice overlaps
1941        //
1942        if self.current_abs_line >= self.end_abs_line {
1943            self.yielded_all = true;
1944
1945            // Clamp the start byte between the slice start and end boundaries
1946            let start_byte = line_start.max(self.slice.start).min(self.slice.end);
1947
1948            return Some(TreeSlice {
1949                tree: self.slice.tree,
1950                start: start_byte,
1951                end: self.slice.end,
1952            });
1953        }
1954
1955        // We are on an intermediate line, find where the next line starts.
1956        let next_line_start = self.slice.tree.try_line_to_byte(self.current_abs_line + 1).unwrap_or(self.slice.end);
1957
1958        // Clamp boundaries to ensure we never bleed outside the TreeSlice
1959        let start_byte = line_start.max(self.slice.start).min(self.slice.end);
1960        let end_byte = next_line_start.min(self.slice.end);
1961
1962        self.current_abs_line += 1;
1963
1964        Some(TreeSlice {
1965            tree: self.slice.tree,
1966            start: start_byte,
1967            end: end_byte,
1968        })
1969    }
1970}
1971
1972#[derive(Debug)]
1973pub struct SliceCharsRev<'a> {
1974    walker: ReverseTreeWalker<'a>,
1975    current_byte: u32,
1976    start_byte: u32,
1977}
1978
1979impl<'a> Iterator for SliceCharsRev<'a> {
1980    type Item = char;
1981
1982    #[inline]
1983    fn next(&mut self) -> Option<char> {
1984        // If we've hit or crossed the start boundary, stop
1985        if self.current_byte <= self.start_byte {
1986            return None;
1987        }
1988
1989        let c = self.walker.next()?;
1990
1991        // Subtract the exact byte width of the character we just yielded
1992        self.current_byte = self.current_byte.saturating_sub(c.len_utf8() as u32);
1993
1994        // If yielding this character pushed us before the slice's start
1995        // boundary, we discard it and end the iterator.
1996        if self.current_byte < self.start_byte {
1997            return None;
1998        }
1999
2000        Some(c)
2001    }
2002}
2003
2004impl core::fmt::Display for SliceCharsRev<'_> {
2005    #[inline]
2006    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2007        // Spawn a transient reverse walker starting exactly where we currently are
2008        let mut temp_walker = ReverseTreeWalker::new_at(self.walker.tree, self.current_byte);
2009        let mut temp_byte = self.current_byte;
2010
2011        while temp_byte > self.start_byte {
2012            if let Some(c) = temp_walker.next() {
2013                temp_byte = temp_byte.saturating_sub(c.len_utf8() as u32);
2014                if temp_byte < self.start_byte { break; }
2015                write!(f, "{c}")?;
2016            } else {
2017                break;
2018            }
2019        }
2020        Ok(())
2021    }
2022}
2023
2024/// Non-allocating char iterator over a byte-bounded window of the tree.
2025#[derive(Debug)]
2026pub struct SliceChars<'a> {
2027    walker:   TreeWalker<'a>,
2028    byte_end: u32,
2029}
2030
2031impl Iterator for SliceChars<'_> {
2032    type Item = char;
2033    #[inline]
2034    fn next(&mut self) -> Option<char> {
2035        if self.walker.offset >= self.byte_end { return None; }
2036        self.walker.next()
2037    }
2038}
2039
2040impl core::fmt::Display for SliceChars<'_> {
2041    #[inline]
2042    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2043        // Grab the current absolute byte offset from the existing walker
2044        let current_offset = self.walker.offset;
2045
2046        // Spawn a transient walker and seek it to the exact current offset
2047        let mut temp_walker = TreeWalker::new(self.walker.tree);
2048        temp_walker.seek(current_offset);
2049
2050        while temp_walker.offset < self.byte_end {
2051            if let Some(c) = temp_walker.next() {
2052                write!(f, "{c}")?;
2053            } else {
2054                break;
2055            }
2056        }
2057
2058        Ok(())
2059    }
2060}
2061
2062/// A zero-copy borrowed view over a byte range of the tree.
2063/// All offsets are relative to the slice start.
2064#[derive(Debug, Clone, Copy)]
2065pub struct TreeSlice<'a> {
2066    tree:  &'a PieceTree,
2067    start: u32,   // byte offset into tree (inclusive)
2068    end:   u32,   // byte offset into tree (exclusive)
2069}
2070
2071impl PartialEq<&str> for TreeSlice<'_> {
2072    #[inline]
2073    fn eq(&self, other: &&str) -> bool {
2074        let mut remaining = other.as_bytes();
2075
2076        for chunk in self.chunks() {
2077            let chunk_bytes = chunk.as_bytes();
2078
2079            // If the chunk is longer than the remaining string, they don't match
2080            if remaining.len() < chunk_bytes.len() {
2081                return false;
2082            }
2083
2084            // If the bytes don't match, fail fast
2085            if !remaining.starts_with(chunk_bytes) {
2086                return false;
2087            }
2088
2089            remaining = &remaining[chunk_bytes.len()..];
2090        }
2091
2092        // If we exhausted all chunks and have no remaining bytes, it's a perfect match
2093        remaining.is_empty()
2094    }
2095}
2096
2097impl PartialEq<str> for TreeSlice<'_> {
2098    #[inline]
2099    fn eq(&self, other: &str) -> bool {
2100        self == &other
2101    }
2102}
2103
2104impl core::fmt::Display for TreeSlice<'_> {
2105    #[inline(always)]
2106    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2107        for char in self.chars() {
2108            write!(f, "{char}")?;
2109        }
2110        Ok(())
2111    }
2112}
2113
2114/// Slice Iterators
2115impl<'a> TreeSlice<'a> {
2116    #[inline]
2117    pub fn chunks(self) -> ChunkIter<'a> {
2118        ChunkIter::new(self.tree, self.start, self.end)
2119    }
2120
2121    #[inline]
2122    pub fn bytes(self) -> impl Iterator<Item = u8> + 'a {
2123        self.chunks().flat_map(|chunk| chunk.bytes())
2124    }
2125
2126    #[inline]
2127    pub fn chars(self) -> impl Iterator<Item = char> + 'a {
2128        self.chunks().flat_map(|chunk| chunk.chars())
2129    }
2130
2131    #[inline]
2132    pub fn chars_rev(self) -> SliceCharsRev<'a> {
2133        SliceCharsRev {
2134            walker: ReverseTreeWalker::new_at(self.tree, self.end),
2135            current_byte: self.end,
2136            start_byte: self.start,
2137        }
2138    }
2139
2140    #[inline]
2141    pub fn lines(self) -> SliceLines<'a> {
2142        // Query the tree directly for the absolute line overlap
2143        let start_line = self.tree.byte_to_line(self.start);
2144        let end_line = self.tree.byte_to_line(self.end);
2145
2146        SliceLines {
2147            slice: self,
2148            current_abs_line: start_line,
2149            end_abs_line: end_line,
2150            yielded_all: false,
2151        }
2152    }
2153
2154    #[inline]
2155    pub fn lines_at(self, line_idx: u32) -> SliceLines<'a> {
2156        let start_line = self.tree.byte_to_line(self.start);
2157        let end_line = self.tree.byte_to_line(self.end);
2158
2159        let target_line = (start_line + line_idx).min(end_line);
2160
2161        SliceLines {
2162            slice: self,
2163            current_abs_line: target_line,
2164            end_abs_line: end_line,
2165            yielded_all: false,
2166        }
2167    }
2168
2169    #[inline]
2170    pub fn chunks_at_byte(self, byte_idx: u32) -> ChunkIter<'a> {
2171        let safe_idx = byte_idx.min(self.len_bytes());
2172        ChunkIter::new(self.tree, self.start + safe_idx, self.end)
2173    }
2174
2175    #[inline]
2176    pub fn bytes_at(self, byte_idx: u32) -> impl Iterator<Item = u8> + 'a {
2177        self.chunks_at_byte(byte_idx).flat_map(|chunk| chunk.bytes())
2178    }
2179
2180    #[inline]
2181    pub fn chars_at(self, char_idx: u32) -> impl Iterator<Item = char> + 'a {
2182        let safe_idx = char_idx.min(self.len_chars());
2183        let byte_offset = self.try_char_to_byte(safe_idx).unwrap_or(self.len_bytes());
2184        self.chunks_at_byte(byte_offset).flat_map(|chunk| chunk.chars())
2185    }
2186}
2187
2188/// Tree Iterators
2189impl PieceTree {
2190    #[inline]
2191    pub fn slice_whole(&self) -> TreeSlice<'_> {
2192        self.slice(0..self.len_bytes())
2193    }
2194
2195    #[inline]
2196    pub fn chunks(&self) -> ChunkIter<'_> { self.slice_whole().chunks() }
2197
2198    #[inline]
2199    pub fn bytes(&self) -> impl Iterator<Item = u8> + '_ { self.slice_whole().bytes() }
2200
2201    #[inline]
2202    pub fn chars(&self) -> impl Iterator<Item = char> + '_ { self.slice_whole().chars() }
2203
2204    #[inline]
2205    pub fn lines(&self) -> SliceLines<'_> { self.slice_whole().lines() }
2206
2207    #[inline(always)]
2208    #[must_use]
2209    pub fn chars_rev(&self) -> ReverseTreeWalker<'_> { ReverseTreeWalker::new(self) }
2210
2211    #[inline(always)]
2212    #[must_use]
2213    pub fn chars_at_rev(&self, char_idx: u32) -> ReverseTreeWalker<'_> { ReverseTreeWalker::new_at(self, char_idx) }
2214
2215    #[inline]
2216    pub fn chunks_at_byte(&self, byte_idx: u32) -> ChunkIter<'_> {
2217        self.slice_whole().chunks_at_byte(byte_idx)
2218    }
2219
2220    #[inline]
2221    pub fn bytes_at(&self, byte_idx: u32) -> impl Iterator<Item = u8> + '_ {
2222        self.slice_whole().bytes_at(byte_idx)
2223    }
2224
2225    #[inline]
2226    pub fn chars_at(&self, char_idx: u32) -> impl Iterator<Item = char> + '_ {
2227        self.slice_whole().chars_at(char_idx)
2228    }
2229
2230    #[inline]
2231    pub fn lines_at(&self, line_idx: u32) -> SliceLines<'_> {
2232        self.slice_whole().lines_at(line_idx)
2233    }
2234}
2235
2236impl<'a> TreeSlice<'a> {
2237    #[inline(always)]
2238    #[must_use]
2239    pub fn new(tree: &'a PieceTree, start: u32, end: u32) -> Self {
2240        let end = end.min(tree.len_bytes());
2241        debug_assert!(start <= end);
2242        Self { tree, start, end }
2243    }
2244
2245    #[inline(always)]
2246    #[must_use]
2247    pub fn len_bytes(&self) -> u32  { self.end - self.start }
2248
2249    #[inline(always)]
2250    #[must_use]
2251    pub fn is_empty(&self)  -> bool { self.start == self.end }
2252
2253    #[inline(always)]
2254    #[must_use]
2255    pub fn len_chars(&self) -> u32 {
2256        let a = self.tree.try_byte_to_char(self.start).unwrap_or(0);
2257        let b = self.tree.try_byte_to_char(self.end).unwrap_or_else(|| self.tree.len_chars());
2258        b - a
2259    }
2260
2261    #[inline]
2262    #[must_use]
2263    pub fn len_lines(&self) -> u32 {
2264        let start_line = self.tree.try_byte_to_line(self.start).unwrap_or(0);
2265        let end_line = self.tree.try_byte_to_line(self.end).unwrap_or_else(|| self.tree.len_lines() - 1);
2266        end_line - start_line + 1
2267    }
2268
2269    #[must_use]
2270    pub fn chunk_at_byte(&self, offset: u32) -> &[u8] {
2271        if offset >= self.len_bytes() { return &[] }
2272
2273        let abs_byte = self.start + offset;
2274        let chunk = self.tree.chunk_at_byte(abs_byte).0;
2275
2276        // Truncate the chunk if it bleeds past the end of the slice
2277        let max_valid_len = (self.end - abs_byte) as usize;
2278        let bounded_len = chunk.len().min(max_valid_len);
2279
2280        &chunk[..bounded_len]
2281    }
2282
2283    #[inline]
2284    #[must_use]
2285    pub fn chunk_at_char(&self, char_offset: u32) -> &[u8] {
2286        if char_offset >= self.len_chars() { return &[] }
2287
2288        let Some(base_char) = self.tree.try_byte_to_char(self.start) else {
2289            return &[];
2290        };
2291        let abs_char = base_char + char_offset;
2292
2293        let chunk = self.tree.chunk_at_char(abs_char).0;
2294
2295        // Truncate the chunk if it bleeds past the end of the slice
2296        let Some(abs_byte) = self.tree.try_char_to_byte(abs_char) else {
2297            return &[];
2298        };
2299
2300        let max_valid_len = (self.end - abs_byte) as usize;
2301        let bounded_len = chunk.len().min(max_valid_len);
2302
2303        &chunk[..bounded_len]
2304    }
2305
2306    #[inline]
2307    #[must_use]
2308    pub fn chunk_at_line_break(&self, break_index: u32) -> &[u8] {
2309        let Some((base_line, _)) = self.tree.try_byte_to_line_col(self.start) else {
2310            return &[];
2311        };
2312
2313        let abs_break_index = base_line + break_index;
2314
2315        let Some(abs_byte) = self.tree.try_line_break_to_byte(abs_break_index) else {
2316            return &[];
2317        };
2318
2319        // Ensure the line break actually falls within the slice bounds
2320        if abs_byte < self.start || abs_byte >= self.end {
2321            return &[];
2322        }
2323
2324        let chunk = self.tree.chunk_at_byte(abs_byte).0;
2325
2326        // Bound the chunk so it doesn't bleed past self.end
2327        let max_valid_len = (self.end - abs_byte) as usize;
2328        let bounded_len = chunk.len().min(max_valid_len);
2329
2330        &chunk[..bounded_len]
2331    }
2332
2333    /// Byte at a slice-relative byte offset.
2334    #[inline(always)]
2335    #[must_use]
2336    pub fn byte(&self, offset: u32) -> u8 {
2337        self.try_byte(offset).unwrap()
2338    }
2339
2340    /// Byte at a slice-relative byte offset.
2341    #[inline(always)]
2342    #[must_use]
2343    pub fn try_byte(&self, offset: u32) -> Option<u8> {
2344        if offset >= self.len_bytes() { return None; }
2345        self.tree.try_byte(self.start + offset)
2346    }
2347
2348    /// Char at a slice-relative char index.
2349    #[inline(always)]
2350    #[must_use]
2351    pub fn char(&self, char_index: u32) -> char {
2352        self.try_char(char_index).unwrap()
2353    }
2354
2355    /// Char at a slice-relative char index.
2356    #[inline(always)]
2357    #[must_use]
2358    pub fn try_char(&self, char_index: u32) -> Option<char> {
2359        if char_index >= self.len_chars() { return None; }
2360        let abs_char = self.tree.try_byte_to_char(self.start)? + char_index;
2361        self.tree.try_char(abs_char)
2362    }
2363
2364    #[inline(always)]
2365    #[must_use]
2366    pub fn line(&self, line: u32) -> ChunkIter<'a> {
2367        self.try_line(line).unwrap()
2368    }
2369
2370    #[inline(always)]
2371    #[must_use]
2372    pub fn try_line(&self, line: u32) -> Option<ChunkIter<'a>> {
2373        let (abs_start, abs_end) = self.abs_line_range(line)?;
2374        // Use ChunkIter directly to enforce the absolute end boundary
2375        Some(ChunkIter::new(self.tree, abs_start, abs_end))
2376    }
2377
2378    /// Returns a sub-slice over a byte range (relative to this slice).
2379    #[inline(always)]
2380    #[must_use]
2381    pub fn slice<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'a> {
2382        let s = match range.start_bound() {
2383            Bound::Included(&n) => n,
2384            Bound::Excluded(&n) => n + 1,
2385            Bound::Unbounded    => 0,
2386        };
2387        let e = match range.end_bound() {
2388            Bound::Included(&n) => n + 1,
2389            Bound::Excluded(&n) => n,
2390            Bound::Unbounded    => self.len_bytes(),
2391        };
2392        TreeSlice::new(self.tree, self.start + s, self.start + e)
2393    }
2394
2395    #[inline(always)]
2396    #[must_use]
2397    pub fn byte_to_char(&self, byte_offset: u32) -> u32 {
2398        self.try_byte_to_char(byte_offset).unwrap()
2399    }
2400
2401    #[inline(always)]
2402    #[must_use]
2403    pub fn try_byte_to_char(&self, byte_offset: u32) -> Option<u32> {
2404        let base = self.tree.try_byte_to_char(self.start)?;
2405        let abs  = self.tree.try_byte_to_char(self.start + byte_offset)?;
2406        Some(abs - base)
2407    }
2408
2409    #[inline(always)]
2410    #[must_use]
2411    pub fn char_to_byte(&self, char_index: u32) -> u32 {
2412        self.try_char_to_byte(char_index).unwrap()
2413    }
2414
2415    #[inline(always)]
2416    #[must_use]
2417    pub fn try_char_to_byte(&self, char_index: u32) -> Option<u32> {
2418        let base_char = self.tree.try_byte_to_char(self.start)?;
2419        let abs_byte  = self.tree.try_char_to_byte(base_char + char_index)?;
2420        Some(abs_byte - self.start)
2421    }
2422
2423    #[inline(always)]
2424    #[must_use]
2425    pub fn byte_to_line(&self, byte_offset: u32) -> u32 {
2426        self.try_byte_to_line(byte_offset).unwrap()
2427    }
2428
2429    #[inline(always)]
2430    #[must_use]
2431    pub fn try_byte_to_line(&self, byte_offset: u32) -> Option<u32> {
2432        let (abs_line, _) = self.tree.try_byte_to_line_col(self.start + byte_offset)?;
2433        let base_line     = self.tree.try_byte_to_line_col(self.start).map(|(l, _)| l)?;
2434        Some(abs_line - base_line)
2435    }
2436
2437    #[inline(always)]
2438    #[must_use]
2439    pub fn line_to_byte(&self, line: u32) -> u32 {
2440        self.try_line_to_byte(line).unwrap()
2441    }
2442
2443    #[inline(always)]
2444    #[must_use]
2445    pub fn try_line_to_byte(&self, line: u32) -> Option<u32> {
2446        let (abs_start, _) = self.abs_line_range(line)?;
2447        Some(abs_start - self.start)
2448    }
2449
2450    #[inline]
2451    #[must_use]
2452    pub fn abs_line_range(&self, rel_line: u32) -> Option<(u32, u32)> {
2453        let base_line = self.tree.try_byte_to_line_col(self.start).map(|(l, _)| l)?;
2454        let abs_line  = base_line + rel_line;
2455
2456        let line_start = self.tree.try_line_to_byte(abs_line)?;
2457        let line_end   = self.tree.try_line_to_byte(abs_line + 1)
2458            .unwrap_or_else(|| self.tree.len_bytes());
2459
2460        //
2461        // Clamp to slice bounds.
2462        //
2463        let s = line_start.max(self.start);
2464        let e = line_end.min(self.end);
2465        if s > e { return None; }
2466
2467        Some((s, e))
2468    }
2469}
2470
2471// Allow slicing directly from PieceTree.
2472impl PieceTree {
2473    #[inline]
2474    pub fn slice<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'_> {
2475        let s = match range.start_bound() {
2476            Bound::Included(&n) => n,
2477            Bound::Excluded(&n) => n + 1,
2478            Bound::Unbounded    => 0,
2479        };
2480        let e = match range.end_bound() {
2481            Bound::Included(&n) => n + 1,
2482            Bound::Excluded(&n) => n,
2483            Bound::Unbounded    => self.len_bytes(),
2484        };
2485        TreeSlice::new(self, s, e)
2486    }
2487
2488    /// Slice by char range.
2489    #[inline]
2490    pub fn slice_chars_range<R: RangeBounds<u32>>(&self, range: R) -> TreeSlice<'_> {
2491        let sc = match range.start_bound() {
2492            Bound::Included(&n) => n,
2493            Bound::Excluded(&n) => n + 1,
2494            Bound::Unbounded    => 0,
2495        };
2496        let ec = match range.end_bound() {
2497            Bound::Included(&n) => n + 1,
2498            Bound::Excluded(&n) => n,
2499            Bound::Unbounded    => self.len_chars(),
2500        };
2501        let s = self.try_char_to_byte(sc).unwrap_or(0);
2502        let e = self.try_char_to_byte(ec).unwrap_or_else(|| self.len_bytes());
2503        TreeSlice::new(self, s, e)
2504    }
2505}
2506
2507impl PieceTree {
2508    /// Fast-path chunk reader specifically designed for the Tree-sitter C API.
2509    /// Given an absolute byte offset, it returns the largest contiguous byte
2510    /// slice available starting exactly at that offset.
2511    #[inline]
2512    #[must_use]
2513    pub fn read_largest_contigous_chunk_at_byte(&self, offset: u32) -> (&[u8], u32) {
2514        let total = self.total_length();
2515        if offset >= total {
2516            return (&[], offset);
2517        }
2518
2519        let mut current = self.root;
2520        let mut current_offset = offset;
2521        let mut doc_offset = 0u32;
2522
2523        while current != NIL {
2524            let node = self.pieces.get(current);
2525            let p = self.pieces.get_piece(current);
2526            let left_len = self.pieces.get(node.left).subtree_len;
2527            let piece_len = p.byte_length;
2528
2529            if current_offset < left_len {
2530                current = node.left;
2531
2532            } else if current_offset < left_len + piece_len {
2533                //
2534                // The requested offset falls inside this exact piece
2535                //
2536
2537                let rel_offset = current_offset - left_len;
2538                let piece_doc_start = doc_offset + left_len;
2539
2540                let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2541
2542                return (&text.as_bytes()[rel_offset as usize..], piece_doc_start);
2543
2544            } else {
2545                doc_offset     += left_len + piece_len;
2546                current_offset -= left_len + piece_len;
2547                current = node.right;
2548            }
2549        }
2550
2551        (&[], offset)
2552    }
2553
2554    #[inline]
2555    #[must_use]
2556    pub fn chunk_at_byte(&self, offset: u32) -> (&[u8], u32) {
2557        self.read_largest_contigous_chunk_at_byte(offset)
2558    }
2559
2560    #[inline]
2561    #[must_use]
2562    pub fn chunk_at_char(&self, char_index: u32) -> (&[u8], u32) {
2563        let Some(offset) = self.try_char_to_byte(char_index) else { return (&[], 0) };
2564        self.chunk_at_byte(offset)
2565    }
2566
2567    #[inline]
2568    #[must_use]
2569    pub fn chunk_at_line_break(&self, break_index: u32) -> (&[u8], u32) {
2570        let Some(offset) = self.try_line_break_to_byte(break_index) else { return (&[], 0) };
2571        self.chunk_at_byte(offset)
2572    }
2573
2574    #[inline]
2575    #[must_use]
2576    pub fn line_break_to_byte(&self, break_index: u32) -> u32 {
2577        self.try_line_break_to_byte(break_index).unwrap()
2578    }
2579
2580    #[inline]
2581    #[must_use]
2582    pub fn try_line_break_to_byte(&self, break_index: u32) -> Option<u32> {
2583        // @Cutnpaste from line_to_byte
2584
2585        let total_newlines = self.pieces.get(self.root).subtree_newlines;
2586        if break_index >= total_newlines { return None; }
2587
2588        let mut current = self.root;
2589        let mut current_offset = 0;
2590        let mut current_newlines = 0;
2591
2592        while current != NIL {
2593            let node = self.pieces.get(current);
2594            let p = self.pieces.get_piece(current);
2595            let left_newlines = self.pieces.get(node.left).subtree_newlines;
2596            let left_len      = self.pieces.get(node.left).subtree_len;
2597            let piece_newlines = p.newline_count;
2598
2599            if break_index < current_newlines + left_newlines {
2600                //
2601                // The target line break is somewhere in the left subtree
2602                //
2603                current = node.left;
2604
2605            } else if break_index < current_newlines + left_newlines + piece_newlines {
2606                //
2607                // The target line break is inside this piece
2608                //
2609                let rel_break = break_index - (current_newlines + left_newlines);
2610                let absolute_newline_index = p.buffer_start_line + rel_break;
2611
2612                let absolute_byte_offset = if p.buffer == MOD_BUFFER {
2613                    self.buffers.modifications_newline_offsets[absolute_newline_index as usize]
2614                } else {
2615                    self.buffers.original_buffers[p.buffer].newline_offsets[absolute_newline_index as usize]
2616                };
2617
2618                //
2619                //
2620                //
2621                //
2622                //
2623                // Unlike `line_to_byte` which adds + 1 to find the start of the *next* line,
2624                // we omit it here to return the exact byte offset of the newline character itself.
2625                //
2626                //
2627                //
2628                //
2629                //
2630                let local_piece_offset = absolute_byte_offset - p.byte_offset;
2631                return Some(current_offset + left_len + local_piece_offset);
2632
2633            } else {
2634                current_newlines += left_newlines + piece_newlines;
2635                current_offset   += left_len + p.byte_length;
2636                current = node.right;
2637            }
2638        }
2639
2640        None
2641    }
2642
2643    #[inline]
2644    #[must_use]
2645    pub fn line_to_byte(&self, target_line: u32) -> u32 {
2646        self.try_line_to_byte(target_line).unwrap()
2647    }
2648
2649    #[inline]
2650    #[must_use]
2651    pub fn try_line_to_byte(&self, target_line: u32) -> Option<u32> {
2652        if target_line == 0 { return Some(0) }
2653        let mut current = self.root;
2654        let mut current_offset = 0;
2655        let mut current_line = 0;
2656        while current != NIL {
2657            let node = self.pieces.get(current);
2658            let p = self.pieces.get_piece(current);
2659            let left_newlines = self.pieces.get(node.left).subtree_newlines;
2660            let left_len      = self.pieces.get(node.left).subtree_len;
2661            let piece_newlines = p.newline_count;
2662            let piece_len      = p.byte_length;
2663
2664            if target_line <= current_line + left_newlines {
2665                //
2666                // The target line starts somewhere in the left subtree
2667                //
2668                current = node.left;
2669
2670            } else if target_line <= current_line + left_newlines + piece_newlines {
2671                //
2672                // The target line starts inside this piece
2673                //
2674
2675                current_line   += left_newlines;
2676                current_offset += left_len;
2677
2678                let rel_line = target_line - current_line; // >= 1, guaranteed
2679                let absolute_newline_index = p.buffer_start_line + rel_line - 1;
2680                let absolute_byte_offset = if p.buffer == MOD_BUFFER {
2681                    self.buffers.modifications_newline_offsets[absolute_newline_index as usize]
2682                } else {
2683                    self.buffers.original_buffers[p.buffer].newline_offsets[absolute_newline_index as usize]
2684                };
2685
2686                let local_piece_offset = absolute_byte_offset - p.byte_offset + 1;
2687                return Some(current_offset + local_piece_offset);
2688
2689            } else {
2690                current_line   += left_newlines + piece_newlines;
2691                current_offset += left_len + piece_len;
2692                current = node.right;
2693            }
2694        }
2695
2696        None
2697    }
2698
2699    #[inline]
2700    #[must_use]
2701    pub fn char_to_byte(&self, char_index: u32) -> u32 {
2702        self.try_char_to_byte(char_index).unwrap()
2703    }
2704
2705    #[inline]
2706    #[must_use]
2707    pub fn try_char_to_byte(&self, char_index: u32) -> Option<u32> {
2708        let total_chars = self.len_chars();
2709        if char_index > total_chars { return None; }
2710        if char_index == total_chars { return Some(self.len_bytes()); }
2711
2712        let mut current = self.root;
2713        let mut current_byte = 0;
2714        let mut current_char = 0;
2715
2716        while current != NIL {
2717            let node = self.pieces.get(current);
2718            let left_node = self.pieces.get(node.left);
2719
2720            let left_chars = left_node.subtree_chars;
2721            let left_bytes = left_node.subtree_len;
2722
2723            let p = self.pieces.get_piece(current);
2724            let piece_chars = p.char_count;
2725
2726            if char_index < current_char + left_chars {
2727                current = node.left;
2728
2729            } else if char_index < current_char + left_chars + piece_chars {
2730                //
2731                // Target char is inside this piece,
2732                // p.piece_start_char is the absolute char index of p.offset in its buffer,
2733                // so (p.piece_start_char + rel_char) is the absolute char target.
2734                //
2735
2736                let rel_char = char_index - (current_char + left_chars);
2737                let absolute_target_byte =
2738                    self.buffers.char_to_byte_absolute(p.buffer, p.piece_start_char + rel_char);
2739
2740                return Some(current_byte + left_bytes + (absolute_target_byte - p.byte_offset));
2741
2742            } else {
2743                current_char += left_chars + piece_chars;
2744                current_byte += left_bytes + p.byte_length;
2745                current = node.right;
2746            }
2747        }
2748
2749        None
2750    }
2751
2752    #[inline]
2753    #[must_use]
2754    pub fn byte_to_char(&self, byte_offset: u32) -> u32 {
2755        self.try_byte_to_char(byte_offset).unwrap()
2756    }
2757
2758    #[inline]
2759    #[must_use]
2760    pub fn try_byte_to_char(&self, byte_offset: u32) -> Option<u32> {
2761        let total_bytes = self.len_bytes();
2762        if byte_offset > total_bytes { return None; }
2763        if byte_offset == total_bytes { return Some(self.len_chars()); }
2764
2765        let mut current = self.root;
2766        let mut current_byte = 0;
2767        let mut current_char = 0;
2768
2769        while current != NIL {
2770            let node = self.pieces.get(current);
2771            let left_node = self.pieces.get(node.left);
2772
2773            let left_bytes = left_node.subtree_len;
2774            let left_chars = left_node.subtree_chars;
2775
2776            let p = self.pieces.get_piece(current);
2777            let piece_bytes = p.byte_length;
2778
2779            if byte_offset < current_byte + left_bytes {
2780                current = node.left;
2781
2782            } else if byte_offset < current_byte + left_bytes + piece_bytes {
2783                // Target byte is inside this piece.
2784                // byte_to_char_absolute gives the absolute char index, so subtracting
2785                // piece_start_char converts it to a piece-relative char count.
2786
2787                let rel_byte = byte_offset - (current_byte + left_bytes);
2788                let absolute_target_char =
2789                    self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2790
2791                return Some(current_char + left_chars + (absolute_target_char - p.piece_start_char));
2792
2793            } else {
2794                current_byte += left_bytes + piece_bytes;
2795                current_char += left_chars + p.char_count;
2796                current = node.right;
2797            }
2798        }
2799
2800        None
2801    }
2802
2803    #[inline]
2804    #[must_use]
2805    pub fn byte_to_line(&self, offset: u32) -> u32 {
2806        self.try_byte_to_line(offset).unwrap()
2807    }
2808
2809    #[inline]
2810    #[must_use]
2811    pub fn try_byte_to_line(&self, offset: u32) -> Option<u32> {
2812        self.try_byte_to_line_col(offset).map(|(l, _)| l)
2813    }
2814
2815    #[inline]
2816    #[must_use]
2817    pub fn byte_to_line_col(&self, offset: u32) -> (u32, u32) {
2818        self.try_byte_to_line_col(offset).unwrap()
2819    }
2820
2821    #[inline]
2822    #[must_use]
2823    pub fn try_byte_to_line_col(&self, offset: u32) -> Option<(u32, u32)> {
2824        let total_bytes = self.len_bytes();
2825        if offset > total_bytes { return None }
2826        if offset == total_bytes {
2827            //
2828            // Get offset of the last line
2829            //
2830            let line            = self.pieces.get(self.root).subtree_newlines;
2831            let line_start_byte = self.try_line_to_byte(line).unwrap_or(0);
2832            let target_char     = self.len_chars();
2833            let line_start_char = self.try_byte_to_char(line_start_byte)?;
2834            return Some((line, target_char - line_start_char));
2835        }
2836
2837        let mut current         = self.root;
2838        let mut current_line    = 0u32;
2839        let mut current_byte    = 0u32;
2840        let mut current_char    = 0u32;
2841
2842        while current != NIL {
2843            let node          = self.pieces.get(current);
2844            let left_node     = self.pieces.get(node.left);
2845            let left_bytes    = left_node.subtree_len;
2846            let left_newlines = left_node.subtree_newlines;
2847            let left_chars    = left_node.subtree_chars;
2848            let p             = self.pieces.get_piece(current);
2849            let piece_bytes   = p.byte_length;
2850
2851            if offset < current_byte + left_bytes {
2852                current = node.left;
2853
2854            } else if offset < current_byte + left_bytes + piece_bytes {
2855                current_line += left_newlines;
2856                current_char += left_chars;
2857
2858                let rel_byte = offset - (current_byte + left_bytes);
2859
2860                let (local_newlines, col) = if p.newline_count == 0 {
2861                    let target_char = self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2862                    let rel_char    = target_char - p.piece_start_char;
2863                    let abs_char    = current_char + rel_char;
2864
2865                    let line_start_byte = self.try_line_to_byte(current_line)?;
2866                    let line_start_char = self.try_byte_to_char(line_start_byte)?;
2867
2868                    (0, abs_char - line_start_char)
2869                } else {
2870                    let start_index       = p.buffer_start_line as usize;
2871                    let end_index         = start_index + p.newline_count as usize;
2872                    let absolute_target = p.byte_offset + rel_byte;
2873
2874                    let offsets = &self.buffers.get_newlines(p.buffer)[start_index..end_index];
2875
2876                    let local_nl = offsets.partition_point(|&off| off < absolute_target) as u32;
2877
2878                    let target_char = self.buffers.byte_to_char_absolute(p.buffer, p.byte_offset + rel_byte);
2879                    let col = if local_nl > 0 {
2880                        let last_nl_abs_byte = offsets[local_nl as usize - 1];
2881                        let last_nl_char     = self.buffers.byte_to_char_absolute(p.buffer, last_nl_abs_byte);
2882                        target_char - (last_nl_char + 1)
2883                    } else {
2884                        let line_start_byte = self.try_line_to_byte(current_line)?;
2885                        let line_start_char = self.try_byte_to_char(line_start_byte)?;
2886                        let rel_char        = target_char - p.piece_start_char;
2887                        (current_char + rel_char) - line_start_char
2888                    };
2889
2890                    (local_nl, col)
2891                };
2892
2893                return Some((current_line + local_newlines, col));
2894
2895            } else {
2896                current_line += left_newlines + p.newline_count;
2897                current_byte += left_bytes + piece_bytes;
2898                current_char += left_chars + p.char_count;
2899                current = node.right;
2900            }
2901        }
2902
2903        None
2904    }
2905
2906    #[inline]
2907    #[must_use]
2908    pub fn pieces(&self) -> PieceTreeIter<'_> {
2909        PieceTreeIter::new(&self.pieces, self.root)
2910    }
2911
2912    #[inline]
2913    #[must_use]
2914    pub fn get_line_range(&self, line: u32) -> (u32, u32) {
2915        self.try_get_line_range(line).unwrap()
2916    }
2917
2918    #[inline]
2919    #[must_use]
2920    pub fn try_get_line_range(&self, line: u32) -> Option<(u32, u32)> {
2921        let start = self.try_line_to_byte(line)?;
2922        let end = self.try_line_to_byte(line + 1).unwrap_or_else(|| self.total_length());
2923        Some((start, end))
2924    }
2925
2926    #[inline]
2927    #[must_use]
2928    pub fn get_line_content_allocating(&self, line: u32) -> Option<String> {
2929        let (start, end) = self.try_get_line_range(line)?;
2930
2931        let mut content = String::with_capacity((end - start) as usize);
2932        let mut walker = TreeWalker::new(self);
2933
2934        walker.seek(start);
2935        while walker.offset < end {
2936            if let Some(c) = walker.next() {
2937                content.push(c);
2938            } else {
2939                break;
2940            }
2941        }
2942
2943        Some(content)
2944    }
2945
2946    /// Byte at a given byte offset. O(log n).
2947    #[inline]
2948    #[must_use]
2949    pub fn byte(&self, offset: u32) -> u8 {
2950        self.try_byte(offset).unwrap()
2951    }
2952
2953    /// Byte at a given byte offset. O(log n).
2954    #[inline]
2955    #[must_use]
2956    pub fn try_byte(&self, offset: u32) -> Option<u8> {
2957        let (node, rel) = self.find_position(offset, false)?;
2958        let p = self.get_piece(node);
2959        let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2960        text.as_bytes().get(rel as usize).copied()
2961    }
2962
2963    /// Char at a given char index. O(log n) to find the piece, then
2964    /// a short scan within the piece.
2965    #[inline]
2966    #[must_use]
2967    pub fn char(&self, char_index: u32) -> char {
2968        self.try_char(char_index).unwrap()
2969    }
2970
2971    /// Char at a given char index. O(log n) to find the piece, then
2972    /// a short scan within the piece.
2973    #[inline]
2974    #[must_use]
2975    pub fn try_char(&self, char_index: u32) -> Option<char> {
2976        let byte_offset = self.try_char_to_byte(char_index)?;
2977        let (node, rel) = self.find_position(byte_offset, false)?;
2978        let p = self.get_piece(node);
2979        let text = self.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
2980        text[rel as usize..].chars().next()
2981    }
2982
2983    /// Returns a non-allocating iterator of chars over the given char range.
2984    /// Backed by `TreeWalker::seek` so it reuses existing infrastructure.
2985    #[inline]
2986    #[must_use]
2987    pub fn slice_chars(&self, char_start: u32, char_end: u32) -> SliceChars<'_> {
2988        let byte_start = self.try_char_to_byte(char_start).unwrap_or(0);
2989        let byte_end   = self.try_char_to_byte(char_end).unwrap_or_else(|| self.total_length());
2990        SliceChars {
2991            walker:   { let mut w = TreeWalker::new(self); w.seek(byte_start); w },
2992            byte_end,
2993        }
2994    }
2995
2996    /// Returns a non-allocating iterator of chars over the given byte range.
2997    #[inline]
2998    #[must_use]
2999    pub fn slice_bytes(&self, byte_start: u32, byte_end: u32) -> SliceChars<'_> {
3000        SliceChars {
3001            walker:   { let mut w = TreeWalker::new(self); w.seek(byte_start); w },
3002            byte_end,
3003        }
3004    }
3005
3006    /// Non-allocating line view: returns a `ChunkIter` over the byte range of
3007    /// `line`. Line numbers are 0-based. The trailing \n is included if present.
3008    #[inline]
3009    #[must_use]
3010    pub fn line(&self, line: u32) -> TreeSlice<'_> {
3011        self.try_line(line).unwrap()
3012    }
3013
3014    #[inline]
3015    #[must_use]
3016    pub fn try_line(&self, line: u32) -> Option<TreeSlice<'_>> {
3017        let start = self.try_line_to_byte(line)?;
3018        let end = self.try_line_to_byte(line + 1).unwrap_or_else(|| self.len_bytes());
3019
3020        Some(self.slice(start..end))
3021    }
3022
3023    /// Number of lines (= newline count + 1)
3024    #[inline]
3025    #[must_use]
3026    pub fn len_lines(&self) -> u32 { self.pieces.get(self.root).subtree_newlines + 1 }
3027
3028    #[inline(always)]
3029    #[must_use]
3030    pub fn len_chars(&self) -> u32 { self.pieces.get(self.root).subtree_chars }
3031
3032    #[inline]
3033    #[must_use]
3034    pub fn len_bytes(&self) -> u32 { self.total_length() }
3035
3036    #[inline]
3037    pub fn remove<R>(&mut self, range: R) where R: RangeBounds<u32> {
3038        let start = match range.start_bound() {
3039            Bound::Included(&n) => n,
3040            Bound::Excluded(&n) => n + 1,
3041            Bound::Unbounded => 0,
3042        };
3043        let end = match range.end_bound() {
3044            Bound::Included(&n) => n + 1,
3045            Bound::Excluded(&n) => n,
3046            Bound::Unbounded => self.len_bytes(),
3047        };
3048        if end > start { self.remove_at(start, end - start) }
3049    }
3050
3051    #[inline]
3052    #[must_use]
3053    pub fn char_to_line(&self, char_index: u32) -> u32 {
3054        self.try_char_to_line(char_index).unwrap()
3055    }
3056
3057    #[inline]
3058    #[must_use]
3059    pub fn try_char_to_line(&self, char_index: u32) -> Option<u32> {
3060        let byte_index = self.try_char_to_byte(char_index)?;
3061        self.try_byte_to_line_col(byte_index).map(|(line, _)| line)
3062    }
3063
3064    #[inline]
3065    #[must_use]
3066    pub fn char_to_line_col(&self, char_index: u32) -> (u32, u32) {
3067        self.try_char_to_line_col(char_index).unwrap()
3068    }
3069
3070    #[inline]
3071    #[must_use]
3072    pub fn try_char_to_line_col(&self, char_index: u32) -> Option<(u32, u32)> {
3073        let byte_index = self.try_char_to_byte(char_index)?;
3074        self.try_byte_to_line_col(byte_index)
3075    }
3076
3077    #[inline]
3078    #[must_use]
3079    pub fn line_to_char(&self, line: u32) -> u32 {
3080        self.try_line_to_char(line).unwrap()
3081    }
3082
3083    #[inline]
3084    #[must_use]
3085    pub fn try_line_to_char(&self, line: u32) -> Option<u32> {
3086        let byte_index = self.try_line_to_byte(line)?;
3087        self.try_byte_to_char(byte_index)
3088    }
3089}
3090
3091#[derive(Debug)]
3092pub struct PieceTreeIter<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
3093    arena: &'a Pieces,
3094    stack: SmallVec<[NodeRef; MAX_INLINE_TREE_DEPTH]>,
3095}
3096
3097impl<'a, const MAX_INLINE_TREE_DEPTH: usize> PieceTreeIter<'a, MAX_INLINE_TREE_DEPTH> {
3098    #[inline]
3099    #[must_use]
3100    pub fn new(arena: &'a Pieces, mut root: NodeRef) -> Self {
3101        let mut stack = SmallVec::new();
3102        while root != NIL {
3103            stack.push(root);
3104            root = arena.get(root).left;
3105        }
3106
3107        Self { arena, stack }
3108    }
3109}
3110
3111impl<const MAX_INLINE_TREE_DEPTH: usize> Iterator for PieceTreeIter<'_, MAX_INLINE_TREE_DEPTH> {
3112    type Item = (NodeRef, Piece);
3113
3114    #[inline]
3115    fn next(&mut self) -> Option<Self::Item> {
3116        let node_index = self.stack.pop()?;
3117        let node = self.arena.get(node_index);
3118        let p = self.arena.get_piece(node_index);
3119
3120        let mut current = node.right;
3121        while current != NIL {
3122            self.stack.push(current);
3123            current = self.arena.get(current).left;
3124        }
3125
3126        Some((node_index, p))
3127    }
3128}
3129
3130#[derive(Clone, Copy, PartialEq, Debug)]
3131enum Direction { Left, Center, Right }
3132
3133#[derive(Debug)]
3134pub struct TreeWalker<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
3135    tree: &'a PieceTree,
3136    stack: SmallVec<[(NodeRef, Direction); MAX_INLINE_TREE_DEPTH]>,
3137    current_str: str::Chars<'a>,
3138    pub offset: u32,
3139}
3140
3141impl<'a, const MAX_INLINE_TREE_DEPTH: usize> TreeWalker<'a, MAX_INLINE_TREE_DEPTH> {
3142    #[inline]
3143    #[must_use]
3144    pub fn new(tree: &'a PieceTree) -> Self {
3145        let mut walker = Self {
3146            tree,
3147            stack: SmallVec::new(),
3148            current_str: "".chars(),
3149            offset: 0,
3150        };
3151        if tree.root != NIL {
3152            walker.stack.push((tree.root, Direction::Left));
3153        }
3154        walker.populate_chars();
3155        walker
3156    }
3157
3158    #[inline]
3159    pub fn seek(&mut self, target: u32) {
3160        self.stack.clear();
3161        self.offset = target;
3162        self.current_str = "".chars();
3163
3164        if self.tree.root == NIL { return; }
3165
3166        let mut current = self.tree.root;
3167        let mut current_offset = target;
3168
3169        while current != NIL {
3170            let node = self.tree.pieces.get(current);
3171            let p = self.tree.pieces.get_piece(current);
3172            let left_len = self.tree.pieces.get(node.left).subtree_len;
3173            let piece_len = p.byte_length;
3174
3175            if current_offset < left_len {
3176                self.stack.push((current, Direction::Center));
3177                current = node.left;
3178            } else if current_offset < left_len + piece_len {
3179                self.stack.push((current, Direction::Right));
3180                let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3181                let rel_offset = current_offset - left_len;
3182                self.current_str = text[rel_offset as usize..].chars();
3183                break;
3184            } else {
3185                current_offset -= left_len + piece_len;
3186                current = node.right;
3187            }
3188        }
3189    }
3190
3191    #[inline]
3192    fn populate_chars(&mut self) {
3193        while let Some((node_index, dir)) = self.stack.pop() {
3194            let node = self.tree.pieces.get(node_index);
3195            match dir {
3196                Direction::Left => {
3197                    self.stack.push((node_index, Direction::Center));
3198                    if node.left != NIL { self.stack.push((node.left, Direction::Left)); }
3199                }
3200
3201                Direction::Center => {
3202                    self.stack.push((node_index, Direction::Right));
3203                    let p = self.tree.pieces.get_piece(node_index);
3204                    let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3205                    self.current_str = text.chars();
3206                    return;
3207                }
3208
3209                Direction::Right => {
3210                    if node.right != NIL { self.stack.push((node.right, Direction::Left)); }
3211                }
3212            }
3213        }
3214    }
3215}
3216
3217impl Iterator for TreeWalker<'_> {
3218    type Item = char;
3219
3220    #[inline(always)]
3221    fn next(&mut self) -> Option<char> {
3222        loop {
3223            if let Some(c) = self.current_str.next() {
3224                self.offset += c.len_utf8() as u32;
3225                return Some(c);
3226            }
3227
3228            if self.stack.is_empty() { return None; }
3229            self.populate_chars();
3230        }
3231    }
3232}
3233
3234#[derive(Debug)]
3235pub struct ReverseTreeWalker<'a, const MAX_INLINE_TREE_DEPTH: usize = 32> {
3236    tree: &'a PieceTree,
3237    stack: SmallVec<[(NodeRef, bool); MAX_INLINE_TREE_DEPTH]>,
3238    current_str: str::Chars<'a>,
3239}
3240
3241impl<'a, const MAX_INLINE_TREE_DEPTH: usize> ReverseTreeWalker<'a, MAX_INLINE_TREE_DEPTH> {
3242    #[inline(always)]
3243    #[must_use]
3244    pub fn new(tree: &'a PieceTree) -> Self {
3245        let mut walker = Self {
3246            tree,
3247            stack: SmallVec::new(),
3248            current_str: "".chars(),
3249        };
3250        walker.push_rightmost(tree.root);
3251        walker
3252    }
3253
3254    #[inline]
3255    #[must_use]
3256    pub fn new_at(tree: &'a PieceTree, target_offset: u32) -> Self {
3257        let mut walker = Self {
3258            tree,
3259            stack: SmallVec::new(),
3260            current_str: "".chars(),
3261        };
3262        walker.seek(target_offset);
3263        walker
3264    }
3265
3266    #[inline(always)]
3267    fn push_rightmost(&mut self, mut node: NodeRef) {
3268        while node != NIL {
3269            self.stack.push((node, false));
3270            node = self.tree.pieces.get(node).right;
3271        }
3272    }
3273
3274    #[inline]
3275    pub fn seek(&mut self, mut target_offset: u32) {
3276        self.stack.clear();
3277        self.current_str = "".chars();
3278
3279        if self.tree.root == NIL { return; }
3280
3281        let total_bytes = self.tree.len_bytes();
3282
3283        if target_offset > total_bytes {
3284            target_offset = total_bytes;
3285        }
3286        if target_offset == total_bytes {
3287            self.push_rightmost(self.tree.root);
3288            return;
3289        }
3290
3291        let mut current = self.tree.root;
3292        let mut current_offset = target_offset;
3293
3294        while current != NIL {
3295            let node = self.tree.pieces.get(current);
3296            let p = self.tree.pieces.get_piece(current);
3297            let left_len = self.tree.pieces.get(node.left).subtree_len;
3298            let piece_len = p.byte_length;
3299
3300            if current_offset < left_len {
3301                self.stack.push((current, true));
3302                current = node.left;
3303
3304            } else if current_offset < left_len + piece_len {
3305                self.stack.push((current, true));
3306
3307                let text = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3308                let rel_offset = current_offset - left_len;
3309
3310                // Keep the subset of the string before the offset
3311                self.current_str = text[..rel_offset as usize].chars();
3312
3313                break;
3314
3315            } else {
3316                self.stack.push((current, false));
3317                current_offset -= left_len + piece_len;
3318                current = node.right;
3319            }
3320        }
3321    }
3322}
3323
3324impl Iterator for ReverseTreeWalker<'_> {
3325    type Item = char;
3326
3327    #[inline]
3328    fn next(&mut self) -> Option<Self::Item> {
3329        if let Some(c) = self.current_str.next_back() { return Some(c); }
3330
3331        while let Some((node_index, visited_right)) = self.stack.pop() {
3332            let node = self.tree.pieces.get(node_index);
3333            if visited_right {
3334                self.push_rightmost(node.left);
3335            } else {
3336                self.stack.push((node_index, true));
3337
3338                let p = self.tree.pieces.get_piece(node_index);
3339                let text_slice = self.tree.buffers.get_slice(p.buffer, p.byte_offset, p.byte_length);
3340
3341                self.current_str = text_slice.chars();
3342
3343                if let Some(c) = self.current_str.next_back() { return Some(c); }
3344            }
3345        }
3346
3347        None
3348    }
3349}
3350
3351impl PieceTree {
3352    #[inline]
3353    pub fn debug(&self, f: &mut impl core::fmt::Write) -> core::fmt::Result {
3354        writeln!(f, "\n--- Tree State (Root: {:?}) ---", self.root)?;
3355        self.print_inorder(f, self.root, &mut None, 0)?;
3356        writeln!(f, "------------------------------\n")
3357    }
3358
3359    fn print_inorder(&self, f: &mut impl core::fmt::Write, node: NodeRef, last: &mut Option<Piece>, depth: usize) -> core::fmt::Result {
3360        if node == NIL { return Ok(()) }
3361
3362        let n = self.pieces.nodes[node];
3363
3364        self.print_inorder(f, n.left, last, depth + 1)?;
3365
3366        let cur = self.get_piece(node);
3367
3368        //
3369        // Check for the mergeable invariant
3370        //
3371        let warning = if let Some(prev) = last {
3372            if prev.buffer == cur.buffer
3373               && prev.byte_offset + prev.byte_length == cur.byte_offset
3374            {
3375                " --------- [!!! MERGEABLE NEIGHBORS NOT MERGED !!!] ---------"
3376            } else {
3377                ""
3378            }
3379        } else {
3380            ""
3381        };
3382
3383        let Piece { buffer, byte_offset: offset, byte_length: length, .. } = cur;
3384
3385        writeln!(
3386            f,
3387            "{:indent$}Node {node:?}: Buf={}, Off={offset}, Len={length}{warning}",
3388            "", buffer.as_u32(), indent = depth * 4
3389        )?;
3390
3391        *last = Some(cur);
3392
3393        self.print_inorder(f, n.right, last, depth + 1)
3394    }
3395}
3396
3397pub fn assert_state(tree: &PieceTree, expected: &str) {
3398    let tree_text = tree.to_string();
3399
3400    assert_eq!(tree_text, expected, "Text mismatch");
3401
3402    if !expected.is_empty() {
3403        let offsets = [0, expected.len() / 2, expected.len() - 1];
3404        for off in offsets {
3405            let chunk = tree.read_largest_contigous_chunk_at_byte(off as u32).0;
3406            let chunk_str = str::from_utf8(chunk).unwrap();
3407            assert!(
3408                expected[off..].starts_with(chunk_str),
3409                "Chunk mismatch at offset {}",
3410                off
3411            );
3412        }
3413    }
3414}
3415
3416pub fn assert_invariants(tree: &PieceTree) {
3417    fn check(tree: &PieceTree, node: NodeRef) -> (usize, usize) {
3418        if node == NIL { return (0, 1) }
3419
3420        let n = tree.pieces.nodes[node];
3421        let piece = tree.get_piece(node);
3422
3423        let (l_len, l_bh) = check(tree, n.left);
3424        let (r_len, r_bh) = check(tree, n.right);
3425
3426        let expected_len = l_len + piece.byte_length as usize + r_len;
3427        assert_eq!(n.subtree_len as usize, expected_len, "subtree_len mismatch");
3428
3429        if n.color() == Color::Red {
3430            assert_eq!(
3431                tree.pieces.nodes[n.left].color(),
3432                Color::Black,
3433                "red node with red left child"
3434            );
3435            assert_eq!(
3436                tree.pieces.nodes[n.right].color(),
3437                Color::Black,
3438                "red node with red right child"
3439            );
3440        }
3441
3442        assert_eq!(l_bh, r_bh, "black height mismatch");
3443
3444        let bh = l_bh + usize::from(n.color() == Color::Black);
3445        (expected_len, bh)
3446    }
3447
3448    if tree.root == NIL {
3449        assert_eq!(tree.total_length(), 0,            "Empty tree has nonzero length");
3450    } else {
3451        assert_eq!(
3452            tree.pieces.nodes[tree.root].color(),
3453            Color::Black,
3454            "root is not black"
3455        );
3456        let (len, _) = check(tree, tree.root);
3457        assert_eq!(len, tree.total_length() as usize, "Tree length and root length differ");
3458
3459    }
3460}
3461
3462pub fn assert_piece_metadata(tree: &PieceTree) {
3463    fn collect(tree: &PieceTree, node: NodeRef, out: &mut Vec<Piece>) {
3464        if node == NIL { return }
3465
3466        let n = tree.pieces.nodes[node];
3467
3468        collect(tree, n.left, out);
3469        out.push(tree.pieces.get_piece(node));
3470        collect(tree, n.right, out);
3471    }
3472
3473    let mut pieces = Vec::new();
3474    collect(tree, tree.root, &mut pieces);
3475
3476    for p in &pieces {
3477        let buf   = tree.buffers.get(p.buffer);
3478        let start = p.byte_offset as usize;
3479        let end   = start + p.byte_length as usize;
3480
3481        assert!(end <= buf.len(), "Piece points past end of buffer");
3482        assert!(p.byte_length > 0, "Zero-length piece found");
3483
3484        let slice = &buf[start..end];
3485        let (actual_chars, actual_nl) = count_chars_and_newlines(slice.as_bytes());
3486        assert_eq!(p.char_count,    actual_chars, "char_count mismatch");
3487        assert_eq!(p.newline_count, actual_nl,    "newline_count mismatch");
3488
3489        //
3490        // piece_start_char and buffer_start_line: scan only the prefix once
3491        //
3492        let prefix = &buf.as_bytes()[..p.byte_offset as usize];
3493        let actual_start_char = bytecount::num_chars(prefix) as u32;
3494        let actual_start_line = bytecount::count(prefix, b'\n') as u32;
3495
3496        assert_eq!(p.piece_start_char,  actual_start_char,
3497            "piece_start_char mismatch buffer={} offset={}", p.buffer.as_u32(), p.byte_offset);
3498
3499        assert_eq!(p.buffer_start_line, actual_start_line,
3500            "buffer_start_line mismatch buffer={} offset={}", p.buffer.as_u32(), p.byte_offset);
3501
3502        //
3503        // Verify the newline_offsets slice
3504        //
3505        let nl_offsets = &tree.buffers.get_newlines(p.buffer)
3506            [p.buffer_start_line as usize..(p.buffer_start_line + p.newline_count) as usize];
3507
3508        for (i, &abs_byte) in nl_offsets.iter().enumerate() {
3509            assert!(
3510                abs_byte >= p.byte_offset && abs_byte < p.byte_offset + p.byte_length,
3511                "newline_offsets[{}] outside piece", p.buffer_start_line as usize + i
3512            );
3513
3514            assert_eq!(
3515                buf.as_bytes()[abs_byte as usize], b'\n',
3516                "newline_offsets[{}] not a newline", p.buffer_start_line as usize + i
3517            );
3518        }
3519    }
3520}
3521
3522pub fn assert_no_mergeable_neighbors(tree: &PieceTree) {
3523    fn inorder(tree: &PieceTree, node: NodeRef, last: &mut Option<Piece>) {
3524        if node == NIL { return }
3525
3526        let n = tree.pieces.nodes[node];
3527        inorder(tree, n.left, last);
3528
3529        let cur = tree.get_piece(node);
3530
3531        if let Some(prev) = *last {
3532            if prev.buffer == cur.buffer
3533                && prev.byte_offset + prev.byte_length == cur.byte_offset
3534            {
3535                panic!("Mergeable neighboring pieces were left unmerged");
3536            }
3537        }
3538
3539        *last = Some(cur);
3540        inorder(tree, n.right, last);
3541    }
3542
3543    let mut last = None;
3544    inorder(tree, tree.root, &mut last);
3545}
3546
3547pub fn assert_coordinates(tree: &PieceTree, oracle: &str) {
3548    #[inline]
3549    fn oracle_offset_to_line_col(s: &str, byte_offset: usize) -> (usize, usize) {
3550        let slice = &s[..byte_offset];
3551        let line  = bytecount::count(slice.as_bytes(), b'\n');
3552        let last_nl = slice.rfind('\n').map_or(0, |i| i + 1);
3553        let col   = bytecount::num_chars(s[last_nl..byte_offset].as_bytes());
3554        (line, col)
3555    }
3556
3557    #[inline]
3558    fn oracle_line_to_offset(s: &str, target_line: usize) -> usize {
3559        if target_line == 0 { return 0 }
3560
3561        let mut line = 0;
3562        for (i, c) in s.char_indices() {
3563            if c == '\n' {
3564                line += 1;
3565                if line == target_line { return i + 1; }
3566            }
3567        }
3568
3569        s.len()
3570    }
3571
3572    fn check_coordinate(tree: &PieceTree, oracle: &str, byte_index: u32) {
3573        let char_index = bytecount::num_chars(oracle[..byte_index as usize].as_bytes()) as u32;
3574
3575        assert_eq!(tree.char_to_byte(char_index), byte_index,
3576                   "char_to_byte({}) wrong", char_index);
3577
3578        assert_eq!(tree.byte_to_char(byte_index), char_index,
3579                   "byte_to_char({}) wrong", byte_index);
3580
3581        let expected_lc = oracle_offset_to_line_col(oracle, byte_index as usize);
3582        assert_eq!(
3583            tree.byte_to_line_col(byte_index),
3584            (expected_lc.0 as u32, expected_lc.1 as u32),
3585            "offset_to_line_col({}) wrong", byte_index
3586        );
3587    }
3588
3589    let total_bytes = oracle.len() as u32;
3590    let total_chars = oracle.chars().count() as u32;
3591
3592    //
3593    // Always check these
3594    //
3595    check_coordinate(tree, oracle, 0);
3596    if total_bytes > 0 {
3597        check_coordinate(tree, oracle, total_bytes - 1);
3598        check_coordinate(tree, oracle, total_bytes / 2);
3599    }
3600
3601
3602    //
3603    // Sample ~8 positions spread across the document
3604    //
3605    for i in 0u32..8 {
3606        let byte =
3607            ((total_bytes as u64 * ((i as u64 * 2_654_435_761) & 0xFFFF_FFFF)) >> 32) as u32 % total_bytes.max(1);
3608
3609        // Snap to char boundary
3610        let byte = oracle.as_bytes()[..byte as usize]
3611            .iter().rposition(|&b| !(0x80..0xC0).contains(&b))
3612            .map_or(0, |i| i as u32);
3613
3614        check_coordinate(tree, oracle, byte);
3615    }
3616
3617    //
3618    // EOF and out-of-bounds always
3619    //
3620    assert_eq!(tree.char_to_byte(total_chars), total_bytes);
3621    assert_eq!(tree.byte_to_char(total_bytes), total_chars);
3622    assert_eq!(tree.try_char_to_byte(total_chars + 1), None);
3623    assert_eq!(tree.try_byte_to_char(total_bytes + 1), None);
3624    assert_eq!(tree.try_byte_to_line_col(total_bytes + 1), None);
3625
3626    //
3627    // line_to_offset: check first, last, middle line only
3628    //
3629    let total_lines = oracle.chars().filter(|&c| c == '\n').count() + 1;
3630    for line in [0, total_lines / 2, total_lines.saturating_sub(1)] {
3631        let expected = oracle_line_to_offset(oracle, line) as u32;
3632        assert_eq!(tree.try_line_to_byte(line as u32), Some(expected),
3633            "line_to_offset({}) wrong", line);
3634    }
3635
3636    assert_eq!(tree.try_line_to_byte(total_lines as u32), None);
3637}