fresh/model/
piece_tree.rs

1use std::io::{self, Read, Seek, SeekFrom};
2use std::path::PathBuf;
3use std::sync::Arc;
4
5/// A position in the document (line and column)
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct Position {
8    pub line: usize,   // 0-indexed line number
9    pub column: usize, // Byte offset within the line
10}
11
12/// Data storage for a buffer - either loaded in memory or unloaded (file reference)
13#[derive(Debug, Clone)]
14pub enum BufferData {
15    /// Loaded in memory with optional line indexing
16    Loaded {
17        data: Vec<u8>,
18        line_starts: Option<Vec<usize>>, // None = not indexed (large file mode)
19    },
20    /// Not yet loaded from file
21    Unloaded {
22        file_path: PathBuf,
23        file_offset: usize, // Where in file this buffer starts
24        bytes: usize,       // Length of this region
25    },
26}
27
28/// A string buffer containing a chunk of text data and its line metadata
29/// This is the fundamental storage unit - piece tree nodes reference these buffers
30#[derive(Debug, Clone)]
31pub struct StringBuffer {
32    /// Unique identifier for this buffer
33    pub id: usize,
34    /// The buffer data - either loaded or unloaded
35    pub data: BufferData,
36}
37
38impl StringBuffer {
39    /// Create a new string buffer with line metadata (legacy constructor)
40    /// Automatically computes line starts
41    pub fn new(id: usize, data: Vec<u8>) -> Self {
42        let line_starts = Self::compute_line_starts(&data);
43        StringBuffer {
44            id,
45            data: BufferData::Loaded {
46                data,
47                line_starts: Some(line_starts),
48            },
49        }
50    }
51
52    /// Create a loaded buffer with optional line indexing
53    pub fn new_loaded(id: usize, data: Vec<u8>, compute_lines: bool) -> Self {
54        let line_starts = if compute_lines {
55            Some(Self::compute_line_starts(&data))
56        } else {
57            None
58        };
59        StringBuffer {
60            id,
61            data: BufferData::Loaded { data, line_starts },
62        }
63    }
64
65    /// Create buffer for file region (not yet loaded)
66    pub fn new_unloaded(id: usize, file_path: PathBuf, file_offset: usize, bytes: usize) -> Self {
67        StringBuffer {
68            id,
69            data: BufferData::Unloaded {
70                file_path,
71                file_offset,
72                bytes,
73            },
74        }
75    }
76
77    /// Check if buffer is loaded
78    pub fn is_loaded(&self) -> bool {
79        matches!(self.data, BufferData::Loaded { .. })
80    }
81
82    /// Get data reference if loaded, None if unloaded
83    ///
84    /// NOTE: This is a low-level API. External code should use TextBuffer::get_text_range_mut()
85    /// which provides automatic lazy loading. This method is pub(crate) to prevent misuse.
86    pub(crate) fn get_data(&self) -> Option<&[u8]> {
87        match &self.data {
88            BufferData::Loaded { data, .. } => Some(data),
89            BufferData::Unloaded { .. } => None,
90        }
91    }
92
93    /// Get line starts if available
94    pub fn get_line_starts(&self) -> Option<&[usize]> {
95        match &self.data {
96            BufferData::Loaded { line_starts, .. } => line_starts.as_deref(),
97            BufferData::Unloaded { .. } => None,
98        }
99    }
100
101    /// Load buffer data from file (for unloaded buffers)
102    /// Returns error if buffer is not unloaded or if I/O fails
103    pub fn load(&mut self) -> io::Result<()> {
104        match &self.data {
105            BufferData::Loaded { .. } => Ok(()), // Already loaded
106            BufferData::Unloaded {
107                file_path,
108                file_offset,
109                bytes,
110            } => {
111                // Load from file
112                let mut file = std::fs::File::open(file_path)?;
113                file.seek(SeekFrom::Start(*file_offset as u64))?;
114
115                let mut buffer = vec![0u8; *bytes];
116                file.read_exact(&mut buffer)?;
117
118                // Replace with loaded data (no line indexing for lazy-loaded chunks)
119                self.data = BufferData::Loaded {
120                    data: buffer,
121                    line_starts: None,
122                };
123
124                Ok(())
125            }
126        }
127    }
128
129    /// Create a new unloaded buffer representing a chunk of this buffer
130    /// This is used for splitting large unloaded buffers into smaller chunks
131    ///
132    /// # Arguments
133    /// * `new_id` - The ID for the new buffer
134    /// * `chunk_offset` - Offset within this buffer where the chunk starts
135    /// * `chunk_bytes` - Number of bytes in the chunk
136    ///
137    /// # Returns
138    /// A new StringBuffer referencing the chunk, or None if this buffer is not unloaded
139    /// or if the chunk range is invalid
140    pub fn create_chunk_buffer(
141        &self,
142        new_id: usize,
143        chunk_offset: usize,
144        chunk_bytes: usize,
145    ) -> Option<StringBuffer> {
146        match &self.data {
147            BufferData::Unloaded {
148                file_path,
149                file_offset,
150                bytes,
151            } => {
152                // Validate chunk range
153                if chunk_offset + chunk_bytes > *bytes {
154                    return None;
155                }
156
157                Some(StringBuffer::new_unloaded(
158                    new_id,
159                    file_path.clone(),
160                    file_offset + chunk_offset,
161                    chunk_bytes,
162                ))
163            }
164            BufferData::Loaded { .. } => None, // Can't create chunk from loaded buffer
165        }
166    }
167
168    /// Compute line start offsets for a buffer
169    fn compute_line_starts(data: &[u8]) -> Vec<usize> {
170        let mut line_starts = vec![0];
171        for (i, &byte) in data.iter().enumerate() {
172            if byte == b'\n' {
173                line_starts.push(i + 1);
174            }
175        }
176        line_starts
177    }
178
179    /// Get the number of line feeds (newlines) in this buffer
180    /// Returns None if line indexing was not computed or buffer is unloaded
181    pub fn line_feed_count(&self) -> Option<usize> {
182        match &self.data {
183            BufferData::Loaded { line_starts, .. } => line_starts
184                .as_ref()
185                .map(|starts| starts.len().saturating_sub(1)),
186            BufferData::Unloaded { .. } => None,
187        }
188    }
189
190    /// Append data to this buffer and recompute line starts
191    /// Returns the offset where the appended data starts
192    /// Only works for loaded buffers with line starts
193    pub fn append(&mut self, data_to_append: &[u8]) -> usize {
194        match &mut self.data {
195            BufferData::Loaded { data, line_starts } => {
196                let start_offset = data.len();
197                data.extend_from_slice(data_to_append);
198
199                // Add new line starts if we're tracking them
200                if let Some(ref mut line_starts) = line_starts {
201                    for (i, &byte) in data_to_append.iter().enumerate() {
202                        if byte == b'\n' {
203                            line_starts.push(start_offset + i + 1);
204                        }
205                    }
206                }
207
208                start_offset
209            }
210            BufferData::Unloaded { .. } => {
211                // Can't append to unloaded buffer
212                0
213            }
214        }
215    }
216}
217
218/// Identifies which buffer a piece of text comes from
219#[derive(Debug, Clone, Copy, PartialEq, Eq)]
220pub enum BufferLocation {
221    /// Data is in the original stored/persisted buffer
222    Stored(usize), // buffer_id
223    /// Data is in the added/modified buffer
224    Added(usize), // buffer_id
225}
226
227impl BufferLocation {
228    /// Get the buffer ID
229    pub fn buffer_id(&self) -> usize {
230        match self {
231            Self::Stored(id) | Self::Added(id) => *id,
232        }
233    }
234}
235
236/// A node in the piece tree with integrated line tracking
237#[derive(Debug, Clone)]
238pub enum PieceTreeNode {
239    /// Internal node with left and right children
240    Internal {
241        left_bytes: usize,      // Total bytes in left subtree
242        lf_left: Option<usize>, // Total line feeds in left subtree (None if unknown)
243        left: Arc<PieceTreeNode>,
244        right: Arc<PieceTreeNode>,
245    },
246    /// Leaf node representing an actual piece
247    Leaf {
248        location: BufferLocation, // Where this piece's data is (includes buffer_id)
249        offset: usize,            // Offset within the buffer
250        bytes: usize,             // Number of bytes in this piece
251        line_feed_cnt: Option<usize>, // Number of line feeds in this piece (None if unknown)
252    },
253}
254
255/// Information about a piece at a specific location
256#[derive(Debug, Clone)]
257pub struct PieceInfo {
258    pub location: BufferLocation,       // Which buffer (Stored or Added)
259    pub offset: usize,                  // Starting offset of this piece within that buffer
260    pub bytes: usize,                   // Length of this piece in bytes
261    pub offset_in_piece: Option<usize>, // For queries: how far into this piece the query point is
262}
263
264/// Result from finding a piece by byte offset
265#[derive(Debug, Clone)]
266struct OffsetFindResult {
267    info: PieceInfo,
268    bytes_before: usize, // Total bytes in all pieces before this one
269}
270
271/// A cursor position in the document
272#[derive(Debug, Clone)]
273pub struct Cursor {
274    pub byte_offset: usize, // Absolute byte offset in document
275    pub line: usize,        // Line number (0-indexed)
276    pub col: usize,         // Column within line (byte offset)
277}
278
279/// Represents the data for a leaf node in the piece tree
280#[derive(Debug, Clone, Copy)]
281pub struct LeafData {
282    pub location: BufferLocation,
283    pub offset: usize,
284    pub bytes: usize,
285    pub line_feed_cnt: Option<usize>,
286}
287
288impl LeafData {
289    pub fn new(
290        location: BufferLocation,
291        offset: usize,
292        bytes: usize,
293        line_feed_cnt: Option<usize>,
294    ) -> Self {
295        LeafData {
296            location,
297            offset,
298            bytes,
299            line_feed_cnt,
300        }
301    }
302}
303
304/// Statistics about the piece tree structure
305#[derive(Debug, Clone, Copy)]
306pub struct TreeStats {
307    pub total_bytes: usize,
308    pub depth: usize,
309    pub leaf_count: usize,
310    pub line_feed_count: Option<usize>,
311}
312
313// Line iteration can be implemented by:
314// 1. Maintaining a cursor position (current piece + offset within piece)
315// 2. For next_line(): scan forward in the current piece's buffer until '\n',
316//    or move to the next piece if we reach the end
317// 3. For prev_line(): scan backward similarly
318// The iterator would need access to the actual buffer data (Stored/Added)
319// which is managed externally, so this is deferred until buffer integration.
320
321impl PieceTreeNode {
322    /// Find the piece containing the given byte offset
323    fn find_by_offset(&self, offset: usize) -> Option<OffsetFindResult> {
324        match self {
325            Self::Internal {
326                left_bytes,
327                left,
328                right,
329                ..
330            } => {
331                if offset < *left_bytes {
332                    left.find_by_offset(offset)
333                } else {
334                    // Search in right subtree
335                    right.find_by_offset(offset - left_bytes).map(|mut result| {
336                        // Adjust bytes_before to account for left subtree
337                        result.bytes_before += left_bytes;
338                        result
339                    })
340                }
341            }
342            Self::Leaf {
343                location,
344                offset: piece_offset,
345                bytes,
346                ..
347            } => {
348                if offset < *bytes {
349                    Some(OffsetFindResult {
350                        info: PieceInfo {
351                            location: *location,
352                            offset: *piece_offset,
353                            bytes: *bytes,
354                            offset_in_piece: Some(offset),
355                        },
356                        bytes_before: 0,
357                    })
358                } else {
359                    None
360                }
361            }
362        }
363    }
364
365    /// Get total bytes in this node
366    fn total_bytes(&self) -> usize {
367        match self {
368            Self::Internal {
369                left_bytes, right, ..
370            } => left_bytes + right.total_bytes(),
371            Self::Leaf { bytes, .. } => *bytes,
372        }
373    }
374
375    /// Get total line feeds in this node
376    /// Returns None if any piece has unknown line count
377    fn total_line_feeds(&self) -> Option<usize> {
378        match self {
379            Self::Internal { lf_left, right, .. } => match (lf_left, right.total_line_feeds()) {
380                (Some(left), Some(right)) => Some(left + right),
381                _ => None,
382            },
383            Self::Leaf { line_feed_cnt, .. } => *line_feed_cnt,
384        }
385    }
386
387    /// Get the depth of this tree
388    fn depth(&self) -> usize {
389        match self {
390            Self::Internal { left, right, .. } => 1 + left.depth().max(right.depth()),
391            Self::Leaf { .. } => 1,
392        }
393    }
394
395    /// Count the number of leaf nodes
396    fn count_leaves(&self) -> usize {
397        match self {
398            Self::Internal { left, right, .. } => left.count_leaves() + right.count_leaves(),
399            Self::Leaf { .. } => 1,
400        }
401    }
402
403    /// Collect all leaves in order
404    fn collect_leaves(&self, leaves: &mut Vec<LeafData>) {
405        match self {
406            Self::Internal { left, right, .. } => {
407                left.collect_leaves(leaves);
408                right.collect_leaves(leaves);
409            }
410            Self::Leaf {
411                location,
412                offset,
413                bytes,
414                line_feed_cnt,
415            } => {
416                leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
417            }
418        }
419    }
420
421    /// Count line feeds in a byte range [start, end)
422    /// current_offset: byte offset at the start of this node
423    /// Returns None if any piece in the range has unknown line count
424    fn count_lines_in_byte_range(
425        &self,
426        current_offset: usize,
427        start: usize,
428        end: usize,
429    ) -> Option<usize> {
430        match self {
431            Self::Internal {
432                left_bytes,
433                left,
434                right,
435                ..
436            } => {
437                let left_end = current_offset + left_bytes;
438
439                if end <= current_offset || start >= current_offset + self.total_bytes() {
440                    Some(0) // Range is completely before or after this node
441                } else if start <= current_offset && end >= current_offset + self.total_bytes() {
442                    // Range completely contains this node
443                    self.total_line_feeds()
444                } else if end <= left_end {
445                    // Range is completely in left subtree
446                    left.count_lines_in_byte_range(current_offset, start, end)
447                } else if start >= left_end {
448                    // Range is completely in right subtree
449                    right.count_lines_in_byte_range(left_end, start, end)
450                } else {
451                    // Range spans both subtrees
452                    let left_count = left.count_lines_in_byte_range(current_offset, start, end)?;
453                    let right_count = right.count_lines_in_byte_range(left_end, start, end)?;
454                    Some(left_count + right_count)
455                }
456            }
457            Self::Leaf {
458                line_feed_cnt,
459                bytes,
460                ..
461            } => {
462                let node_end = current_offset + bytes;
463
464                if end <= current_offset || start >= node_end {
465                    Some(0) // No overlap
466                } else if start <= current_offset && end >= node_end {
467                    // Range completely contains this leaf
468                    *line_feed_cnt
469                } else {
470                    // Partial overlap - for simplicity, return the full count
471                    // (accurate counting would require scanning the buffer)
472                    *line_feed_cnt
473                }
474            }
475        }
476    }
477
478    /// Find byte offset for a given line/column position
479    /// current_offset: byte offset at start of this node
480    /// lines_before: number of complete lines before this node
481    fn find_byte_offset_for_line(
482        &self,
483        current_offset: usize,
484        lines_before: usize,
485        target_line: usize,
486        column: usize,
487        buffers: &[StringBuffer],
488    ) -> Option<usize> {
489        match self {
490            Self::Internal {
491                left_bytes,
492                lf_left,
493                left,
494                right,
495            } => {
496                // If line count is unknown, we can't do line-based navigation
497                let lf_left = lf_left.as_ref()?;
498                let lines_after_left = lines_before + lf_left;
499
500                // When looking for line start (column == 0), we want the leftmost piece containing the line
501                // So use <= instead of < to prefer going left when the line boundary is exactly at lines_after_left
502                let go_left = if column == 0 {
503                    target_line <= lines_after_left
504                } else {
505                    target_line < lines_after_left
506                };
507
508                if go_left {
509                    // Target is in left subtree
510                    let result = left.find_byte_offset_for_line(
511                        current_offset,
512                        lines_before,
513                        target_line,
514                        column,
515                        buffers,
516                    );
517                    // If left returns None, try right as fallback (happens when line starts after a newline)
518                    result.or_else(|| {
519                        right.find_byte_offset_for_line(
520                            current_offset + left_bytes,
521                            lines_after_left,
522                            target_line,
523                            column,
524                            buffers,
525                        )
526                    })
527                } else {
528                    // Target is in right subtree
529                    right.find_byte_offset_for_line(
530                        current_offset + left_bytes,
531                        lines_after_left,
532                        target_line,
533                        column,
534                        buffers,
535                    )
536                }
537            }
538            Self::Leaf {
539                location,
540                offset,
541                bytes,
542                line_feed_cnt,
543            } => {
544                // If line count is unknown, we can't do line-based navigation
545                let line_feed_cnt = line_feed_cnt.as_ref()?;
546                let lines_in_piece = lines_before + line_feed_cnt;
547
548                // Special case: when looking for column==0 of line N where N == lines_in_piece,
549                // the line might start in the NEXT piece if this piece ends with a newline.
550                // Check if the last byte of this piece is a newline.
551                if column == 0 && target_line == lines_in_piece && target_line > lines_before {
552                    let buffer = buffers.get(location.buffer_id())?;
553                    let data = buffer.get_data()?;
554                    let last_byte_offset = offset + bytes - 1;
555                    let last_byte = data.get(last_byte_offset)?;
556
557                    if *last_byte == b'\n' {
558                        // Piece ends with newline, so the next line starts in the next piece
559                        return None;
560                    }
561                    // Otherwise, line starts within this piece after a newline
562                }
563
564                if target_line < lines_before || target_line > lines_in_piece {
565                    // Target line not in this piece
566                    return None;
567                }
568
569                // Get the buffer for this piece
570                let buffer_id = location.buffer_id();
571                let buffer = buffers.get(buffer_id)?;
572                let line_starts = buffer.get_line_starts()?;
573
574                // Find the line within the piece
575                let line_in_piece = target_line - lines_before;
576
577                // Get piece range in buffer
578                let piece_start_in_buffer = *offset;
579                let piece_end_in_buffer = offset + bytes;
580
581                // Special case: first line of piece (line_in_piece == 0)
582                let line_start_in_buffer = if line_in_piece == 0 {
583                    // First line starts at piece start
584                    piece_start_in_buffer
585                } else {
586                    // Find the Nth newline within this piece
587                    // Count line_starts that fall within [piece_start, piece_end)
588                    let mut lines_seen = 0;
589                    let mut found_line_start = None;
590
591                    for &line_start in line_starts.iter() {
592                        // Line starts are positions of newlines + 1, or beginning of buffer (0)
593                        // We want line_starts that are > piece_start and < piece_end
594                        if line_start > piece_start_in_buffer && line_start < piece_end_in_buffer {
595                            if lines_seen == line_in_piece - 1 {
596                                // This is the start of our target line
597                                found_line_start = Some(line_start);
598                                break;
599                            }
600                            lines_seen += 1;
601                        }
602                    }
603
604                    found_line_start?
605                };
606
607                // Add column offset
608                let target_offset_in_buffer = line_start_in_buffer + column;
609
610                // Convert to document offset
611                let offset_in_piece = target_offset_in_buffer.saturating_sub(piece_start_in_buffer);
612                Some(current_offset + offset_in_piece.min(*bytes))
613            }
614        }
615    }
616}
617
618/// The main piece table structure with integrated line tracking
619#[derive(Debug, Clone)]
620pub struct PieceTree {
621    root: Arc<PieceTreeNode>,
622    total_bytes: usize,
623}
624
625impl PieceTree {
626    /// Create a new piece table with a single initial piece
627    pub fn new(
628        location: BufferLocation,
629        offset: usize,
630        bytes: usize,
631        line_feed_cnt: Option<usize>,
632    ) -> Self {
633        PieceTree {
634            root: Arc::new(PieceTreeNode::Leaf {
635                location,
636                offset,
637                bytes,
638                line_feed_cnt,
639            }),
640            total_bytes: bytes,
641        }
642    }
643
644    /// Create an empty piece table
645    pub fn empty() -> Self {
646        PieceTree {
647            root: Arc::new(PieceTreeNode::Leaf {
648                location: BufferLocation::Stored(0),
649                offset: 0,
650                bytes: 0,
651                line_feed_cnt: Some(0), // Empty has zero line feeds (known)
652            }),
653            total_bytes: 0,
654        }
655    }
656
657    /// Build a balanced tree from a list of leaves
658    fn build_balanced(leaves: &[LeafData]) -> Arc<PieceTreeNode> {
659        if leaves.is_empty() {
660            return Arc::new(PieceTreeNode::Leaf {
661                location: BufferLocation::Stored(0),
662                offset: 0,
663                bytes: 0,
664                line_feed_cnt: Some(0), // Empty has zero line feeds (known)
665            });
666        }
667
668        if leaves.len() == 1 {
669            let leaf = leaves[0];
670            return Arc::new(PieceTreeNode::Leaf {
671                location: leaf.location,
672                offset: leaf.offset,
673                bytes: leaf.bytes,
674                line_feed_cnt: leaf.line_feed_cnt,
675            });
676        }
677
678        // Split in the middle
679        let mid = leaves.len() / 2;
680        let left = Self::build_balanced(&leaves[..mid]);
681        let right = Self::build_balanced(&leaves[mid..]);
682
683        let left_bytes = left.total_bytes();
684        let lf_left = left.total_line_feeds();
685
686        Arc::new(PieceTreeNode::Internal {
687            left_bytes,
688            lf_left,
689            left,
690            right,
691        })
692    }
693
694    /// Rebuild the tree to be balanced
695    fn rebalance(&mut self) {
696        let mut leaves = Vec::new();
697        self.root.collect_leaves(&mut leaves);
698        self.root = Self::build_balanced(&leaves);
699    }
700
701    /// Check if rebalancing is needed and do it
702    fn check_and_rebalance(&mut self) {
703        let count = self.root.count_leaves();
704        if count < 2 {
705            return;
706        }
707
708        let depth = self.root.depth();
709        let max_depth = 2 * (count as f64).log2().ceil() as usize;
710
711        if depth > max_depth {
712            self.rebalance();
713        }
714    }
715
716    /// Find the piece at the given byte offset
717    pub fn find_by_offset(&self, offset: usize) -> Option<PieceInfo> {
718        if offset >= self.total_bytes {
719            return None;
720        }
721        self.root.find_by_offset(offset).map(|result| result.info)
722    }
723
724    /// Create a cursor at the given byte offset
725    /// Note: line/col calculation should be done by LineIndex
726    pub fn cursor_at_offset(&self, offset: usize) -> Cursor {
727        Cursor {
728            byte_offset: offset.min(self.total_bytes),
729            line: 0,
730            col: 0,
731        }
732    }
733
734    /// Insert text at the given offset
735    /// Returns new cursor after the inserted text
736    /// line_feed_cnt: number of line feeds in the inserted text (None if unknown/not computed)
737    /// buffers: reference to the string buffers for computing line feeds during splits
738    pub fn insert(
739        &mut self,
740        offset: usize,
741        location: BufferLocation,
742        buffer_offset: usize,
743        bytes: usize,
744        line_feed_cnt: Option<usize>,
745        buffers: &[StringBuffer],
746    ) -> Cursor {
747        if bytes == 0 {
748            return self.cursor_at_offset(offset);
749        }
750
751        // Find the piece to split
752        if let Some(_result) = self.root.find_by_offset(offset) {
753            // Split the piece at the insertion point
754            let mut leaves = Vec::new();
755            let insert_leaf = LeafData::new(location, buffer_offset, bytes, line_feed_cnt);
756            self.collect_leaves_with_split(
757                &self.root,
758                0,
759                offset,
760                Some(insert_leaf),
761                &mut leaves,
762                buffers,
763            );
764
765            self.root = Self::build_balanced(&leaves);
766            self.total_bytes += bytes;
767
768            self.check_and_rebalance();
769        } else if offset == self.total_bytes {
770            // Append at end
771            let mut leaves = Vec::new();
772            self.root.collect_leaves(&mut leaves);
773            leaves.push(LeafData::new(location, buffer_offset, bytes, line_feed_cnt));
774
775            self.root = Self::build_balanced(&leaves);
776            self.total_bytes += bytes;
777
778            self.check_and_rebalance();
779        }
780
781        self.cursor_at_offset(offset + bytes)
782    }
783
784    /// Get a clone of the root node (shared via Arc)
785    pub fn root(&self) -> Arc<PieceTreeNode> {
786        Arc::clone(&self.root)
787    }
788
789    /// Insert text at the given position (line, column)
790    /// Returns new cursor after the inserted text
791    /// This performs a SINGLE tree traversal (more efficient than position_to_offset + insert)
792    #[allow(clippy::too_many_arguments)]
793    pub fn insert_at_position(
794        &mut self,
795        line: usize,
796        column: usize,
797        location: BufferLocation,
798        buffer_offset: usize,
799        bytes: usize,
800        line_feed_cnt: usize,
801        buffers: &[StringBuffer],
802    ) -> Cursor {
803        if bytes == 0 {
804            let offset = self.position_to_offset(line, column, buffers);
805            return self.cursor_at_offset(offset);
806        }
807
808        // Collect leaves while splitting at the position
809        let mut leaves = Vec::new();
810        let insert_leaf = LeafData::new(location, buffer_offset, bytes, Some(line_feed_cnt));
811
812        self.collect_leaves_with_split_at_position(
813            &self.root,
814            0,
815            0,
816            line,
817            column,
818            Some(insert_leaf),
819            &mut leaves,
820            buffers,
821        );
822
823        self.root = Self::build_balanced(&leaves);
824        self.total_bytes += bytes;
825        self.check_and_rebalance();
826
827        // Return cursor at position after insertion
828        let offset = self.position_to_offset(line, column, buffers) + bytes;
829        self.cursor_at_offset(offset)
830    }
831
832    /// Helper to collect leaves while splitting at a position (line, column)
833    /// Similar to collect_leaves_with_split but works with positions instead of offsets
834    #[allow(clippy::too_many_arguments)]
835    fn collect_leaves_with_split_at_position(
836        &self,
837        node: &Arc<PieceTreeNode>,
838        current_offset: usize,
839        lines_before: usize,
840        target_line: usize,
841        target_column: usize,
842        insert: Option<LeafData>,
843        leaves: &mut Vec<LeafData>,
844        buffers: &[StringBuffer],
845    ) {
846        match node.as_ref() {
847            PieceTreeNode::Internal {
848                left_bytes,
849                lf_left,
850                left,
851                right,
852            } => {
853                // If line counts are unknown, we can't do position-based navigation
854                let Some(lf_left) = lf_left else {
855                    return;
856                };
857                let lines_after_left = lines_before + lf_left;
858
859                // Determine if target position is in left or right subtree
860                let go_left = if target_column == 0 {
861                    target_line <= lines_after_left
862                } else {
863                    target_line < lines_after_left
864                };
865
866                if go_left {
867                    // Target is in left subtree
868                    self.collect_leaves_with_split_at_position(
869                        left,
870                        current_offset,
871                        lines_before,
872                        target_line,
873                        target_column,
874                        insert,
875                        leaves,
876                        buffers,
877                    );
878                    self.collect_leaves_with_split_at_position(
879                        right,
880                        current_offset + left_bytes,
881                        lines_after_left,
882                        target_line,
883                        target_column,
884                        None,
885                        leaves,
886                        buffers,
887                    );
888                } else {
889                    // Target is in right subtree
890                    self.collect_leaves_with_split_at_position(
891                        left,
892                        current_offset,
893                        lines_before,
894                        target_line,
895                        target_column,
896                        None,
897                        leaves,
898                        buffers,
899                    );
900                    self.collect_leaves_with_split_at_position(
901                        right,
902                        current_offset + left_bytes,
903                        lines_after_left,
904                        target_line,
905                        target_column,
906                        insert,
907                        leaves,
908                        buffers,
909                    );
910                }
911            }
912            PieceTreeNode::Leaf {
913                location,
914                offset,
915                bytes,
916                line_feed_cnt,
917            } => {
918                // If line counts are unknown, we can't do position-based navigation
919                let Some(line_feed_cnt) = line_feed_cnt else {
920                    return;
921                };
922                let lines_in_piece = lines_before + line_feed_cnt;
923
924                // Check if this piece contains the target line
925                if target_line >= lines_before && target_line <= lines_in_piece {
926                    // Target line is in this piece
927                    let buffer_id = location.buffer_id();
928                    if let Some(buffer) = buffers.get(buffer_id) {
929                        let line_in_piece = target_line - lines_before;
930
931                        // Find the line start within the piece
932                        let line_start_in_buffer = if line_in_piece == 0 {
933                            *offset
934                        } else {
935                            // Find the Nth newline within this piece
936                            let mut lines_seen = 0;
937                            let mut found_line_start = *offset;
938
939                            if let Some(line_starts) = buffer.get_line_starts() {
940                                for &ls in line_starts.iter() {
941                                    if ls > *offset && ls < *offset + *bytes {
942                                        if lines_seen == line_in_piece - 1 {
943                                            found_line_start = ls;
944                                            break;
945                                        }
946                                        lines_seen += 1;
947                                    }
948                                }
949                            }
950
951                            found_line_start
952                        };
953
954                        // Calculate split offset within the piece
955                        let column_offset = target_column.min(*bytes);
956                        let split_in_buffer = line_start_in_buffer + column_offset;
957                        let split_offset_in_piece =
958                            split_in_buffer.saturating_sub(*offset).min(*bytes);
959
960                        // Split the piece at this position
961                        if split_offset_in_piece > 0 {
962                            // First part (before split)
963                            let lf_cnt = Self::compute_line_feeds_static(
964                                buffers,
965                                *location,
966                                *offset,
967                                split_offset_in_piece,
968                            );
969                            leaves.push(LeafData::new(
970                                *location,
971                                *offset,
972                                split_offset_in_piece,
973                                lf_cnt,
974                            ));
975                        }
976
977                        // Inserted piece
978                        if let Some(insert_leaf) = insert {
979                            leaves.push(insert_leaf);
980                        }
981
982                        // Second part (after split)
983                        let remaining = bytes.saturating_sub(split_offset_in_piece);
984                        if remaining > 0 {
985                            let lf_cnt = Self::compute_line_feeds_static(
986                                buffers,
987                                *location,
988                                offset + split_offset_in_piece,
989                                remaining,
990                            );
991                            leaves.push(LeafData::new(
992                                *location,
993                                offset + split_offset_in_piece,
994                                remaining,
995                                lf_cnt,
996                            ));
997                        }
998                    } else {
999                        // Buffer not found, just keep the piece as-is
1000                        leaves.push(LeafData::new(
1001                            *location,
1002                            *offset,
1003                            *bytes,
1004                            Some(*line_feed_cnt),
1005                        ));
1006                    }
1007                } else {
1008                    // Target line not in this piece, just keep it
1009                    leaves.push(LeafData::new(
1010                        *location,
1011                        *offset,
1012                        *bytes,
1013                        Some(*line_feed_cnt),
1014                    ));
1015                }
1016            }
1017        }
1018    }
1019
1020    /// Helper to collect leaves while splitting at insertion point
1021    fn collect_leaves_with_split(
1022        &self,
1023        node: &Arc<PieceTreeNode>,
1024        current_offset: usize,
1025        split_offset: usize,
1026        insert: Option<LeafData>,
1027        leaves: &mut Vec<LeafData>,
1028        buffers: &[StringBuffer],
1029    ) {
1030        match node.as_ref() {
1031            PieceTreeNode::Internal {
1032                left_bytes,
1033                left,
1034                right,
1035                ..
1036            } => {
1037                // Only pass `insert` to the subtree containing the split point
1038                if split_offset < current_offset + left_bytes {
1039                    // Split is in left subtree
1040                    self.collect_leaves_with_split(
1041                        left,
1042                        current_offset,
1043                        split_offset,
1044                        insert,
1045                        leaves,
1046                        buffers,
1047                    );
1048                    self.collect_leaves_with_split(
1049                        right,
1050                        current_offset + left_bytes,
1051                        split_offset,
1052                        None,
1053                        leaves,
1054                        buffers,
1055                    );
1056                } else {
1057                    // Split is in right subtree (or at boundary)
1058                    self.collect_leaves_with_split(
1059                        left,
1060                        current_offset,
1061                        split_offset,
1062                        None,
1063                        leaves,
1064                        buffers,
1065                    );
1066                    self.collect_leaves_with_split(
1067                        right,
1068                        current_offset + left_bytes,
1069                        split_offset,
1070                        insert,
1071                        leaves,
1072                        buffers,
1073                    );
1074                }
1075            }
1076            PieceTreeNode::Leaf {
1077                location,
1078                offset,
1079                bytes,
1080                line_feed_cnt,
1081            } => {
1082                let piece_end = current_offset + bytes;
1083
1084                if split_offset > current_offset && split_offset < piece_end {
1085                    // Split this piece - need to compute line feeds for each part
1086                    let offset_in_piece = split_offset - current_offset;
1087
1088                    // First part (before split)
1089                    if offset_in_piece > 0 {
1090                        let lf_cnt = Self::compute_line_feeds_static(
1091                            buffers,
1092                            *location,
1093                            *offset,
1094                            offset_in_piece,
1095                        );
1096                        leaves.push(LeafData::new(*location, *offset, offset_in_piece, lf_cnt));
1097                    }
1098
1099                    // Inserted piece
1100                    if let Some(insert_leaf) = insert {
1101                        leaves.push(insert_leaf);
1102                    }
1103
1104                    // Second part (after split)
1105                    let remaining = bytes - offset_in_piece;
1106                    if remaining > 0 {
1107                        let lf_cnt = Self::compute_line_feeds_static(
1108                            buffers,
1109                            *location,
1110                            offset + offset_in_piece,
1111                            remaining,
1112                        );
1113                        leaves.push(LeafData::new(
1114                            *location,
1115                            offset + offset_in_piece,
1116                            remaining,
1117                            lf_cnt,
1118                        ));
1119                    }
1120                } else if split_offset == current_offset {
1121                    // Insert before this piece
1122                    if let Some(insert_leaf) = insert {
1123                        leaves.push(insert_leaf);
1124                    }
1125                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1126                } else {
1127                    // Don't split, just add the piece
1128                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1129                }
1130            }
1131        }
1132    }
1133
1134    /// Helper to compute line feeds in a buffer range
1135    fn compute_line_feeds_static(
1136        buffers: &[StringBuffer],
1137        location: BufferLocation,
1138        offset: usize,
1139        bytes: usize,
1140    ) -> Option<usize> {
1141        let buffer_id = location.buffer_id();
1142        if let Some(buffer) = buffers.get(buffer_id) {
1143            if let Some(data) = buffer.get_data() {
1144                let end = (offset + bytes).min(data.len());
1145                Some(data[offset..end].iter().filter(|&&b| b == b'\n').count())
1146            } else {
1147                // Buffer is unloaded - return None
1148                None
1149            }
1150        } else {
1151            // Buffer not available - return None
1152            None
1153        }
1154    }
1155
1156    /// Split a piece at the given offset without inserting anything
1157    /// This is useful for isolating a chunk of a large piece for partial loading
1158    ///
1159    /// If the offset is in the middle of a piece, that piece will be split into two pieces.
1160    /// If the offset is at a piece boundary, nothing changes.
1161    /// Does nothing if offset is 0 or >= total_bytes.
1162    pub fn split_at_offset(&mut self, offset: usize, buffers: &[StringBuffer]) {
1163        if offset == 0 || offset >= self.total_bytes {
1164            return;
1165        }
1166
1167        // Check if we need to split (offset must be in middle of a piece)
1168        if let Some(_result) = self.root.find_by_offset(offset) {
1169            // Split the piece at the offset (with no insertion)
1170            let mut leaves = Vec::new();
1171            self.collect_leaves_with_split(&self.root, 0, offset, None, &mut leaves, buffers);
1172
1173            self.root = Self::build_balanced(&leaves);
1174            self.check_and_rebalance();
1175        }
1176    }
1177
1178    /// Replace buffer references in pieces
1179    /// This is used when creating chunk buffers from large unloaded buffers
1180    ///
1181    /// Finds all pieces that reference the old buffer at the specified offset/bytes
1182    /// and updates them to reference the new buffer at offset 0.
1183    pub fn replace_buffer_reference(
1184        &mut self,
1185        old_buffer_id: usize,
1186        old_buffer_offset: usize,
1187        old_buffer_bytes: usize,
1188        new_buffer_location: BufferLocation,
1189    ) {
1190        let mut leaves = Vec::new();
1191        self.root.collect_leaves(&mut leaves);
1192
1193        // Find and update matching pieces
1194        let mut modified = false;
1195        for leaf in &mut leaves {
1196            if leaf.location.buffer_id() == old_buffer_id
1197                && leaf.offset == old_buffer_offset
1198                && leaf.bytes == old_buffer_bytes
1199            {
1200                leaf.location = new_buffer_location;
1201                leaf.offset = 0; // New buffer starts at 0
1202                modified = true;
1203            }
1204        }
1205
1206        // Rebuild tree if we made changes
1207        if modified {
1208            self.root = Self::build_balanced(&leaves);
1209            self.check_and_rebalance();
1210        }
1211    }
1212
1213    /// Delete text starting at offset for the given number of bytes
1214    pub fn delete(&mut self, offset: usize, delete_bytes: usize, buffers: &[StringBuffer]) {
1215        if delete_bytes == 0 || offset >= self.total_bytes {
1216            return;
1217        }
1218
1219        let delete_bytes = delete_bytes.min(self.total_bytes - offset);
1220        let end_offset = offset + delete_bytes;
1221
1222        let mut leaves = Vec::new();
1223        self.collect_leaves_with_delete(&self.root, 0, offset, end_offset, &mut leaves, buffers);
1224
1225        self.root = Self::build_balanced(&leaves);
1226        self.total_bytes -= delete_bytes;
1227
1228        self.check_and_rebalance();
1229    }
1230
1231    /// Delete text in a range specified by positions (start_line, start_col) to (end_line, end_col)
1232    /// This performs a more efficient traversal than converting positions to offsets separately
1233    pub fn delete_position_range(
1234        &mut self,
1235        start_line: usize,
1236        start_column: usize,
1237        end_line: usize,
1238        end_column: usize,
1239        buffers: &[StringBuffer],
1240    ) {
1241        // Edge case: empty range
1242        if start_line == end_line && start_column == end_column {
1243            return;
1244        }
1245
1246        // Find both positions in a single traversal and collect leaves
1247        let mut leaves = Vec::new();
1248        let mut delete_start_offset = None;
1249        let mut delete_end_offset = None;
1250
1251        self.collect_leaves_with_position_delete(
1252            &self.root,
1253            0,
1254            0,
1255            start_line,
1256            start_column,
1257            end_line,
1258            end_column,
1259            &mut delete_start_offset,
1260            &mut delete_end_offset,
1261            &mut leaves,
1262            buffers,
1263        );
1264
1265        // Calculate how many bytes were deleted
1266        if let (Some(start), Some(end)) = (delete_start_offset, delete_end_offset) {
1267            let deleted_bytes = end.saturating_sub(start);
1268            if deleted_bytes > 0 {
1269                self.root = Self::build_balanced(&leaves);
1270                self.total_bytes = self.total_bytes.saturating_sub(deleted_bytes);
1271                self.check_and_rebalance();
1272            }
1273        }
1274    }
1275
1276    /// Helper to collect leaves while deleting a range specified by positions
1277    /// This finds both positions and performs the deletion in a single tree traversal
1278    #[allow(clippy::too_many_arguments)]
1279    fn collect_leaves_with_position_delete(
1280        &self,
1281        node: &Arc<PieceTreeNode>,
1282        current_offset: usize,
1283        lines_before: usize,
1284        start_line: usize,
1285        start_column: usize,
1286        end_line: usize,
1287        end_column: usize,
1288        delete_start_offset: &mut Option<usize>,
1289        delete_end_offset: &mut Option<usize>,
1290        leaves: &mut Vec<LeafData>,
1291        buffers: &[StringBuffer],
1292    ) {
1293        match node.as_ref() {
1294            PieceTreeNode::Internal {
1295                left_bytes,
1296                lf_left,
1297                left,
1298                right,
1299            } => {
1300                // If line counts are unknown, we can't do position-based navigation
1301                let Some(lf_left) = lf_left else {
1302                    return;
1303                };
1304                let lines_after_left = lines_before + lf_left;
1305
1306                // Recursively process both subtrees
1307                self.collect_leaves_with_position_delete(
1308                    left,
1309                    current_offset,
1310                    lines_before,
1311                    start_line,
1312                    start_column,
1313                    end_line,
1314                    end_column,
1315                    delete_start_offset,
1316                    delete_end_offset,
1317                    leaves,
1318                    buffers,
1319                );
1320                self.collect_leaves_with_position_delete(
1321                    right,
1322                    current_offset + left_bytes,
1323                    lines_after_left,
1324                    start_line,
1325                    start_column,
1326                    end_line,
1327                    end_column,
1328                    delete_start_offset,
1329                    delete_end_offset,
1330                    leaves,
1331                    buffers,
1332                );
1333            }
1334            PieceTreeNode::Leaf {
1335                location,
1336                offset,
1337                bytes,
1338                line_feed_cnt,
1339            } => {
1340                // If line counts are unknown, we can't do position-based navigation
1341                let Some(line_feed_cnt) = line_feed_cnt else {
1342                    return;
1343                };
1344                let lines_in_piece = lines_before + line_feed_cnt;
1345                let piece_start = current_offset;
1346                let piece_end = current_offset + bytes;
1347
1348                // Check if this piece contains the start position
1349                if start_line >= lines_before
1350                    && start_line <= lines_in_piece
1351                    && delete_start_offset.is_none()
1352                {
1353                    if let Some(buffer) = buffers.get(location.buffer_id()) {
1354                        let offset_in_piece = self.find_position_in_leaf(
1355                            lines_before,
1356                            start_line,
1357                            start_column,
1358                            *offset,
1359                            *bytes,
1360                            buffer,
1361                        );
1362                        *delete_start_offset = Some(piece_start + offset_in_piece);
1363                    }
1364                }
1365
1366                // Check if this piece contains the end position
1367                if end_line >= lines_before
1368                    && end_line <= lines_in_piece
1369                    && delete_end_offset.is_none()
1370                {
1371                    if let Some(buffer) = buffers.get(location.buffer_id()) {
1372                        let offset_in_piece = self.find_position_in_leaf(
1373                            lines_before,
1374                            end_line,
1375                            end_column,
1376                            *offset,
1377                            *bytes,
1378                            buffer,
1379                        );
1380                        *delete_end_offset = Some(piece_start + offset_in_piece);
1381                    }
1382                }
1383
1384                // Now determine what to keep
1385                let del_start = delete_start_offset.unwrap_or(usize::MAX);
1386                let del_end = delete_end_offset.unwrap_or(0);
1387
1388                // Piece completely before delete range
1389                if piece_end <= del_start {
1390                    leaves.push(LeafData::new(
1391                        *location,
1392                        *offset,
1393                        *bytes,
1394                        Some(*line_feed_cnt),
1395                    ));
1396                    return;
1397                }
1398
1399                // Piece completely after delete range (only if we've found end)
1400                if delete_end_offset.is_some() && piece_start >= del_end {
1401                    leaves.push(LeafData::new(
1402                        *location,
1403                        *offset,
1404                        *bytes,
1405                        Some(*line_feed_cnt),
1406                    ));
1407                    return;
1408                }
1409
1410                // Piece overlaps with delete range
1411                // Keep part before delete start
1412                if piece_start < del_start && del_start < piece_end {
1413                    let keep_bytes = del_start - piece_start;
1414                    let lf_cnt =
1415                        Self::compute_line_feeds_static(buffers, *location, *offset, keep_bytes);
1416                    leaves.push(LeafData::new(*location, *offset, keep_bytes, lf_cnt));
1417                }
1418
1419                // Keep part after delete end (if we know where end is)
1420                if delete_end_offset.is_some() && del_end > piece_start && del_end < piece_end {
1421                    let skip_bytes = del_end - piece_start;
1422                    let keep_bytes = piece_end - del_end;
1423                    let lf_cnt = Self::compute_line_feeds_static(
1424                        buffers,
1425                        *location,
1426                        offset + skip_bytes,
1427                        keep_bytes,
1428                    );
1429                    leaves.push(LeafData::new(
1430                        *location,
1431                        offset + skip_bytes,
1432                        keep_bytes,
1433                        lf_cnt,
1434                    ));
1435                }
1436            }
1437        }
1438    }
1439
1440    /// Helper to find a position within a leaf piece
1441    /// Returns the offset within the piece (not the document offset)
1442    fn find_position_in_leaf(
1443        &self,
1444        lines_before: usize,
1445        target_line: usize,
1446        target_column: usize,
1447        piece_offset: usize,
1448        piece_bytes: usize,
1449        buffer: &StringBuffer,
1450    ) -> usize {
1451        let line_in_piece = target_line - lines_before;
1452
1453        // Find the line start within the piece
1454        let line_start_in_buffer = if line_in_piece == 0 {
1455            piece_offset
1456        } else {
1457            // Find the Nth newline within this piece
1458            let mut lines_seen = 0;
1459            let mut found_line_start = piece_offset;
1460
1461            if let Some(line_starts) = buffer.get_line_starts() {
1462                for &ls in line_starts.iter() {
1463                    if ls > piece_offset && ls < piece_offset + piece_bytes {
1464                        if lines_seen == line_in_piece - 1 {
1465                            found_line_start = ls;
1466                            break;
1467                        }
1468                        lines_seen += 1;
1469                    }
1470                }
1471            }
1472
1473            found_line_start
1474        };
1475
1476        // Calculate offset within the piece
1477        let column_offset = target_column.min(piece_bytes);
1478        let target_in_buffer = line_start_in_buffer + column_offset;
1479        target_in_buffer
1480            .saturating_sub(piece_offset)
1481            .min(piece_bytes)
1482    }
1483
1484    /// Helper to collect leaves while deleting a range
1485    fn collect_leaves_with_delete(
1486        &self,
1487        node: &Arc<PieceTreeNode>,
1488        current_offset: usize,
1489        delete_start: usize,
1490        delete_end: usize,
1491        leaves: &mut Vec<LeafData>,
1492        buffers: &[StringBuffer],
1493    ) {
1494        match node.as_ref() {
1495            PieceTreeNode::Internal {
1496                left_bytes,
1497                left,
1498                right,
1499                ..
1500            } => {
1501                self.collect_leaves_with_delete(
1502                    left,
1503                    current_offset,
1504                    delete_start,
1505                    delete_end,
1506                    leaves,
1507                    buffers,
1508                );
1509                self.collect_leaves_with_delete(
1510                    right,
1511                    current_offset + left_bytes,
1512                    delete_start,
1513                    delete_end,
1514                    leaves,
1515                    buffers,
1516                );
1517            }
1518            PieceTreeNode::Leaf {
1519                location,
1520                offset,
1521                bytes,
1522                line_feed_cnt,
1523            } => {
1524                let piece_start = current_offset;
1525                let piece_end = current_offset + bytes;
1526
1527                // Piece completely before delete range
1528                if piece_end <= delete_start {
1529                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1530                    return;
1531                }
1532
1533                // Piece completely after delete range
1534                if piece_start >= delete_end {
1535                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1536                    return;
1537                }
1538
1539                // Piece partially or fully overlaps delete range
1540                // Keep part before delete range
1541                if piece_start < delete_start {
1542                    let keep_bytes = delete_start - piece_start;
1543                    let lf_cnt =
1544                        Self::compute_line_feeds_static(buffers, *location, *offset, keep_bytes);
1545                    leaves.push(LeafData::new(*location, *offset, keep_bytes, lf_cnt));
1546                }
1547
1548                // Keep part after delete range
1549                if piece_end > delete_end {
1550                    let skip_bytes = delete_end - piece_start;
1551                    let keep_bytes = piece_end - delete_end;
1552                    let lf_cnt = Self::compute_line_feeds_static(
1553                        buffers,
1554                        *location,
1555                        offset + skip_bytes,
1556                        keep_bytes,
1557                    );
1558                    leaves.push(LeafData::new(
1559                        *location,
1560                        offset + skip_bytes,
1561                        keep_bytes,
1562                        lf_cnt,
1563                    ));
1564                }
1565            }
1566        }
1567    }
1568
1569    /// Get the total number of bytes in the document
1570    pub fn total_bytes(&self) -> usize {
1571        self.total_bytes
1572    }
1573
1574    /// Get the total number of lines in the document
1575    /// Line count = line feeds + 1
1576    /// Returns None if any piece has unknown line count
1577    pub fn line_count(&self) -> Option<usize> {
1578        self.root.total_line_feeds().map(|lf| lf + 1)
1579    }
1580
1581    /// Get tree statistics for debugging
1582    pub fn stats(&self) -> TreeStats {
1583        TreeStats {
1584            total_bytes: self.total_bytes,
1585            depth: self.root.depth(),
1586            leaf_count: self.root.count_leaves(),
1587            line_feed_count: self.root.total_line_feeds(),
1588        }
1589    }
1590
1591    /// Get all leaves in order (for debugging)
1592    pub fn get_leaves(&self) -> Vec<LeafData> {
1593        let mut leaves = Vec::new();
1594        self.root.collect_leaves(&mut leaves);
1595        leaves
1596    }
1597
1598    /// Convert byte offset to line/column position using tree's line metadata
1599    pub fn offset_to_position(
1600        &self,
1601        offset: usize,
1602        buffers: &[StringBuffer],
1603    ) -> Option<(usize, usize)> {
1604        if offset == 0 {
1605            return Some((0, 0));
1606        }
1607
1608        let offset = offset.min(self.total_bytes);
1609
1610        // Find the piece containing this offset
1611        if let Some(result) = self.root.find_by_offset(offset) {
1612            let piece_info = result.info;
1613            let bytes_before = result.bytes_before;
1614
1615            // Count lines before this piece
1616            // If line count is unknown, return None - we can't reliably compute position
1617            let lines_before = match self.count_lines_before_offset(bytes_before) {
1618                Some(count) => count,
1619                None => {
1620                    // No line metadata available - cannot compute position reliably
1621                    return None;
1622                }
1623            };
1624
1625            // Get the buffer for this piece
1626            let buffer_id = piece_info.location.buffer_id();
1627            if let Some(buffer) = buffers.get(buffer_id) {
1628                // Check if we have line starts available
1629                if let Some(line_starts) = buffer.get_line_starts() {
1630                    // Find position within the piece
1631                    let offset_in_piece = piece_info.offset_in_piece.unwrap_or(0);
1632                    let byte_offset_in_buffer = piece_info.offset + offset_in_piece;
1633
1634                    // Find which line within the buffer
1635                    let line_in_buffer = line_starts
1636                        .binary_search(&byte_offset_in_buffer)
1637                        .unwrap_or_else(|i| i.saturating_sub(1));
1638
1639                    // Find which line the piece starts at in the buffer
1640                    let piece_start_line = line_starts
1641                        .binary_search(&piece_info.offset)
1642                        .unwrap_or_else(|i| i.saturating_sub(1));
1643
1644                    // Calculate line relative to piece start (not buffer start)
1645                    let line_in_piece = line_in_buffer - piece_start_line;
1646
1647                    // Calculate the document line number
1648                    let doc_line = lines_before + line_in_piece;
1649
1650                    // Calculate column
1651                    let column = if line_in_piece == 0 && bytes_before == 0 {
1652                        // Fast path: piece is at document start, so column is just offset within piece
1653                        offset_in_piece
1654                    } else if line_in_piece == 0 {
1655                        // We're on the first line of this piece, but the document line may have
1656                        // started before this piece (after modifications). Find the actual line start.
1657                        let line_start = self.position_to_offset(doc_line, 0, buffers);
1658                        offset.saturating_sub(line_start)
1659                    } else {
1660                        // Line starts within this piece
1661                        // Find where the line starts within the piece
1662                        let mut count = 0;
1663                        let mut line_start_in_buf = piece_info.offset;
1664                        for &ls in line_starts.iter() {
1665                            if ls > piece_info.offset && ls < piece_info.offset + piece_info.bytes {
1666                                count += 1;
1667                                if count == line_in_piece {
1668                                    line_start_in_buf = ls;
1669                                    break;
1670                                }
1671                            }
1672                        }
1673                        let line_start_offset_in_piece = line_start_in_buf - piece_info.offset;
1674                        offset_in_piece - line_start_offset_in_piece
1675                    };
1676
1677                    return Some((doc_line, column));
1678                }
1679                // No line starts available - return None
1680            }
1681        }
1682
1683        // Fallback: end of document
1684        // Only if we have line metadata
1685        match self.line_count() {
1686            Some(line_count) => {
1687                let last_line = line_count.saturating_sub(1);
1688                let line_start = self.position_to_offset(last_line, 0, buffers);
1689                let column = self.total_bytes.saturating_sub(line_start);
1690                Some((last_line, column))
1691            }
1692            None => {
1693                // No line metadata - cannot compute position
1694                None
1695            }
1696        }
1697    }
1698
1699    /// Convert line/column position to byte offset using tree's line metadata
1700    pub fn position_to_offset(
1701        &self,
1702        line: usize,
1703        column: usize,
1704        buffers: &[StringBuffer],
1705    ) -> usize {
1706        if line == 0 && column == 0 {
1707            return 0;
1708        }
1709
1710        // Traverse tree to find the piece containing the target line
1711        self.find_offset_for_line(line, column, buffers)
1712            .unwrap_or(self.total_bytes)
1713    }
1714
1715    /// Helper: count line feeds before a given byte offset
1716    /// Returns None if any piece has unknown line count
1717    fn count_lines_before_offset(&self, byte_offset: usize) -> Option<usize> {
1718        self.count_lines_in_range(0, byte_offset)
1719    }
1720
1721    /// Helper: count line feeds in a byte range
1722    /// Returns None if any piece has unknown line count
1723    fn count_lines_in_range(&self, start: usize, end: usize) -> Option<usize> {
1724        if start >= end {
1725            return Some(0);
1726        }
1727
1728        self.root.count_lines_in_byte_range(0, start, end)
1729    }
1730
1731    /// Helper: find byte offset for a given line/column
1732    fn find_offset_for_line(
1733        &self,
1734        target_line: usize,
1735        column: usize,
1736        buffers: &[StringBuffer],
1737    ) -> Option<usize> {
1738        self.root
1739            .find_byte_offset_for_line(0, 0, target_line, column, buffers)
1740    }
1741
1742    /// Get the byte range for a specific line
1743    pub fn line_range(
1744        &self,
1745        line: usize,
1746        buffers: &[StringBuffer],
1747    ) -> Option<(usize, Option<usize>)> {
1748        // Check if line exists
1749        let line_count = self.line_count()?;
1750        if line >= line_count {
1751            return None;
1752        }
1753
1754        let start = self.position_to_offset(line, 0, buffers);
1755        let end = if line + 1 < line_count {
1756            Some(self.position_to_offset(line + 1, 0, buffers))
1757        } else {
1758            None
1759        };
1760        Some((start, end))
1761    }
1762
1763    /// Iterate through pieces overlapping a byte range
1764    /// Does ONE O(log n) tree traversal, then iterates sequentially
1765    pub fn iter_pieces_in_range(&self, start: usize, end: usize) -> PieceRangeIter {
1766        PieceRangeIter::new(&self.root, start, end)
1767    }
1768
1769    /// Apply multiple edits in a single tree traversal + rebuild
1770    ///
1771    /// # Arguments
1772    /// * `edits` - Vec of (position, delete_len, insert_text), MUST be sorted descending by position
1773    /// * `buffers` - Reference to string buffers (for line feed computation)
1774    /// * `add_text_fn` - Function to add text to buffer, returns (BufferLocation, offset, bytes)
1775    ///
1776    /// # Complexity
1777    /// O(pieces + edits) instead of O(pieces × edits)
1778    ///
1779    /// # Returns
1780    /// The net change in total bytes
1781    pub fn apply_bulk_edits<F>(
1782        &mut self,
1783        edits: &[(usize, usize, &str)],
1784        buffers: &[StringBuffer],
1785        mut add_text_fn: F,
1786    ) -> isize
1787    where
1788        F: FnMut(&str) -> (BufferLocation, usize, usize, Option<usize>),
1789    {
1790        if edits.is_empty() {
1791            return 0;
1792        }
1793
1794        // 1. Collect all split points (both start and end of each edit range)
1795        let mut split_points: Vec<usize> = Vec::with_capacity(edits.len() * 2);
1796        for (pos, del_len, _) in edits {
1797            split_points.push(*pos);
1798            if *del_len > 0 {
1799                let end = pos.saturating_add(*del_len).min(self.total_bytes);
1800                if end > *pos {
1801                    split_points.push(end);
1802                }
1803            }
1804        }
1805        split_points.sort_unstable();
1806        split_points.dedup();
1807
1808        // 2. Collect all leaves, splitting at all required points
1809        let mut leaves = Vec::new();
1810        self.collect_leaves_with_multi_split(
1811            &self.root.clone(),
1812            0,
1813            &split_points,
1814            &mut leaves,
1815            buffers,
1816        );
1817
1818        // 3. Build edit ranges for quick lookup (sorted descending by position)
1819        // Each edit: (start, end, insert_leaf)
1820        let mut edit_ranges: Vec<(usize, usize, Option<LeafData>)> =
1821            Vec::with_capacity(edits.len());
1822        for (pos, del_len, text) in edits {
1823            let del_end = pos.saturating_add(*del_len).min(self.total_bytes);
1824            let insert_leaf = if !text.is_empty() {
1825                let (location, offset, bytes, lf_cnt) = add_text_fn(text);
1826                Some(LeafData::new(location, offset, bytes, lf_cnt))
1827            } else {
1828                None
1829            };
1830            edit_ranges.push((*pos, del_end, insert_leaf));
1831        }
1832
1833        // 4. Apply edits to leaves
1834        // Edits are sorted descending by position, so:
1835        //   edit_ranges[len-1] has smallest position, edit_ranges[0] has largest
1836        // We iterate leaves ascending, and for each leaf we:
1837        //   1. First add any inserts that belong BEFORE this leaf
1838        //   2. Then add the leaf (if not deleted)
1839        let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + edits.len());
1840        let mut current_offset = 0;
1841        let mut edit_idx = edit_ranges.len(); // Points past the end; we access [edit_idx-1]
1842
1843        for leaf in leaves {
1844            let leaf_start = current_offset;
1845            let leaf_end = current_offset + leaf.bytes;
1846
1847            // First, add any inserts whose position is <= leaf_start
1848            // These inserts should appear BEFORE this leaf's content
1849            while edit_idx > 0 {
1850                let (edit_start, _edit_end, ref insert_leaf) = edit_ranges[edit_idx - 1];
1851
1852                // If this edit's position is after where we are, stop
1853                if edit_start > leaf_start {
1854                    break;
1855                }
1856
1857                // Insert belongs at or before leaf_start
1858                if let Some(insert) = insert_leaf {
1859                    new_leaves.push(*insert);
1860                }
1861                edit_idx -= 1;
1862            }
1863
1864            // Check if this leaf overlaps with ANY edit's delete range
1865            // We check ALL edits, not just remaining ones, because edits
1866            // processed in the insert loop above may still have deletions
1867            let mut keep_leaf = true;
1868
1869            for (edit_start, edit_end, _) in &edit_ranges {
1870                // If edit's delete range is entirely after this leaf, skip
1871                if *edit_start >= leaf_end {
1872                    continue;
1873                }
1874
1875                // If edit has no deletion (edit_start == edit_end), skip
1876                if *edit_start == *edit_end {
1877                    continue;
1878                }
1879
1880                // If edit's delete range is entirely before this leaf, skip
1881                if *edit_end <= leaf_start {
1882                    continue;
1883                }
1884
1885                // Leaf overlaps with this edit's delete range - filter it out
1886                if leaf_start >= *edit_start && leaf_end <= *edit_end {
1887                    keep_leaf = false;
1888                    break;
1889                }
1890            }
1891
1892            if keep_leaf {
1893                new_leaves.push(leaf);
1894            }
1895
1896            current_offset = leaf_end;
1897        }
1898
1899        // Handle any remaining inserts at the end of document
1900        while edit_idx > 0 {
1901            if let Some(insert) = &edit_ranges[edit_idx - 1].2 {
1902                new_leaves.push(*insert);
1903            }
1904            edit_idx -= 1;
1905        }
1906
1907        // 5. Calculate byte delta
1908        let old_bytes = self.total_bytes;
1909        let mut new_bytes: usize = 0;
1910        for leaf in &new_leaves {
1911            new_bytes += leaf.bytes;
1912        }
1913        let delta = new_bytes as isize - old_bytes as isize;
1914
1915        // 6. Single balanced tree rebuild
1916        self.root = Self::build_balanced(&new_leaves);
1917        self.total_bytes = new_bytes;
1918
1919        delta
1920    }
1921
1922    /// Collect leaves, splitting at multiple points in one traversal
1923    fn collect_leaves_with_multi_split(
1924        &self,
1925        node: &Arc<PieceTreeNode>,
1926        current_offset: usize,
1927        split_points: &[usize],
1928        leaves: &mut Vec<LeafData>,
1929        buffers: &[StringBuffer],
1930    ) {
1931        match node.as_ref() {
1932            PieceTreeNode::Internal {
1933                left_bytes,
1934                left,
1935                right,
1936                ..
1937            } => {
1938                // Recurse into both subtrees
1939                self.collect_leaves_with_multi_split(
1940                    left,
1941                    current_offset,
1942                    split_points,
1943                    leaves,
1944                    buffers,
1945                );
1946                self.collect_leaves_with_multi_split(
1947                    right,
1948                    current_offset + left_bytes,
1949                    split_points,
1950                    leaves,
1951                    buffers,
1952                );
1953            }
1954            PieceTreeNode::Leaf {
1955                location,
1956                offset,
1957                bytes,
1958                line_feed_cnt,
1959            } => {
1960                if *bytes == 0 {
1961                    return;
1962                }
1963
1964                let piece_start = current_offset;
1965                let piece_end = current_offset + bytes;
1966
1967                // Find split points within this piece
1968                let mut split_offsets: Vec<usize> = Vec::new();
1969                for &sp in split_points {
1970                    if sp > piece_start && sp < piece_end {
1971                        split_offsets.push(sp - piece_start);
1972                    }
1973                }
1974
1975                if split_offsets.is_empty() {
1976                    // No splits needed, add entire piece
1977                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1978                } else {
1979                    // Split the piece at each point
1980                    split_offsets.sort_unstable();
1981                    split_offsets.dedup();
1982
1983                    let mut prev_offset = 0;
1984                    for split_offset in split_offsets {
1985                        if split_offset > prev_offset {
1986                            let chunk_bytes = split_offset - prev_offset;
1987                            let lf_cnt = Self::compute_line_feeds_static(
1988                                buffers,
1989                                *location,
1990                                offset + prev_offset,
1991                                chunk_bytes,
1992                            );
1993                            leaves.push(LeafData::new(
1994                                *location,
1995                                offset + prev_offset,
1996                                chunk_bytes,
1997                                lf_cnt,
1998                            ));
1999                        }
2000                        prev_offset = split_offset;
2001                    }
2002
2003                    // Add remaining part after last split
2004                    if prev_offset < *bytes {
2005                        let remaining = bytes - prev_offset;
2006                        let lf_cnt = Self::compute_line_feeds_static(
2007                            buffers,
2008                            *location,
2009                            offset + prev_offset,
2010                            remaining,
2011                        );
2012                        leaves.push(LeafData::new(
2013                            *location,
2014                            offset + prev_offset,
2015                            remaining,
2016                            lf_cnt,
2017                        ));
2018                    }
2019                }
2020            }
2021        }
2022    }
2023}
2024
2025/// A view into a piece's data within the document
2026#[derive(Debug, Clone)]
2027pub struct PieceView {
2028    /// The location of this piece (which buffer it references)
2029    pub location: BufferLocation,
2030    /// Offset within the source buffer where this piece starts
2031    pub buffer_offset: usize,
2032    /// Number of bytes in this piece
2033    pub bytes: usize,
2034    /// Byte offset where this piece starts in the document
2035    pub doc_offset: usize,
2036    /// Number of line feeds in this piece (None if unknown for large files)
2037    pub line_feed_cnt: Option<usize>,
2038}
2039
2040/// Iterator over pieces in a byte range
2041/// Performs ONE O(log n) traversal to collect pieces, then iterates in O(1) per piece
2042pub struct PieceRangeIter {
2043    pieces: Vec<PieceView>,
2044    current_index: usize,
2045}
2046
2047impl PieceRangeIter {
2048    fn new(root: &Arc<PieceTreeNode>, start: usize, end: usize) -> Self {
2049        let mut pieces = Vec::new();
2050        Self::collect_pieces(root, 0, start, end, &mut pieces);
2051        PieceRangeIter {
2052            pieces,
2053            current_index: 0,
2054        }
2055    }
2056
2057    /// Recursively collect all pieces that overlap [start, end)
2058    fn collect_pieces(
2059        node: &Arc<PieceTreeNode>,
2060        doc_offset: usize,
2061        range_start: usize,
2062        range_end: usize,
2063        pieces: &mut Vec<PieceView>,
2064    ) {
2065        match node.as_ref() {
2066            PieceTreeNode::Internal {
2067                left_bytes,
2068                left,
2069                right,
2070                ..
2071            } => {
2072                let left_end = doc_offset + left_bytes;
2073
2074                // Check if left subtree overlaps with range
2075                if range_start < left_end {
2076                    Self::collect_pieces(left, doc_offset, range_start, range_end, pieces);
2077                }
2078
2079                // Check if right subtree overlaps with range
2080                if range_end > left_end {
2081                    Self::collect_pieces(right, left_end, range_start, range_end, pieces);
2082                }
2083            }
2084            PieceTreeNode::Leaf {
2085                location,
2086                offset,
2087                bytes,
2088                line_feed_cnt,
2089            } => {
2090                let piece_end = doc_offset + bytes;
2091
2092                // Check if this piece overlaps with the range
2093                if doc_offset < range_end && piece_end > range_start {
2094                    pieces.push(PieceView {
2095                        location: *location,
2096                        buffer_offset: *offset,
2097                        bytes: *bytes,
2098                        doc_offset,
2099                        line_feed_cnt: *line_feed_cnt,
2100                    });
2101                }
2102            }
2103        }
2104    }
2105}
2106
2107impl Iterator for PieceRangeIter {
2108    type Item = PieceView;
2109
2110    fn next(&mut self) -> Option<Self::Item> {
2111        if self.current_index < self.pieces.len() {
2112            let piece = self.pieces[self.current_index].clone();
2113            self.current_index += 1;
2114            Some(piece)
2115        } else {
2116            None
2117        }
2118    }
2119}
2120
2121#[cfg(test)]
2122mod tests {
2123    use super::*;
2124
2125    // Helper to create test buffers
2126    fn test_buffers() -> Vec<StringBuffer> {
2127        vec![
2128            StringBuffer::new(0, vec![b'a'; 100]), // Buffer 0: 100 'a's
2129            StringBuffer::new(1, vec![b'b'; 50]),  // Buffer 1: 50 'b's
2130            StringBuffer::new(2, vec![b'c'; 25]),  // Buffer 2: 25 'c's
2131        ]
2132    }
2133
2134    #[test]
2135    fn test_create_empty() {
2136        let tree = PieceTree::empty();
2137        assert_eq!(tree.total_bytes(), 0);
2138    }
2139
2140    #[test]
2141    fn test_create_with_initial_piece() {
2142        let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2143        assert_eq!(tree.total_bytes(), 100);
2144    }
2145
2146    #[test]
2147    fn test_insert_at_end() {
2148        let buffers = test_buffers();
2149        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2150        tree.insert(100, BufferLocation::Added(1), 0, 50, Some(0), &buffers);
2151        assert_eq!(tree.total_bytes(), 150);
2152    }
2153
2154    #[test]
2155    fn test_insert_in_middle() {
2156        let buffers = test_buffers();
2157        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2158        tree.insert(50, BufferLocation::Added(2), 0, 25, Some(0), &buffers);
2159        assert_eq!(tree.total_bytes(), 125);
2160        let stats = tree.stats();
2161        assert_eq!(stats.leaf_count, 3); // Original piece split + new piece
2162    }
2163
2164    #[test]
2165    fn test_delete() {
2166        let buffers = test_buffers();
2167        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2168        tree.delete(25, 50, &buffers);
2169        assert_eq!(tree.total_bytes(), 50);
2170    }
2171
2172    #[test]
2173    fn test_delete_at_boundaries() {
2174        let buffers = test_buffers();
2175        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2176
2177        // Delete from start
2178        tree.delete(0, 10, &buffers);
2179        assert_eq!(tree.total_bytes(), 90);
2180
2181        // Delete from end
2182        tree.delete(80, 10, &buffers);
2183        assert_eq!(tree.total_bytes(), 80);
2184    }
2185
2186    #[test]
2187    fn test_multiple_inserts_and_deletes() {
2188        let buffers = test_buffers();
2189        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2190
2191        tree.insert(50, BufferLocation::Added(1), 0, 20, Some(0), &buffers);
2192        assert_eq!(tree.total_bytes(), 120);
2193
2194        tree.delete(40, 30, &buffers);
2195        assert_eq!(tree.total_bytes(), 90);
2196
2197        tree.insert(0, BufferLocation::Added(1), 20, 10, Some(0), &buffers);
2198        assert_eq!(tree.total_bytes(), 100);
2199    }
2200
2201    #[test]
2202    fn test_rebalancing_many_inserts() {
2203        let buffers = test_buffers();
2204        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2205
2206        // Insert many times, which could create unbalanced tree
2207        for i in 0..20 {
2208            tree.insert(i * 5, BufferLocation::Added(1), i, 1, Some(0), &buffers);
2209        }
2210
2211        let stats = tree.stats();
2212        assert_eq!(stats.total_bytes, 120);
2213        // Each insert splits pieces, so we expect many leaves
2214        // Exact count depends on implementation details, but should be > 20
2215        assert!(stats.leaf_count > 20);
2216        assert!(stats.leaf_count < 50); // Reasonable upper bound
2217
2218        // Depth should be reasonable due to rebalancing
2219        let max_expected_depth = 2 * (stats.leaf_count as f64).log2().ceil() as usize;
2220        assert!(
2221            stats.depth <= max_expected_depth + 2,
2222            "Tree depth {} exceeds max {} for {} leaves",
2223            stats.depth,
2224            max_expected_depth,
2225            stats.leaf_count
2226        );
2227    }
2228
2229    #[test]
2230    fn test_find_by_offset() {
2231        let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2232
2233        let info = tree.find_by_offset(50).unwrap();
2234        assert_eq!(info.location, BufferLocation::Stored(0));
2235        assert_eq!(info.offset_in_piece, Some(50));
2236
2237        // Out of bounds
2238        assert!(tree.find_by_offset(100).is_none());
2239    }
2240
2241    #[test]
2242    fn test_find_after_inserts() {
2243        let buffers = test_buffers();
2244        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2245        tree.insert(50, BufferLocation::Added(1), 0, 25, Some(0), &buffers);
2246
2247        // Should find in added section
2248        let info = tree.find_by_offset(50).unwrap();
2249        assert_eq!(info.location, BufferLocation::Added(1));
2250    }
2251
2252    #[test]
2253    fn test_offset_to_position_column_after_modification() {
2254        // This test reproduces a bug where offset_to_position returns incorrect
2255        // column values after buffer modifications.
2256        //
2257        // Initial content: "fn foo(val: i32) {\n    val + 1\n}\n"
2258        // After deleting "val" and inserting "value" twice:
2259        // Buffer becomes: "fn foo(value: i32) {\n    value + 1\n}\n"
2260        //
2261        // Position 25 should be line 1, column 4 (the 'v' in second "value")
2262        // But the bug causes it to return column 0.
2263
2264        // Create buffer with initial content
2265        let initial = b"fn foo(val: i32) {\n    val + 1\n}\n";
2266        let buffer = StringBuffer::new(0, initial.to_vec());
2267        let buffers = vec![buffer.clone()];
2268
2269        let mut tree = PieceTree::new(
2270            BufferLocation::Stored(0),
2271            0,
2272            initial.len(),
2273            Some(initial.iter().filter(|&&b| b == b'\n').count()),
2274        );
2275
2276        // Verify initial position works correctly
2277        // Position 23 = 'v' of second "val" on line 1 (after newline at pos 18)
2278        let pos = tree.offset_to_position(23, &buffers);
2279        assert_eq!(
2280            pos,
2281            Some((1, 4)),
2282            "Initial: position 23 should be line 1, column 4"
2283        );
2284
2285        // Now simulate LSP rename operations:
2286        // 1. Delete "val" at position 23 (3 bytes)
2287        // 2. Insert "value" at position 23 (5 bytes)
2288        // 3. Delete "val" at position 7 (3 bytes)
2289        // 4. Insert "value" at position 7 (5 bytes)
2290
2291        // First modification: delete "val" at position 23
2292        tree.delete(23, 3, &buffers);
2293
2294        // Insert "value" - need a new buffer
2295        let value_buf = StringBuffer::new(1, b"value".to_vec());
2296        let buffers = vec![buffer.clone(), value_buf.clone()];
2297        tree.insert(23, BufferLocation::Added(1), 0, 5, Some(0), &buffers);
2298
2299        // Second modification: delete "val" at position 7
2300        tree.delete(7, 3, &buffers);
2301
2302        // Insert "value" - use another buffer
2303        let value_buf2 = StringBuffer::new(2, b"value".to_vec());
2304        let buffers = vec![buffer.clone(), value_buf.clone(), value_buf2];
2305        tree.insert(7, BufferLocation::Added(2), 0, 5, Some(0), &buffers);
2306
2307        // Buffer is now: "fn foo(value: i32) {\n    value + 1\n}\n"
2308        // Line 0: "fn foo(value: i32) {\n" = 21 bytes (positions 0-20)
2309        // Line 1: "    value + 1\n" starts at position 21
2310        // Position 25 = 21 + 4 = line 1, column 4
2311
2312        // This is where the bug manifests
2313        let pos = tree.offset_to_position(25, &buffers);
2314        assert_eq!(
2315            pos,
2316            Some((1, 4)),
2317            "After modification: position 25 should be line 1, column 4"
2318        );
2319
2320        // Also test position 21 (start of line 1)
2321        let pos = tree.offset_to_position(21, &buffers);
2322        assert_eq!(pos, Some((1, 0)), "Position 21 should be line 1, column 0");
2323    }
2324
2325    // ============== Tests for apply_bulk_edits ==============
2326
2327    // Helper to pre-allocate buffers for bulk edit tests
2328    fn prepare_bulk_edit_buffers(
2329        buffers: &mut Vec<StringBuffer>,
2330        texts: &[&str],
2331    ) -> Vec<(BufferLocation, usize, usize, Option<usize>)> {
2332        let mut infos = Vec::new();
2333        for (i, text) in texts.iter().enumerate() {
2334            let id = buffers.len();
2335            let bytes = text.as_bytes().to_vec();
2336            let lf = bytes.iter().filter(|&&b| b == b'\n').count();
2337            let len = bytes.len();
2338            buffers.push(StringBuffer::new(id, bytes));
2339            infos.push((BufferLocation::Added(id), 0, len, Some(lf)));
2340            let _ = i; // suppress warning
2341        }
2342        infos
2343    }
2344
2345    #[test]
2346    fn test_bulk_edit_single_insert() {
2347        let mut buffers = vec![StringBuffer::new(0, b"hello world".to_vec())];
2348        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, Some(0));
2349
2350        // Pre-allocate buffer for the insert
2351        let infos = prepare_bulk_edit_buffers(&mut buffers, &["!"]);
2352
2353        // Insert "!" at position 11 (end)
2354        let edits: Vec<(usize, usize, &str)> = vec![(11, 0, "!")];
2355        let mut idx = 0;
2356
2357        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2358            let info = infos[idx];
2359            idx += 1;
2360            info
2361        });
2362
2363        assert_eq!(delta, 1);
2364        assert_eq!(tree.total_bytes(), 12);
2365    }
2366
2367    #[test]
2368    fn test_bulk_edit_single_delete() {
2369        let buffers = vec![StringBuffer::new(0, b"hello world".to_vec())];
2370        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, Some(0));
2371
2372        // Delete "world" (positions 6-11) - no insert, so no buffer needed
2373        let edits: Vec<(usize, usize, &str)> = vec![(6, 5, "")];
2374
2375        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2376            (BufferLocation::Added(1), 0, 0, Some(0))
2377        });
2378
2379        assert_eq!(delta, -5);
2380        assert_eq!(tree.total_bytes(), 6);
2381    }
2382
2383    #[test]
2384    fn test_bulk_edit_single_replace() {
2385        let mut buffers = vec![StringBuffer::new(0, b"hello world".to_vec())];
2386        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, Some(0));
2387
2388        // Pre-allocate buffer for the replacement
2389        let infos = prepare_bulk_edit_buffers(&mut buffers, &["rust"]);
2390
2391        // Replace "world" with "rust"
2392        let edits: Vec<(usize, usize, &str)> = vec![(6, 5, "rust")];
2393        let mut idx = 0;
2394
2395        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2396            let info = infos[idx];
2397            idx += 1;
2398            info
2399        });
2400
2401        assert_eq!(delta, -1); // "world" (5) -> "rust" (4)
2402        assert_eq!(tree.total_bytes(), 10);
2403    }
2404
2405    #[test]
2406    fn test_bulk_edit_multiple_inserts_descending() {
2407        // Edits must be sorted descending by position
2408        let mut buffers = vec![StringBuffer::new(0, b"abc".to_vec())];
2409        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 3, Some(0));
2410
2411        // Pre-allocate buffers for 4 inserts
2412        let infos = prepare_bulk_edit_buffers(&mut buffers, &["X", "X", "X", "X"]);
2413
2414        // Insert "X" at positions 3, 2, 1, 0 (descending order)
2415        let edits: Vec<(usize, usize, &str)> = vec![
2416            (3, 0, "X"), // Insert at end
2417            (2, 0, "X"), // Insert before 'c'
2418            (1, 0, "X"), // Insert before 'b'
2419            (0, 0, "X"), // Insert at start
2420        ];
2421        let mut idx = 0;
2422
2423        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2424            let info = infos[idx];
2425            idx += 1;
2426            info
2427        });
2428
2429        assert_eq!(delta, 4);
2430        assert_eq!(tree.total_bytes(), 7); // "XaXbXcX"
2431    }
2432
2433    #[test]
2434    fn test_bulk_edit_multiple_deletes_descending() {
2435        let buffers = vec![StringBuffer::new(0, b"abcdefgh".to_vec())];
2436        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 8, Some(0));
2437
2438        // Delete chars at positions 6, 4, 2, 0 (descending order) - no inserts
2439        let edits: Vec<(usize, usize, &str)> = vec![
2440            (6, 1, ""), // Delete 'g'
2441            (4, 1, ""), // Delete 'e'
2442            (2, 1, ""), // Delete 'c'
2443            (0, 1, ""), // Delete 'a'
2444        ];
2445
2446        let delta = tree.apply_bulk_edits(&edits, &buffers, |_| {
2447            (BufferLocation::Added(1), 0, 0, Some(0))
2448        });
2449
2450        assert_eq!(delta, -4);
2451        assert_eq!(tree.total_bytes(), 4); // "bdfh"
2452    }
2453
2454    #[test]
2455    fn test_bulk_edit_empty_edits() {
2456        let buffers = vec![StringBuffer::new(0, b"hello".to_vec())];
2457        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 5, Some(0));
2458
2459        let edits: Vec<(usize, usize, &str)> = vec![];
2460
2461        let delta = tree.apply_bulk_edits(&edits, &buffers, |_| {
2462            (BufferLocation::Added(1), 0, 0, Some(0))
2463        });
2464
2465        assert_eq!(delta, 0);
2466        assert_eq!(tree.total_bytes(), 5);
2467    }
2468
2469    #[test]
2470    fn test_bulk_edit_consistency_check() {
2471        // Test that piece sum equals total_bytes after bulk edit
2472        let mut buffers = vec![StringBuffer::new(0, b"0123456789".to_vec())];
2473        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 10, Some(0));
2474
2475        // Pre-allocate buffers for the inserts
2476        let infos = prepare_bulk_edit_buffers(&mut buffers, &["XX", "Y", "ZZZ"]);
2477
2478        // Multiple mixed operations
2479        let edits: Vec<(usize, usize, &str)> = vec![
2480            (8, 1, "XX"),  // Replace '8' with 'XX'
2481            (5, 2, "Y"),   // Replace '56' with 'Y'
2482            (2, 0, "ZZZ"), // Insert 'ZZZ' at position 2
2483        ];
2484        let mut idx = 0;
2485
2486        tree.apply_bulk_edits(&edits, &buffers, |_text| {
2487            let info = infos[idx];
2488            idx += 1;
2489            info
2490        });
2491
2492        // Verify consistency
2493        let leaves = tree.get_leaves();
2494        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
2495        assert_eq!(
2496            sum,
2497            tree.total_bytes(),
2498            "Piece sum {} != total_bytes {}",
2499            sum,
2500            tree.total_bytes()
2501        );
2502    }
2503
2504    #[test]
2505    fn test_bulk_edit_vs_sequential_equivalence() {
2506        // Verify that bulk edit produces same result as sequential edits
2507        let original_content = b"The quick brown fox";
2508
2509        // Setup for bulk edit
2510        let mut buffers1 = vec![StringBuffer::new(0, original_content.to_vec())];
2511        let mut tree1 = PieceTree::new(BufferLocation::Stored(0), 0, 19, Some(0));
2512
2513        // Pre-allocate buffers for the replacements
2514        let infos = prepare_bulk_edit_buffers(&mut buffers1, &["red", "slow"]);
2515
2516        // Setup for sequential edit
2517        let mut buffers2 = vec![StringBuffer::new(0, original_content.to_vec())];
2518        let mut tree2 = PieceTree::new(BufferLocation::Stored(0), 0, 19, Some(0));
2519        let mut next_id2 = 1;
2520
2521        // Edits: Replace "quick" with "slow", "brown" with "red"
2522        // Positions in original: quick=4-9, brown=10-15
2523        // Must be sorted descending
2524        let edits: Vec<(usize, usize, &str)> = vec![
2525            (10, 5, "red"), // Replace "brown" at 10
2526            (4, 5, "slow"), // Replace "quick" at 4
2527        ];
2528        let mut idx = 0;
2529
2530        // Apply bulk edit
2531        tree1.apply_bulk_edits(&edits, &buffers1, |_text| {
2532            let info = infos[idx];
2533            idx += 1;
2534            info
2535        });
2536
2537        // Apply sequential edits (in descending order to match)
2538        // First: replace "brown" at 10
2539        tree2.delete(10, 5, &buffers2);
2540        buffers2.push(StringBuffer::new(next_id2, b"red".to_vec()));
2541        tree2.insert(
2542            10,
2543            BufferLocation::Added(next_id2),
2544            0,
2545            3,
2546            Some(0),
2547            &buffers2,
2548        );
2549        next_id2 += 1;
2550
2551        // Second: replace "quick" at 4
2552        tree2.delete(4, 5, &buffers2);
2553        buffers2.push(StringBuffer::new(next_id2, b"slow".to_vec()));
2554        tree2.insert(4, BufferLocation::Added(next_id2), 0, 4, Some(0), &buffers2);
2555
2556        assert_eq!(
2557            tree1.total_bytes(),
2558            tree2.total_bytes(),
2559            "Bulk edit total_bytes {} != sequential {}",
2560            tree1.total_bytes(),
2561            tree2.total_bytes()
2562        );
2563    }
2564}
2565
2566#[cfg(test)]
2567mod property_tests {
2568    use super::*;
2569    use proptest::prelude::*;
2570
2571    // Helper to create test buffers - using larger buffers for property tests
2572    fn test_buffers_large() -> Vec<StringBuffer> {
2573        vec![
2574            StringBuffer::new(0, vec![b'a'; 10000]), // Large buffer
2575            StringBuffer::new(1, vec![b'b'; 10000]),
2576        ]
2577    }
2578
2579    // Strategy to generate operations
2580    #[derive(Debug, Clone)]
2581    enum Operation {
2582        Insert { offset: usize, bytes: usize },
2583        Delete { offset: usize, bytes: usize },
2584    }
2585
2586    // Generate a sequence of operations
2587    fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
2588        prop::collection::vec(
2589            prop_oneof![
2590                (0usize..200, 1usize..50)
2591                    .prop_map(|(offset, bytes)| { Operation::Insert { offset, bytes } }),
2592                (0usize..200, 1usize..50)
2593                    .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
2594            ],
2595            0..50,
2596        )
2597    }
2598
2599    // More aggressive operation strategy that creates more internal nodes
2600    fn aggressive_operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
2601        prop::collection::vec(
2602            prop_oneof![
2603                // More inserts, smaller chunks to create more splits
2604                3 => (0usize..100, 1usize..20).prop_map(|(offset, bytes)| {
2605                    Operation::Insert { offset, bytes }
2606                }),
2607                // Some deletes
2608                1 => (0usize..100, 1usize..30).prop_map(|(offset, bytes)| {
2609                    Operation::Delete { offset, bytes }
2610                }),
2611            ],
2612            10..30, // More operations to force tree growth
2613        )
2614    }
2615
2616    proptest! {
2617        #[test]
2618        fn prop_total_bytes_consistency(operations in operation_strategy()) {
2619            let buffers = test_buffers_large();
2620            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2621            let mut expected_bytes = 100;
2622
2623            for op in operations {
2624                match op {
2625                    Operation::Insert { offset, bytes } => {
2626                        let offset = offset.min(tree.total_bytes());
2627                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2628                        let bytes = bytes.min(buffer_len);
2629                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
2630                        expected_bytes += bytes;
2631                    }
2632                    Operation::Delete { offset, bytes } => {
2633                        if offset < tree.total_bytes() {
2634                            let actual_delete = bytes.min(tree.total_bytes() - offset);
2635                            tree.delete(offset, bytes, &buffers);
2636                            expected_bytes -= actual_delete;
2637                        }
2638                    }
2639                }
2640            }
2641
2642            prop_assert_eq!(tree.total_bytes(), expected_bytes);
2643        }
2644
2645        #[test]
2646        fn prop_tree_never_negative_bytes(operations in operation_strategy()) {
2647            let buffers = test_buffers_large();
2648            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2649
2650            for op in operations {
2651                match op {
2652                    Operation::Insert { offset, bytes } => {
2653                        let offset = offset.min(tree.total_bytes());
2654                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2655                        let bytes = bytes.min(buffer_len);
2656                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
2657                    }
2658                    Operation::Delete { offset, bytes } => {
2659                        tree.delete(offset, bytes, &buffers);
2660                    }
2661                }
2662
2663                // Tree should never have negative bytes (underflow would wrap to large number)
2664                prop_assert!(tree.total_bytes() < 10_000_000);
2665            }
2666        }
2667
2668        #[test]
2669        fn prop_balanced_after_operations(operations in operation_strategy()) {
2670            let buffers = test_buffers_large();
2671            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2672
2673            for op in operations {
2674                match op {
2675                    Operation::Insert { offset, bytes } => {
2676                        let offset = offset.min(tree.total_bytes());
2677                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2678                        let bytes = bytes.min(buffer_len);
2679                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
2680                    }
2681                    Operation::Delete { offset, bytes } => {
2682                        tree.delete(offset, bytes, &buffers);
2683                    }
2684                }
2685            }
2686
2687            let stats = tree.stats();
2688            if stats.leaf_count > 1 {
2689                let max_depth = 2 * (stats.leaf_count as f64).log2().ceil() as usize;
2690                prop_assert!(stats.depth <= max_depth + 2, "Tree depth {} exceeds expected max {} for {} leaves", stats.depth, max_depth, stats.leaf_count);
2691            }
2692        }
2693
2694        #[test]
2695        fn prop_insert_then_delete_equals_original(
2696            insert_offset in 0usize..100,
2697            insert_bytes in 1usize..50
2698        ) {
2699            let buffers = test_buffers_large();
2700            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2701            let original_bytes = tree.total_bytes();
2702
2703            let insert_offset = insert_offset.min(tree.total_bytes());
2704            let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2705            let insert_bytes = insert_bytes.min(buffer_len);
2706            tree.insert(insert_offset, BufferLocation::Added(1), 0, insert_bytes, Some(0), &buffers);
2707
2708            // Delete what we just inserted
2709            tree.delete(insert_offset, insert_bytes, &buffers);
2710
2711            prop_assert_eq!(tree.total_bytes(), original_bytes);
2712        }
2713
2714        #[test]
2715        fn prop_find_offset_in_bounds(
2716            offset in 0usize..100
2717        ) {
2718            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2719
2720            let result = tree.find_by_offset(offset);
2721            prop_assert!(result.is_some());
2722        }
2723
2724        #[test]
2725        fn prop_find_offset_out_of_bounds(
2726            offset in 100usize..1000
2727        ) {
2728            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2729
2730            let result = tree.find_by_offset(offset);
2731            prop_assert!(result.is_none());
2732        }
2733
2734        #[test]
2735        fn prop_sequential_inserts_maintain_order(
2736            count in 1usize..20,
2737            insert_size in 1usize..10
2738        ) {
2739            let buffers = test_buffers_large();
2740            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 10, Some(0));
2741
2742            for _i in 0..count {
2743                let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2744                let insert_size = insert_size.min(buffer_len);
2745                tree.insert(tree.total_bytes(), BufferLocation::Added(1), 0, insert_size, Some(0), &buffers);
2746            }
2747
2748            let expected_bytes = 10 + (count * insert_size);
2749            prop_assert_eq!(tree.total_bytes(), expected_bytes);
2750        }
2751
2752        #[test]
2753        fn prop_delete_all_reaches_zero(
2754            delete_size in 1usize..10
2755        ) {
2756            let buffers = test_buffers_large();
2757            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2758
2759            while tree.total_bytes() > 0 {
2760                let to_delete = delete_size.min(tree.total_bytes());
2761                tree.delete(0, to_delete, &buffers);
2762            }
2763
2764            prop_assert_eq!(tree.total_bytes(), 0);
2765        }
2766    }
2767
2768    #[test]
2769    fn test_empty_delete() {
2770        let buffers = test_buffers_large();
2771        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2772        tree.delete(50, 0, &buffers);
2773        assert_eq!(tree.total_bytes(), 100);
2774    }
2775
2776    // ============== Property tests for apply_bulk_edits ==============
2777
2778    // Strategy to generate bulk edit operations
2779    #[derive(Debug, Clone)]
2780    struct BulkEditOp {
2781        position: usize,
2782        delete_len: usize,
2783        insert_text: String,
2784    }
2785
2786    fn bulk_edit_strategy() -> impl Strategy<Value = Vec<BulkEditOp>> {
2787        prop::collection::vec(
2788            (0usize..100, 0usize..20, "[a-zA-Z0-9]{0,10}").prop_map(
2789                |(position, delete_len, insert_text)| BulkEditOp {
2790                    position,
2791                    delete_len,
2792                    insert_text,
2793                },
2794            ),
2795            1..20,
2796        )
2797    }
2798
2799    // Helper to pre-allocate buffers for property tests
2800    fn preallocate_buffers(
2801        buffers: &mut Vec<StringBuffer>,
2802        texts: &[String],
2803    ) -> Vec<(BufferLocation, usize, usize, Option<usize>)> {
2804        let mut infos = Vec::new();
2805        for text in texts {
2806            let id = buffers.len();
2807            let bytes = text.as_bytes().to_vec();
2808            let lf = bytes.iter().filter(|&&b| b == b'\n').count();
2809            let len = bytes.len();
2810            buffers.push(StringBuffer::new(id, bytes));
2811            infos.push((BufferLocation::Added(id), 0, len, Some(lf)));
2812        }
2813        infos
2814    }
2815
2816    proptest! {
2817        /// Property: apply_bulk_edits maintains tree consistency
2818        /// Sum of piece lengths must equal total_bytes after bulk edit
2819        #[test]
2820        fn prop_bulk_edit_tree_consistency(ops in bulk_edit_strategy()) {
2821            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
2822            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2823
2824            // Sort edits by position descending (required by apply_bulk_edits)
2825            let mut ops = ops;
2826            ops.sort_by(|a, b| b.position.cmp(&a.position));
2827
2828            // Pre-allocate all buffers
2829            let texts: Vec<String> = ops.iter().map(|op| op.insert_text.clone()).collect();
2830            let infos = preallocate_buffers(&mut buffers, &texts);
2831
2832            // Clamp positions and delete lengths to valid ranges
2833            let edits: Vec<(usize, usize, &str)> = ops.iter()
2834                .map(|op| {
2835                    let pos = op.position.min(tree.total_bytes());
2836                    let del = op.delete_len.min(tree.total_bytes().saturating_sub(pos));
2837                    (pos, del, op.insert_text.as_str())
2838                })
2839                .collect();
2840
2841            let mut idx = 0;
2842            tree.apply_bulk_edits(&edits, &buffers, |_text| {
2843                let info = infos[idx];
2844                idx += 1;
2845                info
2846            });
2847
2848            // INVARIANT: Sum of all piece lengths must equal total_bytes
2849            let leaves = tree.get_leaves();
2850            let sum_of_pieces: usize = leaves.iter().map(|leaf| leaf.bytes).sum();
2851            prop_assert_eq!(
2852                sum_of_pieces,
2853                tree.total_bytes(),
2854                "After bulk edit: sum of pieces ({}) != total_bytes ({})",
2855                sum_of_pieces,
2856                tree.total_bytes()
2857            );
2858        }
2859
2860        /// Property: apply_bulk_edits returns correct delta
2861        #[test]
2862        fn prop_bulk_edit_correct_delta(ops in bulk_edit_strategy()) {
2863            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
2864            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2865
2866            let original_bytes = tree.total_bytes();
2867
2868            // Sort edits by position descending
2869            let mut ops = ops;
2870            ops.sort_by(|a, b| b.position.cmp(&a.position));
2871
2872            // Pre-allocate all buffers
2873            let texts: Vec<String> = ops.iter().map(|op| op.insert_text.clone()).collect();
2874            let infos = preallocate_buffers(&mut buffers, &texts);
2875
2876            let edits: Vec<(usize, usize, &str)> = ops.iter()
2877                .map(|op| {
2878                    let pos = op.position.min(tree.total_bytes());
2879                    let del = op.delete_len.min(tree.total_bytes().saturating_sub(pos));
2880                    (pos, del, op.insert_text.as_str())
2881                })
2882                .collect();
2883
2884            let mut idx = 0;
2885            let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2886                let info = infos[idx];
2887                idx += 1;
2888                info
2889            });
2890
2891            let actual_change = tree.total_bytes() as isize - original_bytes as isize;
2892            prop_assert_eq!(
2893                delta,
2894                actual_change,
2895                "Returned delta ({}) != actual change ({})",
2896                delta,
2897                actual_change
2898            );
2899        }
2900
2901        /// Property: bulk edit with only inserts increases size correctly
2902        #[test]
2903        fn prop_bulk_edit_inserts_only(
2904            positions in prop::collection::vec(0usize..50, 1..10),
2905            insert_len in 1usize..10
2906        ) {
2907            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(50).to_vec())];
2908            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 50, Some(0));
2909
2910            let insert_text = "a".repeat(insert_len);
2911            let original_bytes = tree.total_bytes();
2912
2913            // Sort positions descending
2914            let mut positions = positions;
2915            positions.sort_by(|a, b| b.cmp(a));
2916            positions.dedup();
2917
2918            // Pre-allocate all buffers
2919            let texts: Vec<String> = positions.iter().map(|_| insert_text.clone()).collect();
2920            let infos = preallocate_buffers(&mut buffers, &texts);
2921
2922            let edits: Vec<(usize, usize, &str)> = positions
2923                .iter()
2924                .map(|&pos| (pos.min(tree.total_bytes()), 0, insert_text.as_str()))
2925                .collect();
2926
2927            let mut idx = 0;
2928            tree.apply_bulk_edits(&edits, &buffers, |_text| {
2929                let info = infos[idx];
2930                idx += 1;
2931                info
2932            });
2933
2934            let expected_bytes = original_bytes + edits.len() * insert_len;
2935            prop_assert_eq!(
2936                tree.total_bytes(),
2937                expected_bytes,
2938                "After {} inserts of {} bytes each: expected {} bytes, got {}",
2939                edits.len(),
2940                insert_len,
2941                expected_bytes,
2942                tree.total_bytes()
2943            );
2944        }
2945
2946        /// Property: bulk edit with only deletes decreases size correctly
2947        #[test]
2948        fn prop_bulk_edit_deletes_only(
2949            ops in prop::collection::vec((0usize..80, 1usize..5), 1..10)
2950        ) {
2951            let buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
2952            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2953
2954            // Sort by position descending
2955            let mut ops = ops;
2956            ops.sort_by(|a, b| b.0.cmp(&a.0));
2957
2958            // Remove overlapping deletes
2959            let mut edits: Vec<(usize, usize, &str)> = Vec::new();
2960            let mut last_affected_pos = tree.total_bytes();
2961            for (pos, del_len) in ops {
2962                if pos < last_affected_pos {
2963                    let actual_del = del_len.min(last_affected_pos - pos);
2964                    if actual_del > 0 {
2965                        edits.push((pos, actual_del, ""));
2966                        last_affected_pos = pos;
2967                    }
2968                }
2969            }
2970
2971            let expected_delete: usize = edits.iter().map(|(_, d, _)| d).sum();
2972
2973            tree.apply_bulk_edits(&edits, &buffers, |_| {
2974                (BufferLocation::Added(1), 0, 0, Some(0))
2975            });
2976
2977            let expected_bytes = 100 - expected_delete;
2978            prop_assert_eq!(
2979                tree.total_bytes(),
2980                expected_bytes,
2981                "After deleting {} bytes: expected {} bytes, got {}",
2982                expected_delete,
2983                expected_bytes,
2984                tree.total_bytes()
2985            );
2986        }
2987    }
2988
2989    proptest! {
2990        /// Property: Sum of all piece lengths must equal total_bytes
2991        /// This catches bugs like duplicate piece insertion
2992        #[test]
2993        fn prop_tree_consistency_piece_sum(operations in operation_strategy()) {
2994            let buffers = test_buffers_large();
2995            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2996
2997            for op in operations {
2998                match op {
2999                    Operation::Insert { offset, bytes } => {
3000                        let offset = offset.min(tree.total_bytes());
3001                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
3002                        let bytes = bytes.min(buffer_len);
3003                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
3004                    }
3005                    Operation::Delete { offset, bytes } => {
3006                        tree.delete(offset, bytes, &buffers);
3007                    }
3008                }
3009
3010                // INVARIANT: Sum of all piece lengths must equal total_bytes
3011                let leaves = tree.get_leaves();
3012                let sum_of_pieces: usize = leaves.iter().map(|leaf| leaf.bytes).sum();
3013                prop_assert_eq!(
3014                    sum_of_pieces,
3015                    tree.total_bytes(),
3016                    "Tree inconsistency: sum of piece lengths ({}) != total_bytes ({})",
3017                    sum_of_pieces,
3018                    tree.total_bytes()
3019                );
3020            }
3021        }
3022
3023        /// Property: Line feed count consistency
3024        /// Sum of all piece line_feed_cnt must equal tree's total line feeds
3025        #[test]
3026        fn prop_tree_consistency_line_feeds(operations in operation_strategy()) {
3027            let buffers = test_buffers_large();
3028            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3029
3030            for op in operations {
3031                match op {
3032                    Operation::Insert { offset, bytes } => {
3033                        let offset = offset.min(tree.total_bytes());
3034                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
3035                        let bytes = bytes.min(buffer_len);
3036                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
3037                    }
3038                    Operation::Delete { offset, bytes } => {
3039                        tree.delete(offset, bytes, &buffers);
3040                    }
3041                }
3042
3043                // INVARIANT: Sum of all piece line feeds must equal tree's total
3044                let leaves = tree.get_leaves();
3045                let sum_of_line_feeds: Option<usize> = leaves.iter()
3046                    .try_fold(0, |acc, leaf| leaf.line_feed_cnt.map(|cnt| acc + cnt));
3047                let stats = tree.stats();
3048                prop_assert_eq!(
3049                    sum_of_line_feeds,
3050                    stats.line_feed_count,
3051                    "Line feed inconsistency: sum of piece line feeds ({:?}) != tree total ({:?})",
3052                    sum_of_line_feeds,
3053                    stats.line_feed_count
3054                );
3055            }
3056        }
3057
3058        /// Aggressive consistency test designed to catch the duplicate piece insertion bug
3059        /// Uses more operations with smaller inserts to force internal node creation and splits
3060        #[test]
3061        fn prop_tree_consistency_aggressive(operations in aggressive_operation_strategy()) {
3062            let buffers = test_buffers_large();
3063            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3064
3065            // Prime the tree with several inserts to create internal nodes first
3066            // This increases the likelihood of hitting the bug scenario
3067            for i in 0..5 {
3068                let offset = (i * 17) % (tree.total_bytes().max(1));
3069                tree.insert(offset, BufferLocation::Added(1), i * 100, 10, Some(0), &buffers);
3070            }
3071
3072            // Verify we have internal nodes
3073            prop_assert!(tree.stats().depth > 1, "Priming should create internal nodes");
3074
3075            for (i, op) in operations.iter().enumerate() {
3076                match *op {
3077                    Operation::Insert { offset, bytes } => {
3078                        let offset = offset.min(tree.total_bytes());
3079                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
3080                        let bytes = bytes.min(buffer_len);
3081                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
3082                    }
3083                    Operation::Delete { offset, bytes } => {
3084                        tree.delete(offset, bytes, &buffers);
3085                    }
3086                }
3087
3088                // CRITICAL INVARIANT: Sum of all piece lengths must equal total_bytes
3089                // This catches the duplicate piece insertion bug
3090                let leaves = tree.get_leaves();
3091                let sum_of_pieces: usize = leaves.iter().map(|leaf| leaf.bytes).sum();
3092                prop_assert_eq!(
3093                    sum_of_pieces,
3094                    tree.total_bytes(),
3095                    "Operation {}: Tree inconsistency after {:?}.\n\
3096                     Sum of piece lengths ({}) != total_bytes ({}).\n\
3097                     Tree depth: {}, leaves: {}.\n\
3098                     Pieces: {:?}",
3099                    i, op, sum_of_pieces, tree.total_bytes(),
3100                    tree.stats().depth, tree.stats().leaf_count,
3101                    leaves
3102                );
3103            }
3104        }
3105    }
3106
3107    #[test]
3108    fn test_delete_beyond_end() {
3109        let buffers = test_buffers_large();
3110        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3111        tree.delete(50, 100, &buffers); // Try to delete 100 bytes from offset 50
3112        assert_eq!(tree.total_bytes(), 50); // Should only delete 50 bytes
3113    }
3114
3115    #[test]
3116    fn test_insert_zero_bytes() {
3117        let buffers = test_buffers_large();
3118        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3119        tree.insert(50, BufferLocation::Added(1), 0, 0, Some(0), &buffers);
3120        assert_eq!(tree.total_bytes(), 100);
3121    }
3122
3123    #[test]
3124    fn test_tree_consistency_after_insert() {
3125        // Regression test: verify tree consistency after each operation
3126        // This test creates enough inserts to force internal nodes, which is where the bug manifests
3127        let buffers = test_buffers_large();
3128        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3129
3130        // Do several inserts to create internal nodes and splits
3131        for i in 0..10 {
3132            let offset = (i * 13) % (tree.total_bytes().max(1)); // Varying offsets
3133            tree.insert(
3134                offset,
3135                BufferLocation::Added(1),
3136                i * 10,
3137                5,
3138                Some(0),
3139                &buffers,
3140            );
3141
3142            // INVARIANT: sum of piece lengths must equal total_bytes
3143            let leaves = tree.get_leaves();
3144            let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3145            assert_eq!(
3146                sum,
3147                tree.total_bytes(),
3148                "After insert {}: sum of pieces ({}) != total_bytes ({}).\nLeaves: {:?}",
3149                i,
3150                sum,
3151                tree.total_bytes(),
3152                leaves
3153            );
3154        }
3155
3156        // Verify we actually created internal nodes
3157        let stats = tree.stats();
3158        assert!(
3159            stats.depth > 1,
3160            "Test should create internal nodes, but depth is {}",
3161            stats.depth
3162        );
3163    }
3164
3165    #[test]
3166    fn test_duplicate_piece_bug_exact_scenario() {
3167        // This replicates the exact scenario that exposed the duplicate insertion bug
3168        let mut buffers = vec![StringBuffer::new(0, b"initial\ntext".to_vec())];
3169        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 12, Some(1));
3170
3171        // Delete all - creates an empty piece
3172        tree.delete(0, 12, &buffers);
3173
3174        // Check tree consistency after delete
3175        let leaves = tree.get_leaves();
3176        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3177        assert_eq!(
3178            sum,
3179            tree.total_bytes(),
3180            "After delete: sum={}, total={}",
3181            sum,
3182            tree.total_bytes()
3183        );
3184
3185        // Insert 'a' at position 0
3186        buffers.push(StringBuffer::new(1, b"a".to_vec()));
3187        tree.insert(0, BufferLocation::Added(1), 0, 1, Some(0), &buffers);
3188
3189        // Check consistency
3190        let leaves = tree.get_leaves();
3191        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3192        assert_eq!(
3193            sum,
3194            tree.total_bytes(),
3195            "After first insert: sum={}, total={}. Leaves: {:?}",
3196            sum,
3197            tree.total_bytes(),
3198            leaves
3199        );
3200
3201        // Insert 'b' at position 0 - this should trigger the bug with buggy code
3202        buffers.push(StringBuffer::new(2, b"b".to_vec()));
3203        tree.insert(0, BufferLocation::Added(2), 0, 1, Some(0), &buffers);
3204
3205        // Check consistency - this will fail with the bug
3206        let leaves = tree.get_leaves();
3207        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3208        assert_eq!(
3209            sum,
3210            tree.total_bytes(),
3211            "After second insert: sum={}, total={}. Leaves: {:?}",
3212            sum,
3213            tree.total_bytes(),
3214            leaves
3215        );
3216    }
3217
3218    // Property tests for PieceRangeIter
3219    proptest! {
3220        #[test]
3221        fn test_piece_iter_covers_exact_range(
3222            ops in aggressive_operation_strategy(),
3223            start in 0usize..100,
3224            len in 1usize..50
3225        ) {
3226            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
3227            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3228
3229            // Apply operations to build up tree
3230            for op in ops.iter() {
3231                match op {
3232                    Operation::Insert { offset, bytes } => {
3233                        let offset = (*offset).min(tree.total_bytes());
3234                        buffers.push(StringBuffer::new(buffers.len(), b"a".repeat(*bytes).to_vec()));
3235                        tree.insert(offset, BufferLocation::Added(buffers.len() - 1), 0, *bytes, Some(0), &buffers);
3236                    }
3237                    Operation::Delete { offset, bytes } => {
3238                        let offset = (*offset).min(tree.total_bytes());
3239                        let bytes = (*bytes).min(tree.total_bytes().saturating_sub(offset));
3240                        if bytes > 0 {
3241                            tree.delete(offset, bytes, &buffers);
3242                        }
3243                    }
3244                }
3245            }
3246
3247            let total_bytes = tree.total_bytes();
3248            if total_bytes == 0 {
3249                return Ok(());
3250            }
3251
3252            let start = start.min(total_bytes.saturating_sub(1));
3253            let end = (start + len).min(total_bytes);
3254
3255            // Collect pieces using iterator
3256            let pieces: Vec<_> = tree.iter_pieces_in_range(start, end).collect();
3257
3258            // Verify coverage: pieces should cover [start, end)
3259            if !pieces.is_empty() {
3260                let first_piece_start = pieces[0].doc_offset;
3261                let last_piece = &pieces[pieces.len() - 1];
3262                let last_piece_end = last_piece.doc_offset + last_piece.bytes;
3263
3264                // First piece should start at or before requested start
3265                prop_assert!(first_piece_start <= start,
3266                    "First piece starts at {}, but requested start is {}", first_piece_start, start);
3267
3268                // Last piece should end at or after requested end
3269                prop_assert!(last_piece_end >= end,
3270                    "Last piece ends at {}, but requested end is {}", last_piece_end, end);
3271            }
3272        }
3273
3274        #[test]
3275        fn test_piece_iter_no_gaps(ops in aggressive_operation_strategy()) {
3276            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
3277            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3278
3279            for op in ops {
3280                match op {
3281                    Operation::Insert { offset, bytes } => {
3282                        let offset = offset.min(tree.total_bytes());
3283                        buffers.push(StringBuffer::new(buffers.len(), b"a".repeat(bytes).to_vec()));
3284                        tree.insert(offset, BufferLocation::Added(buffers.len() - 1), 0, bytes, Some(0), &buffers);
3285                    }
3286                    Operation::Delete { offset, bytes } => {
3287                        let offset = offset.min(tree.total_bytes());
3288                        let bytes = bytes.min(tree.total_bytes().saturating_sub(offset));
3289                        if bytes > 0 {
3290                            tree.delete(offset, bytes, &buffers);
3291                        }
3292                    }
3293                }
3294            }
3295
3296            let total_bytes = tree.total_bytes();
3297            if total_bytes == 0 {
3298                return Ok(());
3299            }
3300
3301            // Iterate over entire document
3302            let pieces: Vec<_> = tree.iter_pieces_in_range(0, total_bytes).collect();
3303
3304            // Verify no gaps: each piece should start where previous one ended
3305            for i in 1..pieces.len() {
3306                let prev_end = pieces[i - 1].doc_offset + pieces[i - 1].bytes;
3307                let curr_start = pieces[i].doc_offset;
3308                prop_assert_eq!(prev_end, curr_start,
3309                    "Gap between piece {} (ends at {}) and piece {} (starts at {})",
3310                    i - 1, prev_end, i, curr_start);
3311            }
3312        }
3313
3314        #[test]
3315        fn test_piece_iter_total_bytes_matches(ops in aggressive_operation_strategy()) {
3316            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
3317            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3318
3319            for op in ops {
3320                match op {
3321                    Operation::Insert { offset, bytes } => {
3322                        let offset = offset.min(tree.total_bytes());
3323                        buffers.push(StringBuffer::new(buffers.len(), b"a".repeat(bytes).to_vec()));
3324                        tree.insert(offset, BufferLocation::Added(buffers.len() - 1), 0, bytes, Some(0), &buffers);
3325                    }
3326                    Operation::Delete { offset, bytes } => {
3327                        let offset = offset.min(tree.total_bytes());
3328                        let bytes = bytes.min(tree.total_bytes().saturating_sub(offset));
3329                        if bytes > 0 {
3330                            tree.delete(offset, bytes, &buffers);
3331                        }
3332                    }
3333                }
3334            }
3335
3336            let total_bytes = tree.total_bytes();
3337            if total_bytes == 0 {
3338                return Ok(());
3339            }
3340
3341            // Sum of piece bytes should equal total bytes
3342            let pieces: Vec<_> = tree.iter_pieces_in_range(0, total_bytes).collect();
3343            let sum_bytes: usize = pieces.iter().map(|p| p.bytes).sum();
3344            prop_assert_eq!(sum_bytes, total_bytes,
3345                "Sum of piece bytes ({}) doesn't match total_bytes ({})", sum_bytes, total_bytes);
3346        }
3347
3348        /// Property test that verifies offset_to_position returns correct line/column
3349        /// after buffer modifications. This catches the bug where column calculation
3350        /// was incorrect after insertions/deletions.
3351        #[test]
3352        fn prop_offset_to_position_correct_after_modifications(
3353            ops in prop::collection::vec(
3354                prop_oneof![
3355                    // Insert with newlines
3356                    (0usize..50, prop::collection::vec(
3357                        prop_oneof![
3358                            Just(b'a'),
3359                            Just(b'\n'),
3360                        ],
3361                        1..20
3362                    )).prop_map(|(offset, bytes)| (offset, bytes, true)),
3363                    // Delete
3364                    (0usize..50, 1usize..10).prop_map(|(offset, _bytes)| (offset, vec![], false)),
3365                ],
3366                5..20
3367            ),
3368            test_offsets in prop::collection::vec(0usize..100, 3..10)
3369        ) {
3370            // Start with content that has newlines
3371            let initial = b"Hello\nWorld\nTest\n";
3372            let mut content = initial.to_vec();
3373
3374            let mut buffers = vec![StringBuffer::new(0, initial.to_vec())];
3375            let newline_count = initial.iter().filter(|&&b| b == b'\n').count();
3376            let mut tree = PieceTree::new(
3377                BufferLocation::Stored(0),
3378                0,
3379                initial.len(),
3380                Some(newline_count),
3381            );
3382
3383            // Apply operations, tracking actual content
3384            for (offset, bytes, is_insert) in ops {
3385                if is_insert && !bytes.is_empty() {
3386                    let offset = offset.min(content.len());
3387                    let newlines = bytes.iter().filter(|&&b| b == b'\n').count();
3388
3389                    // Add buffer and insert into tree
3390                    buffers.push(StringBuffer::new(buffers.len(), bytes.clone()));
3391                    tree.insert(
3392                        offset,
3393                        BufferLocation::Added(buffers.len() - 1),
3394                        0,
3395                        bytes.len(),
3396                        Some(newlines),
3397                        &buffers,
3398                    );
3399
3400                    // Update actual content
3401                    content.splice(offset..offset, bytes);
3402                } else if !is_insert {
3403                    // Delete operation - offset is first element, bytes length is implied
3404                    let offset = offset.min(content.len());
3405                    let delete_len = 5.min(content.len().saturating_sub(offset)); // Use fixed small delete
3406                    if delete_len > 0 {
3407                        tree.delete(offset, delete_len, &buffers);
3408                        content.drain(offset..offset + delete_len);
3409                    }
3410                }
3411            }
3412
3413            // Helper to compute ground truth line/column from content
3414            let compute_position = |content: &[u8], offset: usize| -> (usize, usize) {
3415                let offset = offset.min(content.len());
3416                let mut line = 0;
3417                let mut col = 0;
3418                for (i, &byte) in content.iter().enumerate() {
3419                    if i == offset {
3420                        break;
3421                    }
3422                    if byte == b'\n' {
3423                        line += 1;
3424                        col = 0;
3425                    } else {
3426                        col += 1;
3427                    }
3428                }
3429                (line, col)
3430            };
3431
3432            // Test various offsets
3433            for offset in test_offsets {
3434                let offset = offset.min(content.len());
3435                if offset == 0 {
3436                    continue; // Skip 0, it's a special case that always works
3437                }
3438
3439                let expected = compute_position(&content, offset);
3440                let actual = tree.offset_to_position(offset, &buffers);
3441
3442                prop_assert_eq!(
3443                    actual,
3444                    Some(expected),
3445                    "offset_to_position({}) returned {:?}, expected {:?}. Content len: {}",
3446                    offset,
3447                    actual,
3448                    expected,
3449                    content.len()
3450                );
3451            }
3452        }
3453    }
3454}