Skip to main content

fresh/model/
piece_tree.rs

1use std::io;
2use std::path::PathBuf;
3use std::sync::Arc;
4
5/// A position in the document (line and column)
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub struct Position {
8    pub line: usize,   // 0-indexed line number
9    pub column: usize, // Byte offset within the line
10}
11
12/// Data storage for a buffer - either loaded in memory or unloaded (file reference)
13#[derive(Debug, Clone)]
14pub enum BufferData {
15    /// Loaded in memory with optional line indexing
16    Loaded {
17        data: Vec<u8>,
18        line_starts: Option<Vec<usize>>, // None = not indexed (large file mode)
19    },
20    /// Not yet loaded from file
21    Unloaded {
22        file_path: PathBuf,
23        file_offset: usize, // Where in file this buffer starts
24        bytes: usize,       // Length of this region
25    },
26}
27
28/// A string buffer containing a chunk of text data and its line metadata
29/// This is the fundamental storage unit - piece tree nodes reference these buffers
30#[derive(Debug, Clone)]
31pub struct StringBuffer {
32    /// Unique identifier for this buffer
33    pub id: usize,
34    /// The buffer data - either loaded or unloaded
35    pub data: BufferData,
36}
37
38impl StringBuffer {
39    /// Create a new string buffer with line metadata (legacy constructor)
40    /// Automatically computes line starts
41    pub fn new(id: usize, data: Vec<u8>) -> Self {
42        let line_starts = Self::compute_line_starts(&data);
43        StringBuffer {
44            id,
45            data: BufferData::Loaded {
46                data,
47                line_starts: Some(line_starts),
48            },
49        }
50    }
51
52    /// Create a loaded buffer with optional line indexing
53    pub fn new_loaded(id: usize, data: Vec<u8>, compute_lines: bool) -> Self {
54        let line_starts = if compute_lines {
55            Some(Self::compute_line_starts(&data))
56        } else {
57            None
58        };
59        StringBuffer {
60            id,
61            data: BufferData::Loaded { data, line_starts },
62        }
63    }
64
65    /// Create buffer for file region (not yet loaded)
66    pub fn new_unloaded(id: usize, file_path: PathBuf, file_offset: usize, bytes: usize) -> Self {
67        StringBuffer {
68            id,
69            data: BufferData::Unloaded {
70                file_path,
71                file_offset,
72                bytes,
73            },
74        }
75    }
76
77    /// Check if buffer is loaded
78    pub fn is_loaded(&self) -> bool {
79        matches!(self.data, BufferData::Loaded { .. })
80    }
81
82    /// Get data reference if loaded, None if unloaded
83    ///
84    /// NOTE: This is a low-level API. External code should use TextBuffer::get_text_range_mut()
85    /// which provides automatic lazy loading. This method is pub(crate) to prevent misuse.
86    pub(crate) fn get_data(&self) -> Option<&[u8]> {
87        match &self.data {
88            BufferData::Loaded { data, .. } => Some(data),
89            BufferData::Unloaded { .. } => None,
90        }
91    }
92
93    /// Get line starts if available
94    pub fn get_line_starts(&self) -> Option<&[usize]> {
95        match &self.data {
96            BufferData::Loaded { line_starts, .. } => line_starts.as_deref(),
97            BufferData::Unloaded { .. } => None,
98        }
99    }
100
101    /// Load buffer data from file using a FileSystem (for unloaded buffers)
102    /// Returns error if buffer is not unloaded or if I/O fails
103    pub fn load(&mut self, fs: &dyn crate::model::filesystem::FileSystem) -> io::Result<()> {
104        match &self.data {
105            BufferData::Loaded { .. } => Ok(()), // Already loaded
106            BufferData::Unloaded {
107                file_path,
108                file_offset,
109                bytes,
110            } => {
111                // Load from file using the FileSystem trait
112                let buffer = fs.read_range(file_path, *file_offset as u64, *bytes)?;
113
114                // Replace with loaded data (no line indexing for lazy-loaded chunks)
115                self.data = BufferData::Loaded {
116                    data: buffer,
117                    line_starts: None,
118                };
119
120                Ok(())
121            }
122        }
123    }
124
125    /// Create a new unloaded buffer representing a chunk of this buffer
126    /// This is used for splitting large unloaded buffers into smaller chunks
127    ///
128    /// # Arguments
129    /// * `new_id` - The ID for the new buffer
130    /// * `chunk_offset` - Offset within this buffer where the chunk starts
131    /// * `chunk_bytes` - Number of bytes in the chunk
132    ///
133    /// # Returns
134    /// A new StringBuffer referencing the chunk, or None if this buffer is not unloaded
135    /// or if the chunk range is invalid
136    pub fn create_chunk_buffer(
137        &self,
138        new_id: usize,
139        chunk_offset: usize,
140        chunk_bytes: usize,
141    ) -> Option<StringBuffer> {
142        match &self.data {
143            BufferData::Unloaded {
144                file_path,
145                file_offset,
146                bytes,
147            } => {
148                // Validate chunk range
149                if chunk_offset + chunk_bytes > *bytes {
150                    return None;
151                }
152
153                Some(StringBuffer::new_unloaded(
154                    new_id,
155                    file_path.clone(),
156                    file_offset + chunk_offset,
157                    chunk_bytes,
158                ))
159            }
160            BufferData::Loaded { .. } => None, // Can't create chunk from loaded buffer
161        }
162    }
163
164    /// Compute line start offsets for a buffer
165    fn compute_line_starts(data: &[u8]) -> Vec<usize> {
166        let mut line_starts = vec![0];
167        for (i, &byte) in data.iter().enumerate() {
168            if byte == b'\n' {
169                line_starts.push(i + 1);
170            }
171        }
172        line_starts
173    }
174
175    /// Get the number of line feeds (newlines) in this buffer
176    /// Returns None if line indexing was not computed or buffer is unloaded
177    pub fn line_feed_count(&self) -> Option<usize> {
178        match &self.data {
179            BufferData::Loaded { line_starts, .. } => line_starts
180                .as_ref()
181                .map(|starts| starts.len().saturating_sub(1)),
182            BufferData::Unloaded { .. } => None,
183        }
184    }
185
186    /// Append data to this buffer and recompute line starts
187    /// Returns the offset where the appended data starts
188    /// Only works for loaded buffers with line starts
189    pub fn append(&mut self, data_to_append: &[u8]) -> usize {
190        match &mut self.data {
191            BufferData::Loaded { data, line_starts } => {
192                let start_offset = data.len();
193                data.extend_from_slice(data_to_append);
194
195                // Add new line starts if we're tracking them
196                if let Some(ref mut line_starts) = line_starts {
197                    for (i, &byte) in data_to_append.iter().enumerate() {
198                        if byte == b'\n' {
199                            line_starts.push(start_offset + i + 1);
200                        }
201                    }
202                }
203
204                start_offset
205            }
206            BufferData::Unloaded { .. } => {
207                // Can't append to unloaded buffer
208                0
209            }
210        }
211    }
212}
213
214/// Identifies which buffer a piece of text comes from
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub enum BufferLocation {
217    /// Data is in the original stored/persisted buffer
218    Stored(usize), // buffer_id
219    /// Data is in the added/modified buffer
220    Added(usize), // buffer_id
221}
222
223impl BufferLocation {
224    /// Get the buffer ID
225    pub fn buffer_id(&self) -> usize {
226        match self {
227            Self::Stored(id) | Self::Added(id) => *id,
228        }
229    }
230}
231
232/// A node in the piece tree with integrated line tracking
233#[derive(Debug, Clone)]
234pub enum PieceTreeNode {
235    /// Internal node with left and right children
236    Internal {
237        left_bytes: usize,      // Total bytes in left subtree
238        lf_left: Option<usize>, // Total line feeds in left subtree (None if unknown)
239        left: Arc<PieceTreeNode>,
240        right: Arc<PieceTreeNode>,
241    },
242    /// Leaf node representing an actual piece
243    Leaf {
244        location: BufferLocation, // Where this piece's data is (includes buffer_id)
245        offset: usize,            // Offset within the buffer
246        bytes: usize,             // Number of bytes in this piece
247        line_feed_cnt: Option<usize>, // Number of line feeds in this piece (None if unknown)
248    },
249}
250
251/// Information about a piece at a specific location
252#[derive(Debug, Clone)]
253pub struct PieceInfo {
254    pub location: BufferLocation,       // Which buffer (Stored or Added)
255    pub offset: usize,                  // Starting offset of this piece within that buffer
256    pub bytes: usize,                   // Length of this piece in bytes
257    pub offset_in_piece: Option<usize>, // For queries: how far into this piece the query point is
258}
259
260/// Result from finding a piece by byte offset
261#[derive(Debug, Clone)]
262struct OffsetFindResult {
263    info: PieceInfo,
264    bytes_before: usize, // Total bytes in all pieces before this one
265}
266
267/// A cursor position in the document
268#[derive(Debug, Clone)]
269pub struct Cursor {
270    pub byte_offset: usize, // Absolute byte offset in document
271    pub line: usize,        // Line number (0-indexed)
272    pub col: usize,         // Column within line (byte offset)
273}
274
275/// Represents the data for a leaf node in the piece tree
276#[derive(Debug, Clone, Copy)]
277pub struct LeafData {
278    pub location: BufferLocation,
279    pub offset: usize,
280    pub bytes: usize,
281    pub line_feed_cnt: Option<usize>,
282}
283
284impl LeafData {
285    pub fn new(
286        location: BufferLocation,
287        offset: usize,
288        bytes: usize,
289        line_feed_cnt: Option<usize>,
290    ) -> Self {
291        LeafData {
292            location,
293            offset,
294            bytes,
295            line_feed_cnt,
296        }
297    }
298}
299
300/// Statistics about the piece tree structure
301#[derive(Debug, Clone, Copy)]
302pub struct TreeStats {
303    pub total_bytes: usize,
304    pub depth: usize,
305    pub leaf_count: usize,
306    pub line_feed_count: Option<usize>,
307}
308
309// Line iteration can be implemented by:
310// 1. Maintaining a cursor position (current piece + offset within piece)
311// 2. For next_line(): scan forward in the current piece's buffer until '\n',
312//    or move to the next piece if we reach the end
313// 3. For prev_line(): scan backward similarly
314// The iterator would need access to the actual buffer data (Stored/Added)
315// which is managed externally, so this is deferred until buffer integration.
316
317impl PieceTreeNode {
318    /// Find the piece containing the given byte offset
319    fn find_by_offset(&self, offset: usize) -> Option<OffsetFindResult> {
320        match self {
321            Self::Internal {
322                left_bytes,
323                left,
324                right,
325                ..
326            } => {
327                if offset < *left_bytes {
328                    left.find_by_offset(offset)
329                } else {
330                    // Search in right subtree
331                    right.find_by_offset(offset - left_bytes).map(|mut result| {
332                        // Adjust bytes_before to account for left subtree
333                        result.bytes_before += left_bytes;
334                        result
335                    })
336                }
337            }
338            Self::Leaf {
339                location,
340                offset: piece_offset,
341                bytes,
342                ..
343            } => {
344                if offset < *bytes {
345                    Some(OffsetFindResult {
346                        info: PieceInfo {
347                            location: *location,
348                            offset: *piece_offset,
349                            bytes: *bytes,
350                            offset_in_piece: Some(offset),
351                        },
352                        bytes_before: 0,
353                    })
354                } else {
355                    None
356                }
357            }
358        }
359    }
360
361    /// Get total bytes in this node
362    fn total_bytes(&self) -> usize {
363        match self {
364            Self::Internal {
365                left_bytes, right, ..
366            } => left_bytes + right.total_bytes(),
367            Self::Leaf { bytes, .. } => *bytes,
368        }
369    }
370
371    /// Get total line feeds in this node
372    /// Returns None if any piece has unknown line count
373    fn total_line_feeds(&self) -> Option<usize> {
374        match self {
375            Self::Internal { lf_left, right, .. } => match (lf_left, right.total_line_feeds()) {
376                (Some(left), Some(right)) => Some(left + right),
377                _ => None,
378            },
379            Self::Leaf { line_feed_cnt, .. } => *line_feed_cnt,
380        }
381    }
382
383    /// Get the depth of this tree
384    fn depth(&self) -> usize {
385        match self {
386            Self::Internal { left, right, .. } => 1 + left.depth().max(right.depth()),
387            Self::Leaf { .. } => 1,
388        }
389    }
390
391    /// Count the number of leaf nodes
392    fn count_leaves(&self) -> usize {
393        match self {
394            Self::Internal { left, right, .. } => left.count_leaves() + right.count_leaves(),
395            Self::Leaf { .. } => 1,
396        }
397    }
398
399    /// Collect all leaves in order
400    fn collect_leaves(&self, leaves: &mut Vec<LeafData>) {
401        match self {
402            Self::Internal { left, right, .. } => {
403                left.collect_leaves(leaves);
404                right.collect_leaves(leaves);
405            }
406            Self::Leaf {
407                location,
408                offset,
409                bytes,
410                line_feed_cnt,
411            } => {
412                leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
413            }
414        }
415    }
416
417    /// Count line feeds in a byte range [start, end)
418    /// current_offset: byte offset at the start of this node
419    /// Returns None if any piece in the range has unknown line count
420    fn count_lines_in_byte_range(
421        &self,
422        current_offset: usize,
423        start: usize,
424        end: usize,
425    ) -> Option<usize> {
426        match self {
427            Self::Internal {
428                left_bytes,
429                left,
430                right,
431                ..
432            } => {
433                let left_end = current_offset + left_bytes;
434
435                if end <= current_offset || start >= current_offset + self.total_bytes() {
436                    Some(0) // Range is completely before or after this node
437                } else if start <= current_offset && end >= current_offset + self.total_bytes() {
438                    // Range completely contains this node
439                    self.total_line_feeds()
440                } else if end <= left_end {
441                    // Range is completely in left subtree
442                    left.count_lines_in_byte_range(current_offset, start, end)
443                } else if start >= left_end {
444                    // Range is completely in right subtree
445                    right.count_lines_in_byte_range(left_end, start, end)
446                } else {
447                    // Range spans both subtrees
448                    let left_count = left.count_lines_in_byte_range(current_offset, start, end)?;
449                    let right_count = right.count_lines_in_byte_range(left_end, start, end)?;
450                    Some(left_count + right_count)
451                }
452            }
453            Self::Leaf {
454                line_feed_cnt,
455                bytes,
456                ..
457            } => {
458                let node_end = current_offset + bytes;
459
460                if end <= current_offset || start >= node_end {
461                    Some(0) // No overlap
462                } else if start <= current_offset && end >= node_end {
463                    // Range completely contains this leaf
464                    *line_feed_cnt
465                } else {
466                    // Partial overlap - for simplicity, return the full count
467                    // (accurate counting would require scanning the buffer)
468                    *line_feed_cnt
469                }
470            }
471        }
472    }
473
474    /// Find byte offset for a given line/column position
475    /// current_offset: byte offset at start of this node
476    /// lines_before: number of complete lines before this node
477    fn find_byte_offset_for_line(
478        &self,
479        current_offset: usize,
480        lines_before: usize,
481        target_line: usize,
482        column: usize,
483        buffers: &[StringBuffer],
484    ) -> Option<usize> {
485        match self {
486            Self::Internal {
487                left_bytes,
488                lf_left,
489                left,
490                right,
491            } => {
492                // If line count is unknown, we can't do line-based navigation
493                let lf_left = lf_left.as_ref()?;
494                let lines_after_left = lines_before + lf_left;
495
496                // When looking for line start (column == 0), we want the leftmost piece containing the line
497                // So use <= instead of < to prefer going left when the line boundary is exactly at lines_after_left
498                let go_left = if column == 0 {
499                    target_line <= lines_after_left
500                } else {
501                    target_line < lines_after_left
502                };
503
504                if go_left {
505                    // Target is in left subtree
506                    let result = left.find_byte_offset_for_line(
507                        current_offset,
508                        lines_before,
509                        target_line,
510                        column,
511                        buffers,
512                    );
513                    // If left returns None, try right as fallback (happens when line starts after a newline)
514                    result.or_else(|| {
515                        right.find_byte_offset_for_line(
516                            current_offset + left_bytes,
517                            lines_after_left,
518                            target_line,
519                            column,
520                            buffers,
521                        )
522                    })
523                } else {
524                    // Target is in right subtree
525                    right.find_byte_offset_for_line(
526                        current_offset + left_bytes,
527                        lines_after_left,
528                        target_line,
529                        column,
530                        buffers,
531                    )
532                }
533            }
534            Self::Leaf {
535                location,
536                offset,
537                bytes,
538                line_feed_cnt,
539            } => {
540                // If line count is unknown, we can't do line-based navigation
541                let line_feed_cnt = line_feed_cnt.as_ref()?;
542                let lines_in_piece = lines_before + line_feed_cnt;
543
544                // Special case: when looking for column==0 of line N where N == lines_in_piece,
545                // the line might start in the NEXT piece if this piece ends with a newline.
546                // Check if the last byte of this piece is a newline.
547                if column == 0 && target_line == lines_in_piece && target_line > lines_before {
548                    let buffer = buffers.get(location.buffer_id())?;
549                    let data = buffer.get_data()?;
550                    let last_byte_offset = offset + bytes - 1;
551                    let last_byte = data.get(last_byte_offset)?;
552
553                    if *last_byte == b'\n' {
554                        // Piece ends with newline, so the next line starts in the next piece
555                        return None;
556                    }
557                    // Otherwise, line starts within this piece after a newline
558                }
559
560                if target_line < lines_before || target_line > lines_in_piece {
561                    // Target line not in this piece
562                    return None;
563                }
564
565                // Get the buffer for this piece
566                let buffer_id = location.buffer_id();
567                let buffer = buffers.get(buffer_id)?;
568                let line_starts = buffer.get_line_starts()?;
569
570                // Find the line within the piece
571                let line_in_piece = target_line - lines_before;
572
573                // Get piece range in buffer
574                let piece_start_in_buffer = *offset;
575                let piece_end_in_buffer = offset + bytes;
576
577                // Special case: first line of piece (line_in_piece == 0)
578                let line_start_in_buffer = if line_in_piece == 0 {
579                    // First line starts at piece start
580                    piece_start_in_buffer
581                } else {
582                    // Find the Nth newline within this piece
583                    // Count line_starts that fall within [piece_start, piece_end)
584                    let mut lines_seen = 0;
585                    let mut found_line_start = None;
586
587                    for &line_start in line_starts.iter() {
588                        // Line starts are positions of newlines + 1, or beginning of buffer (0)
589                        // We want line_starts that are > piece_start and < piece_end
590                        if line_start > piece_start_in_buffer && line_start < piece_end_in_buffer {
591                            if lines_seen == line_in_piece - 1 {
592                                // This is the start of our target line
593                                found_line_start = Some(line_start);
594                                break;
595                            }
596                            lines_seen += 1;
597                        }
598                    }
599
600                    found_line_start?
601                };
602
603                // Add column offset
604                let target_offset_in_buffer = line_start_in_buffer + column;
605
606                // Convert to document offset
607                let offset_in_piece = target_offset_in_buffer.saturating_sub(piece_start_in_buffer);
608                Some(current_offset + offset_in_piece.min(*bytes))
609            }
610        }
611    }
612}
613
614/// The main piece table structure with integrated line tracking
615#[derive(Debug, Clone)]
616pub struct PieceTree {
617    root: Arc<PieceTreeNode>,
618    total_bytes: usize,
619}
620
621impl PieceTree {
622    /// Create a new piece table with a single initial piece
623    pub fn new(
624        location: BufferLocation,
625        offset: usize,
626        bytes: usize,
627        line_feed_cnt: Option<usize>,
628    ) -> Self {
629        PieceTree {
630            root: Arc::new(PieceTreeNode::Leaf {
631                location,
632                offset,
633                bytes,
634                line_feed_cnt,
635            }),
636            total_bytes: bytes,
637        }
638    }
639
640    /// Create an empty piece table
641    pub fn empty() -> Self {
642        PieceTree {
643            root: Arc::new(PieceTreeNode::Leaf {
644                location: BufferLocation::Stored(0),
645                offset: 0,
646                bytes: 0,
647                line_feed_cnt: Some(0), // Empty has zero line feeds (known)
648            }),
649            total_bytes: 0,
650        }
651    }
652
653    /// Build a balanced tree from a list of leaves
654    fn build_balanced(leaves: &[LeafData]) -> Arc<PieceTreeNode> {
655        if leaves.is_empty() {
656            return Arc::new(PieceTreeNode::Leaf {
657                location: BufferLocation::Stored(0),
658                offset: 0,
659                bytes: 0,
660                line_feed_cnt: Some(0), // Empty has zero line feeds (known)
661            });
662        }
663
664        if leaves.len() == 1 {
665            let leaf = leaves[0];
666            return Arc::new(PieceTreeNode::Leaf {
667                location: leaf.location,
668                offset: leaf.offset,
669                bytes: leaf.bytes,
670                line_feed_cnt: leaf.line_feed_cnt,
671            });
672        }
673
674        // Split in the middle
675        let mid = leaves.len() / 2;
676        let left = Self::build_balanced(&leaves[..mid]);
677        let right = Self::build_balanced(&leaves[mid..]);
678
679        let left_bytes = left.total_bytes();
680        let lf_left = left.total_line_feeds();
681
682        Arc::new(PieceTreeNode::Internal {
683            left_bytes,
684            lf_left,
685            left,
686            right,
687        })
688    }
689
690    /// Rebuild the tree to be balanced
691    fn rebalance(&mut self) {
692        let mut leaves = Vec::new();
693        self.root.collect_leaves(&mut leaves);
694        self.root = Self::build_balanced(&leaves);
695    }
696
697    /// Check if rebalancing is needed and do it
698    fn check_and_rebalance(&mut self) {
699        let count = self.root.count_leaves();
700        if count < 2 {
701            return;
702        }
703
704        let depth = self.root.depth();
705        let max_depth = 2 * (count as f64).log2().ceil() as usize;
706
707        if depth > max_depth {
708            self.rebalance();
709        }
710    }
711
712    /// Find the piece at the given byte offset
713    pub fn find_by_offset(&self, offset: usize) -> Option<PieceInfo> {
714        if offset >= self.total_bytes {
715            return None;
716        }
717        self.root.find_by_offset(offset).map(|result| result.info)
718    }
719
720    /// Create a cursor at the given byte offset
721    /// Note: line/col calculation should be done by LineIndex
722    pub fn cursor_at_offset(&self, offset: usize) -> Cursor {
723        Cursor {
724            byte_offset: offset.min(self.total_bytes),
725            line: 0,
726            col: 0,
727        }
728    }
729
730    /// Insert text at the given offset
731    /// Returns new cursor after the inserted text
732    /// line_feed_cnt: number of line feeds in the inserted text (None if unknown/not computed)
733    /// buffers: reference to the string buffers for computing line feeds during splits
734    pub fn insert(
735        &mut self,
736        offset: usize,
737        location: BufferLocation,
738        buffer_offset: usize,
739        bytes: usize,
740        line_feed_cnt: Option<usize>,
741        buffers: &[StringBuffer],
742    ) -> Cursor {
743        if bytes == 0 {
744            return self.cursor_at_offset(offset);
745        }
746
747        // Find the piece to split
748        if let Some(_result) = self.root.find_by_offset(offset) {
749            // Split the piece at the insertion point
750            let mut leaves = Vec::new();
751            let insert_leaf = LeafData::new(location, buffer_offset, bytes, line_feed_cnt);
752            self.collect_leaves_with_split(
753                &self.root,
754                0,
755                offset,
756                Some(insert_leaf),
757                &mut leaves,
758                buffers,
759            );
760
761            self.root = Self::build_balanced(&leaves);
762            self.total_bytes += bytes;
763
764            self.check_and_rebalance();
765        } else if offset == self.total_bytes {
766            // Append at end
767            let mut leaves = Vec::new();
768            self.root.collect_leaves(&mut leaves);
769            leaves.push(LeafData::new(location, buffer_offset, bytes, line_feed_cnt));
770
771            self.root = Self::build_balanced(&leaves);
772            self.total_bytes += bytes;
773
774            self.check_and_rebalance();
775        }
776
777        self.cursor_at_offset(offset + bytes)
778    }
779
780    /// Get a clone of the root node (shared via Arc)
781    pub fn root(&self) -> Arc<PieceTreeNode> {
782        Arc::clone(&self.root)
783    }
784
785    /// Insert text at the given position (line, column)
786    /// Returns new cursor after the inserted text
787    /// This performs a SINGLE tree traversal (more efficient than position_to_offset + insert)
788    #[allow(clippy::too_many_arguments)]
789    pub fn insert_at_position(
790        &mut self,
791        line: usize,
792        column: usize,
793        location: BufferLocation,
794        buffer_offset: usize,
795        bytes: usize,
796        line_feed_cnt: usize,
797        buffers: &[StringBuffer],
798    ) -> Cursor {
799        if bytes == 0 {
800            let offset = self.position_to_offset(line, column, buffers);
801            return self.cursor_at_offset(offset);
802        }
803
804        // Collect leaves while splitting at the position
805        let mut leaves = Vec::new();
806        let insert_leaf = LeafData::new(location, buffer_offset, bytes, Some(line_feed_cnt));
807
808        self.collect_leaves_with_split_at_position(
809            &self.root,
810            0,
811            0,
812            line,
813            column,
814            Some(insert_leaf),
815            &mut leaves,
816            buffers,
817        );
818
819        self.root = Self::build_balanced(&leaves);
820        self.total_bytes += bytes;
821        self.check_and_rebalance();
822
823        // Return cursor at position after insertion
824        let offset = self.position_to_offset(line, column, buffers) + bytes;
825        self.cursor_at_offset(offset)
826    }
827
828    /// Helper to collect leaves while splitting at a position (line, column)
829    /// Similar to collect_leaves_with_split but works with positions instead of offsets
830    #[allow(clippy::too_many_arguments)]
831    fn collect_leaves_with_split_at_position(
832        &self,
833        node: &Arc<PieceTreeNode>,
834        current_offset: usize,
835        lines_before: usize,
836        target_line: usize,
837        target_column: usize,
838        insert: Option<LeafData>,
839        leaves: &mut Vec<LeafData>,
840        buffers: &[StringBuffer],
841    ) {
842        match node.as_ref() {
843            PieceTreeNode::Internal {
844                left_bytes,
845                lf_left,
846                left,
847                right,
848            } => {
849                // If line counts are unknown, we can't do position-based navigation
850                let Some(lf_left) = lf_left else {
851                    return;
852                };
853                let lines_after_left = lines_before + lf_left;
854
855                // Determine if target position is in left or right subtree
856                let go_left = if target_column == 0 {
857                    target_line <= lines_after_left
858                } else {
859                    target_line < lines_after_left
860                };
861
862                if go_left {
863                    // Target is in left subtree
864                    self.collect_leaves_with_split_at_position(
865                        left,
866                        current_offset,
867                        lines_before,
868                        target_line,
869                        target_column,
870                        insert,
871                        leaves,
872                        buffers,
873                    );
874                    self.collect_leaves_with_split_at_position(
875                        right,
876                        current_offset + left_bytes,
877                        lines_after_left,
878                        target_line,
879                        target_column,
880                        None,
881                        leaves,
882                        buffers,
883                    );
884                } else {
885                    // Target is in right subtree
886                    self.collect_leaves_with_split_at_position(
887                        left,
888                        current_offset,
889                        lines_before,
890                        target_line,
891                        target_column,
892                        None,
893                        leaves,
894                        buffers,
895                    );
896                    self.collect_leaves_with_split_at_position(
897                        right,
898                        current_offset + left_bytes,
899                        lines_after_left,
900                        target_line,
901                        target_column,
902                        insert,
903                        leaves,
904                        buffers,
905                    );
906                }
907            }
908            PieceTreeNode::Leaf {
909                location,
910                offset,
911                bytes,
912                line_feed_cnt,
913            } => {
914                // If line counts are unknown, we can't do position-based navigation
915                let Some(line_feed_cnt) = line_feed_cnt else {
916                    return;
917                };
918                let lines_in_piece = lines_before + line_feed_cnt;
919
920                // Check if this piece contains the target line
921                if target_line >= lines_before && target_line <= lines_in_piece {
922                    // Target line is in this piece
923                    let buffer_id = location.buffer_id();
924                    if let Some(buffer) = buffers.get(buffer_id) {
925                        let line_in_piece = target_line - lines_before;
926
927                        // Find the line start within the piece
928                        let line_start_in_buffer = if line_in_piece == 0 {
929                            *offset
930                        } else {
931                            // Find the Nth newline within this piece
932                            let mut lines_seen = 0;
933                            let mut found_line_start = *offset;
934
935                            if let Some(line_starts) = buffer.get_line_starts() {
936                                for &ls in line_starts.iter() {
937                                    if ls > *offset && ls < *offset + *bytes {
938                                        if lines_seen == line_in_piece - 1 {
939                                            found_line_start = ls;
940                                            break;
941                                        }
942                                        lines_seen += 1;
943                                    }
944                                }
945                            }
946
947                            found_line_start
948                        };
949
950                        // Calculate split offset within the piece
951                        let column_offset = target_column.min(*bytes);
952                        let split_in_buffer = line_start_in_buffer + column_offset;
953                        let split_offset_in_piece =
954                            split_in_buffer.saturating_sub(*offset).min(*bytes);
955
956                        // Split the piece at this position
957                        if split_offset_in_piece > 0 {
958                            // First part (before split)
959                            let lf_cnt = Self::compute_line_feeds_static(
960                                buffers,
961                                *location,
962                                *offset,
963                                split_offset_in_piece,
964                            );
965                            leaves.push(LeafData::new(
966                                *location,
967                                *offset,
968                                split_offset_in_piece,
969                                lf_cnt,
970                            ));
971                        }
972
973                        // Inserted piece
974                        if let Some(insert_leaf) = insert {
975                            leaves.push(insert_leaf);
976                        }
977
978                        // Second part (after split)
979                        let remaining = bytes.saturating_sub(split_offset_in_piece);
980                        if remaining > 0 {
981                            let lf_cnt = Self::compute_line_feeds_static(
982                                buffers,
983                                *location,
984                                offset + split_offset_in_piece,
985                                remaining,
986                            );
987                            leaves.push(LeafData::new(
988                                *location,
989                                offset + split_offset_in_piece,
990                                remaining,
991                                lf_cnt,
992                            ));
993                        }
994                    } else {
995                        // Buffer not found, just keep the piece as-is
996                        leaves.push(LeafData::new(
997                            *location,
998                            *offset,
999                            *bytes,
1000                            Some(*line_feed_cnt),
1001                        ));
1002                    }
1003                } else {
1004                    // Target line not in this piece, just keep it
1005                    leaves.push(LeafData::new(
1006                        *location,
1007                        *offset,
1008                        *bytes,
1009                        Some(*line_feed_cnt),
1010                    ));
1011                }
1012            }
1013        }
1014    }
1015
1016    /// Helper to collect leaves while splitting at insertion point
1017    fn collect_leaves_with_split(
1018        &self,
1019        node: &Arc<PieceTreeNode>,
1020        current_offset: usize,
1021        split_offset: usize,
1022        insert: Option<LeafData>,
1023        leaves: &mut Vec<LeafData>,
1024        buffers: &[StringBuffer],
1025    ) {
1026        match node.as_ref() {
1027            PieceTreeNode::Internal {
1028                left_bytes,
1029                left,
1030                right,
1031                ..
1032            } => {
1033                // Only pass `insert` to the subtree containing the split point
1034                if split_offset < current_offset + left_bytes {
1035                    // Split is in left subtree
1036                    self.collect_leaves_with_split(
1037                        left,
1038                        current_offset,
1039                        split_offset,
1040                        insert,
1041                        leaves,
1042                        buffers,
1043                    );
1044                    self.collect_leaves_with_split(
1045                        right,
1046                        current_offset + left_bytes,
1047                        split_offset,
1048                        None,
1049                        leaves,
1050                        buffers,
1051                    );
1052                } else {
1053                    // Split is in right subtree (or at boundary)
1054                    self.collect_leaves_with_split(
1055                        left,
1056                        current_offset,
1057                        split_offset,
1058                        None,
1059                        leaves,
1060                        buffers,
1061                    );
1062                    self.collect_leaves_with_split(
1063                        right,
1064                        current_offset + left_bytes,
1065                        split_offset,
1066                        insert,
1067                        leaves,
1068                        buffers,
1069                    );
1070                }
1071            }
1072            PieceTreeNode::Leaf {
1073                location,
1074                offset,
1075                bytes,
1076                line_feed_cnt,
1077            } => {
1078                let piece_end = current_offset + bytes;
1079
1080                if split_offset > current_offset && split_offset < piece_end {
1081                    // Split this piece - need to compute line feeds for each part
1082                    let offset_in_piece = split_offset - current_offset;
1083
1084                    // First part (before split)
1085                    if offset_in_piece > 0 {
1086                        let lf_cnt = Self::compute_line_feeds_static(
1087                            buffers,
1088                            *location,
1089                            *offset,
1090                            offset_in_piece,
1091                        );
1092                        leaves.push(LeafData::new(*location, *offset, offset_in_piece, lf_cnt));
1093                    }
1094
1095                    // Inserted piece
1096                    if let Some(insert_leaf) = insert {
1097                        leaves.push(insert_leaf);
1098                    }
1099
1100                    // Second part (after split)
1101                    let remaining = bytes - offset_in_piece;
1102                    if remaining > 0 {
1103                        let lf_cnt = Self::compute_line_feeds_static(
1104                            buffers,
1105                            *location,
1106                            offset + offset_in_piece,
1107                            remaining,
1108                        );
1109                        leaves.push(LeafData::new(
1110                            *location,
1111                            offset + offset_in_piece,
1112                            remaining,
1113                            lf_cnt,
1114                        ));
1115                    }
1116                } else if split_offset == current_offset {
1117                    // Insert before this piece
1118                    if let Some(insert_leaf) = insert {
1119                        leaves.push(insert_leaf);
1120                    }
1121                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1122                } else {
1123                    // Don't split, just add the piece
1124                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1125                }
1126            }
1127        }
1128    }
1129
1130    /// Helper to compute line feeds in a buffer range
1131    fn compute_line_feeds_static(
1132        buffers: &[StringBuffer],
1133        location: BufferLocation,
1134        offset: usize,
1135        bytes: usize,
1136    ) -> Option<usize> {
1137        let buffer_id = location.buffer_id();
1138        if let Some(buffer) = buffers.get(buffer_id) {
1139            if let Some(data) = buffer.get_data() {
1140                let end = (offset + bytes).min(data.len());
1141                Some(data[offset..end].iter().filter(|&&b| b == b'\n').count())
1142            } else {
1143                // Buffer is unloaded - return None
1144                None
1145            }
1146        } else {
1147            // Buffer not available - return None
1148            None
1149        }
1150    }
1151
1152    /// Split a piece at the given offset without inserting anything
1153    /// This is useful for isolating a chunk of a large piece for partial loading
1154    ///
1155    /// If the offset is in the middle of a piece, that piece will be split into two pieces.
1156    /// If the offset is at a piece boundary, nothing changes.
1157    /// Does nothing if offset is 0 or >= total_bytes.
1158    pub fn split_at_offset(&mut self, offset: usize, buffers: &[StringBuffer]) {
1159        if offset == 0 || offset >= self.total_bytes {
1160            return;
1161        }
1162
1163        // Check if we need to split (offset must be in middle of a piece)
1164        if let Some(_result) = self.root.find_by_offset(offset) {
1165            // Split the piece at the offset (with no insertion)
1166            let mut leaves = Vec::new();
1167            self.collect_leaves_with_split(&self.root, 0, offset, None, &mut leaves, buffers);
1168
1169            self.root = Self::build_balanced(&leaves);
1170            self.check_and_rebalance();
1171        }
1172    }
1173
1174    /// Replace buffer references in pieces
1175    /// This is used when creating chunk buffers from large unloaded buffers
1176    ///
1177    /// Finds all pieces that reference the old buffer at the specified offset/bytes
1178    /// and updates them to reference the new buffer at offset 0.
1179    pub fn replace_buffer_reference(
1180        &mut self,
1181        old_buffer_id: usize,
1182        old_buffer_offset: usize,
1183        old_buffer_bytes: usize,
1184        new_buffer_location: BufferLocation,
1185    ) {
1186        let mut leaves = Vec::new();
1187        self.root.collect_leaves(&mut leaves);
1188
1189        // Find and update matching pieces
1190        let mut modified = false;
1191        for leaf in &mut leaves {
1192            if leaf.location.buffer_id() == old_buffer_id
1193                && leaf.offset == old_buffer_offset
1194                && leaf.bytes == old_buffer_bytes
1195            {
1196                leaf.location = new_buffer_location;
1197                leaf.offset = 0; // New buffer starts at 0
1198                modified = true;
1199            }
1200        }
1201
1202        // Rebuild tree if we made changes
1203        if modified {
1204            self.root = Self::build_balanced(&leaves);
1205            self.check_and_rebalance();
1206        }
1207    }
1208
1209    /// Delete text starting at offset for the given number of bytes
1210    pub fn delete(&mut self, offset: usize, delete_bytes: usize, buffers: &[StringBuffer]) {
1211        if delete_bytes == 0 || offset >= self.total_bytes {
1212            return;
1213        }
1214
1215        let delete_bytes = delete_bytes.min(self.total_bytes - offset);
1216        let end_offset = offset + delete_bytes;
1217
1218        let mut leaves = Vec::new();
1219        self.collect_leaves_with_delete(&self.root, 0, offset, end_offset, &mut leaves, buffers);
1220
1221        self.root = Self::build_balanced(&leaves);
1222        self.total_bytes -= delete_bytes;
1223
1224        self.check_and_rebalance();
1225    }
1226
1227    /// Delete text in a range specified by positions (start_line, start_col) to (end_line, end_col)
1228    /// This performs a more efficient traversal than converting positions to offsets separately
1229    pub fn delete_position_range(
1230        &mut self,
1231        start_line: usize,
1232        start_column: usize,
1233        end_line: usize,
1234        end_column: usize,
1235        buffers: &[StringBuffer],
1236    ) {
1237        // Edge case: empty range
1238        if start_line == end_line && start_column == end_column {
1239            return;
1240        }
1241
1242        // Find both positions in a single traversal and collect leaves
1243        let mut leaves = Vec::new();
1244        let mut delete_start_offset = None;
1245        let mut delete_end_offset = None;
1246
1247        self.collect_leaves_with_position_delete(
1248            &self.root,
1249            0,
1250            0,
1251            start_line,
1252            start_column,
1253            end_line,
1254            end_column,
1255            &mut delete_start_offset,
1256            &mut delete_end_offset,
1257            &mut leaves,
1258            buffers,
1259        );
1260
1261        // Calculate how many bytes were deleted
1262        if let (Some(start), Some(end)) = (delete_start_offset, delete_end_offset) {
1263            let deleted_bytes = end.saturating_sub(start);
1264            if deleted_bytes > 0 {
1265                self.root = Self::build_balanced(&leaves);
1266                self.total_bytes = self.total_bytes.saturating_sub(deleted_bytes);
1267                self.check_and_rebalance();
1268            }
1269        }
1270    }
1271
1272    /// Helper to collect leaves while deleting a range specified by positions
1273    /// This finds both positions and performs the deletion in a single tree traversal
1274    #[allow(clippy::too_many_arguments)]
1275    fn collect_leaves_with_position_delete(
1276        &self,
1277        node: &Arc<PieceTreeNode>,
1278        current_offset: usize,
1279        lines_before: usize,
1280        start_line: usize,
1281        start_column: usize,
1282        end_line: usize,
1283        end_column: usize,
1284        delete_start_offset: &mut Option<usize>,
1285        delete_end_offset: &mut Option<usize>,
1286        leaves: &mut Vec<LeafData>,
1287        buffers: &[StringBuffer],
1288    ) {
1289        match node.as_ref() {
1290            PieceTreeNode::Internal {
1291                left_bytes,
1292                lf_left,
1293                left,
1294                right,
1295            } => {
1296                // If line counts are unknown, we can't do position-based navigation
1297                let Some(lf_left) = lf_left else {
1298                    return;
1299                };
1300                let lines_after_left = lines_before + lf_left;
1301
1302                // Recursively process both subtrees
1303                self.collect_leaves_with_position_delete(
1304                    left,
1305                    current_offset,
1306                    lines_before,
1307                    start_line,
1308                    start_column,
1309                    end_line,
1310                    end_column,
1311                    delete_start_offset,
1312                    delete_end_offset,
1313                    leaves,
1314                    buffers,
1315                );
1316                self.collect_leaves_with_position_delete(
1317                    right,
1318                    current_offset + left_bytes,
1319                    lines_after_left,
1320                    start_line,
1321                    start_column,
1322                    end_line,
1323                    end_column,
1324                    delete_start_offset,
1325                    delete_end_offset,
1326                    leaves,
1327                    buffers,
1328                );
1329            }
1330            PieceTreeNode::Leaf {
1331                location,
1332                offset,
1333                bytes,
1334                line_feed_cnt,
1335            } => {
1336                // If line counts are unknown, we can't do position-based navigation
1337                let Some(line_feed_cnt) = line_feed_cnt else {
1338                    return;
1339                };
1340                let lines_in_piece = lines_before + line_feed_cnt;
1341                let piece_start = current_offset;
1342                let piece_end = current_offset + bytes;
1343
1344                // Check if this piece contains the start position
1345                if start_line >= lines_before
1346                    && start_line <= lines_in_piece
1347                    && delete_start_offset.is_none()
1348                {
1349                    if let Some(buffer) = buffers.get(location.buffer_id()) {
1350                        let offset_in_piece = self.find_position_in_leaf(
1351                            lines_before,
1352                            start_line,
1353                            start_column,
1354                            *offset,
1355                            *bytes,
1356                            buffer,
1357                        );
1358                        *delete_start_offset = Some(piece_start + offset_in_piece);
1359                    }
1360                }
1361
1362                // Check if this piece contains the end position
1363                if end_line >= lines_before
1364                    && end_line <= lines_in_piece
1365                    && delete_end_offset.is_none()
1366                {
1367                    if let Some(buffer) = buffers.get(location.buffer_id()) {
1368                        let offset_in_piece = self.find_position_in_leaf(
1369                            lines_before,
1370                            end_line,
1371                            end_column,
1372                            *offset,
1373                            *bytes,
1374                            buffer,
1375                        );
1376                        *delete_end_offset = Some(piece_start + offset_in_piece);
1377                    }
1378                }
1379
1380                // Now determine what to keep
1381                let del_start = delete_start_offset.unwrap_or(usize::MAX);
1382                let del_end = delete_end_offset.unwrap_or(0);
1383
1384                // Piece completely before delete range
1385                if piece_end <= del_start {
1386                    leaves.push(LeafData::new(
1387                        *location,
1388                        *offset,
1389                        *bytes,
1390                        Some(*line_feed_cnt),
1391                    ));
1392                    return;
1393                }
1394
1395                // Piece completely after delete range (only if we've found end)
1396                if delete_end_offset.is_some() && piece_start >= del_end {
1397                    leaves.push(LeafData::new(
1398                        *location,
1399                        *offset,
1400                        *bytes,
1401                        Some(*line_feed_cnt),
1402                    ));
1403                    return;
1404                }
1405
1406                // Piece overlaps with delete range
1407                // Keep part before delete start
1408                if piece_start < del_start && del_start < piece_end {
1409                    let keep_bytes = del_start - piece_start;
1410                    let lf_cnt =
1411                        Self::compute_line_feeds_static(buffers, *location, *offset, keep_bytes);
1412                    leaves.push(LeafData::new(*location, *offset, keep_bytes, lf_cnt));
1413                }
1414
1415                // Keep part after delete end (if we know where end is)
1416                if delete_end_offset.is_some() && del_end > piece_start && del_end < piece_end {
1417                    let skip_bytes = del_end - piece_start;
1418                    let keep_bytes = piece_end - del_end;
1419                    let lf_cnt = Self::compute_line_feeds_static(
1420                        buffers,
1421                        *location,
1422                        offset + skip_bytes,
1423                        keep_bytes,
1424                    );
1425                    leaves.push(LeafData::new(
1426                        *location,
1427                        offset + skip_bytes,
1428                        keep_bytes,
1429                        lf_cnt,
1430                    ));
1431                }
1432            }
1433        }
1434    }
1435
1436    /// Helper to find a position within a leaf piece
1437    /// Returns the offset within the piece (not the document offset)
1438    fn find_position_in_leaf(
1439        &self,
1440        lines_before: usize,
1441        target_line: usize,
1442        target_column: usize,
1443        piece_offset: usize,
1444        piece_bytes: usize,
1445        buffer: &StringBuffer,
1446    ) -> usize {
1447        let line_in_piece = target_line - lines_before;
1448
1449        // Find the line start within the piece
1450        let line_start_in_buffer = if line_in_piece == 0 {
1451            piece_offset
1452        } else {
1453            // Find the Nth newline within this piece
1454            let mut lines_seen = 0;
1455            let mut found_line_start = piece_offset;
1456
1457            if let Some(line_starts) = buffer.get_line_starts() {
1458                for &ls in line_starts.iter() {
1459                    if ls > piece_offset && ls < piece_offset + piece_bytes {
1460                        if lines_seen == line_in_piece - 1 {
1461                            found_line_start = ls;
1462                            break;
1463                        }
1464                        lines_seen += 1;
1465                    }
1466                }
1467            }
1468
1469            found_line_start
1470        };
1471
1472        // Calculate offset within the piece
1473        let column_offset = target_column.min(piece_bytes);
1474        let target_in_buffer = line_start_in_buffer + column_offset;
1475        target_in_buffer
1476            .saturating_sub(piece_offset)
1477            .min(piece_bytes)
1478    }
1479
1480    /// Helper to collect leaves while deleting a range
1481    fn collect_leaves_with_delete(
1482        &self,
1483        node: &Arc<PieceTreeNode>,
1484        current_offset: usize,
1485        delete_start: usize,
1486        delete_end: usize,
1487        leaves: &mut Vec<LeafData>,
1488        buffers: &[StringBuffer],
1489    ) {
1490        match node.as_ref() {
1491            PieceTreeNode::Internal {
1492                left_bytes,
1493                left,
1494                right,
1495                ..
1496            } => {
1497                self.collect_leaves_with_delete(
1498                    left,
1499                    current_offset,
1500                    delete_start,
1501                    delete_end,
1502                    leaves,
1503                    buffers,
1504                );
1505                self.collect_leaves_with_delete(
1506                    right,
1507                    current_offset + left_bytes,
1508                    delete_start,
1509                    delete_end,
1510                    leaves,
1511                    buffers,
1512                );
1513            }
1514            PieceTreeNode::Leaf {
1515                location,
1516                offset,
1517                bytes,
1518                line_feed_cnt,
1519            } => {
1520                let piece_start = current_offset;
1521                let piece_end = current_offset + bytes;
1522
1523                // Piece completely before delete range
1524                if piece_end <= delete_start {
1525                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1526                    return;
1527                }
1528
1529                // Piece completely after delete range
1530                if piece_start >= delete_end {
1531                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1532                    return;
1533                }
1534
1535                // Piece partially or fully overlaps delete range
1536                // Keep part before delete range
1537                if piece_start < delete_start {
1538                    let keep_bytes = delete_start - piece_start;
1539                    let lf_cnt =
1540                        Self::compute_line_feeds_static(buffers, *location, *offset, keep_bytes);
1541                    leaves.push(LeafData::new(*location, *offset, keep_bytes, lf_cnt));
1542                }
1543
1544                // Keep part after delete range
1545                if piece_end > delete_end {
1546                    let skip_bytes = delete_end - piece_start;
1547                    let keep_bytes = piece_end - delete_end;
1548                    let lf_cnt = Self::compute_line_feeds_static(
1549                        buffers,
1550                        *location,
1551                        offset + skip_bytes,
1552                        keep_bytes,
1553                    );
1554                    leaves.push(LeafData::new(
1555                        *location,
1556                        offset + skip_bytes,
1557                        keep_bytes,
1558                        lf_cnt,
1559                    ));
1560                }
1561            }
1562        }
1563    }
1564
1565    /// Get the total number of bytes in the document
1566    pub fn total_bytes(&self) -> usize {
1567        self.total_bytes
1568    }
1569
1570    /// Get the total number of lines in the document
1571    /// Line count = line feeds + 1
1572    /// Returns None if any piece has unknown line count
1573    pub fn line_count(&self) -> Option<usize> {
1574        self.root.total_line_feeds().map(|lf| lf + 1)
1575    }
1576
1577    /// Get tree statistics for debugging
1578    pub fn stats(&self) -> TreeStats {
1579        TreeStats {
1580            total_bytes: self.total_bytes,
1581            depth: self.root.depth(),
1582            leaf_count: self.root.count_leaves(),
1583            line_feed_count: self.root.total_line_feeds(),
1584        }
1585    }
1586
1587    /// Get all leaves in order (for debugging)
1588    pub fn get_leaves(&self) -> Vec<LeafData> {
1589        let mut leaves = Vec::new();
1590        self.root.collect_leaves(&mut leaves);
1591        leaves
1592    }
1593
1594    /// Convert byte offset to line/column position using tree's line metadata
1595    pub fn offset_to_position(
1596        &self,
1597        offset: usize,
1598        buffers: &[StringBuffer],
1599    ) -> Option<(usize, usize)> {
1600        if offset == 0 {
1601            return Some((0, 0));
1602        }
1603
1604        let offset = offset.min(self.total_bytes);
1605
1606        // Find the piece containing this offset
1607        if let Some(result) = self.root.find_by_offset(offset) {
1608            let piece_info = result.info;
1609            let bytes_before = result.bytes_before;
1610
1611            // Count lines before this piece
1612            // If line count is unknown, return None - we can't reliably compute position
1613            let lines_before = match self.count_lines_before_offset(bytes_before) {
1614                Some(count) => count,
1615                None => {
1616                    // No line metadata available - cannot compute position reliably
1617                    return None;
1618                }
1619            };
1620
1621            // Get the buffer for this piece
1622            let buffer_id = piece_info.location.buffer_id();
1623            if let Some(buffer) = buffers.get(buffer_id) {
1624                // Check if we have line starts available
1625                if let Some(line_starts) = buffer.get_line_starts() {
1626                    // Find position within the piece
1627                    let offset_in_piece = piece_info.offset_in_piece.unwrap_or(0);
1628                    let byte_offset_in_buffer = piece_info.offset + offset_in_piece;
1629
1630                    // Find which line within the buffer
1631                    let line_in_buffer = line_starts
1632                        .binary_search(&byte_offset_in_buffer)
1633                        .unwrap_or_else(|i| i.saturating_sub(1));
1634
1635                    // Find which line the piece starts at in the buffer
1636                    let piece_start_line = line_starts
1637                        .binary_search(&piece_info.offset)
1638                        .unwrap_or_else(|i| i.saturating_sub(1));
1639
1640                    // Calculate line relative to piece start (not buffer start)
1641                    let line_in_piece = line_in_buffer - piece_start_line;
1642
1643                    // Calculate the document line number
1644                    let doc_line = lines_before + line_in_piece;
1645
1646                    // Calculate column
1647                    let column = if line_in_piece == 0 && bytes_before == 0 {
1648                        // Fast path: piece is at document start, so column is just offset within piece
1649                        offset_in_piece
1650                    } else if line_in_piece == 0 {
1651                        // We're on the first line of this piece, but the document line may have
1652                        // started before this piece (after modifications). Find the actual line start.
1653                        let line_start = self.position_to_offset(doc_line, 0, buffers);
1654                        offset.saturating_sub(line_start)
1655                    } else {
1656                        // Line starts within this piece
1657                        // Find where the line starts within the piece
1658                        let mut count = 0;
1659                        let mut line_start_in_buf = piece_info.offset;
1660                        for &ls in line_starts.iter() {
1661                            if ls > piece_info.offset && ls < piece_info.offset + piece_info.bytes {
1662                                count += 1;
1663                                if count == line_in_piece {
1664                                    line_start_in_buf = ls;
1665                                    break;
1666                                }
1667                            }
1668                        }
1669                        let line_start_offset_in_piece = line_start_in_buf - piece_info.offset;
1670                        offset_in_piece - line_start_offset_in_piece
1671                    };
1672
1673                    return Some((doc_line, column));
1674                }
1675                // No line starts available - return None
1676            }
1677        }
1678
1679        // Fallback: end of document
1680        // Only if we have line metadata
1681        match self.line_count() {
1682            Some(line_count) => {
1683                let last_line = line_count.saturating_sub(1);
1684                let line_start = self.position_to_offset(last_line, 0, buffers);
1685                let column = self.total_bytes.saturating_sub(line_start);
1686                Some((last_line, column))
1687            }
1688            None => {
1689                // No line metadata - cannot compute position
1690                None
1691            }
1692        }
1693    }
1694
1695    /// Convert line/column position to byte offset using tree's line metadata
1696    pub fn position_to_offset(
1697        &self,
1698        line: usize,
1699        column: usize,
1700        buffers: &[StringBuffer],
1701    ) -> usize {
1702        if line == 0 && column == 0 {
1703            return 0;
1704        }
1705
1706        // Traverse tree to find the piece containing the target line
1707        self.find_offset_for_line(line, column, buffers)
1708            .unwrap_or(self.total_bytes)
1709    }
1710
1711    /// Helper: count line feeds before a given byte offset
1712    /// Returns None if any piece has unknown line count
1713    fn count_lines_before_offset(&self, byte_offset: usize) -> Option<usize> {
1714        self.count_lines_in_range(0, byte_offset)
1715    }
1716
1717    /// Helper: count line feeds in a byte range
1718    /// Returns None if any piece has unknown line count
1719    fn count_lines_in_range(&self, start: usize, end: usize) -> Option<usize> {
1720        if start >= end {
1721            return Some(0);
1722        }
1723
1724        self.root.count_lines_in_byte_range(0, start, end)
1725    }
1726
1727    /// Helper: find byte offset for a given line/column
1728    fn find_offset_for_line(
1729        &self,
1730        target_line: usize,
1731        column: usize,
1732        buffers: &[StringBuffer],
1733    ) -> Option<usize> {
1734        self.root
1735            .find_byte_offset_for_line(0, 0, target_line, column, buffers)
1736    }
1737
1738    /// Get the byte range for a specific line
1739    pub fn line_range(
1740        &self,
1741        line: usize,
1742        buffers: &[StringBuffer],
1743    ) -> Option<(usize, Option<usize>)> {
1744        // Check if line exists
1745        let line_count = self.line_count()?;
1746        if line >= line_count {
1747            return None;
1748        }
1749
1750        let start = self.position_to_offset(line, 0, buffers);
1751        let end = if line + 1 < line_count {
1752            Some(self.position_to_offset(line + 1, 0, buffers))
1753        } else {
1754            None
1755        };
1756        Some((start, end))
1757    }
1758
1759    /// Iterate through pieces overlapping a byte range
1760    /// Does ONE O(log n) tree traversal, then iterates sequentially
1761    pub fn iter_pieces_in_range(&self, start: usize, end: usize) -> PieceRangeIter {
1762        PieceRangeIter::new(&self.root, start, end)
1763    }
1764
1765    /// Apply multiple edits in a single tree traversal + rebuild
1766    ///
1767    /// # Arguments
1768    /// * `edits` - Vec of (position, delete_len, insert_text), MUST be sorted descending by position
1769    /// * `buffers` - Reference to string buffers (for line feed computation)
1770    /// * `add_text_fn` - Function to add text to buffer, returns (BufferLocation, offset, bytes)
1771    ///
1772    /// # Complexity
1773    /// O(pieces + edits) instead of O(pieces × edits)
1774    ///
1775    /// # Returns
1776    /// The net change in total bytes
1777    pub fn apply_bulk_edits<F>(
1778        &mut self,
1779        edits: &[(usize, usize, &str)],
1780        buffers: &[StringBuffer],
1781        mut add_text_fn: F,
1782    ) -> isize
1783    where
1784        F: FnMut(&str) -> (BufferLocation, usize, usize, Option<usize>),
1785    {
1786        if edits.is_empty() {
1787            return 0;
1788        }
1789
1790        // 1. Collect all split points (both start and end of each edit range)
1791        let mut split_points: Vec<usize> = Vec::with_capacity(edits.len() * 2);
1792        for (pos, del_len, _) in edits {
1793            split_points.push(*pos);
1794            if *del_len > 0 {
1795                let end = pos.saturating_add(*del_len).min(self.total_bytes);
1796                if end > *pos {
1797                    split_points.push(end);
1798                }
1799            }
1800        }
1801        split_points.sort_unstable();
1802        split_points.dedup();
1803
1804        // 2. Collect all leaves, splitting at all required points
1805        let mut leaves = Vec::new();
1806        self.collect_leaves_with_multi_split(
1807            &self.root.clone(),
1808            0,
1809            &split_points,
1810            &mut leaves,
1811            buffers,
1812        );
1813
1814        // 3. Build edit ranges for quick lookup (sorted descending by position)
1815        // Each edit: (start, end, insert_leaf)
1816        let mut edit_ranges: Vec<(usize, usize, Option<LeafData>)> =
1817            Vec::with_capacity(edits.len());
1818        for (pos, del_len, text) in edits {
1819            let del_end = pos.saturating_add(*del_len).min(self.total_bytes);
1820            let insert_leaf = if !text.is_empty() {
1821                let (location, offset, bytes, lf_cnt) = add_text_fn(text);
1822                Some(LeafData::new(location, offset, bytes, lf_cnt))
1823            } else {
1824                None
1825            };
1826            edit_ranges.push((*pos, del_end, insert_leaf));
1827        }
1828
1829        // 4. Apply edits to leaves
1830        // Edits are sorted descending by position, so:
1831        //   edit_ranges[len-1] has smallest position, edit_ranges[0] has largest
1832        // We iterate leaves ascending, and for each leaf we:
1833        //   1. First add any inserts that belong BEFORE this leaf
1834        //   2. Then add the leaf (if not deleted)
1835        let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + edits.len());
1836        let mut current_offset = 0;
1837        let mut edit_idx = edit_ranges.len(); // Points past the end; we access [edit_idx-1]
1838
1839        for leaf in leaves {
1840            let leaf_start = current_offset;
1841            let leaf_end = current_offset + leaf.bytes;
1842
1843            // First, add any inserts whose position is <= leaf_start
1844            // These inserts should appear BEFORE this leaf's content
1845            while edit_idx > 0 {
1846                let (edit_start, _edit_end, ref insert_leaf) = edit_ranges[edit_idx - 1];
1847
1848                // If this edit's position is after where we are, stop
1849                if edit_start > leaf_start {
1850                    break;
1851                }
1852
1853                // Insert belongs at or before leaf_start
1854                if let Some(insert) = insert_leaf {
1855                    new_leaves.push(*insert);
1856                }
1857                edit_idx -= 1;
1858            }
1859
1860            // Check if this leaf overlaps with ANY edit's delete range
1861            // We check ALL edits, not just remaining ones, because edits
1862            // processed in the insert loop above may still have deletions
1863            let mut keep_leaf = true;
1864
1865            for (edit_start, edit_end, _) in &edit_ranges {
1866                // If edit's delete range is entirely after this leaf, skip
1867                if *edit_start >= leaf_end {
1868                    continue;
1869                }
1870
1871                // If edit has no deletion (edit_start == edit_end), skip
1872                if *edit_start == *edit_end {
1873                    continue;
1874                }
1875
1876                // If edit's delete range is entirely before this leaf, skip
1877                if *edit_end <= leaf_start {
1878                    continue;
1879                }
1880
1881                // Leaf overlaps with this edit's delete range - filter it out
1882                if leaf_start >= *edit_start && leaf_end <= *edit_end {
1883                    keep_leaf = false;
1884                    break;
1885                }
1886            }
1887
1888            if keep_leaf {
1889                new_leaves.push(leaf);
1890            }
1891
1892            current_offset = leaf_end;
1893        }
1894
1895        // Handle any remaining inserts at the end of document
1896        while edit_idx > 0 {
1897            if let Some(insert) = &edit_ranges[edit_idx - 1].2 {
1898                new_leaves.push(*insert);
1899            }
1900            edit_idx -= 1;
1901        }
1902
1903        // 5. Calculate byte delta
1904        let old_bytes = self.total_bytes;
1905        let mut new_bytes: usize = 0;
1906        for leaf in &new_leaves {
1907            new_bytes += leaf.bytes;
1908        }
1909        let delta = new_bytes as isize - old_bytes as isize;
1910
1911        // 6. Single balanced tree rebuild
1912        self.root = Self::build_balanced(&new_leaves);
1913        self.total_bytes = new_bytes;
1914
1915        delta
1916    }
1917
1918    /// Collect leaves, splitting at multiple points in one traversal
1919    fn collect_leaves_with_multi_split(
1920        &self,
1921        node: &Arc<PieceTreeNode>,
1922        current_offset: usize,
1923        split_points: &[usize],
1924        leaves: &mut Vec<LeafData>,
1925        buffers: &[StringBuffer],
1926    ) {
1927        match node.as_ref() {
1928            PieceTreeNode::Internal {
1929                left_bytes,
1930                left,
1931                right,
1932                ..
1933            } => {
1934                // Recurse into both subtrees
1935                self.collect_leaves_with_multi_split(
1936                    left,
1937                    current_offset,
1938                    split_points,
1939                    leaves,
1940                    buffers,
1941                );
1942                self.collect_leaves_with_multi_split(
1943                    right,
1944                    current_offset + left_bytes,
1945                    split_points,
1946                    leaves,
1947                    buffers,
1948                );
1949            }
1950            PieceTreeNode::Leaf {
1951                location,
1952                offset,
1953                bytes,
1954                line_feed_cnt,
1955            } => {
1956                if *bytes == 0 {
1957                    return;
1958                }
1959
1960                let piece_start = current_offset;
1961                let piece_end = current_offset + bytes;
1962
1963                // Find split points within this piece
1964                let mut split_offsets: Vec<usize> = Vec::new();
1965                for &sp in split_points {
1966                    if sp > piece_start && sp < piece_end {
1967                        split_offsets.push(sp - piece_start);
1968                    }
1969                }
1970
1971                if split_offsets.is_empty() {
1972                    // No splits needed, add entire piece
1973                    leaves.push(LeafData::new(*location, *offset, *bytes, *line_feed_cnt));
1974                } else {
1975                    // Split the piece at each point
1976                    split_offsets.sort_unstable();
1977                    split_offsets.dedup();
1978
1979                    let mut prev_offset = 0;
1980                    for split_offset in split_offsets {
1981                        if split_offset > prev_offset {
1982                            let chunk_bytes = split_offset - prev_offset;
1983                            let lf_cnt = Self::compute_line_feeds_static(
1984                                buffers,
1985                                *location,
1986                                offset + prev_offset,
1987                                chunk_bytes,
1988                            );
1989                            leaves.push(LeafData::new(
1990                                *location,
1991                                offset + prev_offset,
1992                                chunk_bytes,
1993                                lf_cnt,
1994                            ));
1995                        }
1996                        prev_offset = split_offset;
1997                    }
1998
1999                    // Add remaining part after last split
2000                    if prev_offset < *bytes {
2001                        let remaining = bytes - prev_offset;
2002                        let lf_cnt = Self::compute_line_feeds_static(
2003                            buffers,
2004                            *location,
2005                            offset + prev_offset,
2006                            remaining,
2007                        );
2008                        leaves.push(LeafData::new(
2009                            *location,
2010                            offset + prev_offset,
2011                            remaining,
2012                            lf_cnt,
2013                        ));
2014                    }
2015                }
2016            }
2017        }
2018    }
2019}
2020
2021/// A view into a piece's data within the document
2022#[derive(Debug, Clone)]
2023pub struct PieceView {
2024    /// The location of this piece (which buffer it references)
2025    pub location: BufferLocation,
2026    /// Offset within the source buffer where this piece starts
2027    pub buffer_offset: usize,
2028    /// Number of bytes in this piece
2029    pub bytes: usize,
2030    /// Byte offset where this piece starts in the document
2031    pub doc_offset: usize,
2032    /// Number of line feeds in this piece (None if unknown for large files)
2033    pub line_feed_cnt: Option<usize>,
2034}
2035
2036/// Iterator over pieces in a byte range
2037/// Performs ONE O(log n) traversal to collect pieces, then iterates in O(1) per piece
2038pub struct PieceRangeIter {
2039    pieces: Vec<PieceView>,
2040    current_index: usize,
2041}
2042
2043impl PieceRangeIter {
2044    fn new(root: &Arc<PieceTreeNode>, start: usize, end: usize) -> Self {
2045        let mut pieces = Vec::new();
2046        Self::collect_pieces(root, 0, start, end, &mut pieces);
2047        PieceRangeIter {
2048            pieces,
2049            current_index: 0,
2050        }
2051    }
2052
2053    /// Recursively collect all pieces that overlap [start, end)
2054    fn collect_pieces(
2055        node: &Arc<PieceTreeNode>,
2056        doc_offset: usize,
2057        range_start: usize,
2058        range_end: usize,
2059        pieces: &mut Vec<PieceView>,
2060    ) {
2061        match node.as_ref() {
2062            PieceTreeNode::Internal {
2063                left_bytes,
2064                left,
2065                right,
2066                ..
2067            } => {
2068                let left_end = doc_offset + left_bytes;
2069
2070                // Check if left subtree overlaps with range
2071                if range_start < left_end {
2072                    Self::collect_pieces(left, doc_offset, range_start, range_end, pieces);
2073                }
2074
2075                // Check if right subtree overlaps with range
2076                if range_end > left_end {
2077                    Self::collect_pieces(right, left_end, range_start, range_end, pieces);
2078                }
2079            }
2080            PieceTreeNode::Leaf {
2081                location,
2082                offset,
2083                bytes,
2084                line_feed_cnt,
2085            } => {
2086                let piece_end = doc_offset + bytes;
2087
2088                // Check if this piece overlaps with the range
2089                if doc_offset < range_end && piece_end > range_start {
2090                    pieces.push(PieceView {
2091                        location: *location,
2092                        buffer_offset: *offset,
2093                        bytes: *bytes,
2094                        doc_offset,
2095                        line_feed_cnt: *line_feed_cnt,
2096                    });
2097                }
2098            }
2099        }
2100    }
2101}
2102
2103impl Iterator for PieceRangeIter {
2104    type Item = PieceView;
2105
2106    fn next(&mut self) -> Option<Self::Item> {
2107        if self.current_index < self.pieces.len() {
2108            let piece = self.pieces[self.current_index].clone();
2109            self.current_index += 1;
2110            Some(piece)
2111        } else {
2112            None
2113        }
2114    }
2115}
2116
2117#[cfg(test)]
2118mod tests {
2119    use super::*;
2120
2121    // Helper to create test buffers
2122    fn test_buffers() -> Vec<StringBuffer> {
2123        vec![
2124            StringBuffer::new(0, vec![b'a'; 100]), // Buffer 0: 100 'a's
2125            StringBuffer::new(1, vec![b'b'; 50]),  // Buffer 1: 50 'b's
2126            StringBuffer::new(2, vec![b'c'; 25]),  // Buffer 2: 25 'c's
2127        ]
2128    }
2129
2130    #[test]
2131    fn test_create_empty() {
2132        let tree = PieceTree::empty();
2133        assert_eq!(tree.total_bytes(), 0);
2134    }
2135
2136    #[test]
2137    fn test_create_with_initial_piece() {
2138        let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2139        assert_eq!(tree.total_bytes(), 100);
2140    }
2141
2142    #[test]
2143    fn test_insert_at_end() {
2144        let buffers = test_buffers();
2145        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2146        tree.insert(100, BufferLocation::Added(1), 0, 50, Some(0), &buffers);
2147        assert_eq!(tree.total_bytes(), 150);
2148    }
2149
2150    #[test]
2151    fn test_insert_in_middle() {
2152        let buffers = test_buffers();
2153        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2154        tree.insert(50, BufferLocation::Added(2), 0, 25, Some(0), &buffers);
2155        assert_eq!(tree.total_bytes(), 125);
2156        let stats = tree.stats();
2157        assert_eq!(stats.leaf_count, 3); // Original piece split + new piece
2158    }
2159
2160    #[test]
2161    fn test_delete() {
2162        let buffers = test_buffers();
2163        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2164        tree.delete(25, 50, &buffers);
2165        assert_eq!(tree.total_bytes(), 50);
2166    }
2167
2168    #[test]
2169    fn test_delete_at_boundaries() {
2170        let buffers = test_buffers();
2171        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2172
2173        // Delete from start
2174        tree.delete(0, 10, &buffers);
2175        assert_eq!(tree.total_bytes(), 90);
2176
2177        // Delete from end
2178        tree.delete(80, 10, &buffers);
2179        assert_eq!(tree.total_bytes(), 80);
2180    }
2181
2182    #[test]
2183    fn test_multiple_inserts_and_deletes() {
2184        let buffers = test_buffers();
2185        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2186
2187        tree.insert(50, BufferLocation::Added(1), 0, 20, Some(0), &buffers);
2188        assert_eq!(tree.total_bytes(), 120);
2189
2190        tree.delete(40, 30, &buffers);
2191        assert_eq!(tree.total_bytes(), 90);
2192
2193        tree.insert(0, BufferLocation::Added(1), 20, 10, Some(0), &buffers);
2194        assert_eq!(tree.total_bytes(), 100);
2195    }
2196
2197    #[test]
2198    fn test_rebalancing_many_inserts() {
2199        let buffers = test_buffers();
2200        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2201
2202        // Insert many times, which could create unbalanced tree
2203        for i in 0..20 {
2204            tree.insert(i * 5, BufferLocation::Added(1), i, 1, Some(0), &buffers);
2205        }
2206
2207        let stats = tree.stats();
2208        assert_eq!(stats.total_bytes, 120);
2209        // Each insert splits pieces, so we expect many leaves
2210        // Exact count depends on implementation details, but should be > 20
2211        assert!(stats.leaf_count > 20);
2212        assert!(stats.leaf_count < 50); // Reasonable upper bound
2213
2214        // Depth should be reasonable due to rebalancing
2215        let max_expected_depth = 2 * (stats.leaf_count as f64).log2().ceil() as usize;
2216        assert!(
2217            stats.depth <= max_expected_depth + 2,
2218            "Tree depth {} exceeds max {} for {} leaves",
2219            stats.depth,
2220            max_expected_depth,
2221            stats.leaf_count
2222        );
2223    }
2224
2225    #[test]
2226    fn test_find_by_offset() {
2227        let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2228
2229        let info = tree.find_by_offset(50).unwrap();
2230        assert_eq!(info.location, BufferLocation::Stored(0));
2231        assert_eq!(info.offset_in_piece, Some(50));
2232
2233        // Out of bounds
2234        assert!(tree.find_by_offset(100).is_none());
2235    }
2236
2237    #[test]
2238    fn test_find_after_inserts() {
2239        let buffers = test_buffers();
2240        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2241        tree.insert(50, BufferLocation::Added(1), 0, 25, Some(0), &buffers);
2242
2243        // Should find in added section
2244        let info = tree.find_by_offset(50).unwrap();
2245        assert_eq!(info.location, BufferLocation::Added(1));
2246    }
2247
2248    #[test]
2249    fn test_offset_to_position_column_after_modification() {
2250        // This test reproduces a bug where offset_to_position returns incorrect
2251        // column values after buffer modifications.
2252        //
2253        // Initial content: "fn foo(val: i32) {\n    val + 1\n}\n"
2254        // After deleting "val" and inserting "value" twice:
2255        // Buffer becomes: "fn foo(value: i32) {\n    value + 1\n}\n"
2256        //
2257        // Position 25 should be line 1, column 4 (the 'v' in second "value")
2258        // But the bug causes it to return column 0.
2259
2260        // Create buffer with initial content
2261        let initial = b"fn foo(val: i32) {\n    val + 1\n}\n";
2262        let buffer = StringBuffer::new(0, initial.to_vec());
2263        let buffers = vec![buffer.clone()];
2264
2265        let mut tree = PieceTree::new(
2266            BufferLocation::Stored(0),
2267            0,
2268            initial.len(),
2269            Some(initial.iter().filter(|&&b| b == b'\n').count()),
2270        );
2271
2272        // Verify initial position works correctly
2273        // Position 23 = 'v' of second "val" on line 1 (after newline at pos 18)
2274        let pos = tree.offset_to_position(23, &buffers);
2275        assert_eq!(
2276            pos,
2277            Some((1, 4)),
2278            "Initial: position 23 should be line 1, column 4"
2279        );
2280
2281        // Now simulate LSP rename operations:
2282        // 1. Delete "val" at position 23 (3 bytes)
2283        // 2. Insert "value" at position 23 (5 bytes)
2284        // 3. Delete "val" at position 7 (3 bytes)
2285        // 4. Insert "value" at position 7 (5 bytes)
2286
2287        // First modification: delete "val" at position 23
2288        tree.delete(23, 3, &buffers);
2289
2290        // Insert "value" - need a new buffer
2291        let value_buf = StringBuffer::new(1, b"value".to_vec());
2292        let buffers = vec![buffer.clone(), value_buf.clone()];
2293        tree.insert(23, BufferLocation::Added(1), 0, 5, Some(0), &buffers);
2294
2295        // Second modification: delete "val" at position 7
2296        tree.delete(7, 3, &buffers);
2297
2298        // Insert "value" - use another buffer
2299        let value_buf2 = StringBuffer::new(2, b"value".to_vec());
2300        let buffers = vec![buffer.clone(), value_buf.clone(), value_buf2];
2301        tree.insert(7, BufferLocation::Added(2), 0, 5, Some(0), &buffers);
2302
2303        // Buffer is now: "fn foo(value: i32) {\n    value + 1\n}\n"
2304        // Line 0: "fn foo(value: i32) {\n" = 21 bytes (positions 0-20)
2305        // Line 1: "    value + 1\n" starts at position 21
2306        // Position 25 = 21 + 4 = line 1, column 4
2307
2308        // This is where the bug manifests
2309        let pos = tree.offset_to_position(25, &buffers);
2310        assert_eq!(
2311            pos,
2312            Some((1, 4)),
2313            "After modification: position 25 should be line 1, column 4"
2314        );
2315
2316        // Also test position 21 (start of line 1)
2317        let pos = tree.offset_to_position(21, &buffers);
2318        assert_eq!(pos, Some((1, 0)), "Position 21 should be line 1, column 0");
2319    }
2320
2321    // ============== Tests for apply_bulk_edits ==============
2322
2323    // Helper to pre-allocate buffers for bulk edit tests
2324    fn prepare_bulk_edit_buffers(
2325        buffers: &mut Vec<StringBuffer>,
2326        texts: &[&str],
2327    ) -> Vec<(BufferLocation, usize, usize, Option<usize>)> {
2328        let mut infos = Vec::new();
2329        for (i, text) in texts.iter().enumerate() {
2330            let id = buffers.len();
2331            let bytes = text.as_bytes().to_vec();
2332            let lf = bytes.iter().filter(|&&b| b == b'\n').count();
2333            let len = bytes.len();
2334            buffers.push(StringBuffer::new(id, bytes));
2335            infos.push((BufferLocation::Added(id), 0, len, Some(lf)));
2336            let _ = i; // suppress warning
2337        }
2338        infos
2339    }
2340
2341    #[test]
2342    fn test_bulk_edit_single_insert() {
2343        let mut buffers = vec![StringBuffer::new(0, b"hello world".to_vec())];
2344        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, Some(0));
2345
2346        // Pre-allocate buffer for the insert
2347        let infos = prepare_bulk_edit_buffers(&mut buffers, &["!"]);
2348
2349        // Insert "!" at position 11 (end)
2350        let edits: Vec<(usize, usize, &str)> = vec![(11, 0, "!")];
2351        let mut idx = 0;
2352
2353        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2354            let info = infos[idx];
2355            idx += 1;
2356            info
2357        });
2358
2359        assert_eq!(delta, 1);
2360        assert_eq!(tree.total_bytes(), 12);
2361    }
2362
2363    #[test]
2364    fn test_bulk_edit_single_delete() {
2365        let buffers = vec![StringBuffer::new(0, b"hello world".to_vec())];
2366        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, Some(0));
2367
2368        // Delete "world" (positions 6-11) - no insert, so no buffer needed
2369        let edits: Vec<(usize, usize, &str)> = vec![(6, 5, "")];
2370
2371        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2372            (BufferLocation::Added(1), 0, 0, Some(0))
2373        });
2374
2375        assert_eq!(delta, -5);
2376        assert_eq!(tree.total_bytes(), 6);
2377    }
2378
2379    #[test]
2380    fn test_bulk_edit_single_replace() {
2381        let mut buffers = vec![StringBuffer::new(0, b"hello world".to_vec())];
2382        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, Some(0));
2383
2384        // Pre-allocate buffer for the replacement
2385        let infos = prepare_bulk_edit_buffers(&mut buffers, &["rust"]);
2386
2387        // Replace "world" with "rust"
2388        let edits: Vec<(usize, usize, &str)> = vec![(6, 5, "rust")];
2389        let mut idx = 0;
2390
2391        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2392            let info = infos[idx];
2393            idx += 1;
2394            info
2395        });
2396
2397        assert_eq!(delta, -1); // "world" (5) -> "rust" (4)
2398        assert_eq!(tree.total_bytes(), 10);
2399    }
2400
2401    #[test]
2402    fn test_bulk_edit_multiple_inserts_descending() {
2403        // Edits must be sorted descending by position
2404        let mut buffers = vec![StringBuffer::new(0, b"abc".to_vec())];
2405        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 3, Some(0));
2406
2407        // Pre-allocate buffers for 4 inserts
2408        let infos = prepare_bulk_edit_buffers(&mut buffers, &["X", "X", "X", "X"]);
2409
2410        // Insert "X" at positions 3, 2, 1, 0 (descending order)
2411        let edits: Vec<(usize, usize, &str)> = vec![
2412            (3, 0, "X"), // Insert at end
2413            (2, 0, "X"), // Insert before 'c'
2414            (1, 0, "X"), // Insert before 'b'
2415            (0, 0, "X"), // Insert at start
2416        ];
2417        let mut idx = 0;
2418
2419        let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2420            let info = infos[idx];
2421            idx += 1;
2422            info
2423        });
2424
2425        assert_eq!(delta, 4);
2426        assert_eq!(tree.total_bytes(), 7); // "XaXbXcX"
2427    }
2428
2429    #[test]
2430    fn test_bulk_edit_multiple_deletes_descending() {
2431        let buffers = vec![StringBuffer::new(0, b"abcdefgh".to_vec())];
2432        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 8, Some(0));
2433
2434        // Delete chars at positions 6, 4, 2, 0 (descending order) - no inserts
2435        let edits: Vec<(usize, usize, &str)> = vec![
2436            (6, 1, ""), // Delete 'g'
2437            (4, 1, ""), // Delete 'e'
2438            (2, 1, ""), // Delete 'c'
2439            (0, 1, ""), // Delete 'a'
2440        ];
2441
2442        let delta = tree.apply_bulk_edits(&edits, &buffers, |_| {
2443            (BufferLocation::Added(1), 0, 0, Some(0))
2444        });
2445
2446        assert_eq!(delta, -4);
2447        assert_eq!(tree.total_bytes(), 4); // "bdfh"
2448    }
2449
2450    #[test]
2451    fn test_bulk_edit_empty_edits() {
2452        let buffers = vec![StringBuffer::new(0, b"hello".to_vec())];
2453        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 5, Some(0));
2454
2455        let edits: Vec<(usize, usize, &str)> = vec![];
2456
2457        let delta = tree.apply_bulk_edits(&edits, &buffers, |_| {
2458            (BufferLocation::Added(1), 0, 0, Some(0))
2459        });
2460
2461        assert_eq!(delta, 0);
2462        assert_eq!(tree.total_bytes(), 5);
2463    }
2464
2465    #[test]
2466    fn test_bulk_edit_consistency_check() {
2467        // Test that piece sum equals total_bytes after bulk edit
2468        let mut buffers = vec![StringBuffer::new(0, b"0123456789".to_vec())];
2469        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 10, Some(0));
2470
2471        // Pre-allocate buffers for the inserts
2472        let infos = prepare_bulk_edit_buffers(&mut buffers, &["XX", "Y", "ZZZ"]);
2473
2474        // Multiple mixed operations
2475        let edits: Vec<(usize, usize, &str)> = vec![
2476            (8, 1, "XX"),  // Replace '8' with 'XX'
2477            (5, 2, "Y"),   // Replace '56' with 'Y'
2478            (2, 0, "ZZZ"), // Insert 'ZZZ' at position 2
2479        ];
2480        let mut idx = 0;
2481
2482        tree.apply_bulk_edits(&edits, &buffers, |_text| {
2483            let info = infos[idx];
2484            idx += 1;
2485            info
2486        });
2487
2488        // Verify consistency
2489        let leaves = tree.get_leaves();
2490        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
2491        assert_eq!(
2492            sum,
2493            tree.total_bytes(),
2494            "Piece sum {} != total_bytes {}",
2495            sum,
2496            tree.total_bytes()
2497        );
2498    }
2499
2500    #[test]
2501    fn test_bulk_edit_vs_sequential_equivalence() {
2502        // Verify that bulk edit produces same result as sequential edits
2503        let original_content = b"The quick brown fox";
2504
2505        // Setup for bulk edit
2506        let mut buffers1 = vec![StringBuffer::new(0, original_content.to_vec())];
2507        let mut tree1 = PieceTree::new(BufferLocation::Stored(0), 0, 19, Some(0));
2508
2509        // Pre-allocate buffers for the replacements
2510        let infos = prepare_bulk_edit_buffers(&mut buffers1, &["red", "slow"]);
2511
2512        // Setup for sequential edit
2513        let mut buffers2 = vec![StringBuffer::new(0, original_content.to_vec())];
2514        let mut tree2 = PieceTree::new(BufferLocation::Stored(0), 0, 19, Some(0));
2515        let mut next_id2 = 1;
2516
2517        // Edits: Replace "quick" with "slow", "brown" with "red"
2518        // Positions in original: quick=4-9, brown=10-15
2519        // Must be sorted descending
2520        let edits: Vec<(usize, usize, &str)> = vec![
2521            (10, 5, "red"), // Replace "brown" at 10
2522            (4, 5, "slow"), // Replace "quick" at 4
2523        ];
2524        let mut idx = 0;
2525
2526        // Apply bulk edit
2527        tree1.apply_bulk_edits(&edits, &buffers1, |_text| {
2528            let info = infos[idx];
2529            idx += 1;
2530            info
2531        });
2532
2533        // Apply sequential edits (in descending order to match)
2534        // First: replace "brown" at 10
2535        tree2.delete(10, 5, &buffers2);
2536        buffers2.push(StringBuffer::new(next_id2, b"red".to_vec()));
2537        tree2.insert(
2538            10,
2539            BufferLocation::Added(next_id2),
2540            0,
2541            3,
2542            Some(0),
2543            &buffers2,
2544        );
2545        next_id2 += 1;
2546
2547        // Second: replace "quick" at 4
2548        tree2.delete(4, 5, &buffers2);
2549        buffers2.push(StringBuffer::new(next_id2, b"slow".to_vec()));
2550        tree2.insert(4, BufferLocation::Added(next_id2), 0, 4, Some(0), &buffers2);
2551
2552        assert_eq!(
2553            tree1.total_bytes(),
2554            tree2.total_bytes(),
2555            "Bulk edit total_bytes {} != sequential {}",
2556            tree1.total_bytes(),
2557            tree2.total_bytes()
2558        );
2559    }
2560}
2561
2562#[cfg(test)]
2563mod property_tests {
2564    use super::*;
2565    use proptest::prelude::*;
2566
2567    // Helper to create test buffers - using larger buffers for property tests
2568    fn test_buffers_large() -> Vec<StringBuffer> {
2569        vec![
2570            StringBuffer::new(0, vec![b'a'; 10000]), // Large buffer
2571            StringBuffer::new(1, vec![b'b'; 10000]),
2572        ]
2573    }
2574
2575    // Strategy to generate operations
2576    #[derive(Debug, Clone)]
2577    enum Operation {
2578        Insert { offset: usize, bytes: usize },
2579        Delete { offset: usize, bytes: usize },
2580    }
2581
2582    // Generate a sequence of operations
2583    fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
2584        prop::collection::vec(
2585            prop_oneof![
2586                (0usize..200, 1usize..50)
2587                    .prop_map(|(offset, bytes)| { Operation::Insert { offset, bytes } }),
2588                (0usize..200, 1usize..50)
2589                    .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
2590            ],
2591            0..50,
2592        )
2593    }
2594
2595    // More aggressive operation strategy that creates more internal nodes
2596    fn aggressive_operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
2597        prop::collection::vec(
2598            prop_oneof![
2599                // More inserts, smaller chunks to create more splits
2600                3 => (0usize..100, 1usize..20).prop_map(|(offset, bytes)| {
2601                    Operation::Insert { offset, bytes }
2602                }),
2603                // Some deletes
2604                1 => (0usize..100, 1usize..30).prop_map(|(offset, bytes)| {
2605                    Operation::Delete { offset, bytes }
2606                }),
2607            ],
2608            10..30, // More operations to force tree growth
2609        )
2610    }
2611
2612    proptest! {
2613        #[test]
2614        fn prop_total_bytes_consistency(operations in operation_strategy()) {
2615            let buffers = test_buffers_large();
2616            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2617            let mut expected_bytes = 100;
2618
2619            for op in operations {
2620                match op {
2621                    Operation::Insert { offset, bytes } => {
2622                        let offset = offset.min(tree.total_bytes());
2623                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2624                        let bytes = bytes.min(buffer_len);
2625                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
2626                        expected_bytes += bytes;
2627                    }
2628                    Operation::Delete { offset, bytes } => {
2629                        if offset < tree.total_bytes() {
2630                            let actual_delete = bytes.min(tree.total_bytes() - offset);
2631                            tree.delete(offset, bytes, &buffers);
2632                            expected_bytes -= actual_delete;
2633                        }
2634                    }
2635                }
2636            }
2637
2638            prop_assert_eq!(tree.total_bytes(), expected_bytes);
2639        }
2640
2641        #[test]
2642        fn prop_tree_never_negative_bytes(operations in operation_strategy()) {
2643            let buffers = test_buffers_large();
2644            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2645
2646            for op in operations {
2647                match op {
2648                    Operation::Insert { offset, bytes } => {
2649                        let offset = offset.min(tree.total_bytes());
2650                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2651                        let bytes = bytes.min(buffer_len);
2652                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
2653                    }
2654                    Operation::Delete { offset, bytes } => {
2655                        tree.delete(offset, bytes, &buffers);
2656                    }
2657                }
2658
2659                // Tree should never have negative bytes (underflow would wrap to large number)
2660                prop_assert!(tree.total_bytes() < 10_000_000);
2661            }
2662        }
2663
2664        #[test]
2665        fn prop_balanced_after_operations(operations in operation_strategy()) {
2666            let buffers = test_buffers_large();
2667            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2668
2669            for op in operations {
2670                match op {
2671                    Operation::Insert { offset, bytes } => {
2672                        let offset = offset.min(tree.total_bytes());
2673                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2674                        let bytes = bytes.min(buffer_len);
2675                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
2676                    }
2677                    Operation::Delete { offset, bytes } => {
2678                        tree.delete(offset, bytes, &buffers);
2679                    }
2680                }
2681            }
2682
2683            let stats = tree.stats();
2684            if stats.leaf_count > 1 {
2685                let max_depth = 2 * (stats.leaf_count as f64).log2().ceil() as usize;
2686                prop_assert!(stats.depth <= max_depth + 2, "Tree depth {} exceeds expected max {} for {} leaves", stats.depth, max_depth, stats.leaf_count);
2687            }
2688        }
2689
2690        #[test]
2691        fn prop_insert_then_delete_equals_original(
2692            insert_offset in 0usize..100,
2693            insert_bytes in 1usize..50
2694        ) {
2695            let buffers = test_buffers_large();
2696            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2697            let original_bytes = tree.total_bytes();
2698
2699            let insert_offset = insert_offset.min(tree.total_bytes());
2700            let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2701            let insert_bytes = insert_bytes.min(buffer_len);
2702            tree.insert(insert_offset, BufferLocation::Added(1), 0, insert_bytes, Some(0), &buffers);
2703
2704            // Delete what we just inserted
2705            tree.delete(insert_offset, insert_bytes, &buffers);
2706
2707            prop_assert_eq!(tree.total_bytes(), original_bytes);
2708        }
2709
2710        #[test]
2711        fn prop_find_offset_in_bounds(
2712            offset in 0usize..100
2713        ) {
2714            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2715
2716            let result = tree.find_by_offset(offset);
2717            prop_assert!(result.is_some());
2718        }
2719
2720        #[test]
2721        fn prop_find_offset_out_of_bounds(
2722            offset in 100usize..1000
2723        ) {
2724            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2725
2726            let result = tree.find_by_offset(offset);
2727            prop_assert!(result.is_none());
2728        }
2729
2730        #[test]
2731        fn prop_sequential_inserts_maintain_order(
2732            count in 1usize..20,
2733            insert_size in 1usize..10
2734        ) {
2735            let buffers = test_buffers_large();
2736            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 10, Some(0));
2737
2738            for _i in 0..count {
2739                let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2740                let insert_size = insert_size.min(buffer_len);
2741                tree.insert(tree.total_bytes(), BufferLocation::Added(1), 0, insert_size, Some(0), &buffers);
2742            }
2743
2744            let expected_bytes = 10 + (count * insert_size);
2745            prop_assert_eq!(tree.total_bytes(), expected_bytes);
2746        }
2747
2748        #[test]
2749        fn prop_delete_all_reaches_zero(
2750            delete_size in 1usize..10
2751        ) {
2752            let buffers = test_buffers_large();
2753            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2754
2755            while tree.total_bytes() > 0 {
2756                let to_delete = delete_size.min(tree.total_bytes());
2757                tree.delete(0, to_delete, &buffers);
2758            }
2759
2760            prop_assert_eq!(tree.total_bytes(), 0);
2761        }
2762    }
2763
2764    #[test]
2765    fn test_empty_delete() {
2766        let buffers = test_buffers_large();
2767        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2768        tree.delete(50, 0, &buffers);
2769        assert_eq!(tree.total_bytes(), 100);
2770    }
2771
2772    // ============== Property tests for apply_bulk_edits ==============
2773
2774    // Strategy to generate bulk edit operations
2775    #[derive(Debug, Clone)]
2776    struct BulkEditOp {
2777        position: usize,
2778        delete_len: usize,
2779        insert_text: String,
2780    }
2781
2782    fn bulk_edit_strategy() -> impl Strategy<Value = Vec<BulkEditOp>> {
2783        prop::collection::vec(
2784            (0usize..100, 0usize..20, "[a-zA-Z0-9]{0,10}").prop_map(
2785                |(position, delete_len, insert_text)| BulkEditOp {
2786                    position,
2787                    delete_len,
2788                    insert_text,
2789                },
2790            ),
2791            1..20,
2792        )
2793    }
2794
2795    // Helper to pre-allocate buffers for property tests
2796    fn preallocate_buffers(
2797        buffers: &mut Vec<StringBuffer>,
2798        texts: &[String],
2799    ) -> Vec<(BufferLocation, usize, usize, Option<usize>)> {
2800        let mut infos = Vec::new();
2801        for text in texts {
2802            let id = buffers.len();
2803            let bytes = text.as_bytes().to_vec();
2804            let lf = bytes.iter().filter(|&&b| b == b'\n').count();
2805            let len = bytes.len();
2806            buffers.push(StringBuffer::new(id, bytes));
2807            infos.push((BufferLocation::Added(id), 0, len, Some(lf)));
2808        }
2809        infos
2810    }
2811
2812    proptest! {
2813        /// Property: apply_bulk_edits maintains tree consistency
2814        /// Sum of piece lengths must equal total_bytes after bulk edit
2815        #[test]
2816        fn prop_bulk_edit_tree_consistency(ops in bulk_edit_strategy()) {
2817            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
2818            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2819
2820            // Sort edits by position descending (required by apply_bulk_edits)
2821            let mut ops = ops;
2822            ops.sort_by(|a, b| b.position.cmp(&a.position));
2823
2824            // Pre-allocate all buffers
2825            let texts: Vec<String> = ops.iter().map(|op| op.insert_text.clone()).collect();
2826            let infos = preallocate_buffers(&mut buffers, &texts);
2827
2828            // Clamp positions and delete lengths to valid ranges
2829            let edits: Vec<(usize, usize, &str)> = ops.iter()
2830                .map(|op| {
2831                    let pos = op.position.min(tree.total_bytes());
2832                    let del = op.delete_len.min(tree.total_bytes().saturating_sub(pos));
2833                    (pos, del, op.insert_text.as_str())
2834                })
2835                .collect();
2836
2837            let mut idx = 0;
2838            tree.apply_bulk_edits(&edits, &buffers, |_text| {
2839                let info = infos[idx];
2840                idx += 1;
2841                info
2842            });
2843
2844            // INVARIANT: Sum of all piece lengths must equal total_bytes
2845            let leaves = tree.get_leaves();
2846            let sum_of_pieces: usize = leaves.iter().map(|leaf| leaf.bytes).sum();
2847            prop_assert_eq!(
2848                sum_of_pieces,
2849                tree.total_bytes(),
2850                "After bulk edit: sum of pieces ({}) != total_bytes ({})",
2851                sum_of_pieces,
2852                tree.total_bytes()
2853            );
2854        }
2855
2856        /// Property: apply_bulk_edits returns correct delta
2857        #[test]
2858        fn prop_bulk_edit_correct_delta(ops in bulk_edit_strategy()) {
2859            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
2860            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2861
2862            let original_bytes = tree.total_bytes();
2863
2864            // Sort edits by position descending
2865            let mut ops = ops;
2866            ops.sort_by(|a, b| b.position.cmp(&a.position));
2867
2868            // Pre-allocate all buffers
2869            let texts: Vec<String> = ops.iter().map(|op| op.insert_text.clone()).collect();
2870            let infos = preallocate_buffers(&mut buffers, &texts);
2871
2872            let edits: Vec<(usize, usize, &str)> = ops.iter()
2873                .map(|op| {
2874                    let pos = op.position.min(tree.total_bytes());
2875                    let del = op.delete_len.min(tree.total_bytes().saturating_sub(pos));
2876                    (pos, del, op.insert_text.as_str())
2877                })
2878                .collect();
2879
2880            let mut idx = 0;
2881            let delta = tree.apply_bulk_edits(&edits, &buffers, |_text| {
2882                let info = infos[idx];
2883                idx += 1;
2884                info
2885            });
2886
2887            let actual_change = tree.total_bytes() as isize - original_bytes as isize;
2888            prop_assert_eq!(
2889                delta,
2890                actual_change,
2891                "Returned delta ({}) != actual change ({})",
2892                delta,
2893                actual_change
2894            );
2895        }
2896
2897        /// Property: bulk edit with only inserts increases size correctly
2898        #[test]
2899        fn prop_bulk_edit_inserts_only(
2900            positions in prop::collection::vec(0usize..50, 1..10),
2901            insert_len in 1usize..10
2902        ) {
2903            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(50).to_vec())];
2904            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 50, Some(0));
2905
2906            let insert_text = "a".repeat(insert_len);
2907            let original_bytes = tree.total_bytes();
2908
2909            // Sort positions descending
2910            let mut positions = positions;
2911            positions.sort_by(|a, b| b.cmp(a));
2912            positions.dedup();
2913
2914            // Pre-allocate all buffers
2915            let texts: Vec<String> = positions.iter().map(|_| insert_text.clone()).collect();
2916            let infos = preallocate_buffers(&mut buffers, &texts);
2917
2918            let edits: Vec<(usize, usize, &str)> = positions
2919                .iter()
2920                .map(|&pos| (pos.min(tree.total_bytes()), 0, insert_text.as_str()))
2921                .collect();
2922
2923            let mut idx = 0;
2924            tree.apply_bulk_edits(&edits, &buffers, |_text| {
2925                let info = infos[idx];
2926                idx += 1;
2927                info
2928            });
2929
2930            let expected_bytes = original_bytes + edits.len() * insert_len;
2931            prop_assert_eq!(
2932                tree.total_bytes(),
2933                expected_bytes,
2934                "After {} inserts of {} bytes each: expected {} bytes, got {}",
2935                edits.len(),
2936                insert_len,
2937                expected_bytes,
2938                tree.total_bytes()
2939            );
2940        }
2941
2942        /// Property: bulk edit with only deletes decreases size correctly
2943        #[test]
2944        fn prop_bulk_edit_deletes_only(
2945            ops in prop::collection::vec((0usize..80, 1usize..5), 1..10)
2946        ) {
2947            let buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
2948            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2949
2950            // Sort by position descending
2951            let mut ops = ops;
2952            ops.sort_by(|a, b| b.0.cmp(&a.0));
2953
2954            // Remove overlapping deletes
2955            let mut edits: Vec<(usize, usize, &str)> = Vec::new();
2956            let mut last_affected_pos = tree.total_bytes();
2957            for (pos, del_len) in ops {
2958                if pos < last_affected_pos {
2959                    let actual_del = del_len.min(last_affected_pos - pos);
2960                    if actual_del > 0 {
2961                        edits.push((pos, actual_del, ""));
2962                        last_affected_pos = pos;
2963                    }
2964                }
2965            }
2966
2967            let expected_delete: usize = edits.iter().map(|(_, d, _)| d).sum();
2968
2969            tree.apply_bulk_edits(&edits, &buffers, |_| {
2970                (BufferLocation::Added(1), 0, 0, Some(0))
2971            });
2972
2973            let expected_bytes = 100 - expected_delete;
2974            prop_assert_eq!(
2975                tree.total_bytes(),
2976                expected_bytes,
2977                "After deleting {} bytes: expected {} bytes, got {}",
2978                expected_delete,
2979                expected_bytes,
2980                tree.total_bytes()
2981            );
2982        }
2983    }
2984
2985    proptest! {
2986        /// Property: Sum of all piece lengths must equal total_bytes
2987        /// This catches bugs like duplicate piece insertion
2988        #[test]
2989        fn prop_tree_consistency_piece_sum(operations in operation_strategy()) {
2990            let buffers = test_buffers_large();
2991            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
2992
2993            for op in operations {
2994                match op {
2995                    Operation::Insert { offset, bytes } => {
2996                        let offset = offset.min(tree.total_bytes());
2997                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
2998                        let bytes = bytes.min(buffer_len);
2999                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
3000                    }
3001                    Operation::Delete { offset, bytes } => {
3002                        tree.delete(offset, bytes, &buffers);
3003                    }
3004                }
3005
3006                // INVARIANT: Sum of all piece lengths must equal total_bytes
3007                let leaves = tree.get_leaves();
3008                let sum_of_pieces: usize = leaves.iter().map(|leaf| leaf.bytes).sum();
3009                prop_assert_eq!(
3010                    sum_of_pieces,
3011                    tree.total_bytes(),
3012                    "Tree inconsistency: sum of piece lengths ({}) != total_bytes ({})",
3013                    sum_of_pieces,
3014                    tree.total_bytes()
3015                );
3016            }
3017        }
3018
3019        /// Property: Line feed count consistency
3020        /// Sum of all piece line_feed_cnt must equal tree's total line feeds
3021        #[test]
3022        fn prop_tree_consistency_line_feeds(operations in operation_strategy()) {
3023            let buffers = test_buffers_large();
3024            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3025
3026            for op in operations {
3027                match op {
3028                    Operation::Insert { offset, bytes } => {
3029                        let offset = offset.min(tree.total_bytes());
3030                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
3031                        let bytes = bytes.min(buffer_len);
3032                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
3033                    }
3034                    Operation::Delete { offset, bytes } => {
3035                        tree.delete(offset, bytes, &buffers);
3036                    }
3037                }
3038
3039                // INVARIANT: Sum of all piece line feeds must equal tree's total
3040                let leaves = tree.get_leaves();
3041                let sum_of_line_feeds: Option<usize> = leaves.iter()
3042                    .try_fold(0, |acc, leaf| leaf.line_feed_cnt.map(|cnt| acc + cnt));
3043                let stats = tree.stats();
3044                prop_assert_eq!(
3045                    sum_of_line_feeds,
3046                    stats.line_feed_count,
3047                    "Line feed inconsistency: sum of piece line feeds ({:?}) != tree total ({:?})",
3048                    sum_of_line_feeds,
3049                    stats.line_feed_count
3050                );
3051            }
3052        }
3053
3054        /// Aggressive consistency test designed to catch the duplicate piece insertion bug
3055        /// Uses more operations with smaller inserts to force internal node creation and splits
3056        #[test]
3057        fn prop_tree_consistency_aggressive(operations in aggressive_operation_strategy()) {
3058            let buffers = test_buffers_large();
3059            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3060
3061            // Prime the tree with several inserts to create internal nodes first
3062            // This increases the likelihood of hitting the bug scenario
3063            for i in 0..5 {
3064                let offset = (i * 17) % (tree.total_bytes().max(1));
3065                tree.insert(offset, BufferLocation::Added(1), i * 100, 10, Some(0), &buffers);
3066            }
3067
3068            // Verify we have internal nodes
3069            prop_assert!(tree.stats().depth > 1, "Priming should create internal nodes");
3070
3071            for (i, op) in operations.iter().enumerate() {
3072                match *op {
3073                    Operation::Insert { offset, bytes } => {
3074                        let offset = offset.min(tree.total_bytes());
3075                        let buffer_len = buffers[1].get_data().map(|d| d.len()).unwrap_or(0);
3076                        let bytes = bytes.min(buffer_len);
3077                        tree.insert(offset, BufferLocation::Added(1), 0, bytes, Some(0), &buffers);
3078                    }
3079                    Operation::Delete { offset, bytes } => {
3080                        tree.delete(offset, bytes, &buffers);
3081                    }
3082                }
3083
3084                // CRITICAL INVARIANT: Sum of all piece lengths must equal total_bytes
3085                // This catches the duplicate piece insertion bug
3086                let leaves = tree.get_leaves();
3087                let sum_of_pieces: usize = leaves.iter().map(|leaf| leaf.bytes).sum();
3088                prop_assert_eq!(
3089                    sum_of_pieces,
3090                    tree.total_bytes(),
3091                    "Operation {}: Tree inconsistency after {:?}.\n\
3092                     Sum of piece lengths ({}) != total_bytes ({}).\n\
3093                     Tree depth: {}, leaves: {}.\n\
3094                     Pieces: {:?}",
3095                    i, op, sum_of_pieces, tree.total_bytes(),
3096                    tree.stats().depth, tree.stats().leaf_count,
3097                    leaves
3098                );
3099            }
3100        }
3101    }
3102
3103    #[test]
3104    fn test_delete_beyond_end() {
3105        let buffers = test_buffers_large();
3106        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3107        tree.delete(50, 100, &buffers); // Try to delete 100 bytes from offset 50
3108        assert_eq!(tree.total_bytes(), 50); // Should only delete 50 bytes
3109    }
3110
3111    #[test]
3112    fn test_insert_zero_bytes() {
3113        let buffers = test_buffers_large();
3114        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3115        tree.insert(50, BufferLocation::Added(1), 0, 0, Some(0), &buffers);
3116        assert_eq!(tree.total_bytes(), 100);
3117    }
3118
3119    #[test]
3120    fn test_tree_consistency_after_insert() {
3121        // Regression test: verify tree consistency after each operation
3122        // This test creates enough inserts to force internal nodes, which is where the bug manifests
3123        let buffers = test_buffers_large();
3124        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3125
3126        // Do several inserts to create internal nodes and splits
3127        for i in 0..10 {
3128            let offset = (i * 13) % (tree.total_bytes().max(1)); // Varying offsets
3129            tree.insert(
3130                offset,
3131                BufferLocation::Added(1),
3132                i * 10,
3133                5,
3134                Some(0),
3135                &buffers,
3136            );
3137
3138            // INVARIANT: sum of piece lengths must equal total_bytes
3139            let leaves = tree.get_leaves();
3140            let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3141            assert_eq!(
3142                sum,
3143                tree.total_bytes(),
3144                "After insert {}: sum of pieces ({}) != total_bytes ({}).\nLeaves: {:?}",
3145                i,
3146                sum,
3147                tree.total_bytes(),
3148                leaves
3149            );
3150        }
3151
3152        // Verify we actually created internal nodes
3153        let stats = tree.stats();
3154        assert!(
3155            stats.depth > 1,
3156            "Test should create internal nodes, but depth is {}",
3157            stats.depth
3158        );
3159    }
3160
3161    #[test]
3162    fn test_duplicate_piece_bug_exact_scenario() {
3163        // This replicates the exact scenario that exposed the duplicate insertion bug
3164        let mut buffers = vec![StringBuffer::new(0, b"initial\ntext".to_vec())];
3165        let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 12, Some(1));
3166
3167        // Delete all - creates an empty piece
3168        tree.delete(0, 12, &buffers);
3169
3170        // Check tree consistency after delete
3171        let leaves = tree.get_leaves();
3172        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3173        assert_eq!(
3174            sum,
3175            tree.total_bytes(),
3176            "After delete: sum={}, total={}",
3177            sum,
3178            tree.total_bytes()
3179        );
3180
3181        // Insert 'a' at position 0
3182        buffers.push(StringBuffer::new(1, b"a".to_vec()));
3183        tree.insert(0, BufferLocation::Added(1), 0, 1, Some(0), &buffers);
3184
3185        // Check consistency
3186        let leaves = tree.get_leaves();
3187        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3188        assert_eq!(
3189            sum,
3190            tree.total_bytes(),
3191            "After first insert: sum={}, total={}. Leaves: {:?}",
3192            sum,
3193            tree.total_bytes(),
3194            leaves
3195        );
3196
3197        // Insert 'b' at position 0 - this should trigger the bug with buggy code
3198        buffers.push(StringBuffer::new(2, b"b".to_vec()));
3199        tree.insert(0, BufferLocation::Added(2), 0, 1, Some(0), &buffers);
3200
3201        // Check consistency - this will fail with the bug
3202        let leaves = tree.get_leaves();
3203        let sum: usize = leaves.iter().map(|l| l.bytes).sum();
3204        assert_eq!(
3205            sum,
3206            tree.total_bytes(),
3207            "After second insert: sum={}, total={}. Leaves: {:?}",
3208            sum,
3209            tree.total_bytes(),
3210            leaves
3211        );
3212    }
3213
3214    // Property tests for PieceRangeIter
3215    proptest! {
3216        #[test]
3217        fn test_piece_iter_covers_exact_range(
3218            ops in aggressive_operation_strategy(),
3219            start in 0usize..100,
3220            len in 1usize..50
3221        ) {
3222            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
3223            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3224
3225            // Apply operations to build up tree
3226            for op in ops.iter() {
3227                match op {
3228                    Operation::Insert { offset, bytes } => {
3229                        let offset = (*offset).min(tree.total_bytes());
3230                        buffers.push(StringBuffer::new(buffers.len(), b"a".repeat(*bytes).to_vec()));
3231                        tree.insert(offset, BufferLocation::Added(buffers.len() - 1), 0, *bytes, Some(0), &buffers);
3232                    }
3233                    Operation::Delete { offset, bytes } => {
3234                        let offset = (*offset).min(tree.total_bytes());
3235                        let bytes = (*bytes).min(tree.total_bytes().saturating_sub(offset));
3236                        if bytes > 0 {
3237                            tree.delete(offset, bytes, &buffers);
3238                        }
3239                    }
3240                }
3241            }
3242
3243            let total_bytes = tree.total_bytes();
3244            if total_bytes == 0 {
3245                return Ok(());
3246            }
3247
3248            let start = start.min(total_bytes.saturating_sub(1));
3249            let end = (start + len).min(total_bytes);
3250
3251            // Collect pieces using iterator
3252            let pieces: Vec<_> = tree.iter_pieces_in_range(start, end).collect();
3253
3254            // Verify coverage: pieces should cover [start, end)
3255            if !pieces.is_empty() {
3256                let first_piece_start = pieces[0].doc_offset;
3257                let last_piece = &pieces[pieces.len() - 1];
3258                let last_piece_end = last_piece.doc_offset + last_piece.bytes;
3259
3260                // First piece should start at or before requested start
3261                prop_assert!(first_piece_start <= start,
3262                    "First piece starts at {}, but requested start is {}", first_piece_start, start);
3263
3264                // Last piece should end at or after requested end
3265                prop_assert!(last_piece_end >= end,
3266                    "Last piece ends at {}, but requested end is {}", last_piece_end, end);
3267            }
3268        }
3269
3270        #[test]
3271        fn test_piece_iter_no_gaps(ops in aggressive_operation_strategy()) {
3272            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
3273            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3274
3275            for op in ops {
3276                match op {
3277                    Operation::Insert { offset, bytes } => {
3278                        let offset = offset.min(tree.total_bytes());
3279                        buffers.push(StringBuffer::new(buffers.len(), b"a".repeat(bytes).to_vec()));
3280                        tree.insert(offset, BufferLocation::Added(buffers.len() - 1), 0, bytes, Some(0), &buffers);
3281                    }
3282                    Operation::Delete { offset, bytes } => {
3283                        let offset = offset.min(tree.total_bytes());
3284                        let bytes = bytes.min(tree.total_bytes().saturating_sub(offset));
3285                        if bytes > 0 {
3286                            tree.delete(offset, bytes, &buffers);
3287                        }
3288                    }
3289                }
3290            }
3291
3292            let total_bytes = tree.total_bytes();
3293            if total_bytes == 0 {
3294                return Ok(());
3295            }
3296
3297            // Iterate over entire document
3298            let pieces: Vec<_> = tree.iter_pieces_in_range(0, total_bytes).collect();
3299
3300            // Verify no gaps: each piece should start where previous one ended
3301            for i in 1..pieces.len() {
3302                let prev_end = pieces[i - 1].doc_offset + pieces[i - 1].bytes;
3303                let curr_start = pieces[i].doc_offset;
3304                prop_assert_eq!(prev_end, curr_start,
3305                    "Gap between piece {} (ends at {}) and piece {} (starts at {})",
3306                    i - 1, prev_end, i, curr_start);
3307            }
3308        }
3309
3310        #[test]
3311        fn test_piece_iter_total_bytes_matches(ops in aggressive_operation_strategy()) {
3312            let mut buffers = vec![StringBuffer::new(0, b"x".repeat(100).to_vec())];
3313            let mut tree = PieceTree::new(BufferLocation::Stored(0), 0, 100, Some(0));
3314
3315            for op in ops {
3316                match op {
3317                    Operation::Insert { offset, bytes } => {
3318                        let offset = offset.min(tree.total_bytes());
3319                        buffers.push(StringBuffer::new(buffers.len(), b"a".repeat(bytes).to_vec()));
3320                        tree.insert(offset, BufferLocation::Added(buffers.len() - 1), 0, bytes, Some(0), &buffers);
3321                    }
3322                    Operation::Delete { offset, bytes } => {
3323                        let offset = offset.min(tree.total_bytes());
3324                        let bytes = bytes.min(tree.total_bytes().saturating_sub(offset));
3325                        if bytes > 0 {
3326                            tree.delete(offset, bytes, &buffers);
3327                        }
3328                    }
3329                }
3330            }
3331
3332            let total_bytes = tree.total_bytes();
3333            if total_bytes == 0 {
3334                return Ok(());
3335            }
3336
3337            // Sum of piece bytes should equal total bytes
3338            let pieces: Vec<_> = tree.iter_pieces_in_range(0, total_bytes).collect();
3339            let sum_bytes: usize = pieces.iter().map(|p| p.bytes).sum();
3340            prop_assert_eq!(sum_bytes, total_bytes,
3341                "Sum of piece bytes ({}) doesn't match total_bytes ({})", sum_bytes, total_bytes);
3342        }
3343
3344        /// Property test that verifies offset_to_position returns correct line/column
3345        /// after buffer modifications. This catches the bug where column calculation
3346        /// was incorrect after insertions/deletions.
3347        #[test]
3348        fn prop_offset_to_position_correct_after_modifications(
3349            ops in prop::collection::vec(
3350                prop_oneof![
3351                    // Insert with newlines
3352                    (0usize..50, prop::collection::vec(
3353                        prop_oneof![
3354                            Just(b'a'),
3355                            Just(b'\n'),
3356                        ],
3357                        1..20
3358                    )).prop_map(|(offset, bytes)| (offset, bytes, true)),
3359                    // Delete
3360                    (0usize..50, 1usize..10).prop_map(|(offset, _bytes)| (offset, vec![], false)),
3361                ],
3362                5..20
3363            ),
3364            test_offsets in prop::collection::vec(0usize..100, 3..10)
3365        ) {
3366            // Start with content that has newlines
3367            let initial = b"Hello\nWorld\nTest\n";
3368            let mut content = initial.to_vec();
3369
3370            let mut buffers = vec![StringBuffer::new(0, initial.to_vec())];
3371            let newline_count = initial.iter().filter(|&&b| b == b'\n').count();
3372            let mut tree = PieceTree::new(
3373                BufferLocation::Stored(0),
3374                0,
3375                initial.len(),
3376                Some(newline_count),
3377            );
3378
3379            // Apply operations, tracking actual content
3380            for (offset, bytes, is_insert) in ops {
3381                if is_insert && !bytes.is_empty() {
3382                    let offset = offset.min(content.len());
3383                    let newlines = bytes.iter().filter(|&&b| b == b'\n').count();
3384
3385                    // Add buffer and insert into tree
3386                    buffers.push(StringBuffer::new(buffers.len(), bytes.clone()));
3387                    tree.insert(
3388                        offset,
3389                        BufferLocation::Added(buffers.len() - 1),
3390                        0,
3391                        bytes.len(),
3392                        Some(newlines),
3393                        &buffers,
3394                    );
3395
3396                    // Update actual content
3397                    content.splice(offset..offset, bytes);
3398                } else if !is_insert {
3399                    // Delete operation - offset is first element, bytes length is implied
3400                    let offset = offset.min(content.len());
3401                    let delete_len = 5.min(content.len().saturating_sub(offset)); // Use fixed small delete
3402                    if delete_len > 0 {
3403                        tree.delete(offset, delete_len, &buffers);
3404                        content.drain(offset..offset + delete_len);
3405                    }
3406                }
3407            }
3408
3409            // Helper to compute ground truth line/column from content
3410            let compute_position = |content: &[u8], offset: usize| -> (usize, usize) {
3411                let offset = offset.min(content.len());
3412                let mut line = 0;
3413                let mut col = 0;
3414                for (i, &byte) in content.iter().enumerate() {
3415                    if i == offset {
3416                        break;
3417                    }
3418                    if byte == b'\n' {
3419                        line += 1;
3420                        col = 0;
3421                    } else {
3422                        col += 1;
3423                    }
3424                }
3425                (line, col)
3426            };
3427
3428            // Test various offsets
3429            for offset in test_offsets {
3430                let offset = offset.min(content.len());
3431                if offset == 0 {
3432                    continue; // Skip 0, it's a special case that always works
3433                }
3434
3435                let expected = compute_position(&content, offset);
3436                let actual = tree.offset_to_position(offset, &buffers);
3437
3438                prop_assert_eq!(
3439                    actual,
3440                    Some(expected),
3441                    "offset_to_position({}) returned {:?}, expected {:?}. Content len: {}",
3442                    offset,
3443                    actual,
3444                    expected,
3445                    content.len()
3446                );
3447            }
3448        }
3449    }
3450}