Skip to main content

fresh/model/
buffer.rs

1/// Text buffer that uses PieceTree with integrated line tracking
2/// Architecture where the tree is the single source of truth for text and line information
3use crate::model::filesystem::{FileMetadata, FileSystem, WriteOp};
4use crate::model::piece_tree::{
5    BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, Position,
6    StringBuffer, TreeStats,
7};
8use crate::model::piece_tree_diff::PieceTreeDiff;
9use crate::primitives::grapheme;
10use anyhow::{Context, Result};
11use regex::bytes::Regex;
12use std::io::{self, Write};
13use std::ops::Range;
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17/// Error returned when a file save operation requires elevated privileges.
18///
19/// This error contains all the information needed to perform the save via sudo
20/// in a single operation, preserving original file ownership and permissions.
21#[derive(Debug, Clone, PartialEq)]
22pub struct SudoSaveRequired {
23    /// Path to the temporary file containing the new content
24    pub temp_path: PathBuf,
25    /// Destination path where the file should be saved
26    pub dest_path: PathBuf,
27    /// Original file owner (UID)
28    pub uid: u32,
29    /// Original file group (GID)
30    pub gid: u32,
31    /// Original file permissions (mode)
32    pub mode: u32,
33}
34
35impl std::fmt::Display for SudoSaveRequired {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        write!(
38            f,
39            "Permission denied saving to {}. Use sudo to complete the operation.",
40            self.dest_path.display()
41        )
42    }
43}
44
45impl std::error::Error for SudoSaveRequired {}
46
47// Large file support configuration
48/// Default threshold for considering a file "large" (100 MB)
49pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
50
51/// Chunk size to load when lazy loading (1 MB)
52pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
53
54/// Chunk alignment for lazy loading (64 KB)
55pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
56
57/// Line ending format used in the file
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum LineEnding {
60    /// Unix/Linux/Mac format (\n)
61    #[default]
62    LF,
63    /// Windows format (\r\n)
64    CRLF,
65    /// Old Mac format (\r) - rare but supported
66    CR,
67}
68
69impl LineEnding {
70    /// Get the string representation of this line ending
71    pub fn as_str(&self) -> &'static str {
72        match self {
73            Self::LF => "\n",
74            Self::CRLF => "\r\n",
75            Self::CR => "\r",
76        }
77    }
78
79    /// Get the display name for status bar
80    pub fn display_name(&self) -> &'static str {
81        match self {
82            Self::LF => "LF",
83            Self::CRLF => "CRLF",
84            Self::CR => "CR",
85        }
86    }
87}
88
89/// A write recipe built from the piece tree for saving
90struct WriteRecipe {
91    /// The source file path for Copy operations (if any)
92    src_path: Option<PathBuf>,
93    /// Data chunks for Insert operations (owned to avoid lifetime issues)
94    insert_data: Vec<Vec<u8>>,
95    /// Sequence of actions to build the output file
96    actions: Vec<RecipeAction>,
97}
98
99/// An action in a write recipe
100#[derive(Debug, Clone, Copy)]
101enum RecipeAction {
102    /// Copy bytes from source file at offset
103    Copy { offset: u64, len: u64 },
104    /// Insert data from insert_data[index]
105    Insert { index: usize },
106}
107
108impl WriteRecipe {
109    /// Convert the recipe to WriteOp slice for use with filesystem write_patched
110    fn to_write_ops(&self) -> Vec<WriteOp<'_>> {
111        self.actions
112            .iter()
113            .map(|action| match action {
114                RecipeAction::Copy { offset, len } => WriteOp::Copy {
115                    offset: *offset,
116                    len: *len,
117                },
118                RecipeAction::Insert { index } => WriteOp::Insert {
119                    data: &self.insert_data[*index],
120                },
121            })
122            .collect()
123    }
124
125    /// Check if this recipe has any Copy operations
126    fn has_copy_ops(&self) -> bool {
127        self.actions
128            .iter()
129            .any(|a| matches!(a, RecipeAction::Copy { .. }))
130    }
131
132    /// Flatten all Insert operations into a single buffer.
133    /// Only valid when has_copy_ops() returns false.
134    fn flatten_inserts(&self) -> Vec<u8> {
135        let mut result = Vec::new();
136        for action in &self.actions {
137            if let RecipeAction::Insert { index } = action {
138                result.extend_from_slice(&self.insert_data[*index]);
139            }
140        }
141        result
142    }
143}
144
145/// Represents a line number (simplified for new implementation)
146/// Legacy enum kept for backwards compatibility - always Absolute now
147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148pub enum LineNumber {
149    /// Absolute line number - this is the actual line number in the file
150    Absolute(usize),
151    /// Relative line number (deprecated - now same as Absolute)
152    Relative {
153        line: usize,
154        from_cached_line: usize,
155    },
156}
157
158impl LineNumber {
159    /// Get the line number value
160    pub fn value(&self) -> usize {
161        match self {
162            Self::Absolute(line) | Self::Relative { line, .. } => *line,
163        }
164    }
165
166    /// Check if this is an absolute line number
167    pub fn is_absolute(&self) -> bool {
168        matches!(self, LineNumber::Absolute(_))
169    }
170
171    /// Check if this is a relative line number
172    pub fn is_relative(&self) -> bool {
173        matches!(self, LineNumber::Relative { .. })
174    }
175
176    /// Format the line number for display
177    pub fn format(&self) -> String {
178        match self {
179            Self::Absolute(line) => format!("{}", line + 1),
180            Self::Relative { line, .. } => format!("~{}", line + 1),
181        }
182    }
183}
184
185/// A text buffer that manages document content using a piece table
186/// with integrated line tracking
187pub struct TextBuffer {
188    /// Filesystem abstraction for file I/O operations.
189    /// Stored internally so methods can access it without threading through call chains.
190    fs: Arc<dyn FileSystem + Send + Sync>,
191
192    /// The piece tree for efficient text manipulation with integrated line tracking
193    piece_tree: PieceTree,
194
195    /// Snapshot of the piece tree root at last save (shared via Arc)
196    saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
197
198    /// List of string buffers containing chunks of text data
199    /// Index 0 is typically the original/stored buffer
200    /// Additional buffers are added for modifications
201    buffers: Vec<StringBuffer>,
202
203    /// Next buffer ID to assign
204    next_buffer_id: usize,
205
206    /// Optional file path for persistence
207    file_path: Option<PathBuf>,
208
209    /// Has the buffer been modified since last save?
210    modified: bool,
211
212    /// Does the buffer have unsaved changes for recovery auto-save?
213    /// This is separate from `modified` because recovery auto-save doesn't
214    /// clear `modified` (buffer still differs from on-disk file).
215    recovery_pending: bool,
216
217    /// Is this a large file (no line indexing, lazy loading enabled)?
218    large_file: bool,
219
220    /// Is this a binary file? Binary files are opened read-only and render
221    /// unprintable characters as code points.
222    is_binary: bool,
223
224    /// Line ending format detected from the file (or default for new files)
225    line_ending: LineEnding,
226
227    /// Original line ending format when file was loaded (used for conversion on save)
228    /// This tracks what the file had when loaded, so we can detect if the user
229    /// changed the line ending format and needs conversion on save.
230    original_line_ending: LineEnding,
231
232    /// The file size on disk after the last save.
233    /// Used for chunked recovery to know the original file size for reconstruction.
234    /// Updated when loading from file or after saving.
235    saved_file_size: Option<usize>,
236
237    /// Monotonic version counter for change tracking.
238    version: u64,
239}
240
241impl TextBuffer {
242    /// Create a new text buffer with the given filesystem implementation.
243    /// Note: large_file_threshold is ignored in the new implementation
244    pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
245        let piece_tree = PieceTree::empty();
246        let line_ending = LineEnding::default();
247        TextBuffer {
248            fs,
249            saved_root: piece_tree.root(),
250            piece_tree,
251            buffers: vec![StringBuffer::new(0, Vec::new())],
252            next_buffer_id: 1,
253            file_path: None,
254            modified: false,
255            recovery_pending: false,
256            large_file: false,
257            is_binary: false,
258            line_ending,
259            original_line_ending: line_ending,
260            saved_file_size: None,
261            version: 0,
262        }
263    }
264
265    /// Current buffer version (monotonic, wraps on overflow)
266    pub fn version(&self) -> u64 {
267        self.version
268    }
269
270    /// Get a reference to the filesystem implementation used by this buffer.
271    pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
272        &self.fs
273    }
274
275    /// Set the filesystem implementation for this buffer.
276    pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
277        self.fs = fs;
278    }
279
280    #[inline]
281    fn bump_version(&mut self) {
282        self.version = self.version.wrapping_add(1);
283    }
284
285    #[inline]
286    fn mark_content_modified(&mut self) {
287        self.modified = true;
288        self.recovery_pending = true;
289        self.bump_version();
290    }
291
292    /// Create a text buffer from initial content with the given filesystem.
293    pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
294        let bytes = content.len();
295
296        // Auto-detect line ending format from content
297        let line_ending = Self::detect_line_ending(&content);
298
299        // Create initial StringBuffer with ID 0
300        let buffer = StringBuffer::new(0, content);
301        let line_feed_cnt = buffer.line_feed_count();
302
303        let piece_tree = if bytes > 0 {
304            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
305        } else {
306            PieceTree::empty()
307        };
308
309        let saved_root = piece_tree.root();
310
311        TextBuffer {
312            fs,
313            line_ending,
314            original_line_ending: line_ending,
315            piece_tree,
316            saved_root,
317            buffers: vec![buffer],
318            next_buffer_id: 1,
319            file_path: None,
320            modified: false,
321            recovery_pending: false,
322            large_file: false,
323            is_binary: false,
324            saved_file_size: Some(bytes), // Treat initial content as "saved" state
325            version: 0,
326        }
327    }
328
329    /// Create a text buffer from a string with the given filesystem.
330    pub fn from_str(
331        s: &str,
332        _large_file_threshold: usize,
333        fs: Arc<dyn FileSystem + Send + Sync>,
334    ) -> Self {
335        Self::from_bytes(s.as_bytes().to_vec(), fs)
336    }
337
338    /// Create an empty text buffer with the given filesystem.
339    pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
340        let piece_tree = PieceTree::empty();
341        let saved_root = piece_tree.root();
342        let line_ending = LineEnding::default();
343        TextBuffer {
344            fs,
345            piece_tree,
346            saved_root,
347            buffers: vec![StringBuffer::new(0, Vec::new())],
348            next_buffer_id: 1,
349            file_path: None,
350            modified: false,
351            recovery_pending: false,
352            large_file: false,
353            is_binary: false,
354            line_ending,
355            original_line_ending: line_ending,
356            saved_file_size: None,
357            version: 0,
358        }
359    }
360
361    /// Load a text buffer from a file using the given filesystem.
362    pub fn load_from_file<P: AsRef<Path>>(
363        path: P,
364        large_file_threshold: usize,
365        fs: Arc<dyn FileSystem + Send + Sync>,
366    ) -> anyhow::Result<Self> {
367        let path = path.as_ref();
368
369        // Get file size to determine loading strategy
370        let metadata = fs.metadata(path)?;
371        let file_size = metadata.size as usize;
372
373        // Use threshold parameter or default
374        let threshold = if large_file_threshold > 0 {
375            large_file_threshold
376        } else {
377            DEFAULT_LARGE_FILE_THRESHOLD
378        };
379
380        // Choose loading strategy based on file size
381        if file_size >= threshold {
382            Self::load_large_file(path, file_size, fs)
383        } else {
384            Self::load_small_file(path, fs)
385        }
386    }
387
388    /// Load a small file with full eager loading and line indexing
389    fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
390        let contents = fs.read_file(path)?;
391
392        // Detect if this is a binary file
393        let is_binary = Self::detect_binary(&contents);
394
395        // Detect line ending format (CRLF/LF/CR) - used for Enter key insertion
396        let line_ending = Self::detect_line_ending(&contents);
397
398        // Keep original line endings - the view layer handles CRLF display
399        let mut buffer = Self::from_bytes(contents, fs);
400        buffer.file_path = Some(path.to_path_buf());
401        buffer.modified = false;
402        buffer.large_file = false;
403        buffer.is_binary = is_binary;
404        buffer.line_ending = line_ending;
405        buffer.original_line_ending = line_ending;
406        Ok(buffer)
407    }
408
409    /// Load a large file with unloaded buffer (no line indexing, lazy loading)
410    fn load_large_file(
411        path: &Path,
412        file_size: usize,
413        fs: Arc<dyn FileSystem + Send + Sync>,
414    ) -> anyhow::Result<Self> {
415        use crate::model::piece_tree::{BufferData, BufferLocation};
416
417        // Read a sample of the file to detect if it's binary and line ending format
418        // We read the first 8KB for both binary and line ending detection
419        let sample_size = file_size.min(8 * 1024);
420        let sample = fs.read_range(path, 0, sample_size)?;
421        let is_binary = Self::detect_binary(&sample);
422        let line_ending = Self::detect_line_ending(&sample);
423
424        // Create an unloaded buffer that references the entire file
425        let buffer = StringBuffer {
426            id: 0,
427            data: BufferData::Unloaded {
428                file_path: path.to_path_buf(),
429                file_offset: 0,
430                bytes: file_size,
431            },
432        };
433
434        // Create piece tree with a single piece covering the whole file
435        // No line feed count (None) since we're not computing line indexing
436        let piece_tree = if file_size > 0 {
437            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
438        } else {
439            PieceTree::empty()
440        };
441        let saved_root = piece_tree.root();
442
443        tracing::debug!(
444            "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
445            file_size,
446            file_size
447        );
448
449        Ok(TextBuffer {
450            fs,
451            piece_tree,
452            saved_root,
453            buffers: vec![buffer],
454            next_buffer_id: 1,
455            file_path: Some(path.to_path_buf()),
456            modified: false,
457            recovery_pending: false,
458            large_file: true,
459            is_binary,
460            line_ending,
461            original_line_ending: line_ending,
462            saved_file_size: Some(file_size),
463            version: 0,
464        })
465    }
466
467    /// Save the buffer to its associated file
468    pub fn save(&mut self) -> anyhow::Result<()> {
469        if let Some(path) = &self.file_path {
470            self.save_to_file(path.clone())
471        } else {
472            anyhow::bail!(io::Error::new(
473                io::ErrorKind::NotFound,
474                "No file path associated with buffer",
475            ))
476        }
477    }
478
479    /// Check if we should use in-place writing to preserve file ownership.
480    /// Returns true if the file exists and is owned by a different user.
481    /// On Unix, only root or the file owner can change file ownership with chown.
482    /// When the current user is not the file owner, using atomic write (temp file + rename)
483    /// would change the file's ownership to the current user. To preserve ownership,
484    /// we must write directly to the existing file instead.
485    fn should_use_inplace_write(&self, dest_path: &Path) -> bool {
486        !self.fs.is_owner(dest_path)
487    }
488
489    /// Build a write recipe from the piece tree for saving.
490    ///
491    /// This creates a recipe of Copy and Insert operations that can reconstruct
492    /// the buffer content. Copy operations reference unchanged regions in the
493    /// source file, while Insert operations contain new/modified data.
494    ///
495    /// # Returns
496    /// A WriteRecipe with the source path, insert data, and sequence of actions.
497    fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
498        let total = self.total_bytes();
499
500        // Determine the source file for Copy operations (if any)
501        // We can only use Copy if:
502        // 1. We have a source file path
503        // 2. The source file exists
504        // 3. No line ending conversion is needed
505        let needs_conversion = self.line_ending != self.original_line_ending;
506        let src_path_for_copy: Option<&Path> = if needs_conversion {
507            None
508        } else {
509            self.file_path.as_deref().filter(|p| self.fs.exists(p))
510        };
511        let target_ending = self.line_ending;
512
513        let mut insert_data: Vec<Vec<u8>> = Vec::new();
514        let mut actions: Vec<RecipeAction> = Vec::new();
515
516        for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
517            let buffer_id = piece_view.location.buffer_id();
518            let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
519                io::Error::new(
520                    io::ErrorKind::InvalidData,
521                    format!("Buffer {} not found", buffer_id),
522                )
523            })?;
524
525            match &buffer.data {
526                // Unloaded buffer: can use Copy if same source file, else load and send
527                BufferData::Unloaded {
528                    file_path,
529                    file_offset,
530                    ..
531                } => {
532                    // Can only use Copy if:
533                    // - This is a Stored piece (original file content)
534                    // - We have a valid source for copying
535                    // - This buffer is from that source
536                    // - No line ending conversion
537                    let can_copy = matches!(piece_view.location, BufferLocation::Stored(_))
538                        && src_path_for_copy.is_some_and(|src| file_path == src);
539
540                    if can_copy {
541                        let src_offset = (*file_offset + piece_view.buffer_offset) as u64;
542                        actions.push(RecipeAction::Copy {
543                            offset: src_offset,
544                            len: piece_view.bytes as u64,
545                        });
546                        continue;
547                    }
548
549                    // Need to load and send this unloaded region
550                    // This happens when: different source file, or line ending conversion
551                    let data = self.fs.read_range(
552                        file_path,
553                        (*file_offset + piece_view.buffer_offset) as u64,
554                        piece_view.bytes,
555                    )?;
556
557                    let data = if needs_conversion {
558                        Self::convert_line_endings_to(&data, target_ending)
559                    } else {
560                        data
561                    };
562
563                    let index = insert_data.len();
564                    insert_data.push(data);
565                    actions.push(RecipeAction::Insert { index });
566                }
567
568                // Loaded data: send as Insert
569                BufferData::Loaded { data, .. } => {
570                    let start = piece_view.buffer_offset;
571                    let end = start + piece_view.bytes;
572                    let chunk = &data[start..end];
573
574                    let chunk = if needs_conversion {
575                        Self::convert_line_endings_to(chunk, target_ending)
576                    } else {
577                        chunk.to_vec()
578                    };
579
580                    let index = insert_data.len();
581                    insert_data.push(chunk);
582                    actions.push(RecipeAction::Insert { index });
583                }
584            }
585        }
586
587        Ok(WriteRecipe {
588            src_path: src_path_for_copy.map(|p| p.to_path_buf()),
589            insert_data,
590            actions,
591        })
592    }
593
594    /// Create a temporary file for saving.
595    ///
596    /// Tries to create the file in the same directory as the destination file first
597    /// to allow for an atomic rename. If that fails (e.g., due to directory permissions),
598    /// falls back to the system temporary directory.
599    fn create_temp_file(
600        &self,
601        dest_path: &Path,
602    ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
603        // Try creating in same directory first
604        let same_dir_temp = self.fs.temp_path_for(dest_path);
605        match self.fs.create_file(&same_dir_temp) {
606            Ok(file) => Ok((same_dir_temp, file)),
607            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
608                // Fallback to system temp directory
609                let temp_path = self.fs.unique_temp_path(dest_path);
610                let file = self.fs.create_file(&temp_path)?;
611                Ok((temp_path, file))
612            }
613            Err(e) => Err(e),
614        }
615    }
616
617    /// Save the buffer to a specific file
618    ///
619    /// Uses the write recipe approach for both local and remote filesystems:
620    /// - Copy ops reference unchanged regions in the source file
621    /// - Insert ops contain new/modified data
622    ///
623    /// For remote filesystems, the recipe is sent to the agent which reconstructs
624    /// the file server-side, avoiding transfer of unchanged content.
625    ///
626    /// For local filesystems with ownership concerns (file owned by another user),
627    /// uses in-place writing to preserve ownership. Otherwise uses atomic writes.
628    ///
629    /// If the line ending format has been changed (via set_line_ending), all content
630    /// will be converted to the new format during save.
631    pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
632        let dest_path = path.as_ref();
633        let total = self.total_bytes();
634
635        // Handle empty files
636        if total == 0 {
637            self.fs.write_file(dest_path, &[])?;
638            self.finalize_save(dest_path)?;
639            return Ok(());
640        }
641
642        // Build the write recipe (unified for all filesystem types)
643        let recipe = self.build_write_recipe()?;
644        let ops = recipe.to_write_ops();
645
646        // Check if we need in-place writing to preserve file ownership (local only)
647        // Remote filesystems handle this differently
648        let is_local = self.fs.remote_connection_info().is_none();
649        let use_inplace = is_local && self.should_use_inplace_write(dest_path);
650
651        if use_inplace {
652            // In-place write: write directly to preserve ownership
653            self.save_with_inplace_write(dest_path, &recipe)?;
654        } else if !recipe.has_copy_ops() && !is_local {
655            // Remote with no Copy ops: use write_file directly (more efficient)
656            let data = recipe.flatten_inserts();
657            self.fs.write_file(dest_path, &data)?;
658        } else if is_local {
659            // Local: use write_file or write_patched with sudo fallback
660            let write_result = if !recipe.has_copy_ops() {
661                let data = recipe.flatten_inserts();
662                self.fs.write_file(dest_path, &data)
663            } else {
664                let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
665                self.fs.write_patched(src_for_patch, dest_path, &ops)
666            };
667
668            if let Err(e) = write_result {
669                if e.kind() == io::ErrorKind::PermissionDenied {
670                    // Create temp file and return sudo error
671                    let original_metadata = self.fs.metadata_if_exists(dest_path);
672                    let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
673                    self.write_recipe_to_file(&mut temp_file, &recipe)?;
674                    temp_file.sync_all()?;
675                    drop(temp_file);
676                    return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
677                }
678                return Err(e.into());
679            }
680        } else {
681            // Remote with Copy ops: use write_patched
682            let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
683            self.fs.write_patched(src_for_patch, dest_path, &ops)?;
684        }
685
686        self.finalize_save(dest_path)?;
687        Ok(())
688    }
689
690    /// Write using in-place mode to preserve file ownership.
691    ///
692    /// This is used when the file is owned by a different user and we need
693    /// to write directly to the existing file to preserve its ownership.
694    fn save_with_inplace_write(
695        &self,
696        dest_path: &Path,
697        recipe: &WriteRecipe,
698    ) -> anyhow::Result<()> {
699        let original_metadata = self.fs.metadata_if_exists(dest_path);
700
701        match self.fs.open_file_for_write(dest_path) {
702            Ok(mut out_file) => {
703                // Write recipe content directly to file
704                self.write_recipe_to_file(&mut out_file, recipe)?;
705                out_file.sync_all()?;
706                Ok(())
707            }
708            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
709                // Fall back to atomic write, which will likely also fail
710                // and trigger sudo fallback
711                let ops = recipe.to_write_ops();
712                let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
713
714                match self.fs.write_patched(src_for_patch, dest_path, &ops) {
715                    Ok(()) => Ok(()),
716                    Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
717                        // Create temp file for sudo fallback
718                        let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
719                        self.write_recipe_to_file(&mut temp_file, recipe)?;
720                        temp_file.sync_all()?;
721                        drop(temp_file);
722                        Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
723                    }
724                    Err(e) => Err(e.into()),
725                }
726            }
727            Err(e) => Err(e.into()),
728        }
729    }
730
731    /// Write the recipe content to a file writer.
732    fn write_recipe_to_file(
733        &self,
734        out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
735        recipe: &WriteRecipe,
736    ) -> io::Result<()> {
737        for action in &recipe.actions {
738            match action {
739                RecipeAction::Copy { offset, len } => {
740                    // Read from source and write to output
741                    let src_path = recipe.src_path.as_ref().ok_or_else(|| {
742                        io::Error::new(io::ErrorKind::InvalidData, "Copy action without source")
743                    })?;
744                    let data = self.fs.read_range(src_path, *offset, *len as usize)?;
745                    out_file.write_all(&data)?;
746                }
747                RecipeAction::Insert { index } => {
748                    out_file.write_all(&recipe.insert_data[*index])?;
749                }
750            }
751        }
752        Ok(())
753    }
754
755    /// Finalize save state after successful write.
756    fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
757        let new_size = self.fs.metadata(dest_path)?.size as usize;
758        tracing::debug!(
759            "Buffer::save: updating saved_file_size from {:?} to {}",
760            self.saved_file_size,
761            new_size
762        );
763        self.saved_file_size = Some(new_size);
764        self.file_path = Some(dest_path.to_path_buf());
765
766        // For large files, consolidate the piece tree to synchronize with new disk offsets.
767        // Without this, pieces referencing "original" file would use old offsets on new content.
768        if self.large_file {
769            self.consolidate_large_file(dest_path, new_size);
770        }
771
772        self.mark_saved_snapshot();
773        self.original_line_ending = self.line_ending;
774        Ok(())
775    }
776
777    /// Finalize buffer state after an external save operation (e.g., via sudo).
778    ///
779    /// This updates the saved snapshot and file size to match the new state on disk.
780    pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
781        let new_size = self.fs.metadata(&dest_path)?.size as usize;
782        self.saved_file_size = Some(new_size);
783        self.file_path = Some(dest_path.clone());
784
785        // For large files, consolidate the piece tree to synchronize with new disk offsets.
786        if self.large_file {
787            self.consolidate_large_file(&dest_path, new_size);
788        }
789
790        self.mark_saved_snapshot();
791        self.original_line_ending = self.line_ending;
792        Ok(())
793    }
794
795    /// Consolidate large file piece tree into a single piece pointing to the new file.
796    /// This ensures that subsequent operations correctly reference the new content and offsets.
797    fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
798        let buffer = StringBuffer {
799            id: 0,
800            data: BufferData::Unloaded {
801                file_path: path.to_path_buf(),
802                file_offset: 0,
803                bytes: file_size,
804            },
805        };
806
807        self.piece_tree = if file_size > 0 {
808            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
809        } else {
810            PieceTree::empty()
811        };
812
813        self.buffers = vec![buffer];
814        self.next_buffer_id = 1;
815
816        tracing::debug!(
817            "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
818            file_size
819        );
820    }
821
822    /// Internal helper to create a SudoSaveRequired error.
823    fn make_sudo_error(
824        &self,
825        temp_path: PathBuf,
826        dest_path: &Path,
827        original_metadata: Option<FileMetadata>,
828    ) -> anyhow::Error {
829        #[cfg(unix)]
830        let (uid, gid, mode) = if let Some(ref meta) = original_metadata {
831            (
832                meta.uid.unwrap_or(0),
833                meta.gid.unwrap_or(0),
834                meta.permissions
835                    .as_ref()
836                    .map(|p| p.mode() & 0o7777)
837                    .unwrap_or(0),
838            )
839        } else {
840            (0, 0, 0)
841        };
842        #[cfg(not(unix))]
843        let (uid, gid, mode) = (0u32, 0u32, 0u32);
844
845        let _ = original_metadata; // suppress unused warning on non-Unix
846
847        anyhow::anyhow!(SudoSaveRequired {
848            temp_path,
849            dest_path: dest_path.to_path_buf(),
850            uid,
851            gid,
852            mode,
853        })
854    }
855
856    /// Get the total number of bytes in the document
857    pub fn total_bytes(&self) -> usize {
858        self.piece_tree.total_bytes()
859    }
860
861    /// Get the total number of lines in the document
862    /// Uses the piece tree's integrated line tracking
863    /// Returns None if line count is unknown (e.g., for large files without line indexing)
864    pub fn line_count(&self) -> Option<usize> {
865        self.piece_tree.line_count()
866    }
867
868    /// Snapshot the current tree as the saved baseline
869    pub fn mark_saved_snapshot(&mut self) {
870        self.saved_root = self.piece_tree.root();
871        self.modified = false;
872    }
873
874    /// Diff the current piece tree against the last saved snapshot.
875    ///
876    /// This compares actual byte content, not just tree structure. This means
877    /// that if you delete text and then paste it back, the diff will correctly
878    /// show no changes (even though the tree structure differs).
879    ///
880    /// Uses a two-phase algorithm for efficiency:
881    /// - Phase 1: Fast structure-based diff to find changed byte ranges (O(num_leaves))
882    /// - Phase 2: Only compare actual content within changed ranges (O(edit_size))
883    ///
884    /// This is O(edit_size) instead of O(file_size) for small edits in large files.
885    pub fn diff_since_saved(&self) -> PieceTreeDiff {
886        // First, quick check: if tree roots are identical (Arc pointer equality),
887        // the content is definitely the same.
888        if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
889            return PieceTreeDiff {
890                equal: true,
891                byte_ranges: Vec::new(),
892                line_ranges: Some(Vec::new()),
893            };
894        }
895
896        // Phase 1: Fast structure-based diff to find which byte ranges differ
897        // This is O(number of leaves) - very fast even for large files
898        let structure_diff = self.diff_trees_by_structure();
899
900        // If structure says trees are equal (same pieces in same order), we're done
901        if structure_diff.equal {
902            return structure_diff;
903        }
904
905        // Phase 2: For small changed regions, verify with actual content comparison
906        // This handles the case where different pieces contain identical content
907        // (e.g., delete text then paste it back)
908        let total_changed_bytes: usize = structure_diff
909            .byte_ranges
910            .iter()
911            .map(|r| r.end.saturating_sub(r.start))
912            .sum();
913
914        // Only do content verification if the changed region is reasonably small
915        // For large changes, trust the structure-based diff
916        const MAX_VERIFY_BYTES: usize = 64 * 1024; // 64KB threshold for verification
917
918        if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
919            // Check if content in the changed ranges is actually different
920            if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
921                // Content actually differs - return the structure diff result
922                return structure_diff;
923            } else {
924                // Content is the same despite structure differences (rare case: undo/redo)
925                return PieceTreeDiff {
926                    equal: true,
927                    byte_ranges: Vec::new(),
928                    line_ranges: Some(Vec::new()),
929                };
930            }
931        }
932
933        // For large changes or when we can't verify, trust the structure diff
934        structure_diff
935    }
936
937    /// Check if the actual byte content differs in the given ranges.
938    /// Returns true if content differs, false if content is identical.
939    fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
940        let saved_bytes = self.tree_total_bytes(&self.saved_root);
941        let current_bytes = self.piece_tree.total_bytes();
942
943        // Different total sizes means content definitely differs
944        if saved_bytes != current_bytes {
945            return true;
946        }
947
948        // For each changed range, compare the actual bytes
949        for range in byte_ranges {
950            if range.start >= range.end {
951                continue;
952            }
953
954            // Extract bytes from saved tree for this range
955            let saved_slice =
956                self.extract_range_from_tree(&self.saved_root, range.start, range.end);
957            // Extract bytes from current tree for this range
958            let current_slice = self.get_text_range(range.start, range.end);
959
960            match (saved_slice, current_slice) {
961                (Some(saved), Some(current)) => {
962                    if saved != current {
963                        return true; // Content differs
964                    }
965                }
966                _ => {
967                    // Couldn't read content, assume it differs to be safe
968                    return true;
969                }
970            }
971        }
972
973        // All ranges have identical content
974        false
975    }
976
977    /// Extract a byte range from a saved tree root
978    fn extract_range_from_tree(
979        &self,
980        root: &Arc<crate::model::piece_tree::PieceTreeNode>,
981        start: usize,
982        end: usize,
983    ) -> Option<Vec<u8>> {
984        let mut result = Vec::with_capacity(end.saturating_sub(start));
985        self.collect_range_from_node(root, start, end, 0, &mut result)?;
986        Some(result)
987    }
988
989    /// Recursively collect bytes from a range within a tree node
990    fn collect_range_from_node(
991        &self,
992        node: &Arc<crate::model::piece_tree::PieceTreeNode>,
993        range_start: usize,
994        range_end: usize,
995        node_offset: usize,
996        result: &mut Vec<u8>,
997    ) -> Option<()> {
998        use crate::model::piece_tree::PieceTreeNode;
999
1000        match node.as_ref() {
1001            PieceTreeNode::Internal {
1002                left_bytes,
1003                left,
1004                right,
1005                ..
1006            } => {
1007                let left_end = node_offset + left_bytes;
1008
1009                // Check if range overlaps with left subtree
1010                if range_start < left_end {
1011                    self.collect_range_from_node(
1012                        left,
1013                        range_start,
1014                        range_end,
1015                        node_offset,
1016                        result,
1017                    )?;
1018                }
1019
1020                // Check if range overlaps with right subtree
1021                if range_end > left_end {
1022                    self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
1023                }
1024            }
1025            PieceTreeNode::Leaf {
1026                location,
1027                offset,
1028                bytes,
1029                ..
1030            } => {
1031                let node_end = node_offset + bytes;
1032
1033                // Check if this leaf overlaps with our range
1034                if range_start < node_end && range_end > node_offset {
1035                    let buf = self.buffers.get(location.buffer_id())?;
1036                    let data = buf.get_data()?;
1037
1038                    // Calculate the slice within this leaf
1039                    let leaf_start = range_start.saturating_sub(node_offset);
1040                    let leaf_end = (range_end - node_offset).min(*bytes);
1041
1042                    if leaf_start < leaf_end {
1043                        let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
1044                        result.extend_from_slice(slice);
1045                    }
1046                }
1047            }
1048        }
1049        Some(())
1050    }
1051
1052    /// Helper to get total bytes from a tree root
1053    fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
1054        use crate::model::piece_tree::PieceTreeNode;
1055        match root.as_ref() {
1056            PieceTreeNode::Internal {
1057                left_bytes, right, ..
1058            } => left_bytes + self.tree_total_bytes(right),
1059            PieceTreeNode::Leaf { bytes, .. } => *bytes,
1060        }
1061    }
1062
1063    /// Structure-based diff comparing piece tree leaves
1064    fn diff_trees_by_structure(&self) -> PieceTreeDiff {
1065        crate::model::piece_tree_diff::diff_piece_trees(
1066            &self.saved_root,
1067            &self.piece_tree.root(),
1068            &|leaf, start, len| {
1069                if len == 0 {
1070                    return Some(0);
1071                }
1072                let buf = self.buffers.get(leaf.location.buffer_id())?;
1073                let data = buf.get_data()?;
1074                let start = leaf.offset + start;
1075                let end = start + len;
1076                let slice = data.get(start..end)?;
1077                let line_feeds = slice.iter().filter(|&&b| b == b'\n').count();
1078                Some(line_feeds)
1079            },
1080        )
1081    }
1082
1083    /// Convert a byte offset to a line/column position
1084    pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
1085        self.piece_tree
1086            .offset_to_position(offset, &self.buffers)
1087            .map(|(line, column)| Position { line, column })
1088    }
1089
1090    /// Convert a line/column position to a byte offset
1091    pub fn position_to_offset(&self, position: Position) -> usize {
1092        self.piece_tree
1093            .position_to_offset(position.line, position.column, &self.buffers)
1094    }
1095
1096    /// Insert text at the given byte offset
1097    pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
1098        if text.is_empty() {
1099            return self.piece_tree.cursor_at_offset(offset);
1100        }
1101
1102        // Mark as modified (updates version)
1103        self.mark_content_modified();
1104
1105        // Count line feeds in the text to insert
1106        let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
1107
1108        // Optimization: try to append to existing buffer if insertion is at piece boundary
1109        let (buffer_location, buffer_offset, text_len) =
1110            if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
1111                append_info
1112            } else {
1113                // Create a new StringBuffer for this insertion
1114                let buffer_id = self.next_buffer_id;
1115                self.next_buffer_id += 1;
1116                let buffer = StringBuffer::new(buffer_id, text.clone());
1117                self.buffers.push(buffer);
1118                (BufferLocation::Added(buffer_id), 0, text.len())
1119            };
1120
1121        // Update piece tree (need to pass buffers reference)
1122        self.piece_tree.insert(
1123            offset,
1124            buffer_location,
1125            buffer_offset,
1126            text_len,
1127            line_feed_cnt,
1128            &self.buffers,
1129        )
1130    }
1131
1132    /// Try to append to an existing buffer if insertion point aligns with buffer end
1133    /// Returns (BufferLocation, buffer_offset, text_len) if append succeeds, None otherwise
1134    fn try_append_to_existing_buffer(
1135        &mut self,
1136        offset: usize,
1137        text: &[u8],
1138    ) -> Option<(BufferLocation, usize, usize)> {
1139        // Only optimize for non-empty insertions after existing content
1140        if text.is_empty() || offset == 0 {
1141            return None;
1142        }
1143
1144        // Find the piece containing the byte just before the insertion point
1145        // This avoids the saturating_sub issue
1146        let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1147
1148        // Check if insertion is exactly at the end of this piece
1149        // offset_in_piece tells us where (offset-1) is within the piece
1150        // For insertion to be at piece end, (offset-1) must be the last byte
1151        let offset_in_piece = piece_info.offset_in_piece?;
1152        if offset_in_piece + 1 != piece_info.bytes {
1153            return None; // Not at the end of the piece
1154        }
1155
1156        // Only append to "Added" buffers (not original Stored buffers)
1157        if !matches!(piece_info.location, BufferLocation::Added(_)) {
1158            return None;
1159        }
1160
1161        let buffer_id = piece_info.location.buffer_id();
1162        let buffer = self.buffers.get_mut(buffer_id)?;
1163
1164        // Check if buffer is loaded
1165        let buffer_len = buffer.get_data()?.len();
1166
1167        // Check if this piece ends exactly at the end of its buffer
1168        if piece_info.offset + piece_info.bytes != buffer_len {
1169            return None;
1170        }
1171
1172        // Perfect! Append to this buffer
1173        let append_offset = buffer.append(text);
1174
1175        Some((piece_info.location, append_offset, text.len()))
1176    }
1177
1178    /// Insert text (from &str) at the given byte offset
1179    pub fn insert(&mut self, offset: usize, text: &str) {
1180        self.insert_bytes(offset, text.as_bytes().to_vec());
1181    }
1182
1183    /// Insert text at a line/column position
1184    /// This now uses the optimized piece_tree.insert_at_position() for a single traversal
1185    pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1186        if text.is_empty() {
1187            let offset = self.position_to_offset(position);
1188            return self.piece_tree.cursor_at_offset(offset);
1189        }
1190
1191        self.mark_content_modified();
1192
1193        // Count line feeds in the text to insert
1194        let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1195
1196        // Create a new StringBuffer for this insertion
1197        let buffer_id = self.next_buffer_id;
1198        self.next_buffer_id += 1;
1199        let buffer = StringBuffer::new(buffer_id, text.clone());
1200        self.buffers.push(buffer);
1201
1202        // Use the optimized position-based insertion (single traversal)
1203        self.piece_tree.insert_at_position(
1204            position.line,
1205            position.column,
1206            BufferLocation::Added(buffer_id),
1207            0,
1208            text.len(),
1209            line_feed_cnt,
1210            &self.buffers,
1211        )
1212    }
1213
1214    /// Delete text starting at the given byte offset
1215    pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1216        if bytes == 0 || offset >= self.total_bytes() {
1217            return;
1218        }
1219
1220        // Update piece tree
1221        self.piece_tree.delete(offset, bytes, &self.buffers);
1222
1223        self.mark_content_modified();
1224    }
1225
1226    /// Delete text in a range
1227    pub fn delete(&mut self, range: Range<usize>) {
1228        if range.end > range.start {
1229            self.delete_bytes(range.start, range.end - range.start);
1230        }
1231    }
1232
1233    /// Delete text in a line/column range
1234    /// This now uses the optimized piece_tree.delete_position_range() for a single traversal
1235    pub fn delete_range(&mut self, start: Position, end: Position) {
1236        // Use the optimized position-based deletion
1237        self.piece_tree.delete_position_range(
1238            start.line,
1239            start.column,
1240            end.line,
1241            end.column,
1242            &self.buffers,
1243        );
1244        self.mark_content_modified();
1245    }
1246
1247    /// Replace the entire buffer content with new content
1248    /// This is an O(n) operation that rebuilds the piece tree in a single pass,
1249    /// avoiding the O(n²) complexity of applying individual edits.
1250    ///
1251    /// This is used for bulk operations like "replace all" where applying
1252    /// individual edits would be prohibitively slow.
1253    pub fn replace_content(&mut self, new_content: &str) {
1254        let bytes = new_content.len();
1255        let content_bytes = new_content.as_bytes().to_vec();
1256
1257        // Count line feeds in the new content
1258        let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
1259
1260        // Create a new StringBuffer for the new content
1261        let buffer_id = self.next_buffer_id;
1262        self.next_buffer_id += 1;
1263        let buffer = StringBuffer::new(buffer_id, content_bytes);
1264        self.buffers.push(buffer);
1265
1266        // Rebuild the piece tree with a single piece containing all the new content
1267        if bytes > 0 {
1268            self.piece_tree = PieceTree::new(
1269                BufferLocation::Added(buffer_id),
1270                0,
1271                bytes,
1272                Some(line_feed_cnt),
1273            );
1274        } else {
1275            self.piece_tree = PieceTree::empty();
1276        }
1277
1278        self.mark_content_modified();
1279    }
1280
1281    /// Restore a previously saved piece tree (for undo of BulkEdit)
1282    /// This is O(1) because PieceTree uses Arc internally
1283    pub fn restore_piece_tree(&mut self, tree: &Arc<PieceTree>) {
1284        self.piece_tree = (**tree).clone();
1285        self.mark_content_modified();
1286    }
1287
1288    /// Get the current piece tree as an Arc (for saving before BulkEdit)
1289    /// This is O(1) - creates an Arc wrapper around a clone of the tree
1290    pub fn snapshot_piece_tree(&self) -> Arc<PieceTree> {
1291        Arc::new(self.piece_tree.clone())
1292    }
1293
1294    /// Apply bulk edits efficiently in a single pass
1295    /// Returns the net change in bytes
1296    pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
1297        // Pre-allocate buffers for all insert texts (only non-empty texts)
1298        // This avoids the borrow conflict in the closure
1299        // IMPORTANT: Only add entries for non-empty texts because the closure
1300        // is only called for edits with non-empty insert text
1301        let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
1302
1303        for (_, _, text) in edits {
1304            if !text.is_empty() {
1305                let buffer_id = self.next_buffer_id;
1306                self.next_buffer_id += 1;
1307                let content = text.as_bytes().to_vec();
1308                let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
1309                let bytes = content.len();
1310                let buffer = StringBuffer::new(buffer_id, content);
1311                self.buffers.push(buffer);
1312                buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
1313            }
1314            // No placeholder for empty texts - the closure is only called for non-empty texts
1315        }
1316
1317        // Now call apply_bulk_edits with a simple index-based closure
1318        let mut idx = 0;
1319        let delta = self
1320            .piece_tree
1321            .apply_bulk_edits(edits, &self.buffers, |_text| {
1322                let info = buffer_info[idx];
1323                idx += 1;
1324                info
1325            });
1326
1327        self.mark_content_modified();
1328        delta
1329    }
1330
1331    /// Get text from a byte offset range
1332    /// This now uses the optimized piece_tree.iter_pieces_in_range() for a single traversal
1333    /// Get text from a byte offset range (read-only)
1334    /// Returns None if any buffer in the range is unloaded
1335    /// PRIVATE: External code should use get_text_range_mut() which handles lazy loading
1336    fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
1337        if bytes == 0 {
1338            return Some(Vec::new());
1339        }
1340
1341        let mut result = Vec::with_capacity(bytes);
1342        let end_offset = offset + bytes;
1343        let mut collected = 0;
1344
1345        // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1346        for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
1347            let buffer_id = piece_view.location.buffer_id();
1348            if let Some(buffer) = self.buffers.get(buffer_id) {
1349                // Calculate the range to read from this piece
1350                let piece_start_in_doc = piece_view.doc_offset;
1351                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1352
1353                // Clip to the requested range
1354                let read_start = offset.max(piece_start_in_doc);
1355                let read_end = end_offset.min(piece_end_in_doc);
1356
1357                if read_end > read_start {
1358                    let offset_in_piece = read_start - piece_start_in_doc;
1359                    let bytes_to_read = read_end - read_start;
1360
1361                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1362                    let buffer_end = buffer_start + bytes_to_read;
1363
1364                    // Return None if buffer is unloaded (type-safe)
1365                    let data = buffer.get_data()?;
1366
1367                    if buffer_end <= data.len() {
1368                        result.extend_from_slice(&data[buffer_start..buffer_end]);
1369                        collected += bytes_to_read;
1370
1371                        if collected >= bytes {
1372                            break;
1373                        }
1374                    }
1375                }
1376            }
1377        }
1378
1379        Some(result)
1380    }
1381
1382    /// Get text from a byte offset range with lazy loading
1383    /// This will load unloaded chunks on-demand and always returns complete data
1384    ///
1385    /// Returns an error if loading fails or if data cannot be read for any reason.
1386    ///
1387    /// NOTE: Currently loads entire buffers on-demand. Future optimization would split
1388    /// large pieces and load only LOAD_CHUNK_SIZE chunks at a time.
1389    pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
1390        if bytes == 0 {
1391            return Ok(Vec::new());
1392        }
1393
1394        let mut result = Vec::with_capacity(bytes);
1395        // Clamp end_offset to buffer length to handle reads beyond EOF
1396        let end_offset = (offset + bytes).min(self.len());
1397        let mut current_offset = offset;
1398
1399        // Keep iterating until we've collected all requested bytes
1400        while current_offset < end_offset {
1401            let mut made_progress = false;
1402            let mut restarted_iteration = false;
1403
1404            // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1405            for piece_view in self
1406                .piece_tree
1407                .iter_pieces_in_range(current_offset, end_offset)
1408            {
1409                let buffer_id = piece_view.location.buffer_id();
1410
1411                // Check if buffer needs loading
1412                let needs_loading = self
1413                    .buffers
1414                    .get(buffer_id)
1415                    .map(|b| !b.is_loaded())
1416                    .unwrap_or(false);
1417
1418                if needs_loading {
1419                    // Check if piece is too large for full loading
1420                    if piece_view.bytes > LOAD_CHUNK_SIZE {
1421                        // Split large piece into chunks
1422                        let piece_start_in_doc = piece_view.doc_offset;
1423                        let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
1424
1425                        // Calculate chunk boundaries aligned to CHUNK_ALIGNMENT
1426                        let chunk_start_in_buffer = (piece_view.buffer_offset + offset_in_piece)
1427                            / CHUNK_ALIGNMENT
1428                            * CHUNK_ALIGNMENT;
1429                        let chunk_bytes = LOAD_CHUNK_SIZE.min(
1430                            (piece_view.buffer_offset + piece_view.bytes)
1431                                .saturating_sub(chunk_start_in_buffer),
1432                        );
1433
1434                        // Calculate document offsets for splitting
1435                        let chunk_start_offset_in_piece =
1436                            chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
1437                        let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
1438                        let split_end_in_doc = split_start_in_doc + chunk_bytes;
1439
1440                        // Split the piece to isolate the chunk
1441                        if chunk_start_offset_in_piece > 0 {
1442                            self.piece_tree
1443                                .split_at_offset(split_start_in_doc, &self.buffers);
1444                        }
1445                        if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
1446                            self.piece_tree
1447                                .split_at_offset(split_end_in_doc, &self.buffers);
1448                        }
1449
1450                        // Create a new buffer for this chunk
1451                        let chunk_buffer = self
1452                            .buffers
1453                            .get(buffer_id)
1454                            .context("Buffer not found")?
1455                            .create_chunk_buffer(
1456                                self.next_buffer_id,
1457                                chunk_start_in_buffer,
1458                                chunk_bytes,
1459                            )
1460                            .context("Failed to create chunk buffer")?;
1461
1462                        self.next_buffer_id += 1;
1463                        let new_buffer_id = chunk_buffer.id;
1464                        self.buffers.push(chunk_buffer);
1465
1466                        // Update the piece to reference the new chunk buffer
1467                        self.piece_tree.replace_buffer_reference(
1468                            buffer_id,
1469                            piece_view.buffer_offset + chunk_start_offset_in_piece,
1470                            chunk_bytes,
1471                            BufferLocation::Added(new_buffer_id),
1472                        );
1473
1474                        // Load the chunk buffer using the FileSystem trait
1475                        self.buffers
1476                            .get_mut(new_buffer_id)
1477                            .context("Chunk buffer not found")?
1478                            .load(&*self.fs)
1479                            .context("Failed to load chunk")?;
1480
1481                        // Restart iteration with the modified tree
1482                        restarted_iteration = true;
1483                        break;
1484                    } else {
1485                        // Piece is small enough, load the entire buffer
1486                        self.buffers
1487                            .get_mut(buffer_id)
1488                            .context("Buffer not found")?
1489                            .load(&*self.fs)
1490                            .context("Failed to load buffer")?;
1491                    }
1492                }
1493
1494                // Calculate the range to read from this piece
1495                let piece_start_in_doc = piece_view.doc_offset;
1496                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1497
1498                // Clip to the requested range
1499                let read_start = current_offset.max(piece_start_in_doc);
1500                let read_end = end_offset.min(piece_end_in_doc);
1501
1502                if read_end > read_start {
1503                    let offset_in_piece = read_start - piece_start_in_doc;
1504                    let bytes_to_read = read_end - read_start;
1505
1506                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1507                    let buffer_end = buffer_start + bytes_to_read;
1508
1509                    // Buffer should be loaded now
1510                    let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
1511                    let data = buffer
1512                        .get_data()
1513                        .context("Buffer data unavailable after load")?;
1514
1515                    anyhow::ensure!(
1516                        buffer_end <= data.len(),
1517                        "Buffer range out of bounds: requested {}..{}, buffer size {}",
1518                        buffer_start,
1519                        buffer_end,
1520                        data.len()
1521                    );
1522
1523                    result.extend_from_slice(&data[buffer_start..buffer_end]);
1524                    current_offset = read_end;
1525                    made_progress = true;
1526                }
1527            }
1528
1529            // If we didn't make progress and didn't restart iteration, this is an error
1530            if !made_progress && !restarted_iteration {
1531                tracing::error!(
1532                    "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
1533                    current_offset,
1534                    offset,
1535                    end_offset,
1536                    self.len()
1537                );
1538                tracing::error!(
1539                    "Piece tree stats: {} total bytes",
1540                    self.piece_tree.stats().total_bytes
1541                );
1542                anyhow::bail!(
1543                    "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
1544                    current_offset,
1545                    offset,
1546                    end_offset,
1547                    self.len()
1548                );
1549            }
1550        }
1551
1552        Ok(result)
1553    }
1554
1555    /// Prepare a viewport for rendering
1556    ///
1557    /// This is called before rendering with &mut access to pre-load all data
1558    /// that will be needed for the viewport. It estimates the number of bytes
1559    /// needed based on the line count and pre-loads them.
1560    ///
1561    /// # Arguments
1562    /// * `start_offset` - The byte offset where the viewport starts
1563    /// * `line_count` - The number of lines to prepare (estimate)
1564    ///
1565    /// # Returns
1566    /// Ok(()) if preparation succeeded, Err if loading failed
1567    pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
1568        // Estimate how many bytes we need (pessimistic assumption)
1569        // Average line length is typically 80-100 bytes, but we use 200 to be safe
1570        let estimated_bytes = line_count.saturating_mul(200);
1571
1572        // Cap the estimate at the remaining bytes in the document
1573        let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
1574        let bytes_to_load = estimated_bytes.min(remaining_bytes);
1575
1576        // Pre-load with full chunk-splitting support
1577        // This may load more than we need, but ensures all data is available
1578        self.get_text_range_mut(start_offset, bytes_to_load)?;
1579
1580        Ok(())
1581    }
1582
1583    /// Get all text as a single Vec<u8>
1584    /// Returns None if any buffers are unloaded (lazy loading)
1585    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1586    pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
1587        self.get_text_range(0, self.total_bytes())
1588    }
1589
1590    /// Get all text as a String
1591    /// Returns None if any buffers are unloaded (lazy loading)
1592    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1593    pub(crate) fn get_all_text_string(&self) -> Option<String> {
1594        self.get_all_text()
1595            .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
1596    }
1597
1598    /// Get text from a byte range as bytes
1599    /// CRATE-PRIVATE: Returns empty vector if any buffers are unloaded (silently fails!)
1600    /// Only use this when you KNOW the data is loaded (e.g., for syntax highlighting small regions)
1601    /// External code should use get_text_range_mut() or DocumentModel methods
1602    pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
1603        self.get_text_range(range.start, range.end.saturating_sub(range.start))
1604            .unwrap_or_default()
1605    }
1606
1607    /// Get all text as a String
1608    /// Returns None if any buffers are unloaded (lazy loading)
1609    pub fn to_string(&self) -> Option<String> {
1610        self.get_all_text_string()
1611    }
1612
1613    /// Get the total number of bytes
1614    pub fn len(&self) -> usize {
1615        self.total_bytes()
1616    }
1617
1618    /// Check if the buffer is empty
1619    pub fn is_empty(&self) -> bool {
1620        self.total_bytes() == 0
1621    }
1622
1623    /// Get the file path associated with this buffer
1624    pub fn file_path(&self) -> Option<&Path> {
1625        self.file_path.as_deref()
1626    }
1627
1628    /// Set the file path for this buffer
1629    pub fn set_file_path(&mut self, path: PathBuf) {
1630        self.file_path = Some(path);
1631    }
1632
1633    /// Clear the file path (make buffer unnamed)
1634    /// Note: This does NOT affect Unloaded chunk file_paths used for lazy loading.
1635    /// Those still point to the original source file for chunk loading.
1636    pub fn clear_file_path(&mut self) {
1637        self.file_path = None;
1638    }
1639
1640    /// Extend buffer to include more bytes from a streaming source file.
1641    /// Used for stdin streaming where the temp file grows over time.
1642    /// Appends a new Unloaded chunk for the new bytes.
1643    pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
1644        let old_size = self.total_bytes();
1645        if new_size <= old_size {
1646            return;
1647        }
1648
1649        let additional_bytes = new_size - old_size;
1650
1651        // Create new Unloaded buffer for the appended region
1652        let buffer_id = self.next_buffer_id;
1653        self.next_buffer_id += 1;
1654
1655        let new_buffer = StringBuffer::new_unloaded(
1656            buffer_id,
1657            source_path.to_path_buf(),
1658            old_size,         // file_offset - where this chunk starts in the file
1659            additional_bytes, // bytes - size of this chunk
1660        );
1661        self.buffers.push(new_buffer);
1662
1663        // Append piece at end of document (insert at offset == total_bytes)
1664        self.piece_tree.insert(
1665            old_size,
1666            BufferLocation::Stored(buffer_id),
1667            0,
1668            additional_bytes,
1669            None, // line_feed_cnt unknown for unloaded chunk
1670            &self.buffers,
1671        );
1672    }
1673
1674    /// Check if the buffer has been modified since last save
1675    pub fn is_modified(&self) -> bool {
1676        self.modified
1677    }
1678
1679    /// Clear the modified flag (after save)
1680    pub fn clear_modified(&mut self) {
1681        self.modified = false;
1682    }
1683
1684    /// Set the modified flag explicitly
1685    /// Used by undo/redo to restore the correct modified state
1686    pub fn set_modified(&mut self, modified: bool) {
1687        self.modified = modified;
1688    }
1689
1690    /// Check if buffer has pending changes for recovery auto-save
1691    pub fn is_recovery_pending(&self) -> bool {
1692        self.recovery_pending
1693    }
1694
1695    /// Mark buffer as needing recovery auto-save (call after edits)
1696    pub fn set_recovery_pending(&mut self, pending: bool) {
1697        self.recovery_pending = pending;
1698    }
1699
1700    /// Check if this is a large file with lazy loading enabled
1701    pub fn is_large_file(&self) -> bool {
1702        self.large_file
1703    }
1704
1705    /// Get the saved file size (size of the file on disk after last load/save)
1706    /// For large files, this is used during recovery to know the expected original file size.
1707    /// Returns None for new unsaved buffers.
1708    pub fn original_file_size(&self) -> Option<usize> {
1709        // Return the tracked saved file size - this is updated when the file is
1710        // loaded or saved, so it always reflects the current file on disk.
1711        self.saved_file_size
1712    }
1713
1714    /// Get recovery chunks for this buffer (only modified portions)
1715    ///
1716    /// For large files, this returns only the pieces that come from Added buffers
1717    /// (i.e., the modifications), not the original file content. This allows
1718    /// efficient incremental recovery without reading/writing the entire file.
1719    ///
1720    /// Returns: Vec of (original_file_offset, data) for each modified chunk
1721    /// The offset is the position in the ORIGINAL file where this chunk should be inserted.
1722    pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
1723        use crate::model::piece_tree::BufferLocation;
1724
1725        let mut chunks = Vec::new();
1726        let total = self.total_bytes();
1727
1728        // Track cumulative bytes from Stored pieces as we iterate.
1729        // This gives us the original file offset for Added pieces.
1730        // The key insight: Added pieces should be inserted at the position
1731        // corresponding to where they appear relative to Stored content,
1732        // not their position in the current document.
1733        let mut stored_bytes_before = 0;
1734
1735        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
1736            match piece.location {
1737                BufferLocation::Stored(_) => {
1738                    // Accumulate stored bytes to track position in original file
1739                    stored_bytes_before += piece.bytes;
1740                }
1741                BufferLocation::Added(buffer_id) => {
1742                    if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
1743                        // Get the data from the buffer if loaded
1744                        if let Some(data) = buffer.get_data() {
1745                            // Extract just the portion this piece references
1746                            let start = piece.buffer_offset;
1747                            let end = start + piece.bytes;
1748                            if end <= data.len() {
1749                                // Use stored_bytes_before as the original file offset.
1750                                // This is where this insertion should go relative to
1751                                // the original file content.
1752                                chunks.push((stored_bytes_before, data[start..end].to_vec()));
1753                            }
1754                        }
1755                    }
1756                }
1757            }
1758        }
1759
1760        chunks
1761    }
1762
1763    /// Check if this buffer contains binary content
1764    pub fn is_binary(&self) -> bool {
1765        self.is_binary
1766    }
1767
1768    /// Get the line ending format for this buffer
1769    pub fn line_ending(&self) -> LineEnding {
1770        self.line_ending
1771    }
1772
1773    /// Set the line ending format for this buffer
1774    ///
1775    /// This marks the buffer as modified since the line ending format has changed.
1776    /// On save, the buffer content will be converted to the new format.
1777    pub fn set_line_ending(&mut self, line_ending: LineEnding) {
1778        self.line_ending = line_ending;
1779        self.mark_content_modified();
1780    }
1781
1782    /// Set the default line ending format for a new/empty buffer
1783    ///
1784    /// Unlike `set_line_ending`, this does NOT mark the buffer as modified.
1785    /// This should be used when initializing a new buffer with a configured default.
1786    pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
1787        self.line_ending = line_ending;
1788        self.original_line_ending = line_ending;
1789    }
1790
1791    /// Detect if the given bytes contain binary content.
1792    ///
1793    /// Binary content is detected by looking for:
1794    /// - Null bytes (0x00)
1795    /// - Non-printable control characters (except common ones like tab, newline, CR)
1796    ///
1797    /// ANSI escape sequences (ESC [ ...) are treated as text, not binary.
1798    pub fn detect_binary(bytes: &[u8]) -> bool {
1799        // Only check the first 8KB for binary detection
1800        let check_len = bytes.len().min(8 * 1024);
1801        let sample = &bytes[..check_len];
1802
1803        let mut i = 0;
1804        while i < sample.len() {
1805            let byte = sample[i];
1806
1807            // Check for ANSI escape sequence (ESC [ or ESC ])
1808            // These are common in text files and should not trigger binary detection
1809            if byte == 0x1B && i + 1 < sample.len() {
1810                let next = sample[i + 1];
1811                if next == b'[' || next == b']' {
1812                    // Skip the escape sequence - find the terminator
1813                    i += 2;
1814                    while i < sample.len() {
1815                        let c = sample[i];
1816                        // ANSI sequences end with a letter (0x40-0x7E for CSI)
1817                        if (0x40..=0x7E).contains(&c) {
1818                            break;
1819                        }
1820                        i += 1;
1821                    }
1822                    i += 1;
1823                    continue;
1824                }
1825            }
1826
1827            // Null byte is a strong indicator of binary content
1828            if byte == 0x00 {
1829                return true;
1830            }
1831
1832            // Check for non-printable control characters
1833            // Allow: tab (0x09), newline (0x0A), carriage return (0x0D)
1834            // Also allow: form feed (0x0C), vertical tab (0x0B) - sometimes used in text
1835            // ESC (0x1B) is handled above for ANSI sequences
1836            if byte < 0x20
1837                && byte != 0x09
1838                && byte != 0x0A
1839                && byte != 0x0D
1840                && byte != 0x0C
1841                && byte != 0x0B
1842                && byte != 0x1B
1843            {
1844                return true;
1845            }
1846
1847            // DEL character (0x7F) is also a control character
1848            if byte == 0x7F {
1849                return true;
1850            }
1851
1852            i += 1;
1853        }
1854
1855        false
1856    }
1857
1858    /// Detect the line ending format from a sample of bytes
1859    ///
1860    /// Uses majority voting: counts CRLF, LF-only, and CR-only occurrences
1861    /// and returns the most common format.
1862    pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
1863        // Only check the first 8KB for line ending detection (same as binary detection)
1864        let check_len = bytes.len().min(8 * 1024);
1865        let sample = &bytes[..check_len];
1866
1867        let mut crlf_count = 0;
1868        let mut lf_only_count = 0;
1869        let mut cr_only_count = 0;
1870
1871        let mut i = 0;
1872        while i < sample.len() {
1873            if sample[i] == b'\r' {
1874                // Check if this is CRLF
1875                if i + 1 < sample.len() && sample[i + 1] == b'\n' {
1876                    crlf_count += 1;
1877                    i += 2; // Skip both \r and \n
1878                    continue;
1879                } else {
1880                    // CR only (old Mac format)
1881                    cr_only_count += 1;
1882                }
1883            } else if sample[i] == b'\n' {
1884                // LF only (Unix format)
1885                lf_only_count += 1;
1886            }
1887            i += 1;
1888        }
1889
1890        // Use majority voting to determine line ending
1891        if crlf_count > lf_only_count && crlf_count > cr_only_count {
1892            LineEnding::CRLF
1893        } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
1894            LineEnding::CR
1895        } else {
1896            // Default to LF if no clear winner or if LF wins
1897            LineEnding::LF
1898        }
1899    }
1900
1901    /// Normalize line endings in the given bytes to LF only
1902    ///
1903    /// Converts CRLF (\r\n) and CR (\r) to LF (\n) for internal representation.
1904    /// This makes editing and cursor movement simpler while preserving the
1905    /// original format for saving.
1906    #[allow(dead_code)] // Kept for tests and potential future use
1907    pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
1908        let mut normalized = Vec::with_capacity(bytes.len());
1909        let mut i = 0;
1910
1911        while i < bytes.len() {
1912            if bytes[i] == b'\r' {
1913                // Check if this is CRLF
1914                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1915                    // CRLF -> LF
1916                    normalized.push(b'\n');
1917                    i += 2; // Skip both \r and \n
1918                    continue;
1919                } else {
1920                    // CR only -> LF
1921                    normalized.push(b'\n');
1922                }
1923            } else {
1924                // Copy byte as-is
1925                normalized.push(bytes[i]);
1926            }
1927            i += 1;
1928        }
1929
1930        normalized
1931    }
1932
1933    /// Convert line endings from any source format to any target format
1934    ///
1935    /// This first normalizes all line endings to LF, then converts to the target format.
1936    /// Used when saving files after the user has changed the line ending format.
1937    fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
1938        // First pass: normalize everything to LF
1939        let mut normalized = Vec::with_capacity(bytes.len());
1940        let mut i = 0;
1941        while i < bytes.len() {
1942            if bytes[i] == b'\r' {
1943                // Check if this is CRLF
1944                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1945                    // CRLF -> LF
1946                    normalized.push(b'\n');
1947                    i += 2;
1948                    continue;
1949                } else {
1950                    // CR only -> LF
1951                    normalized.push(b'\n');
1952                }
1953            } else {
1954                normalized.push(bytes[i]);
1955            }
1956            i += 1;
1957        }
1958
1959        // If target is LF, we're done
1960        if target_ending == LineEnding::LF {
1961            return normalized;
1962        }
1963
1964        // Second pass: convert LF to target format
1965        let replacement = target_ending.as_str().as_bytes();
1966        let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
1967
1968        for byte in normalized {
1969            if byte == b'\n' {
1970                result.extend_from_slice(replacement);
1971            } else {
1972                result.push(byte);
1973            }
1974        }
1975
1976        result
1977    }
1978
1979    /// Get text for a specific line
1980    pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
1981        let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
1982
1983        let bytes = if let Some(end_offset) = end {
1984            end_offset.saturating_sub(start)
1985        } else {
1986            self.total_bytes().saturating_sub(start)
1987        };
1988
1989        self.get_text_range(start, bytes)
1990    }
1991
1992    /// Get the byte offset where a line starts
1993    pub fn line_start_offset(&self, line: usize) -> Option<usize> {
1994        let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
1995        Some(start)
1996    }
1997
1998    /// Get piece information at a byte offset
1999    pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
2000        self.piece_tree.find_by_offset(offset)
2001    }
2002
2003    /// Get tree statistics for debugging
2004    pub fn stats(&self) -> TreeStats {
2005        self.piece_tree.stats()
2006    }
2007
2008    // Search and Replace Operations
2009
2010    /// Find the next occurrence of a pattern, with wrap-around
2011    pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
2012        if pattern.is_empty() {
2013            return None;
2014        }
2015
2016        let pattern_bytes = pattern.as_bytes();
2017        let buffer_len = self.len();
2018
2019        // Search from start_pos to end
2020        if start_pos < buffer_len {
2021            if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
2022                return Some(offset);
2023            }
2024        }
2025
2026        // Wrap around: search from beginning to start_pos
2027        if start_pos > 0 {
2028            if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
2029                return Some(offset);
2030            }
2031        }
2032
2033        None
2034    }
2035
2036    /// Find the next occurrence of a pattern within an optional range
2037    /// If range is None, searches the entire buffer with wrap-around (same as find_next)
2038    /// If range is Some, searches only within that range without wrap-around
2039    pub fn find_next_in_range(
2040        &self,
2041        pattern: &str,
2042        start_pos: usize,
2043        range: Option<Range<usize>>,
2044    ) -> Option<usize> {
2045        if pattern.is_empty() {
2046            return None;
2047        }
2048
2049        if let Some(search_range) = range {
2050            // Search within range only, no wrap-around
2051            let pattern_bytes = pattern.as_bytes();
2052            let search_start = start_pos.max(search_range.start);
2053            let search_end = search_range.end.min(self.len());
2054
2055            if search_start < search_end {
2056                self.find_pattern(search_start, search_end, pattern_bytes)
2057            } else {
2058                None
2059            }
2060        } else {
2061            // No range specified, use normal find_next with wrap-around
2062            self.find_next(pattern, start_pos)
2063        }
2064    }
2065
2066    /// Find pattern in a byte range using overlapping chunks
2067    fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
2068        if pattern.is_empty() || start >= end {
2069            return None;
2070        }
2071
2072        const CHUNK_SIZE: usize = 65536; // 64KB chunks
2073        let overlap = pattern.len().saturating_sub(1).max(1);
2074
2075        // Use the overlapping chunks iterator for efficient streaming search
2076        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
2077
2078        for chunk in chunks {
2079            // Search the entire chunk buffer
2080            if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
2081                let match_end = pos + pattern.len();
2082                // Only report if match ENDS in or after the valid zone
2083                // This ensures patterns spanning boundaries are found exactly once
2084                if match_end > chunk.valid_start {
2085                    let absolute_pos = chunk.absolute_pos + pos;
2086                    // Verify the match doesn't extend beyond our search range
2087                    if absolute_pos + pattern.len() <= end {
2088                        return Some(absolute_pos);
2089                    }
2090                }
2091            }
2092        }
2093
2094        None
2095    }
2096
2097    /// Simple byte pattern search using naive algorithm
2098    fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
2099        if needle.is_empty() || needle.len() > haystack.len() {
2100            return None;
2101        }
2102
2103        (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
2104    }
2105
2106    /// Find the next occurrence of a regex pattern, with wrap-around
2107    pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
2108        let buffer_len = self.len();
2109
2110        // Search from start_pos to end
2111        if start_pos < buffer_len {
2112            if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
2113                return Some(offset);
2114            }
2115        }
2116
2117        // Wrap around: search from beginning to start_pos
2118        if start_pos > 0 {
2119            if let Some(offset) = self.find_regex(0, start_pos, regex) {
2120                return Some(offset);
2121            }
2122        }
2123
2124        None
2125    }
2126
2127    /// Find the next occurrence of a regex pattern within an optional range
2128    pub fn find_next_regex_in_range(
2129        &self,
2130        regex: &Regex,
2131        start_pos: usize,
2132        range: Option<Range<usize>>,
2133    ) -> Option<usize> {
2134        if let Some(search_range) = range {
2135            let search_start = start_pos.max(search_range.start);
2136            let search_end = search_range.end.min(self.len());
2137
2138            if search_start < search_end {
2139                self.find_regex(search_start, search_end, regex)
2140            } else {
2141                None
2142            }
2143        } else {
2144            self.find_next_regex(regex, start_pos)
2145        }
2146    }
2147
2148    /// Find regex pattern in a byte range using overlapping chunks
2149    fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
2150        if start >= end {
2151            return None;
2152        }
2153
2154        const CHUNK_SIZE: usize = 1048576; // 1MB chunks
2155        const OVERLAP: usize = 4096; // 4KB overlap for regex
2156
2157        // Use the overlapping chunks iterator for efficient streaming search
2158        // This fixes the critical bug where regex patterns spanning chunk boundaries were missed
2159        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
2160
2161        for chunk in chunks {
2162            // Search the entire chunk buffer
2163            if let Some(mat) = regex.find(&chunk.buffer) {
2164                let match_end = mat.end();
2165                // Only report if match ENDS in or after the valid zone
2166                // This ensures patterns spanning boundaries are found exactly once
2167                if match_end > chunk.valid_start {
2168                    let absolute_pos = chunk.absolute_pos + mat.start();
2169                    // Verify the match doesn't extend beyond our search range
2170                    let match_len = mat.end() - mat.start();
2171                    if absolute_pos + match_len <= end {
2172                        return Some(absolute_pos);
2173                    }
2174                }
2175            }
2176        }
2177
2178        None
2179    }
2180
2181    /// Replace a range with replacement text
2182    pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
2183        if range.start >= self.len() {
2184            return false;
2185        }
2186
2187        let end = range.end.min(self.len());
2188        if end > range.start {
2189            self.delete_bytes(range.start, end - range.start);
2190        }
2191
2192        if !replacement.is_empty() {
2193            self.insert(range.start, replacement);
2194        }
2195
2196        true
2197    }
2198
2199    /// Find and replace the next occurrence of a pattern
2200    pub fn replace_next(
2201        &mut self,
2202        pattern: &str,
2203        replacement: &str,
2204        start_pos: usize,
2205        range: Option<Range<usize>>,
2206    ) -> Option<usize> {
2207        if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
2208            self.replace_range(pos..pos + pattern.len(), replacement);
2209            Some(pos)
2210        } else {
2211            None
2212        }
2213    }
2214
2215    /// Replace all occurrences of a pattern with replacement text
2216    pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
2217        if pattern.is_empty() {
2218            return 0;
2219        }
2220
2221        let mut count = 0;
2222        let mut pos = 0;
2223
2224        // Keep searching and replacing
2225        // Note: we search forward from last replacement to handle growth/shrinkage
2226        // Find next occurrence (no wrap-around for replace_all)
2227        while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
2228            self.replace_range(found_pos..found_pos + pattern.len(), replacement);
2229            count += 1;
2230
2231            // Move past the replacement
2232            pos = found_pos + replacement.len();
2233
2234            // If we're at or past the end, stop
2235            if pos >= self.len() {
2236                break;
2237            }
2238        }
2239
2240        count
2241    }
2242
2243    /// Replace all occurrences of a regex pattern with replacement text
2244    pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
2245        let mut count = 0;
2246        let mut pos = 0;
2247
2248        while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
2249            // Get the match to find its length
2250            let text = self
2251                .get_text_range_mut(found_pos, self.len() - found_pos)
2252                .context("Failed to read text for regex match")?;
2253
2254            if let Some(mat) = regex.find(&text) {
2255                self.replace_range(found_pos..found_pos + mat.len(), replacement);
2256                count += 1;
2257                pos = found_pos + replacement.len();
2258
2259                if pos >= self.len() {
2260                    break;
2261                }
2262            } else {
2263                break;
2264            }
2265        }
2266
2267        Ok(count)
2268    }
2269
2270    // LSP Support (UTF-16 conversions)
2271
2272    /// Convert byte position to (line, column) in bytes
2273    pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
2274        self.offset_to_position(byte_pos)
2275            .map(|pos| (pos.line, pos.column))
2276            .unwrap_or_else(|| (byte_pos / 80, 0)) // Estimate if metadata unavailable
2277    }
2278
2279    /// Convert (line, character) to byte position - 0-indexed
2280    /// character is in BYTES, not UTF-16 code units
2281    /// Optimized to use single line_range() call instead of two
2282    pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
2283        if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2284            // Calculate line length from the range
2285            let line_len = if let Some(end_offset) = end {
2286                end_offset.saturating_sub(start)
2287            } else {
2288                self.total_bytes().saturating_sub(start)
2289            };
2290            let byte_offset = character.min(line_len);
2291            start + byte_offset
2292        } else {
2293            // Line doesn't exist, return end of buffer
2294            self.len()
2295        }
2296    }
2297
2298    /// Convert byte position to LSP position (line, UTF-16 code units)
2299    /// LSP protocol uses UTF-16 code units for character offsets
2300    pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
2301        let (line, column_bytes) = self
2302            .offset_to_position(byte_pos)
2303            .map(|pos| (pos.line, pos.column))
2304            .unwrap_or_else(|| (byte_pos / 80, 0)); // Estimate if metadata unavailable
2305
2306        // Get the line content
2307        if let Some(line_bytes) = self.get_line(line) {
2308            // Convert byte offset to UTF-16 code units
2309            let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
2310            let text_str = String::from_utf8_lossy(text_before);
2311            let utf16_offset = text_str.encode_utf16().count();
2312            (line, utf16_offset)
2313        } else {
2314            (line, 0)
2315        }
2316    }
2317
2318    /// Convert LSP position (line, UTF-16 code units) to byte position
2319    /// LSP uses UTF-16 code units for character offsets, not bytes
2320    /// Optimized to use single line_range() call instead of two
2321    pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
2322        if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2323            // Calculate line length and get line content
2324            let line_len = if let Some(end_offset) = end {
2325                end_offset.saturating_sub(line_start)
2326            } else {
2327                self.total_bytes().saturating_sub(line_start)
2328            };
2329
2330            if line_len > 0 {
2331                // If data is unloaded, return line_start as fallback
2332                let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
2333                    return line_start;
2334                };
2335                let line_str = String::from_utf8_lossy(&line_bytes);
2336
2337                // Convert UTF-16 offset to byte offset
2338                let mut utf16_count = 0;
2339                let mut byte_offset = 0;
2340
2341                for ch in line_str.chars() {
2342                    if utf16_count >= utf16_offset {
2343                        break;
2344                    }
2345                    utf16_count += ch.len_utf16();
2346                    byte_offset += ch.len_utf8();
2347                }
2348
2349                line_start + byte_offset
2350            } else {
2351                line_start
2352            }
2353        } else {
2354            // Line doesn't exist, return end of buffer
2355            self.len()
2356        }
2357    }
2358
2359    // Navigation helpers
2360
2361    /// Find the previous character boundary (UTF-8 aware)
2362    pub fn prev_char_boundary(&self, pos: usize) -> usize {
2363        if pos == 0 {
2364            return 0;
2365        }
2366
2367        // Get a few bytes before pos to find the character boundary
2368        let start = pos.saturating_sub(4);
2369        let Some(bytes) = self.get_text_range(start, pos - start) else {
2370            // Data unloaded, return pos as fallback
2371            return pos;
2372        };
2373
2374        // Walk backwards to find a UTF-8 leading byte
2375        for i in (0..bytes.len()).rev() {
2376            let byte = bytes[i];
2377            // Check if this is a UTF-8 leading byte (not a continuation byte)
2378            if (byte & 0b1100_0000) != 0b1000_0000 {
2379                return start + i;
2380            }
2381        }
2382
2383        // Fallback
2384        pos.saturating_sub(1)
2385    }
2386
2387    /// Find the next character boundary (UTF-8 aware)
2388    pub fn next_char_boundary(&self, pos: usize) -> usize {
2389        let len = self.len();
2390        if pos >= len {
2391            return len;
2392        }
2393
2394        // Get a few bytes after pos to find the character boundary
2395        let end = (pos + 5).min(len);
2396        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2397            // Data unloaded, return pos as fallback
2398            return pos;
2399        };
2400
2401        // Start from index 1 (we want the NEXT boundary)
2402        for (i, &byte) in bytes.iter().enumerate().skip(1) {
2403            // Check if this is a UTF-8 leading byte (not a continuation byte)
2404            if (byte & 0b1100_0000) != 0b1000_0000 {
2405                return pos + i;
2406            }
2407        }
2408
2409        // If we got here, we're at the end or found no boundary in the range
2410        end
2411    }
2412
2413    /// Check if a byte is a UTF-8 continuation byte (not at a char boundary)
2414    /// UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF)
2415    /// This is the same check that str::is_char_boundary uses internally.
2416    #[inline]
2417    fn is_utf8_continuation_byte(byte: u8) -> bool {
2418        (byte & 0b1100_0000) == 0b1000_0000
2419    }
2420
2421    /// Snap position to a valid UTF-8 character boundary
2422    /// If already at a boundary, returns the same position.
2423    /// Otherwise, moves to the previous valid boundary.
2424    pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
2425        let len = self.len();
2426        if pos == 0 || pos >= len {
2427            return pos.min(len);
2428        }
2429
2430        // Get the byte at pos to check if we're at a character boundary
2431        let Some(bytes) = self.get_text_range(pos, 1) else {
2432            // Data unloaded, return pos as fallback
2433            return pos;
2434        };
2435
2436        // A position is at a char boundary if the byte there is NOT a continuation byte
2437        if !Self::is_utf8_continuation_byte(bytes[0]) {
2438            // Already at a character boundary
2439            return pos;
2440        }
2441
2442        // Not at a boundary, find the previous one
2443        self.prev_char_boundary(pos)
2444    }
2445
2446    /// Find the previous grapheme cluster boundary (for proper cursor movement with combining characters)
2447    ///
2448    /// This handles complex scripts like Thai where multiple Unicode code points
2449    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2450    /// is 3 code points but 1 grapheme cluster.
2451    pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
2452        if pos == 0 {
2453            return 0;
2454        }
2455
2456        // Get enough context before pos to find grapheme boundaries
2457        // Thai combining characters can have multiple marks, so get up to 32 bytes
2458        // IMPORTANT: Align start to a valid character boundary to avoid invalid UTF-8
2459        // when get_text_range starts mid-character
2460        let raw_start = pos.saturating_sub(32);
2461        let start = if raw_start == 0 {
2462            0
2463        } else {
2464            // Find the character boundary at or before raw_start
2465            self.prev_char_boundary(raw_start + 1)
2466        };
2467
2468        let Some(bytes) = self.get_text_range(start, pos - start) else {
2469            // Data unloaded, fall back to char boundary
2470            return self.prev_char_boundary(pos);
2471        };
2472
2473        let text = match std::str::from_utf8(&bytes) {
2474            Ok(s) => s,
2475            Err(e) => {
2476                // Still got invalid UTF-8 (shouldn't happen after alignment)
2477                // Try using just the valid portion
2478                let valid_bytes = &bytes[..e.valid_up_to()];
2479                match std::str::from_utf8(valid_bytes) {
2480                    Ok(s) if !s.is_empty() => s,
2481                    _ => return self.prev_char_boundary(pos),
2482                }
2483            }
2484        };
2485
2486        // Use shared grapheme utility with relative position
2487        let rel_pos = pos - start;
2488        let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
2489
2490        // If we landed at the start of this chunk and there's more before,
2491        // we might need to look further back
2492        if new_rel_pos == 0 && start > 0 {
2493            return self.prev_grapheme_boundary(start);
2494        }
2495
2496        start + new_rel_pos
2497    }
2498
2499    /// Find the next grapheme cluster boundary (for proper cursor movement with combining characters)
2500    ///
2501    /// This handles complex scripts like Thai where multiple Unicode code points
2502    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2503    /// is 3 code points but 1 grapheme cluster.
2504    pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
2505        let len = self.len();
2506        if pos >= len {
2507            return len;
2508        }
2509
2510        // Get enough context after pos to find grapheme boundaries
2511        // Thai combining characters can have multiple marks, so get up to 32 bytes
2512        let end = (pos + 32).min(len);
2513        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2514            // Data unloaded, fall back to char boundary
2515            return self.next_char_boundary(pos);
2516        };
2517
2518        // Convert to UTF-8 string, handling the case where we might have
2519        // grabbed bytes that end mid-character (truncate to valid UTF-8)
2520        let text = match std::str::from_utf8(&bytes) {
2521            Ok(s) => s,
2522            Err(e) => {
2523                // The bytes end in an incomplete UTF-8 sequence
2524                // Use only the valid portion (which includes at least the first grapheme)
2525                let valid_bytes = &bytes[..e.valid_up_to()];
2526                match std::str::from_utf8(valid_bytes) {
2527                    Ok(s) if !s.is_empty() => s,
2528                    _ => return self.next_char_boundary(pos),
2529                }
2530            }
2531        };
2532
2533        // Use shared grapheme utility
2534        let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
2535        pos + new_rel_pos
2536    }
2537
2538    /// Find the previous word boundary
2539    pub fn prev_word_boundary(&self, pos: usize) -> usize {
2540        if pos == 0 {
2541            return 0;
2542        }
2543
2544        // Get some text before pos
2545        let start = pos.saturating_sub(256).max(0);
2546        let Some(bytes) = self.get_text_range(start, pos - start) else {
2547            // Data unloaded, return pos as fallback
2548            return pos;
2549        };
2550        let text = String::from_utf8_lossy(&bytes);
2551
2552        let mut found_word_char = false;
2553        let chars: Vec<char> = text.chars().collect();
2554
2555        for i in (0..chars.len()).rev() {
2556            let ch = chars[i];
2557            let is_word_char = ch.is_alphanumeric() || ch == '_';
2558
2559            if found_word_char && !is_word_char {
2560                // We've transitioned from word to non-word
2561                // Calculate the byte position
2562                let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
2563                return start + byte_offset;
2564            }
2565
2566            if is_word_char {
2567                found_word_char = true;
2568            }
2569        }
2570
2571        0
2572    }
2573
2574    /// Find the next word boundary
2575    pub fn next_word_boundary(&self, pos: usize) -> usize {
2576        let len = self.len();
2577        if pos >= len {
2578            return len;
2579        }
2580
2581        // Get some text after pos
2582        let end = (pos + 256).min(len);
2583        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2584            // Data unloaded, return pos as fallback
2585            return pos;
2586        };
2587        let text = String::from_utf8_lossy(&bytes);
2588
2589        let mut found_word_char = false;
2590        let mut byte_offset = 0;
2591
2592        for ch in text.chars() {
2593            let is_word_char = ch.is_alphanumeric() || ch == '_';
2594
2595            if found_word_char && !is_word_char {
2596                // We've transitioned from word to non-word
2597                return pos + byte_offset;
2598            }
2599
2600            if is_word_char {
2601                found_word_char = true;
2602            }
2603
2604            byte_offset += ch.len_utf8();
2605        }
2606
2607        len
2608    }
2609
2610    /// Create a line iterator starting at the given byte position
2611    ///
2612    /// This iterator lazily loads chunks as needed, never scanning the entire file.
2613    /// For large files with unloaded buffers, chunks are loaded on-demand (1MB at a time).
2614    pub fn line_iterator(
2615        &mut self,
2616        byte_pos: usize,
2617        estimated_line_length: usize,
2618    ) -> LineIterator<'_> {
2619        LineIterator::new(self, byte_pos, estimated_line_length)
2620    }
2621
2622    /// Iterate over lines starting from a given byte offset, with line numbers
2623    ///
2624    /// This is a more efficient alternative to using line_iterator() + offset_to_position()
2625    /// because it calculates line numbers incrementally during iteration by accumulating
2626    /// line_feed_cnt from pieces (which is already tracked in the piece tree).
2627    ///
2628    /// Returns: Iterator yielding (byte_offset, content, line_number: Option<usize>)
2629    /// - line_number is Some(n) for small files with line metadata
2630    /// - line_number is None for large files without line metadata
2631    ///
2632    /// # Performance
2633    /// - O(1) per line for line number calculation (vs O(log n) per line with offset_to_position)
2634    /// - Uses single source of truth: piece tree's existing line_feed_cnt metadata
2635    pub fn iter_lines_from(
2636        &mut self,
2637        byte_pos: usize,
2638        max_lines: usize,
2639    ) -> Result<TextBufferLineIterator> {
2640        TextBufferLineIterator::new(self, byte_pos, max_lines)
2641    }
2642
2643    // Legacy API methods for backwards compatibility
2644
2645    /// Get the line number for a given byte offset
2646    ///
2647    /// Returns exact line number if metadata available, otherwise estimates based on bytes.
2648    ///
2649    /// # Behavior by File Size:
2650    /// - **Small files (< 1MB)**: Returns exact line number from piece tree's `line_starts` metadata
2651    /// - **Large files (≥ 1MB)**: Returns estimated line number using `byte_offset / 80`
2652    ///
2653    /// Large files don't maintain line metadata for performance reasons. The estimation
2654    /// assumes ~80 bytes per line on average, which works reasonably well for most text files.
2655    pub fn get_line_number(&self, byte_offset: usize) -> usize {
2656        self.offset_to_position(byte_offset)
2657            .map(|pos| pos.line)
2658            .unwrap_or_else(|| {
2659                // Estimate line number based on average line length of ~80 bytes
2660                byte_offset / 80
2661            })
2662    }
2663
2664    /// Get the starting line number at a byte offset (used for viewport rendering)
2665    ///
2666    /// # Line Cache Architecture (Post-Refactoring):
2667    ///
2668    /// The concept of a separate "line cache" is **now obsolete**. After the refactoring,
2669    /// line tracking is integrated directly into the piece tree via:
2670    /// ```rust
2671    /// BufferData::Loaded {
2672    ///     data: Vec<u8>,
2673    ///     line_starts: Option<Vec<usize>>  // None = large file mode (no line metadata)
2674    /// }
2675    /// ```
2676    ///
2677    /// ## Why This Method Still Exists:
2678    /// The rendering code needs to know what line number to display in the margin at the
2679    /// top of the viewport. This method returns that line number, handling both small
2680    /// and large file modes transparently.
2681    ///
2682    /// ## Small vs Large File Modes:
2683    /// - **Small files**: `line_starts = Some(vec)` → returns exact line number from metadata
2684    /// - **Large files**: `line_starts = None` → returns estimated line number (byte_offset / 80)
2685    ///
2686    /// ## Legacy Line Cache Methods:
2687    /// These methods are now no-ops and can be removed in a future cleanup:
2688    /// - `invalidate_line_cache_from()` - No-op (piece tree updates automatically)
2689    /// - `handle_line_cache_insertion()` - No-op (piece tree updates automatically)
2690    /// - `handle_line_cache_deletion()` - No-op (piece tree updates automatically)
2691    /// - `clear_line_cache()` - No-op (can't clear piece tree metadata)
2692    ///
2693    /// ## Bug Fix (2025-11):
2694    /// Previously this method always returned `0`, causing line numbers in the margin
2695    /// to always show 1, 2, 3... regardless of scroll position. Now it correctly returns
2696    /// the actual line number at `start_byte`.
2697    pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
2698        // No-op for cache population: LineIndex maintains all line starts automatically
2699        // But we need to return the actual line number at start_byte for rendering
2700        self.get_line_number(start_byte)
2701    }
2702
2703    /// Get cached byte offset for line (compatibility method)
2704    pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
2705        self.line_start_offset(line_number)
2706    }
2707
2708    /// Invalidate line cache from offset (no-op in new implementation)
2709    pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
2710        // No-op: LineIndex updates automatically
2711    }
2712
2713    /// Handle line cache insertion (no-op in new implementation)
2714    pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
2715        // No-op: LineIndex updates automatically during insert
2716    }
2717
2718    /// Handle line cache deletion (no-op in new implementation)
2719    pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
2720        // No-op: LineIndex updates automatically during delete
2721    }
2722
2723    /// Clear line cache (no-op in new implementation)
2724    pub fn clear_line_cache(&mut self) {
2725        // No-op: LineIndex can't be cleared
2726    }
2727
2728    // Test helper methods
2729
2730    /// Create a buffer from a string for testing
2731    #[cfg(test)]
2732    pub fn from_str_test(s: &str) -> Self {
2733        Self::from_bytes(
2734            s.as_bytes().to_vec(),
2735            std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
2736        )
2737    }
2738
2739    /// Create a new empty buffer for testing
2740    #[cfg(test)]
2741    pub fn new_test() -> Self {
2742        Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
2743    }
2744}
2745
2746/// Type alias for backwards compatibility
2747pub type Buffer = TextBuffer;
2748
2749// Re-export LineIterator from the line_iterator module
2750pub use crate::primitives::line_iterator::LineIterator;
2751
2752// ============================================================================
2753// Overlapping Chunks Iterator for Efficient Search
2754// ============================================================================
2755
2756/// Information about a chunk of data for pattern matching
2757#[derive(Debug)]
2758pub struct ChunkInfo {
2759    /// The buffer containing this chunk's data (includes overlap from previous chunk)
2760    pub buffer: Vec<u8>,
2761
2762    /// Absolute position in the document where this buffer starts
2763    pub absolute_pos: usize,
2764
2765    /// Offset within buffer where "new" data starts (valid match zone)
2766    /// Matches starting before this offset were already checked in the previous chunk
2767    pub valid_start: usize,
2768}
2769
2770/// Iterator that yields overlapping chunks for pattern matching
2771///
2772/// This iterator implements the VSCode/Sublime approach: pull overlapping chunks
2773/// from the underlying piece tree and use standard search algorithms on them.
2774///
2775/// # Algorithm
2776///
2777/// ```text
2778/// Chunk 1: [------------ valid -----------]
2779/// Chunk 2:      [overlap][---- valid ----]
2780/// Chunk 3:                   [overlap][-- valid --]
2781///
2782/// Only matches starting in the "valid" zone are reported to avoid duplicates.
2783/// ```
2784///
2785/// # Example
2786///
2787/// ```ignore
2788/// let chunks = OverlappingChunks::new(&text_buffer, start, end, 4096, pattern.len()-1);
2789/// for chunk in chunks {
2790///     // Search only starting from chunk.valid_start
2791///     if let Some(pos) = search(&chunk.buffer[chunk.valid_start..]) {
2792///         let absolute_pos = chunk.absolute_pos + chunk.valid_start + pos;
2793///         return Some(absolute_pos);
2794///     }
2795/// }
2796/// ```
2797pub struct OverlappingChunks<'a> {
2798    piece_iter: PieceRangeIter,
2799    buffers: &'a [StringBuffer],
2800
2801    // Reusable chunk buffer that we fill from pieces
2802    buffer: Vec<u8>,
2803    buffer_absolute_pos: usize,
2804
2805    // Current state
2806    current_pos: usize,
2807    end_pos: usize,
2808
2809    // Configuration
2810    chunk_size: usize,
2811    overlap: usize,
2812
2813    // Track first chunk special case
2814    first_chunk: bool,
2815
2816    // Cached piece data for incremental reading
2817    current_piece_data: Option<Vec<u8>>,
2818    current_piece_offset: usize,
2819}
2820
2821impl<'a> OverlappingChunks<'a> {
2822    /// Create a new overlapping chunks iterator
2823    ///
2824    /// # Arguments
2825    ///
2826    /// * `text_buffer` - The text buffer to iterate over
2827    /// * `start` - Start position in the document
2828    /// * `end` - End position in the document (exclusive)
2829    /// * `chunk_size` - Target size for each chunk (excluding overlap)
2830    /// * `overlap` - Number of bytes to overlap between chunks
2831    ///
2832    /// # Recommendations
2833    ///
2834    /// * For literal string search: `chunk_size=65536, overlap=pattern.len()-1`
2835    /// * For regex search: `chunk_size=1048576, overlap=4096`
2836    pub fn new(
2837        text_buffer: &'a TextBuffer,
2838        start: usize,
2839        end: usize,
2840        chunk_size: usize,
2841        overlap: usize,
2842    ) -> Self {
2843        let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
2844
2845        Self {
2846            piece_iter,
2847            buffers: &text_buffer.buffers,
2848            buffer: Vec::with_capacity(chunk_size + overlap),
2849            buffer_absolute_pos: start,
2850            current_pos: start,
2851            end_pos: end,
2852            chunk_size,
2853            overlap,
2854            first_chunk: true,
2855            current_piece_data: None,
2856            current_piece_offset: 0,
2857        }
2858    }
2859
2860    /// Read one byte from the piece iterator
2861    fn read_byte(&mut self) -> Option<u8> {
2862        loop {
2863            // If we have cached piece data, read from it
2864            if let Some(ref data) = self.current_piece_data {
2865                if self.current_piece_offset < data.len() {
2866                    let byte = data[self.current_piece_offset];
2867                    self.current_piece_offset += 1;
2868                    self.current_pos += 1;
2869                    return Some(byte);
2870                } else {
2871                    // Exhausted current piece, move to next
2872                    self.current_piece_data = None;
2873                    self.current_piece_offset = 0;
2874                }
2875            }
2876
2877            // Get next piece
2878            if let Some(piece_view) = self.piece_iter.next() {
2879                let buffer_id = piece_view.location.buffer_id();
2880                if let Some(buffer) = self.buffers.get(buffer_id) {
2881                    // Extract the relevant slice from this piece
2882                    let piece_start_in_doc = piece_view.doc_offset;
2883                    let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2884
2885                    // Clip to our search range
2886                    let read_start = self.current_pos.max(piece_start_in_doc);
2887                    let read_end = self.end_pos.min(piece_end_in_doc);
2888
2889                    if read_end > read_start {
2890                        let offset_in_piece = read_start - piece_start_in_doc;
2891                        let bytes_to_read = read_end - read_start;
2892
2893                        let buffer_start = piece_view.buffer_offset + offset_in_piece;
2894                        let buffer_end = buffer_start + bytes_to_read;
2895
2896                        if let Some(data) = buffer.get_data() {
2897                            if buffer_end <= data.len() {
2898                                // Cache this piece's data
2899                                self.current_piece_data =
2900                                    Some(data[buffer_start..buffer_end].to_vec());
2901                                self.current_piece_offset = 0;
2902                                continue;
2903                            }
2904                        }
2905                    }
2906                }
2907            }
2908
2909            // No more data
2910            return None;
2911        }
2912    }
2913
2914    /// Fill the buffer with the next chunk of data
2915    fn fill_next_chunk(&mut self) -> bool {
2916        if self.first_chunk {
2917            // First chunk: fill up to chunk_size
2918            self.first_chunk = false;
2919            while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
2920                if let Some(byte) = self.read_byte() {
2921                    self.buffer.push(byte);
2922                } else {
2923                    break;
2924                }
2925            }
2926            !self.buffer.is_empty()
2927        } else {
2928            // Subsequent chunks: keep overlap, fill chunk_size NEW bytes
2929            if self.current_pos >= self.end_pos {
2930                return false;
2931            }
2932
2933            // Keep overlap bytes at the end
2934            if self.buffer.len() > self.overlap {
2935                let drain_amount = self.buffer.len() - self.overlap;
2936                self.buffer.drain(0..drain_amount);
2937                self.buffer_absolute_pos += drain_amount;
2938            }
2939
2940            // Fill chunk_size NEW bytes (in addition to overlap)
2941            let before_len = self.buffer.len();
2942            let target_len = self.overlap + self.chunk_size;
2943            while self.buffer.len() < target_len && self.current_pos < self.end_pos {
2944                if let Some(byte) = self.read_byte() {
2945                    self.buffer.push(byte);
2946                } else {
2947                    break;
2948                }
2949            }
2950
2951            // Return true if we added new data
2952            self.buffer.len() > before_len
2953        }
2954    }
2955}
2956
2957impl<'a> Iterator for OverlappingChunks<'a> {
2958    type Item = ChunkInfo;
2959
2960    fn next(&mut self) -> Option<Self::Item> {
2961        // Track if this is the first chunk before filling
2962        let is_first = self.buffer_absolute_pos == self.current_pos;
2963
2964        if !self.fill_next_chunk() {
2965            return None;
2966        }
2967
2968        // First chunk: all data is valid (no overlap from previous)
2969        // Subsequent chunks: overlap bytes are not valid (already checked)
2970        let valid_start = if is_first {
2971            0
2972        } else {
2973            self.overlap.min(self.buffer.len())
2974        };
2975
2976        Some(ChunkInfo {
2977            buffer: self.buffer.clone(),
2978            absolute_pos: self.buffer_absolute_pos,
2979            valid_start,
2980        })
2981    }
2982}
2983
2984#[cfg(test)]
2985mod tests {
2986    use crate::model::filesystem::StdFileSystem;
2987    use std::sync::Arc;
2988
2989    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
2990        Arc::new(StdFileSystem)
2991    }
2992    use super::*;
2993
2994    #[test]
2995    fn test_empty_buffer() {
2996        let buffer = TextBuffer::empty(test_fs());
2997        assert_eq!(buffer.total_bytes(), 0);
2998        assert_eq!(buffer.line_count(), Some(1)); // Empty doc has 1 line
2999    }
3000
3001    #[test]
3002    fn test_line_positions_multiline() {
3003        let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec(), test_fs());
3004
3005        // Check line count
3006        assert_eq!(buffer.line_count(), Some(3));
3007
3008        // Check line starts
3009        assert_eq!(buffer.line_start_offset(0), Some(0)); // "Hello\n" starts at 0
3010        assert_eq!(buffer.line_start_offset(1), Some(6)); // "New Line\n" starts at 6
3011        assert_eq!(buffer.line_start_offset(2), Some(15)); // "World!" starts at 15
3012
3013        // Check offset_to_position
3014        assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); // Start of "Hello"
3015        assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); // End of "Hello" (before \n)
3016        assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); // Start of "New Line"
3017        assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); // End of "New Line" (before \n)
3018        assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); // Start of "World!"
3019
3020        // Check line_col_to_position
3021        assert_eq!(buffer.line_col_to_position(0, 5), 5); // End of line 0
3022        assert_eq!(buffer.line_col_to_position(1, 0), 6); // Start of line 1
3023        assert_eq!(buffer.line_col_to_position(1, 8), 14); // End of line 1
3024        assert_eq!(buffer.line_col_to_position(2, 0), 15); // Start of line 2
3025    }
3026
3027    #[test]
3028    fn test_new_from_content() {
3029        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
3030        assert_eq!(buffer.total_bytes(), 11);
3031        assert_eq!(buffer.line_count(), Some(2));
3032    }
3033
3034    #[test]
3035    fn test_get_all_text() {
3036        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
3037        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
3038    }
3039
3040    #[test]
3041    fn test_insert_at_start() {
3042        let mut buffer = TextBuffer::from_bytes(b"world".to_vec(), test_fs());
3043        buffer.insert_bytes(0, b"hello ".to_vec());
3044
3045        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
3046        assert_eq!(buffer.total_bytes(), 11);
3047    }
3048
3049    #[test]
3050    fn test_insert_in_middle() {
3051        let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec(), test_fs());
3052        buffer.insert_bytes(5, b" ".to_vec());
3053
3054        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
3055        assert_eq!(buffer.total_bytes(), 11);
3056    }
3057
3058    #[test]
3059    fn test_insert_at_end() {
3060        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
3061        buffer.insert_bytes(5, b" world".to_vec());
3062
3063        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
3064        assert_eq!(buffer.total_bytes(), 11);
3065    }
3066
3067    #[test]
3068    fn test_insert_with_newlines() {
3069        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
3070        buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
3071
3072        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
3073        assert_eq!(buffer.line_count(), Some(3));
3074    }
3075
3076    #[test]
3077    fn test_delete_from_start() {
3078        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3079        buffer.delete_bytes(0, 6);
3080
3081        assert_eq!(buffer.get_all_text().unwrap(), b"world");
3082        assert_eq!(buffer.total_bytes(), 5);
3083    }
3084
3085    #[test]
3086    fn test_delete_from_middle() {
3087        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3088        buffer.delete_bytes(5, 1);
3089
3090        assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
3091        assert_eq!(buffer.total_bytes(), 10);
3092    }
3093
3094    #[test]
3095    fn test_delete_from_end() {
3096        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3097        buffer.delete_bytes(6, 5);
3098
3099        assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
3100        assert_eq!(buffer.total_bytes(), 6);
3101    }
3102
3103    #[test]
3104    fn test_delete_with_newlines() {
3105        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3106        buffer.delete_bytes(5, 7); // Delete "\nworld\n"
3107
3108        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
3109        assert_eq!(buffer.line_count(), Some(1));
3110    }
3111
3112    #[test]
3113    fn test_offset_position_conversions() {
3114        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3115
3116        let pos = buffer.offset_to_position(0);
3117        assert_eq!(pos, Some(Position { line: 0, column: 0 }));
3118
3119        let pos = buffer.offset_to_position(6);
3120        assert_eq!(pos, Some(Position { line: 1, column: 0 }));
3121
3122        let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
3123        assert_eq!(offset, 6);
3124    }
3125
3126    #[test]
3127    fn test_insert_at_position() {
3128        let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
3129        buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
3130
3131        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
3132    }
3133
3134    #[test]
3135    fn test_delete_range() {
3136        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3137
3138        let start = Position { line: 0, column: 5 };
3139        let end = Position { line: 2, column: 0 };
3140        buffer.delete_range(start, end);
3141
3142        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
3143    }
3144
3145    #[test]
3146    fn test_get_line() {
3147        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3148
3149        assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
3150        assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
3151        assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
3152        assert_eq!(buffer.get_line(3), None);
3153    }
3154
3155    #[test]
3156    fn test_multiple_operations() {
3157        let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
3158
3159        buffer.insert_bytes(0, b"start\n".to_vec());
3160        assert_eq!(buffer.line_count(), Some(4));
3161
3162        buffer.delete_bytes(6, 6); // Delete "line1\n"
3163        assert_eq!(buffer.line_count(), Some(3));
3164
3165        buffer.insert_bytes(6, b"new\n".to_vec());
3166        assert_eq!(buffer.line_count(), Some(4));
3167
3168        let text = buffer.get_all_text().unwrap();
3169        assert_eq!(text, b"start\nnew\nline2\nline3");
3170    }
3171
3172    #[test]
3173    fn test_get_text_range() {
3174        let buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3175
3176        assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
3177        assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
3178        assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
3179    }
3180
3181    #[test]
3182    fn test_empty_operations() {
3183        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
3184
3185        buffer.insert_bytes(2, Vec::new());
3186        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3187
3188        buffer.delete_bytes(2, 0);
3189        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3190    }
3191
3192    #[test]
3193    fn test_sequential_inserts_at_beginning() {
3194        // Regression test for piece tree duplicate insertion bug
3195        let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
3196
3197        // Delete all
3198        buffer.delete_bytes(0, 12);
3199        assert_eq!(buffer.get_all_text().unwrap(), b"");
3200
3201        // Insert 'a' at 0
3202        buffer.insert_bytes(0, vec![b'a']);
3203        assert_eq!(buffer.get_all_text().unwrap(), b"a");
3204
3205        // Insert 'b' at 0 (should give "ba")
3206        buffer.insert_bytes(0, vec![b'b']);
3207        assert_eq!(buffer.get_all_text().unwrap(), b"ba");
3208    }
3209
3210    // ===== Phase 1-3: Large File Support Tests =====
3211
3212    mod large_file_support {
3213        use super::*;
3214        use crate::model::piece_tree::StringBuffer;
3215        use std::fs::File;
3216        use std::io::Write;
3217        use tempfile::TempDir;
3218
3219        // Phase 1: Option<usize> Type Safety Tests
3220
3221        #[test]
3222        fn test_line_feed_count_is_some_for_loaded_buffer() {
3223            let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
3224            assert_eq!(buffer.line_feed_count(), Some(2));
3225        }
3226
3227        #[test]
3228        fn test_line_feed_count_is_none_for_unloaded_buffer() {
3229            let temp_dir = TempDir::new().unwrap();
3230            let file_path = temp_dir.path().join("test.txt");
3231
3232            let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
3233            assert_eq!(buffer.line_feed_count(), None);
3234        }
3235
3236        #[test]
3237        fn test_line_count_is_some_for_small_buffer() {
3238            let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3239            assert_eq!(buffer.line_count(), Some(3));
3240        }
3241
3242        #[test]
3243        fn test_piece_tree_works_with_none_line_count() {
3244            // Create a buffer with no line count information
3245            let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
3246            assert_eq!(buffer.line_feed_count(), None);
3247
3248            // Create piece tree without line feed count
3249            use crate::model::piece_tree::{BufferLocation, PieceTree};
3250            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
3251
3252            // line_count should return None
3253            assert_eq!(tree.line_count(), None);
3254        }
3255
3256        // Phase 2: BufferData Enum Tests
3257
3258        #[test]
3259        fn test_buffer_data_loaded_variant() {
3260            let data = b"hello world".to_vec();
3261            let buffer = StringBuffer::new_loaded(0, data.clone(), true);
3262
3263            assert!(buffer.is_loaded());
3264            assert_eq!(buffer.get_data(), Some(&data[..]));
3265            assert!(buffer.get_line_starts().is_some());
3266        }
3267
3268        #[test]
3269        fn test_buffer_data_loaded_without_line_starts() {
3270            let data = b"hello\nworld".to_vec();
3271            let buffer = StringBuffer::new_loaded(0, data.clone(), false);
3272
3273            assert!(buffer.is_loaded());
3274            assert_eq!(buffer.get_data(), Some(&data[..]));
3275            assert_eq!(buffer.get_line_starts(), None); // No line indexing
3276        }
3277
3278        #[test]
3279        fn test_buffer_data_unloaded_variant() {
3280            let temp_dir = TempDir::new().unwrap();
3281            let file_path = temp_dir.path().join("test.txt");
3282
3283            let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
3284
3285            assert!(!buffer.is_loaded());
3286            assert_eq!(buffer.get_data(), None);
3287            assert_eq!(buffer.get_line_starts(), None);
3288        }
3289
3290        #[test]
3291        fn test_buffer_load_method() {
3292            let temp_dir = TempDir::new().unwrap();
3293            let file_path = temp_dir.path().join("test.txt");
3294
3295            // Create test file
3296            let test_data = b"hello world";
3297            File::create(&file_path)
3298                .unwrap()
3299                .write_all(test_data)
3300                .unwrap();
3301
3302            // Create unloaded buffer
3303            let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
3304            assert!(!buffer.is_loaded());
3305
3306            // Load the buffer using local filesystem
3307            let fs = crate::model::filesystem::StdFileSystem;
3308            buffer.load(&fs).unwrap();
3309
3310            // Now it should be loaded
3311            assert!(buffer.is_loaded());
3312            assert_eq!(buffer.get_data(), Some(&test_data[..]));
3313        }
3314
3315        #[test]
3316        fn test_string_buffer_new_vs_new_loaded() {
3317            let data = b"hello\nworld".to_vec();
3318
3319            // StringBuffer::new should compute line starts
3320            let buf1 = StringBuffer::new(0, data.clone());
3321            assert!(buf1.is_loaded());
3322            assert!(buf1.get_line_starts().is_some());
3323            assert_eq!(buf1.line_feed_count(), Some(1));
3324
3325            // StringBuffer::new_loaded with compute_lines=false should not
3326            let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
3327            assert!(buf2.is_loaded());
3328            assert_eq!(buf2.get_line_starts(), None);
3329            assert_eq!(buf2.line_feed_count(), None);
3330        }
3331
3332        // Phase 3: Large File Detection Tests
3333
3334        #[test]
3335        fn test_load_small_file_eager_loading() {
3336            let temp_dir = TempDir::new().unwrap();
3337            let file_path = temp_dir.path().join("small.txt");
3338
3339            // Create a small file (10 bytes < 100MB threshold)
3340            let test_data = b"hello\ntest";
3341            File::create(&file_path)
3342                .unwrap()
3343                .write_all(test_data)
3344                .unwrap();
3345
3346            // Load with default threshold
3347            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
3348
3349            // Should be eagerly loaded (not large_file mode)
3350            assert!(!buffer.large_file);
3351            assert_eq!(buffer.total_bytes(), test_data.len());
3352            assert_eq!(buffer.line_count(), Some(2)); // Has line indexing
3353            assert_eq!(buffer.get_all_text().unwrap(), test_data);
3354
3355            // The buffer should be loaded
3356            assert!(buffer.buffers[0].is_loaded());
3357        }
3358
3359        #[test]
3360        fn test_load_large_file_lazy_loading() {
3361            let temp_dir = TempDir::new().unwrap();
3362            let file_path = temp_dir.path().join("large.txt");
3363
3364            // Create a "large" file by using a small threshold
3365            let test_data = b"hello\nworld\ntest";
3366            File::create(&file_path)
3367                .unwrap()
3368                .write_all(test_data)
3369                .unwrap();
3370
3371            // Load with threshold of 10 bytes (file is 17 bytes, so it's "large")
3372            let buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
3373
3374            // Should be in large_file mode
3375            assert!(buffer.large_file);
3376            assert_eq!(buffer.total_bytes(), test_data.len());
3377
3378            // Should NOT have line indexing
3379            assert_eq!(buffer.line_count(), None);
3380
3381            // The buffer should be unloaded
3382            assert!(!buffer.buffers[0].is_loaded());
3383            assert_eq!(buffer.buffers[0].get_data(), None);
3384        }
3385
3386        /// Test that reproduces issue #657: Search on large plain text files
3387        ///
3388        /// The bug: When a large file is opened with lazy loading, buffer.to_string()
3389        /// returns None because some buffers are unloaded. This causes search to fail
3390        /// with "Buffer not fully loaded" error.
3391        ///
3392        /// The fix: Use get_text_range_mut() which loads the buffer on demand.
3393        #[test]
3394        fn test_issue_657_search_on_large_file_unloaded_buffer() {
3395            let temp_dir = TempDir::new().unwrap();
3396            let file_path = temp_dir.path().join("large_search_test.txt");
3397
3398            // Create test content with a searchable string
3399            let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
3400            File::create(&file_path)
3401                .unwrap()
3402                .write_all(test_data)
3403                .unwrap();
3404
3405            // Load with small threshold to force lazy loading
3406            let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
3407
3408            // Verify we're in large file mode with unloaded buffer
3409            assert!(buffer.large_file, "Buffer should be in large file mode");
3410            assert!(
3411                !buffer.buffers[0].is_loaded(),
3412                "Buffer should be unloaded initially"
3413            );
3414
3415            // REPRODUCE THE BUG: to_string() returns None for unloaded buffers
3416            // This is what the old perform_search() code did, causing the error
3417            assert!(
3418                buffer.to_string().is_none(),
3419                "BUG REPRODUCED: to_string() returns None for unloaded buffer"
3420            );
3421
3422            // THE FIX: get_text_range_mut() loads the buffer on demand
3423            let total_bytes = buffer.len();
3424            let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
3425            let content_str = String::from_utf8_lossy(&content);
3426
3427            // Verify the content is now available and contains our search target
3428            assert!(
3429                content_str.contains("SEARCH_TARGET"),
3430                "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
3431            );
3432
3433            // After loading, to_string() should also work
3434            assert!(
3435                buffer.to_string().is_some(),
3436                "After get_text_range_mut(), to_string() should work"
3437            );
3438        }
3439
3440        #[test]
3441        fn test_large_file_threshold_boundary() {
3442            let temp_dir = TempDir::new().unwrap();
3443
3444            // Test exactly at threshold
3445            let file_path = temp_dir.path().join("at_threshold.txt");
3446            let test_data = vec![b'x'; 100];
3447            File::create(&file_path)
3448                .unwrap()
3449                .write_all(&test_data)
3450                .unwrap();
3451
3452            // Load with threshold of 100 bytes - should be large file (>= threshold)
3453            let buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
3454            assert!(buffer.large_file);
3455
3456            // Test just below threshold
3457            let file_path2 = temp_dir.path().join("below_threshold.txt");
3458            let test_data2 = vec![b'x'; 99];
3459            File::create(&file_path2)
3460                .unwrap()
3461                .write_all(&test_data2)
3462                .unwrap();
3463
3464            // Load with threshold of 100 bytes - should be small file (< threshold)
3465            let buffer2 = TextBuffer::load_from_file(&file_path2, 100, test_fs()).unwrap();
3466            assert!(!buffer2.large_file);
3467        }
3468
3469        #[test]
3470        fn test_large_file_default_threshold() {
3471            let temp_dir = TempDir::new().unwrap();
3472            let file_path = temp_dir.path().join("test.txt");
3473
3474            // Create a small file
3475            File::create(&file_path)
3476                .unwrap()
3477                .write_all(b"hello")
3478                .unwrap();
3479
3480            // Load with threshold 0 - should use DEFAULT_LARGE_FILE_THRESHOLD
3481            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
3482
3483            // 5 bytes < 100MB, so should not be large file
3484            assert!(!buffer.large_file);
3485        }
3486
3487        #[test]
3488        fn test_large_file_has_correct_piece_tree_structure() {
3489            let temp_dir = TempDir::new().unwrap();
3490            let file_path = temp_dir.path().join("large.txt");
3491
3492            let test_data = b"hello world";
3493            File::create(&file_path)
3494                .unwrap()
3495                .write_all(test_data)
3496                .unwrap();
3497
3498            // Load as large file
3499            let buffer = TextBuffer::load_from_file(&file_path, 5, test_fs()).unwrap();
3500
3501            // Should have correct total bytes
3502            assert_eq!(buffer.total_bytes(), test_data.len());
3503
3504            // Should have 1 buffer
3505            assert_eq!(buffer.buffers.len(), 1);
3506
3507            // Buffer should be unloaded
3508            assert!(!buffer.buffers[0].is_loaded());
3509        }
3510
3511        #[test]
3512        fn test_empty_large_file() {
3513            let temp_dir = TempDir::new().unwrap();
3514            let file_path = temp_dir.path().join("empty.txt");
3515
3516            // Create an empty file
3517            File::create(&file_path).unwrap();
3518
3519            // Load as large file
3520            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
3521
3522            // Empty file is handled gracefully
3523            assert_eq!(buffer.total_bytes(), 0);
3524            assert!(buffer.is_empty());
3525        }
3526
3527        #[test]
3528        fn test_large_file_basic_api_operations() {
3529            let temp_dir = TempDir::new().unwrap();
3530            let file_path = temp_dir.path().join("large_test.txt");
3531
3532            // Create a test file with known content
3533            let test_data = b"line1\nline2\nline3\nline4\n";
3534            File::create(&file_path)
3535                .unwrap()
3536                .write_all(test_data)
3537                .unwrap();
3538
3539            // Load as large file (use small threshold to trigger large file mode)
3540            let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
3541
3542            // Verify it's in large file mode
3543            assert!(buffer.large_file);
3544            assert_eq!(buffer.line_count(), None); // No line indexing
3545
3546            // Test basic access functions
3547            assert_eq!(buffer.total_bytes(), test_data.len());
3548            assert!(!buffer.is_empty());
3549            assert_eq!(buffer.len(), test_data.len());
3550
3551            // Test reading operations using get_text_range_mut (lazy loads on demand)
3552            let range_result = buffer.get_text_range_mut(0, 5).unwrap();
3553            assert_eq!(range_result, b"line1");
3554
3555            let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
3556            assert_eq!(range_result2, b"line2");
3557
3558            // Test get_all_text (via get_text_range after lazy loading)
3559            let all_text = buffer.get_all_text().unwrap();
3560            assert_eq!(all_text, test_data);
3561
3562            // Test slice_bytes method
3563            assert_eq!(buffer.slice_bytes(0..5), b"line1");
3564
3565            // Test basic editing operations
3566            // Insert at offset 0
3567            buffer.insert_bytes(0, b"prefix_".to_vec());
3568            assert_eq!(buffer.total_bytes(), test_data.len() + 7);
3569            assert!(buffer.is_modified());
3570
3571            // Verify the insertion worked
3572            let text_after_insert = buffer.get_all_text().unwrap();
3573            assert_eq!(&text_after_insert[0..7], b"prefix_");
3574            assert_eq!(&text_after_insert[7..12], b"line1");
3575
3576            // Delete some bytes
3577            buffer.delete_bytes(0, 7);
3578            assert_eq!(buffer.total_bytes(), test_data.len());
3579
3580            // Verify deletion worked - should be back to original
3581            let text_after_delete = buffer.get_all_text().unwrap();
3582            assert_eq!(text_after_delete, test_data);
3583
3584            // Insert at end
3585            let end_offset = buffer.total_bytes();
3586            buffer.insert_bytes(end_offset, b"suffix".to_vec());
3587            assert_eq!(buffer.total_bytes(), test_data.len() + 6);
3588
3589            // Verify end insertion
3590            let final_text = buffer.get_all_text().unwrap();
3591            assert!(final_text.ends_with(b"suffix"));
3592            assert_eq!(&final_text[0..test_data.len()], test_data);
3593
3594            // Test offset_to_position
3595            // Note: Without line indexing, position tracking is limited
3596            // but byte-level operations still work
3597            let pos = buffer.offset_to_position(0).unwrap();
3598            assert_eq!(pos.column, 0);
3599
3600            // Test position_to_offset
3601            let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
3602            assert_eq!(offset, 0);
3603
3604            // Test replace operations
3605            let replace_result = buffer.replace_range(0..5, "START");
3606            assert!(replace_result);
3607
3608            let text_after_replace = buffer.get_all_text().unwrap();
3609            assert!(text_after_replace.starts_with(b"START"));
3610        }
3611
3612        #[test]
3613        fn test_large_file_chunk_based_loading() {
3614            let temp_dir = TempDir::new().unwrap();
3615            let file_path = temp_dir.path().join("huge.txt");
3616
3617            // Create a file larger than LOAD_CHUNK_SIZE (1MB)
3618            // We'll create a 3MB file with a repeating pattern so we can verify chunks
3619            let chunk_size = LOAD_CHUNK_SIZE; // 1MB
3620            let file_size = chunk_size * 3; // 3MB
3621
3622            // Pattern: "AAAA...AAAA" (1MB of A's), "BBBB...BBBB" (1MB of B's), "CCCC...CCCC" (1MB of C's)
3623            let mut file = File::create(&file_path).unwrap();
3624            file.write_all(&vec![b'A'; chunk_size]).unwrap();
3625            file.write_all(&vec![b'B'; chunk_size]).unwrap();
3626            file.write_all(&vec![b'C'; chunk_size]).unwrap();
3627            file.flush().unwrap();
3628
3629            // Load as large file (use threshold of 1 byte to ensure large file mode)
3630            let mut buffer = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
3631
3632            // Verify it's in large file mode
3633            assert!(buffer.large_file);
3634            assert_eq!(buffer.total_bytes(), file_size);
3635
3636            // Buffer should be unloaded initially
3637            assert!(!buffer.buffers[0].is_loaded());
3638
3639            // Read from the first chunk (should load only first 1MB)
3640            let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
3641            assert_eq!(first_chunk_data.len(), 1024);
3642            assert!(first_chunk_data.iter().all(|&b| b == b'A'));
3643
3644            // Read from the middle chunk (offset = 1MB, should load second 1MB)
3645            let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
3646            assert_eq!(second_chunk_data.len(), 1024);
3647            assert!(second_chunk_data.iter().all(|&b| b == b'B'));
3648
3649            // Read from the last chunk (offset = 2MB, should load third 1MB)
3650            let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
3651            assert_eq!(third_chunk_data.len(), 1024);
3652            assert!(third_chunk_data.iter().all(|&b| b == b'C'));
3653
3654            // Verify we can read across chunk boundaries
3655            // Read from middle of first chunk to middle of second chunk
3656            let cross_chunk_offset = chunk_size - 512;
3657            let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
3658            assert_eq!(cross_chunk_data.len(), 1024);
3659            // First 512 bytes should be 'A', next 512 bytes should be 'B'
3660            assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
3661            assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
3662
3663            // After chunk-based loading, verify the piece tree has been split
3664            // The number of buffers should be greater than 1 (original + chunks)
3665            assert!(
3666                buffer.buffers.len() > 1,
3667                "Expected multiple buffers after chunk-based loading, got {}",
3668                buffer.buffers.len()
3669            );
3670
3671            // Test that editing still works after chunk-based loading
3672            buffer.insert_bytes(0, b"PREFIX".to_vec());
3673            assert_eq!(buffer.total_bytes(), file_size + 6);
3674
3675            let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
3676            assert_eq!(after_insert, b"PREFIX");
3677
3678            // Verify the original data is still there after the prefix
3679            let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
3680            assert!(after_prefix.iter().all(|&b| b == b'A'));
3681
3682            // Most importantly: validate the entire buffer content matches the original file
3683            // Create a fresh buffer to read the original file
3684            let mut buffer2 = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
3685
3686            // Read the entire file in chunks and verify each chunk
3687            let chunk_read_size = 64 * 1024; // Read in 64KB chunks for efficiency
3688            let mut offset = 0;
3689            while offset < file_size {
3690                let bytes_to_read = chunk_read_size.min(file_size - offset);
3691                let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
3692
3693                // Determine which section of the file we're reading
3694                let first_mb_end = chunk_size;
3695                let second_mb_end = chunk_size * 2;
3696
3697                // Validate the data based on which MB section we're in
3698                for (i, &byte) in chunk_data.iter().enumerate() {
3699                    let file_offset = offset + i;
3700                    let expected = if file_offset < first_mb_end {
3701                        b'A'
3702                    } else if file_offset < second_mb_end {
3703                        b'B'
3704                    } else {
3705                        b'C'
3706                    };
3707                    assert_eq!(
3708                        byte, expected,
3709                        "Mismatch at file offset {}: expected {}, got {}",
3710                        file_offset, expected as char, byte as char
3711                    );
3712                }
3713
3714                offset += bytes_to_read;
3715            }
3716        }
3717
3718        /// Test that save_to_file works correctly with partially loaded large files
3719        /// This is a regression test for a bug where saving would silently produce
3720        /// an empty file if any buffer regions were still unloaded.
3721        #[test]
3722        fn test_large_file_incremental_save() {
3723            let temp_dir = TempDir::new().unwrap();
3724            let file_path = temp_dir.path().join("large_save_test.txt");
3725
3726            // Create a small file but use tiny threshold to trigger large file mode
3727            let chunk_size = 1000; // 1KB chunks
3728            let file_size = chunk_size * 2; // 2KB total
3729
3730            let mut file = File::create(&file_path).unwrap();
3731            // First half: 'A' repeated
3732            file.write_all(&vec![b'A'; chunk_size]).unwrap();
3733            // Second half: 'B' repeated
3734            file.write_all(&vec![b'B'; chunk_size]).unwrap();
3735            file.flush().unwrap();
3736
3737            // Load as large file (threshold of 100 bytes)
3738            let mut buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
3739            assert!(buffer.large_file);
3740            assert_eq!(buffer.total_bytes(), file_size);
3741
3742            // Only read from the beginning - this loads only a small region
3743            let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
3744            assert!(first_bytes.iter().all(|&b| b == b'A'));
3745
3746            // Make an edit at the beginning
3747            buffer.insert_bytes(0, b"PREFIX_".to_vec());
3748
3749            // Save to a new file (to avoid issues with reading while writing same file)
3750            let save_path = temp_dir.path().join("saved.txt");
3751            buffer.save_to_file(&save_path).unwrap();
3752
3753            // Verify the saved file
3754            let saved_content = std::fs::read(&save_path).unwrap();
3755
3756            // Check total size: original + "PREFIX_" (7 bytes)
3757            assert_eq!(
3758                saved_content.len(),
3759                file_size + 7,
3760                "Saved file should be {} bytes, got {}",
3761                file_size + 7,
3762                saved_content.len()
3763            );
3764
3765            // Check prefix
3766            assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
3767
3768            // Check that first chunk (after prefix) contains A's
3769            assert!(
3770                saved_content[7..100].iter().all(|&b| b == b'A'),
3771                "First chunk after prefix should be A's"
3772            );
3773
3774            // Check that second chunk contains B's (this was unloaded!)
3775            let second_chunk_start = 7 + chunk_size;
3776            assert!(
3777                saved_content[second_chunk_start..second_chunk_start + 100]
3778                    .iter()
3779                    .all(|&b| b == b'B'),
3780                "Second chunk should be B's (was unloaded, should be preserved)"
3781            );
3782        }
3783
3784        /// Test that save_to_file handles edits at multiple positions
3785        #[test]
3786        fn test_large_file_save_with_multiple_edits() {
3787            let temp_dir = TempDir::new().unwrap();
3788            let file_path = temp_dir.path().join("multi_edit.txt");
3789
3790            // Create a ~5KB file with numbered lines for easier verification
3791            let mut content = Vec::new();
3792            for i in 0..100 {
3793                content.extend_from_slice(
3794                    format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
3795                );
3796            }
3797            let original_len = content.len();
3798            std::fs::write(&file_path, &content).unwrap();
3799
3800            // Load as large file (threshold of 500 bytes)
3801            let mut buffer = TextBuffer::load_from_file(&file_path, 500, test_fs()).unwrap();
3802            assert!(
3803                buffer.line_count().is_none(),
3804                "Should be in large file mode"
3805            );
3806
3807            // Edit at the beginning
3808            buffer.insert_bytes(0, b"[START]".to_vec());
3809
3810            // Edit somewhere in the middle (load that region first)
3811            let mid_offset = original_len / 2;
3812            let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); // +7 for our insert
3813            buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
3814
3815            // Save
3816            let save_path = temp_dir.path().join("multi_edit_saved.txt");
3817            buffer.save_to_file(&save_path).unwrap();
3818
3819            // Verify
3820            let saved = std::fs::read_to_string(&save_path).unwrap();
3821
3822            assert!(
3823                saved.starts_with("[START]Line 0000"),
3824                "Should start with our edit"
3825            );
3826            assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
3827            assert!(saved.contains("Line 0099"), "Should preserve end of file");
3828
3829            // Verify total length
3830            let expected_len = original_len + 7 + 8; // [START] + [MIDDLE]
3831            assert_eq!(
3832                saved.len(),
3833                expected_len,
3834                "Length should be original + edits"
3835            );
3836        }
3837    }
3838
3839    // ===== Offset to Position Tests =====
3840    // These tests focus on the offset_to_position correctness
3841
3842    #[test]
3843    fn test_offset_to_position_simple() {
3844        // Create a buffer with known line structure
3845        // Line 0: "a\n" (bytes 0-1, newline at 1)
3846        // Line 1: "b\n" (bytes 2-3, newline at 3)
3847        // Line 2: "c\n" (bytes 4-5, newline at 5)
3848        // Line 3: "d" (bytes 6, no newline)
3849        let content = b"a\nb\nc\nd";
3850        let buffer = TextBuffer::from_bytes(content.to_vec(), test_fs());
3851
3852        // Verify specific positions
3853        let pos = buffer
3854            .offset_to_position(0)
3855            .expect("small buffer should have line metadata");
3856        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3857        assert_eq!(pos.column, 0);
3858
3859        let pos = buffer
3860            .offset_to_position(1)
3861            .expect("small buffer should have line metadata");
3862        assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
3863        assert_eq!(pos.column, 1);
3864
3865        let pos = buffer
3866            .offset_to_position(2)
3867            .expect("small buffer should have line metadata");
3868        assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
3869        assert_eq!(pos.column, 0);
3870
3871        let pos = buffer
3872            .offset_to_position(3)
3873            .expect("small buffer should have line metadata");
3874        assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
3875        assert_eq!(pos.column, 1);
3876
3877        let pos = buffer
3878            .offset_to_position(4)
3879            .expect("small buffer should have line metadata");
3880        assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
3881        assert_eq!(pos.column, 0);
3882
3883        let pos = buffer
3884            .offset_to_position(6)
3885            .expect("small buffer should have line metadata");
3886        assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
3887        assert_eq!(pos.column, 0);
3888    }
3889
3890    #[test]
3891    fn test_offset_to_position_after_insert() {
3892        // Start with simple content
3893        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
3894
3895        // Insert at position 2 (start of line 1)
3896        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
3897
3898        // After insert, buffer should be: "a\nx\nb\n"
3899        // Line 0: "a\n" (bytes 0-1)
3900        // Line 1: "x\n" (bytes 2-3)
3901        // Line 2: "b\n" (bytes 4-5)
3902
3903        let pos = buffer
3904            .offset_to_position(0)
3905            .expect("small buffer should have line metadata");
3906        assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
3907
3908        let pos = buffer
3909            .offset_to_position(2)
3910            .expect("small buffer should have line metadata");
3911        assert_eq!(
3912            pos.line, 1,
3913            "Byte 2 (start of inserted line) should be on line 1"
3914        );
3915
3916        let pos = buffer
3917            .offset_to_position(4)
3918            .expect("small buffer should have line metadata");
3919        assert_eq!(
3920            pos.line, 2,
3921            "Byte 4 (start of 'b') should be on line 2 after insert"
3922        );
3923    }
3924
3925    #[test]
3926    fn test_offset_to_position_empty_lines() {
3927        // Test with empty lines: "\n\n\n"
3928        let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec(), test_fs());
3929
3930        // Line 0: "\n" (byte 0)
3931        // Line 1: "\n" (byte 1)
3932        // Line 2: "\n" (byte 2)
3933        // Line 3: "" (empty, after last newline)
3934
3935        let pos = buffer
3936            .offset_to_position(0)
3937            .expect("small buffer should have line metadata");
3938        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3939
3940        let pos = buffer
3941            .offset_to_position(1)
3942            .expect("small buffer should have line metadata");
3943        assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
3944
3945        let pos = buffer
3946            .offset_to_position(2)
3947            .expect("small buffer should have line metadata");
3948        assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
3949
3950        let pos = buffer
3951            .offset_to_position(3)
3952            .expect("small buffer should have line metadata");
3953        assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
3954    }
3955
3956    #[test]
3957    fn test_offset_to_position_long_lines() {
3958        // Test with long lines to ensure it's not just line counting
3959        let mut content = Vec::new();
3960        content.extend_from_slice(b"aaaaaaaaaa\n"); // Line 0: 11 bytes (10 'a's + newline)
3961        content.extend_from_slice(b"bbbbbbbbbb\n"); // Line 1: 11 bytes
3962        content.extend_from_slice(b"cccccccccc"); // Line 2: 10 bytes (no newline)
3963
3964        let buffer = TextBuffer::from_bytes(content.clone(), test_fs());
3965
3966        // Test positions at start of each line
3967        let pos = buffer
3968            .offset_to_position(0)
3969            .expect("small buffer should have line metadata");
3970        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3971        assert_eq!(pos.column, 0);
3972
3973        let pos = buffer
3974            .offset_to_position(11)
3975            .expect("small buffer should have line metadata");
3976        assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
3977        assert_eq!(pos.column, 0);
3978
3979        let pos = buffer
3980            .offset_to_position(22)
3981            .expect("small buffer should have line metadata");
3982        assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
3983        assert_eq!(pos.column, 0);
3984
3985        // Test mid-line positions
3986        let pos = buffer
3987            .offset_to_position(5)
3988            .expect("small buffer should have line metadata");
3989        assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
3990        assert_eq!(pos.column, 5);
3991
3992        let pos = buffer
3993            .offset_to_position(16)
3994            .expect("small buffer should have line metadata");
3995        assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
3996        assert_eq!(pos.column, 5);
3997    }
3998
3999    #[test]
4000    fn test_line_iterator_with_offset_to_position() {
4001        // This combines line iterator with offset_to_position to find issues
4002        let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec(), test_fs());
4003
4004        // Test creating line iterator at various positions
4005        for byte_pos in 0..=buffer.len() {
4006            let iter = buffer.line_iterator(byte_pos, 80);
4007            let iter_pos = iter.current_position();
4008            let expected_line = buffer
4009                .offset_to_position(byte_pos)
4010                .expect("small buffer should have line metadata")
4011                .line;
4012            let expected_line_start = buffer.position_to_offset(Position {
4013                line: expected_line,
4014                column: 0,
4015            });
4016
4017            assert_eq!(
4018                iter_pos, expected_line_start,
4019                "LineIterator at byte {} should position at line start {} but got {}",
4020                byte_pos, expected_line_start, iter_pos
4021            );
4022        }
4023    }
4024
4025    #[test]
4026    fn test_piece_tree_line_count_after_insert() {
4027        // Debug the piece tree structure after insert
4028        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
4029
4030        // Insert at line 1, column 0
4031        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
4032
4033        // Manually verify line counts
4034        let content = buffer.slice_bytes(0..buffer.len());
4035        let newline_count = content.iter().filter(|&&b| b == b'\n').count();
4036        let expected_line_count = newline_count + 1;
4037        let actual_line_count = buffer.line_count();
4038
4039        assert_eq!(
4040            actual_line_count,
4041            Some(expected_line_count),
4042            "Line count mismatch after insert"
4043        );
4044    }
4045
4046    #[test]
4047    fn test_position_to_lsp_position_after_modification() {
4048        // This test demonstrates a bug in the piece tree's offset_to_position
4049        // where column calculation is incorrect after buffer modifications.
4050        // The position_to_lsp_position function works around this by using
4051        // line_start_offset to calculate the column correctly.
4052
4053        // Initial content: "fn foo(val: i32) {\n    val + 1\n}\n"
4054        let initial = b"fn foo(val: i32) {\n    val + 1\n}\n";
4055        let mut buffer = TextBuffer::from_bytes(initial.to_vec(), test_fs());
4056
4057        // Verify initial positions work correctly
4058        // Position 23 is 'v' of second "val" on line 1
4059        let (line, char) = buffer.position_to_lsp_position(23);
4060        assert_eq!(line, 1, "Initial: position 23 should be on line 1");
4061        assert_eq!(char, 4, "Initial: position 23 should be at char 4");
4062
4063        // Simulate rename: delete "val" at position 23 (line 1, char 4) and insert "value"
4064        // Position 23 = line 1, char 4; Position 26 = line 1, char 7
4065        buffer.delete_range(
4066            Position { line: 1, column: 4 },
4067            Position { line: 1, column: 7 },
4068        );
4069        buffer.insert_bytes(23, b"value".to_vec()); // Insert "value"
4070
4071        // Also rename the first occurrence
4072        // Position 7 = line 0, char 7; Position 10 = line 0, char 10
4073        buffer.delete_range(
4074            Position { line: 0, column: 7 },
4075            Position {
4076                line: 0,
4077                column: 10,
4078            },
4079        );
4080        buffer.insert_bytes(7, b"value".to_vec()); // Insert "value"
4081
4082        // Buffer is now: "fn foo(value: i32) {\n    value + 1\n}\n"
4083        let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
4084        assert_eq!(content, "fn foo(value: i32) {\n    value + 1\n}\n");
4085
4086        // Position 25 is now 'v' of second "value" on line 1
4087        // Line 0: "fn foo(value: i32) {\n" = 21 chars (positions 0-20)
4088        // Line 1: "    value + 1\n" starts at position 21
4089        // Position 25 = 21 + 4 = line 1, char 4
4090
4091        // The workaround in position_to_lsp_position should give correct result
4092        let (line, char) = buffer.position_to_lsp_position(25);
4093        assert_eq!(
4094            line, 1,
4095            "After modification: position 25 should be on line 1"
4096        );
4097        assert_eq!(
4098            char, 4,
4099            "After modification: position 25 should be at char 4"
4100        );
4101
4102        // Also verify position 21 (start of line 1) works
4103        let (line, char) = buffer.position_to_lsp_position(21);
4104        assert_eq!(line, 1, "Position 21 should be on line 1");
4105        assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
4106    }
4107
4108    #[test]
4109    fn test_detect_crlf() {
4110        assert_eq!(
4111            TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
4112            LineEnding::CRLF
4113        );
4114    }
4115
4116    #[test]
4117    fn test_detect_lf() {
4118        assert_eq!(
4119            TextBuffer::detect_line_ending(b"hello\nworld\n"),
4120            LineEnding::LF
4121        );
4122    }
4123
4124    #[test]
4125    fn test_normalize_crlf() {
4126        let input = b"hello\r\nworld\r\n".to_vec();
4127        let output = TextBuffer::normalize_line_endings(input);
4128        assert_eq!(output, b"hello\nworld\n");
4129    }
4130
4131    #[test]
4132    fn test_normalize_empty() {
4133        let input = Vec::new();
4134        let output = TextBuffer::normalize_line_endings(input);
4135        assert_eq!(output, Vec::<u8>::new());
4136    }
4137
4138    /// Regression test: get_all_text() returns empty for large files with unloaded regions
4139    ///
4140    /// This was the root cause of a bug where recovery auto-save would save 0 bytes
4141    /// for large files, causing data loss on crash recovery.
4142    ///
4143    /// The fix is to use get_text_range_mut() which handles lazy loading.
4144    #[test]
4145    fn test_get_all_text_returns_empty_for_unloaded_buffers() {
4146        use tempfile::TempDir;
4147        let temp_dir = TempDir::new().unwrap();
4148        let file_path = temp_dir.path().join("large_test.txt");
4149
4150        // Create a 50KB file
4151        let original_content = "X".repeat(50_000);
4152        std::fs::write(&file_path, &original_content).unwrap();
4153
4154        // Load with small threshold to trigger large file mode
4155        let mut buffer = TextBuffer::load_from_file(&file_path, 1024, test_fs()).unwrap();
4156        assert!(buffer.large_file, "Should be in large file mode");
4157        assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
4158
4159        // Make a small edit
4160        buffer.insert_bytes(0, b"EDITED: ".to_vec());
4161
4162        // get_all_text() now returns None for unloaded buffers instead of empty
4163        // This is the correct behavior - it signals that content is not available
4164        let content_immutable = buffer.get_all_text();
4165
4166        // get_all_text() returns None because it uses get_text_range() which
4167        // returns None for unloaded regions
4168        assert!(
4169            content_immutable.is_none(),
4170            "get_all_text() should return None for large files with unloaded regions. \
4171             Got Some({} bytes) instead of None.",
4172            content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
4173        );
4174
4175        // CORRECT BEHAVIOR: get_text_range_mut() handles lazy loading
4176        let total = buffer.total_bytes();
4177        let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
4178        assert_eq!(
4179            content_lazy.len(),
4180            50_000 + 8,
4181            "get_text_range_mut() should return all content with lazy loading"
4182        );
4183        assert!(
4184            String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
4185            "Content should start with our edit"
4186        );
4187    }
4188
4189    // ===== Line Ending Conversion Tests =====
4190
4191    mod line_ending_conversion {
4192        use super::*;
4193
4194        #[test]
4195        fn test_convert_lf_to_crlf() {
4196            let input = b"Line 1\nLine 2\nLine 3\n";
4197            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4198            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4199        }
4200
4201        #[test]
4202        fn test_convert_crlf_to_lf() {
4203            let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4204            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4205            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4206        }
4207
4208        #[test]
4209        fn test_convert_cr_to_lf() {
4210            let input = b"Line 1\rLine 2\rLine 3\r";
4211            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4212            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4213        }
4214
4215        #[test]
4216        fn test_convert_mixed_to_crlf() {
4217            // Mixed line endings: LF, CRLF, CR
4218            let input = b"Line 1\nLine 2\r\nLine 3\r";
4219            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4220            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4221        }
4222
4223        #[test]
4224        fn test_convert_lf_to_lf_is_noop() {
4225            let input = b"Line 1\nLine 2\nLine 3\n";
4226            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4227            assert_eq!(result, input.to_vec());
4228        }
4229
4230        #[test]
4231        fn test_convert_empty_content() {
4232            let input = b"";
4233            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4234            assert_eq!(result, b"".to_vec());
4235        }
4236
4237        #[test]
4238        fn test_convert_no_line_endings() {
4239            let input = b"No line endings here";
4240            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4241            assert_eq!(result, b"No line endings here".to_vec());
4242        }
4243
4244        #[test]
4245        fn test_set_line_ending_marks_modified() {
4246            let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec(), test_fs());
4247            assert!(!buffer.is_modified());
4248
4249            buffer.set_line_ending(LineEnding::CRLF);
4250            assert!(buffer.is_modified());
4251        }
4252
4253        #[test]
4254        fn test_set_default_line_ending_does_not_mark_modified() {
4255            let mut buffer = TextBuffer::empty(test_fs());
4256            assert!(!buffer.is_modified());
4257
4258            buffer.set_default_line_ending(LineEnding::CRLF);
4259            assert!(!buffer.is_modified());
4260            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4261        }
4262
4263        #[test]
4264        fn test_save_to_file_converts_lf_to_crlf() {
4265            use tempfile::TempDir;
4266
4267            let temp_dir = TempDir::new().unwrap();
4268            let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
4269
4270            // Create a file with LF line endings
4271            let original_content = b"Line 1\nLine 2\nLine 3\n";
4272            std::fs::write(&file_path, original_content).unwrap();
4273
4274            // Load the file
4275            let mut buffer =
4276                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
4277                    .unwrap();
4278            assert_eq!(buffer.line_ending(), LineEnding::LF);
4279
4280            // Change line ending to CRLF
4281            buffer.set_line_ending(LineEnding::CRLF);
4282            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4283            assert!(buffer.is_modified());
4284
4285            // Save the file
4286            buffer.save_to_file(&file_path).unwrap();
4287
4288            // Read back and verify CRLF
4289            let saved_bytes = std::fs::read(&file_path).unwrap();
4290            assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4291        }
4292
4293        #[test]
4294        fn test_save_to_file_converts_crlf_to_lf() {
4295            use tempfile::TempDir;
4296
4297            let temp_dir = TempDir::new().unwrap();
4298            let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
4299
4300            // Create a file with CRLF line endings
4301            let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4302            std::fs::write(&file_path, original_content).unwrap();
4303
4304            // Load the file
4305            let mut buffer =
4306                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
4307                    .unwrap();
4308            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4309
4310            // Change line ending to LF
4311            buffer.set_line_ending(LineEnding::LF);
4312            assert_eq!(buffer.line_ending(), LineEnding::LF);
4313            assert!(buffer.is_modified());
4314
4315            // Save the file
4316            buffer.save_to_file(&file_path).unwrap();
4317
4318            // Read back and verify LF (no CRLF)
4319            let saved_bytes = std::fs::read(&file_path).unwrap();
4320            assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
4321        }
4322
4323        #[test]
4324        #[cfg(unix)]
4325        fn test_save_to_unwritable_file() -> anyhow::Result<()> {
4326            use std::fs::Permissions;
4327            use std::os::unix::fs::PermissionsExt;
4328            use tempfile::TempDir;
4329
4330            let temp_dir = TempDir::new().unwrap();
4331            let unwritable_dir = temp_dir.path().join("unwritable_dir");
4332            std::fs::create_dir(&unwritable_dir)?;
4333
4334            let file_path = unwritable_dir.join("unwritable.txt");
4335            std::fs::write(&file_path, "original content")?;
4336
4337            // Make directory unwritable to prevent rename/temp file creation
4338            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4339
4340            let mut buffer = TextBuffer::from_bytes(b"new content".to_vec(), test_fs());
4341            let result = buffer.save_to_file(&file_path);
4342
4343            // Verify that it returns SudoSaveRequired
4344            match result {
4345                Err(e) => {
4346                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4347                        assert_eq!(sudo_err.dest_path, file_path);
4348                        assert!(sudo_err.temp_path.exists());
4349                        // Cleanup temp file
4350                        let _ = std::fs::remove_file(&sudo_err.temp_path);
4351                    } else {
4352                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
4353                    }
4354                }
4355                Ok(_) => panic!("Expected error, but save succeeded"),
4356            }
4357
4358            Ok(())
4359        }
4360
4361        #[test]
4362        #[cfg(unix)]
4363        fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
4364            use std::fs::Permissions;
4365            use std::os::unix::fs::PermissionsExt;
4366            use tempfile::TempDir;
4367
4368            let temp_dir = TempDir::new().unwrap();
4369            let unwritable_dir = temp_dir.path().join("unwritable_dir");
4370            std::fs::create_dir(&unwritable_dir)?;
4371
4372            let file_path = unwritable_dir.join("test.txt");
4373
4374            // Make directory unwritable (no write allowed)
4375            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4376
4377            let mut buffer = TextBuffer::from_bytes(b"content".to_vec(), test_fs());
4378            let result = buffer.save_to_file(&file_path);
4379
4380            match result {
4381                Err(e) => {
4382                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4383                        assert_eq!(sudo_err.dest_path, file_path);
4384                        assert!(sudo_err.temp_path.exists());
4385                        // It should be in /tmp because the directory was not writable
4386                        assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
4387                        // Cleanup
4388                        let _ = std::fs::remove_file(&sudo_err.temp_path);
4389                    } else {
4390                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
4391                    }
4392                }
4393                Ok(_) => panic!("Expected error, but save succeeded"),
4394            }
4395
4396            Ok(())
4397        }
4398    }
4399}
4400
4401#[cfg(test)]
4402mod property_tests {
4403    use crate::model::filesystem::StdFileSystem;
4404    use std::sync::Arc;
4405
4406    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
4407        Arc::new(StdFileSystem)
4408    }
4409    use super::*;
4410    use proptest::prelude::*;
4411
4412    // Generate text with some newlines
4413    fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
4414        prop::collection::vec(
4415            prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
4416            0..100,
4417        )
4418    }
4419
4420    // Strategy to generate operations
4421    #[derive(Debug, Clone)]
4422    enum Operation {
4423        Insert { offset: usize, text: Vec<u8> },
4424        Delete { offset: usize, bytes: usize },
4425    }
4426
4427    fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
4428        prop::collection::vec(
4429            prop_oneof![
4430                (0usize..200, text_with_newlines())
4431                    .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
4432                (0usize..200, 1usize..50)
4433                    .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
4434            ],
4435            0..50,
4436        )
4437    }
4438
4439    proptest! {
4440        #[test]
4441        fn prop_line_count_consistent(text in text_with_newlines()) {
4442            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4443
4444            let newline_count = text.iter().filter(|&&b| b == b'\n').count();
4445            prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
4446        }
4447
4448        #[test]
4449        fn prop_get_all_text_matches_original(text in text_with_newlines()) {
4450            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4451            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4452        }
4453
4454        #[test]
4455        fn prop_insert_increases_size(
4456            text in text_with_newlines(),
4457            offset in 0usize..100,
4458            insert_text in text_with_newlines()
4459        ) {
4460            let mut buffer = TextBuffer::from_bytes(text, test_fs());
4461            let initial_bytes = buffer.total_bytes();
4462
4463            let offset = offset.min(buffer.total_bytes());
4464            buffer.insert_bytes(offset, insert_text.clone());
4465
4466            prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
4467        }
4468
4469        #[test]
4470        fn prop_delete_decreases_size(
4471            text in text_with_newlines(),
4472            offset in 0usize..100,
4473            delete_bytes in 1usize..50
4474        ) {
4475            if text.is_empty() {
4476                return Ok(());
4477            }
4478
4479            let mut buffer = TextBuffer::from_bytes(text, test_fs());
4480            let initial_bytes = buffer.total_bytes();
4481
4482            let offset = offset.min(buffer.total_bytes());
4483            let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
4484
4485            if delete_bytes == 0 {
4486                return Ok(());
4487            }
4488
4489            buffer.delete_bytes(offset, delete_bytes);
4490
4491            prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
4492        }
4493
4494        #[test]
4495        fn prop_insert_then_delete_restores_original(
4496            text in text_with_newlines(),
4497            offset in 0usize..100,
4498            insert_text in text_with_newlines()
4499        ) {
4500            let mut buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4501
4502            let offset = offset.min(buffer.total_bytes());
4503            buffer.insert_bytes(offset, insert_text.clone());
4504            buffer.delete_bytes(offset, insert_text.len());
4505
4506            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4507        }
4508
4509        #[test]
4510        fn prop_offset_position_roundtrip(text in text_with_newlines()) {
4511            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4512
4513            for offset in 0..text.len() {
4514                let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
4515                let back = buffer.position_to_offset(pos);
4516                prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
4517            }
4518        }
4519
4520        #[test]
4521        fn prop_get_text_range_valid(
4522            text in text_with_newlines(),
4523            offset in 0usize..100,
4524            length in 1usize..50
4525        ) {
4526            if text.is_empty() {
4527                return Ok(());
4528            }
4529
4530            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4531            let offset = offset.min(buffer.total_bytes());
4532            let length = length.min(buffer.total_bytes() - offset);
4533
4534            if length == 0 {
4535                return Ok(());
4536            }
4537
4538            let result = buffer.get_text_range(offset, length);
4539            prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
4540        }
4541
4542        #[test]
4543        fn prop_operations_maintain_consistency(operations in operation_strategy()) {
4544            let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
4545            let mut expected_text = b"initial\ntext".to_vec();
4546
4547            for op in operations {
4548                match op {
4549                    Operation::Insert { offset, text } => {
4550                        let offset = offset.min(buffer.total_bytes());
4551                        buffer.insert_bytes(offset, text.clone());
4552
4553                        // Update expected
4554                        let offset = offset.min(expected_text.len());
4555                        expected_text.splice(offset..offset, text);
4556                    }
4557                    Operation::Delete { offset, bytes } => {
4558                        if offset < buffer.total_bytes() {
4559                            let bytes = bytes.min(buffer.total_bytes() - offset);
4560                            buffer.delete_bytes(offset, bytes);
4561
4562                            // Update expected
4563                            if offset < expected_text.len() {
4564                                let bytes = bytes.min(expected_text.len() - offset);
4565                                expected_text.drain(offset..offset + bytes);
4566                            }
4567                        }
4568                    }
4569                }
4570            }
4571
4572            prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
4573        }
4574
4575        #[test]
4576        fn prop_line_count_never_zero(operations in operation_strategy()) {
4577            let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
4578
4579            for op in operations {
4580                match op {
4581                    Operation::Insert { offset, text } => {
4582                        let offset = offset.min(buffer.total_bytes());
4583                        buffer.insert_bytes(offset, text);
4584                    }
4585                    Operation::Delete { offset, bytes } => {
4586                        buffer.delete_bytes(offset, bytes);
4587                    }
4588                }
4589
4590                // Document always has at least 1 line
4591                prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
4592            }
4593        }
4594
4595        #[test]
4596        fn prop_total_bytes_never_negative(operations in operation_strategy()) {
4597            let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
4598
4599            for op in operations {
4600                match op {
4601                    Operation::Insert { offset, text } => {
4602                        let offset = offset.min(buffer.total_bytes());
4603                        buffer.insert_bytes(offset, text);
4604                    }
4605                    Operation::Delete { offset, bytes } => {
4606                        buffer.delete_bytes(offset, bytes);
4607                    }
4608                }
4609
4610                // Bytes should never overflow
4611                prop_assert!(buffer.total_bytes() < 10_000_000);
4612            }
4613        }
4614
4615        #[test]
4616        fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
4617            let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
4618
4619            for op in operations {
4620                match op {
4621                    Operation::Insert { offset, text } => {
4622                        let offset = offset.min(buffer.total_bytes());
4623                        buffer.insert_bytes(offset, text);
4624                    }
4625                    Operation::Delete { offset, bytes } => {
4626                        buffer.delete_bytes(offset, bytes);
4627                    }
4628                }
4629
4630                // Verify we can still convert between offsets and positions
4631                if buffer.total_bytes() > 0 {
4632                    let mid_offset = buffer.total_bytes() / 2;
4633                    if let Some(pos) = buffer.offset_to_position(mid_offset) {
4634                        let back = buffer.position_to_offset(pos);
4635
4636                        // Should be able to roundtrip
4637                        prop_assert!(back <= buffer.total_bytes());
4638                    }
4639                }
4640            }
4641        }
4642
4643        #[test]
4644        fn prop_write_recipe_matches_content(text in text_with_newlines()) {
4645            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4646            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
4647
4648            // Apply the recipe to get the output
4649            let output = apply_recipe(&buffer, &recipe);
4650            prop_assert_eq!(output, text, "Recipe output should match original content");
4651        }
4652
4653        #[test]
4654        fn prop_write_recipe_after_edits(
4655            initial_text in text_with_newlines(),
4656            operations in operation_strategy()
4657        ) {
4658            let mut buffer = TextBuffer::from_bytes(initial_text, test_fs());
4659
4660            // Apply random operations
4661            for op in operations {
4662                match op {
4663                    Operation::Insert { offset, text } => {
4664                        let offset = offset.min(buffer.total_bytes());
4665                        buffer.insert_bytes(offset, text);
4666                    }
4667                    Operation::Delete { offset, bytes } => {
4668                        if offset < buffer.total_bytes() {
4669                            let bytes = bytes.min(buffer.total_bytes() - offset);
4670                            if bytes > 0 {
4671                                buffer.delete_bytes(offset, bytes);
4672                            }
4673                        }
4674                    }
4675                }
4676            }
4677
4678            // Build recipe and verify it matches buffer content
4679            let expected = buffer.get_all_text().unwrap();
4680            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
4681            let output = apply_recipe(&buffer, &recipe);
4682
4683            prop_assert_eq!(output, expected, "Recipe output should match buffer content after edits");
4684        }
4685
4686        #[test]
4687        fn prop_write_recipe_copy_ops_valid(
4688            text in prop::collection::vec(prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n')], 10..200),
4689            edit_offset in 0usize..100,
4690            edit_text in text_with_newlines()
4691        ) {
4692            use tempfile::TempDir;
4693
4694            // Create a temp file with initial content
4695            let temp_dir = TempDir::new().unwrap();
4696            let file_path = temp_dir.path().join("test.txt");
4697            std::fs::write(&file_path, &text).unwrap();
4698
4699            // Load the file (creates unloaded buffer regions)
4700            let mut buffer = TextBuffer::load_from_file(&file_path, 1024 * 1024, test_fs()).unwrap();
4701
4702            // Make an edit in the middle
4703            let edit_offset = edit_offset.min(buffer.total_bytes());
4704            buffer.insert_bytes(edit_offset, edit_text.clone());
4705
4706            // Build recipe - should have Copy ops for unmodified regions
4707            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
4708
4709            // Verify recipe produces correct output
4710            let expected = buffer.get_all_text().unwrap();
4711            let output = apply_recipe(&buffer, &recipe);
4712            prop_assert_eq!(output, expected, "Recipe with Copy ops should match buffer content");
4713
4714            // Verify we have at least some Copy ops if the file was large enough
4715            // (Copy ops reference unloaded regions from the original file)
4716            if text.len() > 100 && edit_offset > 10 {
4717                let has_copy = recipe.actions.iter().any(|a| matches!(a, RecipeAction::Copy { .. }));
4718                // Note: We don't assert this because line ending conversion or other factors
4719                // might cause all Insert ops, which is valid behavior
4720                let _ = has_copy;
4721            }
4722        }
4723    }
4724
4725    /// Helper to apply a WriteRecipe and return the resulting bytes
4726    fn apply_recipe(buffer: &TextBuffer, recipe: &WriteRecipe) -> Vec<u8> {
4727        let mut output = Vec::new();
4728        for action in &recipe.actions {
4729            match action {
4730                RecipeAction::Copy { offset, len } => {
4731                    if let Some(src_path) = &recipe.src_path {
4732                        let data = buffer
4733                            .fs
4734                            .read_range(src_path, *offset, *len as usize)
4735                            .expect("read_range should succeed for Copy op");
4736                        output.extend_from_slice(&data);
4737                    } else {
4738                        panic!("Copy action without source path");
4739                    }
4740                }
4741                RecipeAction::Insert { index } => {
4742                    output.extend_from_slice(&recipe.insert_data[*index]);
4743                }
4744            }
4745        }
4746        output
4747    }
4748
4749    #[test]
4750    fn test_detect_binary_text_files() {
4751        // Plain text should not be detected as binary
4752        assert!(!TextBuffer::detect_binary(b"Hello, world!"));
4753        assert!(!TextBuffer::detect_binary(b"Line 1\nLine 2\nLine 3"));
4754        assert!(!TextBuffer::detect_binary(b"Tabs\tand\tnewlines\n"));
4755        assert!(!TextBuffer::detect_binary(b"Carriage return\r\n"));
4756
4757        // Empty content is not binary
4758        assert!(!TextBuffer::detect_binary(b""));
4759
4760        // ANSI CSI escape sequences should be treated as text
4761        assert!(!TextBuffer::detect_binary(b"\x1b[31mRed text\x1b[0m"));
4762    }
4763
4764    #[test]
4765    fn test_detect_binary_binary_files() {
4766        // Null bytes indicate binary
4767        assert!(TextBuffer::detect_binary(b"Hello\x00World"));
4768        assert!(TextBuffer::detect_binary(b"\x00"));
4769
4770        // Non-printable control characters (except tab, newline, CR, form feed, vertical tab)
4771        assert!(TextBuffer::detect_binary(b"Text with \x01 control char"));
4772        assert!(TextBuffer::detect_binary(b"\x02\x03\x04"));
4773
4774        // DEL character (0x7F)
4775        assert!(TextBuffer::detect_binary(b"Text with DEL\x7F"));
4776    }
4777
4778    #[test]
4779    fn test_detect_binary_png_file() {
4780        // PNG file signature: 89 50 4E 47 0D 0A 1A 0A
4781        // The 0x1A byte (substitute character) is a control character that triggers binary detection
4782        let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4783        assert!(TextBuffer::detect_binary(png_header));
4784
4785        // Simulate a PNG file with more data after header
4786        let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4787        png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); // IHDR chunk with null bytes
4788        assert!(TextBuffer::detect_binary(&png_data));
4789    }
4790
4791    #[test]
4792    fn test_detect_binary_other_image_formats() {
4793        // JPEG signature: FF D8 FF
4794        let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
4795        assert!(TextBuffer::detect_binary(jpeg_header));
4796
4797        // GIF signature: GIF89a or GIF87a - contains valid ASCII but typically followed by binary
4798        // GIF header is ASCII but the LSD (Logical Screen Descriptor) contains binary
4799        let gif_data: &[u8] = &[
4800            0x47, 0x49, 0x46, 0x38, 0x39, 0x61, // GIF89a
4801            0x01, 0x00, 0x01, 0x00, // Width=1, Height=1 (little endian)
4802            0x00, // Packed byte
4803            0x00, // Background color index
4804            0x00, // Pixel aspect ratio
4805        ];
4806        // The null bytes in the dimensions trigger binary detection
4807        assert!(TextBuffer::detect_binary(gif_data));
4808
4809        // BMP signature: BM followed by file size (usually contains null bytes)
4810        let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
4811        assert!(TextBuffer::detect_binary(bmp_header));
4812    }
4813
4814    #[test]
4815    fn test_detect_binary_executable_formats() {
4816        // ELF signature (Linux executables)
4817        let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
4818        assert!(TextBuffer::detect_binary(elf_header));
4819
4820        // Mach-O signature (macOS executables) - magic + cpu type/subtype contain null bytes
4821        let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
4822        assert!(TextBuffer::detect_binary(macho_header));
4823
4824        // PE/COFF (Windows executables) - MZ header
4825        let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
4826        assert!(TextBuffer::detect_binary(pe_header));
4827    }
4828}
4829
4830/// Line data with optional line number
4831#[derive(Debug, Clone)]
4832pub struct LineData {
4833    /// Byte offset where this line starts in the document
4834    pub byte_offset: usize,
4835    /// Line content (without trailing newline)
4836    pub content: String,
4837    /// Whether this line ends with a newline
4838    pub has_newline: bool,
4839    /// Line number (None for large files without line metadata)
4840    pub line_number: Option<usize>,
4841}
4842
4843/// Iterator over lines in a TextBuffer that efficiently tracks line numbers
4844/// using piece tree metadata (single source of truth)
4845pub struct TextBufferLineIterator {
4846    /// Collected lines (we collect all at once since we need mutable access to load chunks)
4847    lines: Vec<LineData>,
4848    /// Current index in the lines vector
4849    current_index: usize,
4850    /// Whether there are more lines after these
4851    pub has_more: bool,
4852}
4853
4854impl TextBufferLineIterator {
4855    pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
4856        let buffer_len = buffer.len();
4857        if byte_pos >= buffer_len {
4858            return Ok(Self {
4859                lines: Vec::new(),
4860                current_index: 0,
4861                has_more: false,
4862            });
4863        }
4864
4865        // Check if buffer has line metadata (None for large files > 1MB)
4866        let has_line_metadata = buffer.line_count().is_some();
4867
4868        // Determine starting line number by querying piece tree once
4869        // (only if we have line metadata)
4870        let mut current_line = if has_line_metadata {
4871            buffer.offset_to_position(byte_pos).map(|pos| pos.line)
4872        } else {
4873            None
4874        };
4875
4876        let mut lines = Vec::with_capacity(max_lines);
4877        let mut current_offset = byte_pos;
4878        let estimated_line_length = 80; // Use default estimate
4879
4880        // Collect lines by scanning forward
4881        for _ in 0..max_lines {
4882            if current_offset >= buffer_len {
4883                break;
4884            }
4885
4886            let line_start = current_offset;
4887            let line_number = current_line;
4888
4889            // Estimate how many bytes to load for this line
4890            let estimated_max_line_length = estimated_line_length * 3;
4891            let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
4892
4893            // Load chunk (this handles lazy loading)
4894            let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
4895
4896            // Scan for newline
4897            let mut line_len = 0;
4898            let mut found_newline = false;
4899            for &byte in chunk.iter() {
4900                line_len += 1;
4901                if byte == b'\n' {
4902                    found_newline = true;
4903                    break;
4904                }
4905            }
4906
4907            // Handle long lines (rare case)
4908            if !found_newline && current_offset + line_len < buffer_len {
4909                // Line is longer than expected, load more data
4910                let remaining = buffer_len - current_offset - line_len;
4911                let additional_bytes = estimated_max_line_length.min(remaining);
4912                let more_chunk =
4913                    buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
4914
4915                let mut extended_chunk = chunk;
4916                extended_chunk.extend_from_slice(&more_chunk);
4917
4918                for &byte in more_chunk.iter() {
4919                    line_len += 1;
4920                    if byte == b'\n' {
4921                        found_newline = true;
4922                        break;
4923                    }
4924                }
4925
4926                let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
4927                let has_newline = line_string.ends_with('\n');
4928                let content = if has_newline {
4929                    line_string[..line_string.len() - 1].to_string()
4930                } else {
4931                    line_string
4932                };
4933
4934                lines.push(LineData {
4935                    byte_offset: line_start,
4936                    content,
4937                    has_newline,
4938                    line_number,
4939                });
4940
4941                current_offset += line_len;
4942                if has_line_metadata && found_newline {
4943                    current_line = current_line.map(|n| n + 1);
4944                }
4945                continue;
4946            }
4947
4948            // Normal case
4949            let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
4950            let has_newline = line_string.ends_with('\n');
4951            let content = if has_newline {
4952                line_string[..line_string.len() - 1].to_string()
4953            } else {
4954                line_string
4955            };
4956
4957            lines.push(LineData {
4958                byte_offset: line_start,
4959                content,
4960                has_newline,
4961                line_number,
4962            });
4963
4964            current_offset += line_len;
4965            // Increment line number if we have metadata and found a newline
4966            if has_line_metadata && found_newline {
4967                current_line = current_line.map(|n| n + 1);
4968            }
4969        }
4970
4971        // Check if there are more lines
4972        let has_more = current_offset < buffer_len;
4973
4974        Ok(Self {
4975            lines,
4976            current_index: 0,
4977            has_more,
4978        })
4979    }
4980}
4981
4982impl Iterator for TextBufferLineIterator {
4983    type Item = LineData;
4984
4985    fn next(&mut self) -> Option<Self::Item> {
4986        if self.current_index < self.lines.len() {
4987            let line = self.lines[self.current_index].clone();
4988            self.current_index += 1;
4989            Some(line)
4990        } else {
4991            None
4992        }
4993    }
4994}