fresh/model/
buffer.rs

1/// Text buffer that uses PieceTree with integrated line tracking
2/// Architecture where the tree is the single source of truth for text and line information
3use crate::model::piece_tree::{
4    BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, Position,
5    StringBuffer, TreeStats,
6};
7use crate::model::piece_tree_diff::PieceTreeDiff;
8use crate::primitives::grapheme;
9use anyhow::{Context, Result};
10use regex::bytes::Regex;
11use std::io::{self, Read, Seek, SeekFrom, Write};
12use std::ops::Range;
13use std::path::{Path, PathBuf};
14use std::sync::Arc;
15
16#[cfg(unix)]
17use std::os::unix::fs::MetadataExt;
18
19/// Error returned when a file save operation requires elevated privileges.
20///
21/// This error contains all the information needed to perform the save via sudo
22/// in a single operation, preserving original file ownership and permissions.
23#[derive(Debug, Clone, PartialEq)]
24pub struct SudoSaveRequired {
25    /// Path to the temporary file containing the new content
26    pub temp_path: PathBuf,
27    /// Destination path where the file should be saved
28    pub dest_path: PathBuf,
29    /// Original file owner (UID)
30    pub uid: u32,
31    /// Original file group (GID)
32    pub gid: u32,
33    /// Original file permissions (mode)
34    pub mode: u32,
35}
36
37impl std::fmt::Display for SudoSaveRequired {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        write!(
40            f,
41            "Permission denied saving to {}. Use sudo to complete the operation.",
42            self.dest_path.display()
43        )
44    }
45}
46
47impl std::error::Error for SudoSaveRequired {}
48
49// Large file support configuration
50/// Default threshold for considering a file "large" (100 MB)
51pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
52
53/// Chunk size to load when lazy loading (1 MB)
54pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
55
56/// Chunk alignment for lazy loading (64 KB)
57pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
58
59/// Line ending format used in the file
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
61pub enum LineEnding {
62    /// Unix/Linux/Mac format (\n)
63    #[default]
64    LF,
65    /// Windows format (\r\n)
66    CRLF,
67    /// Old Mac format (\r) - rare but supported
68    CR,
69}
70
71impl LineEnding {
72    /// Get the string representation of this line ending
73    pub fn as_str(&self) -> &'static str {
74        match self {
75            Self::LF => "\n",
76            Self::CRLF => "\r\n",
77            Self::CR => "\r",
78        }
79    }
80
81    /// Get the display name for status bar
82    pub fn display_name(&self) -> &'static str {
83        match self {
84            Self::LF => "LF",
85            Self::CRLF => "CRLF",
86            Self::CR => "CR",
87        }
88    }
89}
90
91/// Represents a line number (simplified for new implementation)
92/// Legacy enum kept for backwards compatibility - always Absolute now
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub enum LineNumber {
95    /// Absolute line number - this is the actual line number in the file
96    Absolute(usize),
97    /// Relative line number (deprecated - now same as Absolute)
98    Relative {
99        line: usize,
100        from_cached_line: usize,
101    },
102}
103
104impl LineNumber {
105    /// Get the line number value
106    pub fn value(&self) -> usize {
107        match self {
108            Self::Absolute(line) | Self::Relative { line, .. } => *line,
109        }
110    }
111
112    /// Check if this is an absolute line number
113    pub fn is_absolute(&self) -> bool {
114        matches!(self, LineNumber::Absolute(_))
115    }
116
117    /// Check if this is a relative line number
118    pub fn is_relative(&self) -> bool {
119        matches!(self, LineNumber::Relative { .. })
120    }
121
122    /// Format the line number for display
123    pub fn format(&self) -> String {
124        match self {
125            Self::Absolute(line) => format!("{}", line + 1),
126            Self::Relative { line, .. } => format!("~{}", line + 1),
127        }
128    }
129}
130
131/// A text buffer that manages document content using a piece table
132/// with integrated line tracking
133pub struct TextBuffer {
134    /// The piece tree for efficient text manipulation with integrated line tracking
135    piece_tree: PieceTree,
136
137    /// Snapshot of the piece tree root at last save (shared via Arc)
138    saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
139
140    /// List of string buffers containing chunks of text data
141    /// Index 0 is typically the original/stored buffer
142    /// Additional buffers are added for modifications
143    buffers: Vec<StringBuffer>,
144
145    /// Next buffer ID to assign
146    next_buffer_id: usize,
147
148    /// Optional file path for persistence
149    file_path: Option<PathBuf>,
150
151    /// Has the buffer been modified since last save?
152    modified: bool,
153
154    /// Does the buffer have unsaved changes for recovery auto-save?
155    /// This is separate from `modified` because recovery auto-save doesn't
156    /// clear `modified` (buffer still differs from on-disk file).
157    recovery_pending: bool,
158
159    /// Is this a large file (no line indexing, lazy loading enabled)?
160    large_file: bool,
161
162    /// Is this a binary file? Binary files are opened read-only and render
163    /// unprintable characters as code points.
164    is_binary: bool,
165
166    /// Line ending format detected from the file (or default for new files)
167    line_ending: LineEnding,
168
169    /// Original line ending format when file was loaded (used for conversion on save)
170    /// This tracks what the file had when loaded, so we can detect if the user
171    /// changed the line ending format and needs conversion on save.
172    original_line_ending: LineEnding,
173
174    /// The file size on disk after the last save.
175    /// Used for chunked recovery to know the original file size for reconstruction.
176    /// Updated when loading from file or after saving.
177    saved_file_size: Option<usize>,
178
179    /// Monotonic version counter for change tracking.
180    version: u64,
181}
182
183impl TextBuffer {
184    /// Create a new text buffer (with large_file_threshold for backwards compatibility)
185    /// Note: large_file_threshold is ignored in the new implementation
186    pub fn new(_large_file_threshold: usize) -> Self {
187        let piece_tree = PieceTree::empty();
188        let line_ending = LineEnding::default();
189        TextBuffer {
190            saved_root: piece_tree.root(),
191            piece_tree,
192            buffers: vec![StringBuffer::new(0, Vec::new())],
193            next_buffer_id: 1,
194            file_path: None,
195            modified: false,
196            recovery_pending: false,
197            large_file: false,
198            is_binary: false,
199            line_ending,
200            original_line_ending: line_ending,
201            saved_file_size: None,
202            version: 0,
203        }
204    }
205
206    /// Current buffer version (monotonic, wraps on overflow)
207    pub fn version(&self) -> u64 {
208        self.version
209    }
210
211    #[inline]
212    fn bump_version(&mut self) {
213        self.version = self.version.wrapping_add(1);
214    }
215
216    #[inline]
217    fn mark_content_modified(&mut self) {
218        self.modified = true;
219        self.recovery_pending = true;
220        self.bump_version();
221    }
222
223    /// Create a text buffer from initial content
224    pub fn from_bytes(content: Vec<u8>) -> Self {
225        let bytes = content.len();
226
227        // Auto-detect line ending format from content
228        let line_ending = Self::detect_line_ending(&content);
229
230        // Create initial StringBuffer with ID 0
231        let buffer = StringBuffer::new(0, content);
232        let line_feed_cnt = buffer.line_feed_count();
233
234        let piece_tree = if bytes > 0 {
235            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
236        } else {
237            PieceTree::empty()
238        };
239
240        let saved_root = piece_tree.root();
241
242        TextBuffer {
243            line_ending,
244            original_line_ending: line_ending,
245            piece_tree,
246            saved_root,
247            buffers: vec![buffer],
248            next_buffer_id: 1,
249            file_path: None,
250            modified: false,
251            recovery_pending: false,
252            large_file: false,
253            is_binary: false,
254            saved_file_size: Some(bytes), // Treat initial content as "saved" state
255            version: 0,
256        }
257    }
258
259    /// Create a text buffer from a string
260    pub fn from_str(s: &str, _large_file_threshold: usize) -> Self {
261        Self::from_bytes(s.as_bytes().to_vec())
262    }
263
264    /// Create an empty text buffer
265    pub fn empty() -> Self {
266        let piece_tree = PieceTree::empty();
267        let saved_root = piece_tree.root();
268        let line_ending = LineEnding::default();
269        TextBuffer {
270            piece_tree,
271            saved_root,
272            buffers: vec![StringBuffer::new(0, Vec::new())],
273            next_buffer_id: 1,
274            file_path: None,
275            modified: false,
276            recovery_pending: false,
277            large_file: false,
278            is_binary: false,
279            line_ending,
280            original_line_ending: line_ending,
281            saved_file_size: None,
282            version: 0,
283        }
284    }
285
286    /// Load a text buffer from a file
287    pub fn load_from_file<P: AsRef<Path>>(
288        path: P,
289        large_file_threshold: usize,
290    ) -> anyhow::Result<Self> {
291        let path = path.as_ref();
292
293        // Get file size to determine loading strategy
294        let metadata = std::fs::metadata(path)?;
295        let file_size = metadata.len() as usize;
296
297        // Use threshold parameter or default
298        let threshold = if large_file_threshold > 0 {
299            large_file_threshold
300        } else {
301            DEFAULT_LARGE_FILE_THRESHOLD
302        };
303
304        // Choose loading strategy based on file size
305        if file_size >= threshold {
306            Self::load_large_file(path, file_size)
307        } else {
308            Self::load_small_file(path)
309        }
310    }
311
312    /// Load a small file with full eager loading and line indexing
313    fn load_small_file<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
314        let path = path.as_ref();
315        let mut file = std::fs::File::open(path)?;
316        let mut contents = Vec::new();
317        file.read_to_end(&mut contents)?;
318
319        // Detect if this is a binary file
320        let is_binary = Self::detect_binary(&contents);
321
322        // Detect line ending format (CRLF/LF/CR) - used for Enter key insertion
323        let line_ending = Self::detect_line_ending(&contents);
324
325        // Keep original line endings - the view layer handles CRLF display
326        let mut buffer = Self::from_bytes(contents);
327        buffer.file_path = Some(path.to_path_buf());
328        buffer.modified = false;
329        buffer.large_file = false;
330        buffer.is_binary = is_binary;
331        buffer.line_ending = line_ending;
332        buffer.original_line_ending = line_ending;
333        Ok(buffer)
334    }
335
336    /// Load a large file with unloaded buffer (no line indexing, lazy loading)
337    fn load_large_file<P: AsRef<Path>>(path: P, file_size: usize) -> anyhow::Result<Self> {
338        use crate::model::piece_tree::{BufferData, BufferLocation};
339
340        let path = path.as_ref();
341
342        // Read a sample of the file to detect if it's binary and line ending format
343        // We read the first 8KB for both binary and line ending detection
344        let (is_binary, line_ending) = {
345            let mut file = std::fs::File::open(path)?;
346            let sample_size = file_size.min(8 * 1024);
347            let mut sample = vec![0u8; sample_size];
348            file.read_exact(&mut sample)?;
349            let is_binary = Self::detect_binary(&sample);
350            let line_ending = Self::detect_line_ending(&sample);
351            (is_binary, line_ending)
352        };
353
354        // Create an unloaded buffer that references the entire file
355        let buffer = StringBuffer {
356            id: 0,
357            data: BufferData::Unloaded {
358                file_path: path.to_path_buf(),
359                file_offset: 0,
360                bytes: file_size,
361            },
362        };
363
364        // Create piece tree with a single piece covering the whole file
365        // No line feed count (None) since we're not computing line indexing
366        let piece_tree = if file_size > 0 {
367            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
368        } else {
369            PieceTree::empty()
370        };
371        let saved_root = piece_tree.root();
372
373        tracing::debug!(
374            "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
375            file_size,
376            file_size
377        );
378
379        Ok(TextBuffer {
380            piece_tree,
381            saved_root,
382            buffers: vec![buffer],
383            next_buffer_id: 1,
384            file_path: Some(path.to_path_buf()),
385            modified: false,
386            recovery_pending: false,
387            large_file: true,
388            is_binary,
389            line_ending,
390            original_line_ending: line_ending,
391            saved_file_size: Some(file_size),
392            version: 0,
393        })
394    }
395
396    /// Save the buffer to its associated file
397    pub fn save(&mut self) -> anyhow::Result<()> {
398        if let Some(path) = &self.file_path {
399            self.save_to_file(path.clone())
400        } else {
401            anyhow::bail!(io::Error::new(
402                io::ErrorKind::NotFound,
403                "No file path associated with buffer",
404            ))
405        }
406    }
407
408    /// Create a temporary file for saving.
409    ///
410    /// Tries to create the file in the same directory as the destination file first
411    /// to allow for an atomic rename. If that fails (e.g., due to directory permissions),
412    /// falls back to the system temporary directory.
413    /// Check if we should use in-place writing to preserve file ownership.
414    /// Returns true if the file exists and is owned by a different user.
415    /// On Unix, only root or the file owner can change file ownership with chown.
416    /// When the current user is not the file owner, using atomic write (temp file + rename)
417    /// would change the file's ownership to the current user. To preserve ownership,
418    /// we must write directly to the existing file instead.
419    #[cfg(unix)]
420    fn should_use_inplace_write(dest_path: &Path) -> bool {
421        if let Ok(meta) = std::fs::metadata(dest_path) {
422            let file_uid = meta.uid();
423            let current_uid = unsafe { libc::getuid() };
424            // If file is owned by a different user, we should write in-place
425            // to preserve ownership (since we can't chown to another user)
426            file_uid != current_uid
427        } else {
428            // File doesn't exist, use normal atomic write
429            false
430        }
431    }
432
433    #[cfg(not(unix))]
434    fn should_use_inplace_write(_dest_path: &Path) -> bool {
435        // On non-Unix platforms, always use atomic write
436        false
437    }
438
439    fn create_temp_file(dest_path: &Path) -> io::Result<(PathBuf, std::fs::File)> {
440        // Try creating in same directory first
441        let same_dir_temp = dest_path.with_extension("tmp");
442        match std::fs::File::create(&same_dir_temp) {
443            Ok(file) => Ok((same_dir_temp, file)),
444            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
445                // Fallback to system temp directory
446                let temp_dir = std::env::temp_dir();
447                let file_name = dest_path
448                    .file_name()
449                    .unwrap_or_else(|| std::ffi::OsStr::new("fresh-save"));
450                let timestamp = std::time::SystemTime::now()
451                    .duration_since(std::time::UNIX_EPOCH)
452                    .map(|d| d.as_nanos())
453                    .unwrap_or(0);
454                let temp_path = temp_dir.join(format!(
455                    "{}-{}-{}.tmp",
456                    file_name.to_string_lossy(),
457                    std::process::id(),
458                    timestamp
459                ));
460                let file = std::fs::File::create(&temp_path)?;
461                Ok((temp_path, file))
462            }
463            Err(e) => Err(e),
464        }
465    }
466
467    /// Save the buffer to a specific file
468    ///
469    /// This uses incremental saving for large files: instead of loading the entire
470    /// file into memory, it streams unmodified regions directly from the source file
471    /// and only keeps edited regions in memory.
472    ///
473    /// If the line ending format has been changed (via set_line_ending), all content
474    /// will be converted to the new format during save.
475    pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
476        let dest_path = path.as_ref();
477        let total = self.total_bytes();
478
479        // Get original file metadata (permissions, owner, etc.) before writing
480        // so we can preserve it after creating/renaming the temp file
481        let original_metadata = std::fs::metadata(dest_path).ok();
482
483        // Check if we need to convert line endings
484        let needs_conversion = self.line_ending != self.original_line_ending;
485        let target_ending = self.line_ending;
486
487        // Determine whether to use in-place writing to preserve file ownership.
488        // When the file is owned by a different user (e.g., editing with group write
489        // permissions), we must write directly to the file to preserve ownership,
490        // since non-root users cannot chown files to other users.
491        let use_inplace = Self::should_use_inplace_write(dest_path);
492
493        // Stage A: Create output file (either temp file or open existing for in-place write)
494        let (temp_path, mut out_file) = if use_inplace {
495            // In-place write: open existing file with truncate to preserve ownership
496            match std::fs::OpenOptions::new()
497                .write(true)
498                .truncate(true)
499                .open(dest_path)
500            {
501                Ok(file) => (None, file),
502                Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
503                    // Permission denied on in-place write: fall back to atomic write
504                    // with temp file. The rename will also fail, triggering SudoSaveRequired.
505                    let (path, file) = Self::create_temp_file(dest_path)?;
506                    (Some(path), file)
507                }
508                Err(e) => return Err(e.into()),
509            }
510        } else {
511            // Atomic write: create temp file, will rename later
512            let (path, file) = Self::create_temp_file(dest_path)?;
513            (Some(path), file)
514        };
515
516        if total > 0 {
517            // Cache for open source files (for streaming unloaded regions)
518            let mut source_file_cache: Option<(PathBuf, std::fs::File)> = None;
519
520            // Iterate through all pieces and write them
521            for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
522                let buffer_id = piece_view.location.buffer_id();
523                let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
524                    io::Error::new(
525                        io::ErrorKind::InvalidData,
526                        format!("Buffer {} not found", buffer_id),
527                    )
528                })?;
529
530                match &buffer.data {
531                    BufferData::Loaded { data, .. } => {
532                        let start = piece_view.buffer_offset;
533                        let end = start + piece_view.bytes;
534                        let chunk = &data[start..end];
535
536                        if needs_conversion {
537                            // Convert line endings before writing
538                            let converted = Self::convert_line_endings_to(chunk, target_ending);
539                            out_file.write_all(&converted)?;
540                        } else {
541                            // Write directly without conversion
542                            out_file.write_all(chunk)?;
543                        }
544                    }
545                    BufferData::Unloaded {
546                        file_path,
547                        file_offset,
548                        ..
549                    } => {
550                        // Stream from source file
551                        let source_file = match &mut source_file_cache {
552                            Some((cached_path, file)) if cached_path == file_path => file,
553                            _ => {
554                                let file = std::fs::File::open(file_path)?;
555                                source_file_cache = Some((file_path.clone(), file));
556                                &mut source_file_cache.as_mut().unwrap().1
557                            }
558                        };
559
560                        // Seek to the right position in source file
561                        let read_offset = *file_offset + piece_view.buffer_offset;
562                        source_file.seek(SeekFrom::Start(read_offset as u64))?;
563
564                        // Stream in chunks
565                        const STREAM_CHUNK_SIZE: usize = 64 * 1024; // 64KB chunks
566                        let mut remaining = piece_view.bytes;
567                        let mut chunk_buf = vec![0u8; STREAM_CHUNK_SIZE.min(remaining)];
568
569                        while remaining > 0 {
570                            let to_read = remaining.min(chunk_buf.len());
571                            source_file.read_exact(&mut chunk_buf[..to_read])?;
572
573                            if needs_conversion {
574                                // Convert line endings before writing
575                                let converted = Self::convert_line_endings_to(
576                                    &chunk_buf[..to_read],
577                                    target_ending,
578                                );
579                                out_file.write_all(&converted)?;
580                            } else {
581                                // Write directly without conversion
582                                out_file.write_all(&chunk_buf[..to_read])?;
583                            }
584                            remaining -= to_read;
585                        }
586                    }
587                }
588            }
589        }
590
591        // Ensure all data is written
592        out_file.sync_all()?;
593        drop(out_file);
594
595        // Stage B & C: Only needed for atomic write (not in-place write)
596        if let Some(temp_path) = temp_path {
597            // Restore original file permissions/owner before renaming
598            if let Some(ref meta) = original_metadata {
599                // Best effort restore
600                let _ = Self::restore_file_metadata(&temp_path, meta);
601            }
602
603            // Stage C: Atomic Replacement or Sudo Fallback
604            if let Err(e) = std::fs::rename(&temp_path, dest_path) {
605                let is_permission_denied = e.kind() == io::ErrorKind::PermissionDenied;
606                let is_cross_device = cfg!(unix) && e.raw_os_error() == Some(18);
607
608                if is_cross_device {
609                    #[cfg(unix)]
610                    {
611                        match std::fs::copy(&temp_path, dest_path) {
612                            Ok(_) => {
613                                let _ = std::fs::remove_file(&temp_path);
614                            }
615                            Err(copy_err) if copy_err.kind() == io::ErrorKind::PermissionDenied => {
616                                return Err(self.make_sudo_error(
617                                    temp_path,
618                                    dest_path,
619                                    original_metadata,
620                                ));
621                            }
622                            Err(copy_err) => return Err(copy_err.into()),
623                        }
624                    }
625                } else if is_permission_denied {
626                    return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
627                } else {
628                    return Err(e.into());
629                }
630            }
631        }
632        // For in-place write, we already wrote directly to dest_path,
633        // preserving ownership since we modified the existing inode
634
635        // Update saved file size to match the file on disk
636        let new_size = std::fs::metadata(dest_path)?.len() as usize;
637        tracing::debug!(
638            "Buffer::save: updating saved_file_size from {:?} to {}",
639            self.saved_file_size,
640            new_size
641        );
642        self.saved_file_size = Some(new_size);
643
644        self.file_path = Some(dest_path.to_path_buf());
645        self.mark_saved_snapshot();
646
647        // Update original_line_ending to match what we just saved
648        // This prevents repeated conversions on subsequent saves
649        self.original_line_ending = self.line_ending;
650
651        Ok(())
652    }
653
654    /// Finalize buffer state after an external save operation (e.g., via sudo).
655    ///
656    /// This updates the saved snapshot and file size to match the new state on disk.
657    pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
658        let new_size = std::fs::metadata(&dest_path)?.len() as usize;
659        self.saved_file_size = Some(new_size);
660        self.file_path = Some(dest_path);
661        self.mark_saved_snapshot();
662        self.original_line_ending = self.line_ending;
663        Ok(())
664    }
665
666    /// Internal helper to create a SudoSaveRequired error.
667    fn make_sudo_error(
668        &self,
669        temp_path: PathBuf,
670        dest_path: &Path,
671        original_metadata: Option<std::fs::Metadata>,
672    ) -> anyhow::Error {
673        let (uid, gid, mode) = if let Some(meta) = original_metadata {
674            #[cfg(unix)]
675            {
676                (meta.uid(), meta.gid(), meta.mode() & 0o7777)
677            }
678            #[cfg(not(unix))]
679            (0, 0, 0)
680        } else {
681            (0, 0, 0)
682        };
683
684        anyhow::anyhow!(SudoSaveRequired {
685            temp_path,
686            dest_path: dest_path.to_path_buf(),
687            uid,
688            gid,
689            mode,
690        })
691    }
692
693    /// Restore file metadata (permissions, owner/group) from original file
694    fn restore_file_metadata(path: &Path, original_meta: &std::fs::Metadata) -> anyhow::Result<()> {
695        // Restore permissions (works cross-platform)
696        std::fs::set_permissions(path, original_meta.permissions())?;
697
698        // On Unix, also restore owner and group
699        #[cfg(unix)]
700        {
701            let uid = original_meta.uid();
702            let gid = original_meta.gid();
703            // Use libc to set owner/group - ignore errors since we may not have permission
704            // (e.g., only root can chown to a different user)
705            unsafe {
706                use std::os::unix::ffi::OsStrExt;
707                let c_path = std::ffi::CString::new(path.as_os_str().as_bytes())
708                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
709                libc::chown(c_path.as_ptr(), uid, gid);
710            }
711        }
712
713        Ok(())
714    }
715
716    /// Get the total number of bytes in the document
717    pub fn total_bytes(&self) -> usize {
718        self.piece_tree.total_bytes()
719    }
720
721    /// Get the total number of lines in the document
722    /// Uses the piece tree's integrated line tracking
723    /// Returns None if line count is unknown (e.g., for large files without line indexing)
724    pub fn line_count(&self) -> Option<usize> {
725        self.piece_tree.line_count()
726    }
727
728    /// Snapshot the current tree as the saved baseline
729    pub fn mark_saved_snapshot(&mut self) {
730        self.saved_root = self.piece_tree.root();
731        self.modified = false;
732    }
733
734    /// Diff the current piece tree against the last saved snapshot.
735    ///
736    /// This compares actual byte content, not just tree structure. This means
737    /// that if you delete text and then paste it back, the diff will correctly
738    /// show no changes (even though the tree structure differs).
739    ///
740    /// Uses a two-phase algorithm for efficiency:
741    /// - Phase 1: Fast structure-based diff to find changed byte ranges (O(num_leaves))
742    /// - Phase 2: Only compare actual content within changed ranges (O(edit_size))
743    ///
744    /// This is O(edit_size) instead of O(file_size) for small edits in large files.
745    pub fn diff_since_saved(&self) -> PieceTreeDiff {
746        // First, quick check: if tree roots are identical (Arc pointer equality),
747        // the content is definitely the same.
748        if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
749            return PieceTreeDiff {
750                equal: true,
751                byte_ranges: Vec::new(),
752                line_ranges: Some(Vec::new()),
753            };
754        }
755
756        // Phase 1: Fast structure-based diff to find which byte ranges differ
757        // This is O(number of leaves) - very fast even for large files
758        let structure_diff = self.diff_trees_by_structure();
759
760        // If structure says trees are equal (same pieces in same order), we're done
761        if structure_diff.equal {
762            return structure_diff;
763        }
764
765        // Phase 2: For small changed regions, verify with actual content comparison
766        // This handles the case where different pieces contain identical content
767        // (e.g., delete text then paste it back)
768        let total_changed_bytes: usize = structure_diff
769            .byte_ranges
770            .iter()
771            .map(|r| r.end.saturating_sub(r.start))
772            .sum();
773
774        // Only do content verification if the changed region is reasonably small
775        // For large changes, trust the structure-based diff
776        const MAX_VERIFY_BYTES: usize = 64 * 1024; // 64KB threshold for verification
777
778        if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
779            // Check if content in the changed ranges is actually different
780            if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
781                // Content actually differs - return the structure diff result
782                return structure_diff;
783            } else {
784                // Content is the same despite structure differences (rare case: undo/redo)
785                return PieceTreeDiff {
786                    equal: true,
787                    byte_ranges: Vec::new(),
788                    line_ranges: Some(Vec::new()),
789                };
790            }
791        }
792
793        // For large changes or when we can't verify, trust the structure diff
794        structure_diff
795    }
796
797    /// Check if the actual byte content differs in the given ranges.
798    /// Returns true if content differs, false if content is identical.
799    fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
800        let saved_bytes = self.tree_total_bytes(&self.saved_root);
801        let current_bytes = self.piece_tree.total_bytes();
802
803        // Different total sizes means content definitely differs
804        if saved_bytes != current_bytes {
805            return true;
806        }
807
808        // For each changed range, compare the actual bytes
809        for range in byte_ranges {
810            if range.start >= range.end {
811                continue;
812            }
813
814            // Extract bytes from saved tree for this range
815            let saved_slice =
816                self.extract_range_from_tree(&self.saved_root, range.start, range.end);
817            // Extract bytes from current tree for this range
818            let current_slice = self.get_text_range(range.start, range.end);
819
820            match (saved_slice, current_slice) {
821                (Some(saved), Some(current)) => {
822                    if saved != current {
823                        return true; // Content differs
824                    }
825                }
826                _ => {
827                    // Couldn't read content, assume it differs to be safe
828                    return true;
829                }
830            }
831        }
832
833        // All ranges have identical content
834        false
835    }
836
837    /// Extract a byte range from a saved tree root
838    fn extract_range_from_tree(
839        &self,
840        root: &Arc<crate::model::piece_tree::PieceTreeNode>,
841        start: usize,
842        end: usize,
843    ) -> Option<Vec<u8>> {
844        let mut result = Vec::with_capacity(end.saturating_sub(start));
845        self.collect_range_from_node(root, start, end, 0, &mut result)?;
846        Some(result)
847    }
848
849    /// Recursively collect bytes from a range within a tree node
850    fn collect_range_from_node(
851        &self,
852        node: &Arc<crate::model::piece_tree::PieceTreeNode>,
853        range_start: usize,
854        range_end: usize,
855        node_offset: usize,
856        result: &mut Vec<u8>,
857    ) -> Option<()> {
858        use crate::model::piece_tree::PieceTreeNode;
859
860        match node.as_ref() {
861            PieceTreeNode::Internal {
862                left_bytes,
863                left,
864                right,
865                ..
866            } => {
867                let left_end = node_offset + left_bytes;
868
869                // Check if range overlaps with left subtree
870                if range_start < left_end {
871                    self.collect_range_from_node(
872                        left,
873                        range_start,
874                        range_end,
875                        node_offset,
876                        result,
877                    )?;
878                }
879
880                // Check if range overlaps with right subtree
881                if range_end > left_end {
882                    self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
883                }
884            }
885            PieceTreeNode::Leaf {
886                location,
887                offset,
888                bytes,
889                ..
890            } => {
891                let node_end = node_offset + bytes;
892
893                // Check if this leaf overlaps with our range
894                if range_start < node_end && range_end > node_offset {
895                    let buf = self.buffers.get(location.buffer_id())?;
896                    let data = buf.get_data()?;
897
898                    // Calculate the slice within this leaf
899                    let leaf_start = range_start.saturating_sub(node_offset);
900                    let leaf_end = (range_end - node_offset).min(*bytes);
901
902                    if leaf_start < leaf_end {
903                        let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
904                        result.extend_from_slice(slice);
905                    }
906                }
907            }
908        }
909        Some(())
910    }
911
912    /// Helper to get total bytes from a tree root
913    fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
914        use crate::model::piece_tree::PieceTreeNode;
915        match root.as_ref() {
916            PieceTreeNode::Internal {
917                left_bytes, right, ..
918            } => left_bytes + self.tree_total_bytes(right),
919            PieceTreeNode::Leaf { bytes, .. } => *bytes,
920        }
921    }
922
923    /// Structure-based diff comparing piece tree leaves
924    fn diff_trees_by_structure(&self) -> PieceTreeDiff {
925        crate::model::piece_tree_diff::diff_piece_trees(
926            &self.saved_root,
927            &self.piece_tree.root(),
928            &|leaf, start, len| {
929                if len == 0 {
930                    return Some(0);
931                }
932                let buf = self.buffers.get(leaf.location.buffer_id())?;
933                let data = buf.get_data()?;
934                let start = leaf.offset + start;
935                let end = start + len;
936                let slice = data.get(start..end)?;
937                let line_feeds = slice.iter().filter(|&&b| b == b'\n').count();
938                Some(line_feeds)
939            },
940        )
941    }
942
943    /// Convert a byte offset to a line/column position
944    pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
945        self.piece_tree
946            .offset_to_position(offset, &self.buffers)
947            .map(|(line, column)| Position { line, column })
948    }
949
950    /// Convert a line/column position to a byte offset
951    pub fn position_to_offset(&self, position: Position) -> usize {
952        self.piece_tree
953            .position_to_offset(position.line, position.column, &self.buffers)
954    }
955
956    /// Insert text at the given byte offset
957    pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
958        if text.is_empty() {
959            return self.piece_tree.cursor_at_offset(offset);
960        }
961
962        // Mark as modified (updates version)
963        self.mark_content_modified();
964
965        // Count line feeds in the text to insert
966        let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
967
968        // Optimization: try to append to existing buffer if insertion is at piece boundary
969        let (buffer_location, buffer_offset, text_len) =
970            if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
971                append_info
972            } else {
973                // Create a new StringBuffer for this insertion
974                let buffer_id = self.next_buffer_id;
975                self.next_buffer_id += 1;
976                let buffer = StringBuffer::new(buffer_id, text.clone());
977                self.buffers.push(buffer);
978                (BufferLocation::Added(buffer_id), 0, text.len())
979            };
980
981        // Update piece tree (need to pass buffers reference)
982        self.piece_tree.insert(
983            offset,
984            buffer_location,
985            buffer_offset,
986            text_len,
987            line_feed_cnt,
988            &self.buffers,
989        )
990    }
991
992    /// Try to append to an existing buffer if insertion point aligns with buffer end
993    /// Returns (BufferLocation, buffer_offset, text_len) if append succeeds, None otherwise
994    fn try_append_to_existing_buffer(
995        &mut self,
996        offset: usize,
997        text: &[u8],
998    ) -> Option<(BufferLocation, usize, usize)> {
999        // Only optimize for non-empty insertions after existing content
1000        if text.is_empty() || offset == 0 {
1001            return None;
1002        }
1003
1004        // Find the piece containing the byte just before the insertion point
1005        // This avoids the saturating_sub issue
1006        let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1007
1008        // Check if insertion is exactly at the end of this piece
1009        // offset_in_piece tells us where (offset-1) is within the piece
1010        // For insertion to be at piece end, (offset-1) must be the last byte
1011        let offset_in_piece = piece_info.offset_in_piece?;
1012        if offset_in_piece + 1 != piece_info.bytes {
1013            return None; // Not at the end of the piece
1014        }
1015
1016        // Only append to "Added" buffers (not original Stored buffers)
1017        if !matches!(piece_info.location, BufferLocation::Added(_)) {
1018            return None;
1019        }
1020
1021        let buffer_id = piece_info.location.buffer_id();
1022        let buffer = self.buffers.get_mut(buffer_id)?;
1023
1024        // Check if buffer is loaded
1025        let buffer_len = buffer.get_data()?.len();
1026
1027        // Check if this piece ends exactly at the end of its buffer
1028        if piece_info.offset + piece_info.bytes != buffer_len {
1029            return None;
1030        }
1031
1032        // Perfect! Append to this buffer
1033        let append_offset = buffer.append(text);
1034
1035        Some((piece_info.location, append_offset, text.len()))
1036    }
1037
1038    /// Insert text (from &str) at the given byte offset
1039    pub fn insert(&mut self, offset: usize, text: &str) {
1040        self.insert_bytes(offset, text.as_bytes().to_vec());
1041    }
1042
1043    /// Insert text at a line/column position
1044    /// This now uses the optimized piece_tree.insert_at_position() for a single traversal
1045    pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1046        if text.is_empty() {
1047            let offset = self.position_to_offset(position);
1048            return self.piece_tree.cursor_at_offset(offset);
1049        }
1050
1051        self.mark_content_modified();
1052
1053        // Count line feeds in the text to insert
1054        let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1055
1056        // Create a new StringBuffer for this insertion
1057        let buffer_id = self.next_buffer_id;
1058        self.next_buffer_id += 1;
1059        let buffer = StringBuffer::new(buffer_id, text.clone());
1060        self.buffers.push(buffer);
1061
1062        // Use the optimized position-based insertion (single traversal)
1063        self.piece_tree.insert_at_position(
1064            position.line,
1065            position.column,
1066            BufferLocation::Added(buffer_id),
1067            0,
1068            text.len(),
1069            line_feed_cnt,
1070            &self.buffers,
1071        )
1072    }
1073
1074    /// Delete text starting at the given byte offset
1075    pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1076        if bytes == 0 || offset >= self.total_bytes() {
1077            return;
1078        }
1079
1080        // Update piece tree
1081        self.piece_tree.delete(offset, bytes, &self.buffers);
1082
1083        self.mark_content_modified();
1084    }
1085
1086    /// Delete text in a range
1087    pub fn delete(&mut self, range: Range<usize>) {
1088        if range.end > range.start {
1089            self.delete_bytes(range.start, range.end - range.start);
1090        }
1091    }
1092
1093    /// Delete text in a line/column range
1094    /// This now uses the optimized piece_tree.delete_position_range() for a single traversal
1095    pub fn delete_range(&mut self, start: Position, end: Position) {
1096        // Use the optimized position-based deletion
1097        self.piece_tree.delete_position_range(
1098            start.line,
1099            start.column,
1100            end.line,
1101            end.column,
1102            &self.buffers,
1103        );
1104        self.mark_content_modified();
1105    }
1106
1107    /// Replace the entire buffer content with new content
1108    /// This is an O(n) operation that rebuilds the piece tree in a single pass,
1109    /// avoiding the O(n²) complexity of applying individual edits.
1110    ///
1111    /// This is used for bulk operations like "replace all" where applying
1112    /// individual edits would be prohibitively slow.
1113    pub fn replace_content(&mut self, new_content: &str) {
1114        let bytes = new_content.len();
1115        let content_bytes = new_content.as_bytes().to_vec();
1116
1117        // Count line feeds in the new content
1118        let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
1119
1120        // Create a new StringBuffer for the new content
1121        let buffer_id = self.next_buffer_id;
1122        self.next_buffer_id += 1;
1123        let buffer = StringBuffer::new(buffer_id, content_bytes);
1124        self.buffers.push(buffer);
1125
1126        // Rebuild the piece tree with a single piece containing all the new content
1127        if bytes > 0 {
1128            self.piece_tree = PieceTree::new(
1129                BufferLocation::Added(buffer_id),
1130                0,
1131                bytes,
1132                Some(line_feed_cnt),
1133            );
1134        } else {
1135            self.piece_tree = PieceTree::empty();
1136        }
1137
1138        self.mark_content_modified();
1139    }
1140
1141    /// Restore a previously saved piece tree (for undo of BulkEdit)
1142    /// This is O(1) because PieceTree uses Arc internally
1143    pub fn restore_piece_tree(&mut self, tree: &Arc<PieceTree>) {
1144        self.piece_tree = (**tree).clone();
1145        self.mark_content_modified();
1146    }
1147
1148    /// Get the current piece tree as an Arc (for saving before BulkEdit)
1149    /// This is O(1) - creates an Arc wrapper around a clone of the tree
1150    pub fn snapshot_piece_tree(&self) -> Arc<PieceTree> {
1151        Arc::new(self.piece_tree.clone())
1152    }
1153
1154    /// Apply bulk edits efficiently in a single pass
1155    /// Returns the net change in bytes
1156    pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
1157        // Pre-allocate buffers for all insert texts (only non-empty texts)
1158        // This avoids the borrow conflict in the closure
1159        // IMPORTANT: Only add entries for non-empty texts because the closure
1160        // is only called for edits with non-empty insert text
1161        let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
1162
1163        for (_, _, text) in edits {
1164            if !text.is_empty() {
1165                let buffer_id = self.next_buffer_id;
1166                self.next_buffer_id += 1;
1167                let content = text.as_bytes().to_vec();
1168                let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
1169                let bytes = content.len();
1170                let buffer = StringBuffer::new(buffer_id, content);
1171                self.buffers.push(buffer);
1172                buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
1173            }
1174            // No placeholder for empty texts - the closure is only called for non-empty texts
1175        }
1176
1177        // Now call apply_bulk_edits with a simple index-based closure
1178        let mut idx = 0;
1179        let delta = self
1180            .piece_tree
1181            .apply_bulk_edits(edits, &self.buffers, |_text| {
1182                let info = buffer_info[idx];
1183                idx += 1;
1184                info
1185            });
1186
1187        self.mark_content_modified();
1188        delta
1189    }
1190
1191    /// Get text from a byte offset range
1192    /// This now uses the optimized piece_tree.iter_pieces_in_range() for a single traversal
1193    /// Get text from a byte offset range (read-only)
1194    /// Returns None if any buffer in the range is unloaded
1195    /// PRIVATE: External code should use get_text_range_mut() which handles lazy loading
1196    fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
1197        if bytes == 0 {
1198            return Some(Vec::new());
1199        }
1200
1201        let mut result = Vec::with_capacity(bytes);
1202        let end_offset = offset + bytes;
1203        let mut collected = 0;
1204
1205        // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1206        for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
1207            let buffer_id = piece_view.location.buffer_id();
1208            if let Some(buffer) = self.buffers.get(buffer_id) {
1209                // Calculate the range to read from this piece
1210                let piece_start_in_doc = piece_view.doc_offset;
1211                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1212
1213                // Clip to the requested range
1214                let read_start = offset.max(piece_start_in_doc);
1215                let read_end = end_offset.min(piece_end_in_doc);
1216
1217                if read_end > read_start {
1218                    let offset_in_piece = read_start - piece_start_in_doc;
1219                    let bytes_to_read = read_end - read_start;
1220
1221                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1222                    let buffer_end = buffer_start + bytes_to_read;
1223
1224                    // Return None if buffer is unloaded (type-safe)
1225                    let data = buffer.get_data()?;
1226
1227                    if buffer_end <= data.len() {
1228                        result.extend_from_slice(&data[buffer_start..buffer_end]);
1229                        collected += bytes_to_read;
1230
1231                        if collected >= bytes {
1232                            break;
1233                        }
1234                    }
1235                }
1236            }
1237        }
1238
1239        Some(result)
1240    }
1241
1242    /// Get text from a byte offset range with lazy loading
1243    /// This will load unloaded chunks on-demand and always returns complete data
1244    ///
1245    /// Returns an error if loading fails or if data cannot be read for any reason.
1246    ///
1247    /// NOTE: Currently loads entire buffers on-demand. Future optimization would split
1248    /// large pieces and load only LOAD_CHUNK_SIZE chunks at a time.
1249    pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
1250        if bytes == 0 {
1251            return Ok(Vec::new());
1252        }
1253
1254        let mut result = Vec::with_capacity(bytes);
1255        // Clamp end_offset to buffer length to handle reads beyond EOF
1256        let end_offset = (offset + bytes).min(self.len());
1257        let mut current_offset = offset;
1258
1259        // Keep iterating until we've collected all requested bytes
1260        while current_offset < end_offset {
1261            let mut made_progress = false;
1262            let mut restarted_iteration = false;
1263
1264            // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1265            for piece_view in self
1266                .piece_tree
1267                .iter_pieces_in_range(current_offset, end_offset)
1268            {
1269                let buffer_id = piece_view.location.buffer_id();
1270
1271                // Check if buffer needs loading
1272                let needs_loading = self
1273                    .buffers
1274                    .get(buffer_id)
1275                    .map(|b| !b.is_loaded())
1276                    .unwrap_or(false);
1277
1278                if needs_loading {
1279                    // Check if piece is too large for full loading
1280                    if piece_view.bytes > LOAD_CHUNK_SIZE {
1281                        // Split large piece into chunks
1282                        let piece_start_in_doc = piece_view.doc_offset;
1283                        let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
1284
1285                        // Calculate chunk boundaries aligned to CHUNK_ALIGNMENT
1286                        let chunk_start_in_buffer = (piece_view.buffer_offset + offset_in_piece)
1287                            / CHUNK_ALIGNMENT
1288                            * CHUNK_ALIGNMENT;
1289                        let chunk_bytes = LOAD_CHUNK_SIZE.min(
1290                            (piece_view.buffer_offset + piece_view.bytes)
1291                                .saturating_sub(chunk_start_in_buffer),
1292                        );
1293
1294                        // Calculate document offsets for splitting
1295                        let chunk_start_offset_in_piece =
1296                            chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
1297                        let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
1298                        let split_end_in_doc = split_start_in_doc + chunk_bytes;
1299
1300                        // Split the piece to isolate the chunk
1301                        if chunk_start_offset_in_piece > 0 {
1302                            self.piece_tree
1303                                .split_at_offset(split_start_in_doc, &self.buffers);
1304                        }
1305                        if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
1306                            self.piece_tree
1307                                .split_at_offset(split_end_in_doc, &self.buffers);
1308                        }
1309
1310                        // Create a new buffer for this chunk
1311                        let chunk_buffer = self
1312                            .buffers
1313                            .get(buffer_id)
1314                            .context("Buffer not found")?
1315                            .create_chunk_buffer(
1316                                self.next_buffer_id,
1317                                chunk_start_in_buffer,
1318                                chunk_bytes,
1319                            )
1320                            .context("Failed to create chunk buffer")?;
1321
1322                        self.next_buffer_id += 1;
1323                        let new_buffer_id = chunk_buffer.id;
1324                        self.buffers.push(chunk_buffer);
1325
1326                        // Update the piece to reference the new chunk buffer
1327                        self.piece_tree.replace_buffer_reference(
1328                            buffer_id,
1329                            piece_view.buffer_offset + chunk_start_offset_in_piece,
1330                            chunk_bytes,
1331                            BufferLocation::Added(new_buffer_id),
1332                        );
1333
1334                        // Load the chunk buffer
1335                        self.buffers
1336                            .get_mut(new_buffer_id)
1337                            .context("Chunk buffer not found")?
1338                            .load()
1339                            .context("Failed to load chunk")?;
1340
1341                        // Restart iteration with the modified tree
1342                        restarted_iteration = true;
1343                        break;
1344                    } else {
1345                        // Piece is small enough, load the entire buffer
1346                        self.buffers
1347                            .get_mut(buffer_id)
1348                            .context("Buffer not found")?
1349                            .load()
1350                            .context("Failed to load buffer")?;
1351                    }
1352                }
1353
1354                // Calculate the range to read from this piece
1355                let piece_start_in_doc = piece_view.doc_offset;
1356                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1357
1358                // Clip to the requested range
1359                let read_start = current_offset.max(piece_start_in_doc);
1360                let read_end = end_offset.min(piece_end_in_doc);
1361
1362                if read_end > read_start {
1363                    let offset_in_piece = read_start - piece_start_in_doc;
1364                    let bytes_to_read = read_end - read_start;
1365
1366                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1367                    let buffer_end = buffer_start + bytes_to_read;
1368
1369                    // Buffer should be loaded now
1370                    let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
1371                    let data = buffer
1372                        .get_data()
1373                        .context("Buffer data unavailable after load")?;
1374
1375                    anyhow::ensure!(
1376                        buffer_end <= data.len(),
1377                        "Buffer range out of bounds: requested {}..{}, buffer size {}",
1378                        buffer_start,
1379                        buffer_end,
1380                        data.len()
1381                    );
1382
1383                    result.extend_from_slice(&data[buffer_start..buffer_end]);
1384                    current_offset = read_end;
1385                    made_progress = true;
1386                }
1387            }
1388
1389            // If we didn't make progress and didn't restart iteration, this is an error
1390            if !made_progress && !restarted_iteration {
1391                tracing::error!(
1392                    "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
1393                    current_offset,
1394                    offset,
1395                    end_offset,
1396                    self.len()
1397                );
1398                tracing::error!(
1399                    "Piece tree stats: {} total bytes",
1400                    self.piece_tree.stats().total_bytes
1401                );
1402                anyhow::bail!(
1403                    "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
1404                    current_offset,
1405                    offset,
1406                    end_offset,
1407                    self.len()
1408                );
1409            }
1410        }
1411
1412        Ok(result)
1413    }
1414
1415    /// Prepare a viewport for rendering
1416    ///
1417    /// This is called before rendering with &mut access to pre-load all data
1418    /// that will be needed for the viewport. It estimates the number of bytes
1419    /// needed based on the line count and pre-loads them.
1420    ///
1421    /// # Arguments
1422    /// * `start_offset` - The byte offset where the viewport starts
1423    /// * `line_count` - The number of lines to prepare (estimate)
1424    ///
1425    /// # Returns
1426    /// Ok(()) if preparation succeeded, Err if loading failed
1427    pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
1428        // Estimate how many bytes we need (pessimistic assumption)
1429        // Average line length is typically 80-100 bytes, but we use 200 to be safe
1430        let estimated_bytes = line_count.saturating_mul(200);
1431
1432        // Cap the estimate at the remaining bytes in the document
1433        let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
1434        let bytes_to_load = estimated_bytes.min(remaining_bytes);
1435
1436        // Pre-load with full chunk-splitting support
1437        // This may load more than we need, but ensures all data is available
1438        self.get_text_range_mut(start_offset, bytes_to_load)?;
1439
1440        Ok(())
1441    }
1442
1443    /// Get all text as a single Vec<u8>
1444    /// Returns None if any buffers are unloaded (lazy loading)
1445    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1446    pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
1447        self.get_text_range(0, self.total_bytes())
1448    }
1449
1450    /// Get all text as a String
1451    /// Returns None if any buffers are unloaded (lazy loading)
1452    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1453    pub(crate) fn get_all_text_string(&self) -> Option<String> {
1454        self.get_all_text()
1455            .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
1456    }
1457
1458    /// Get text from a byte range as bytes
1459    /// CRATE-PRIVATE: Returns empty vector if any buffers are unloaded (silently fails!)
1460    /// Only use this when you KNOW the data is loaded (e.g., for syntax highlighting small regions)
1461    /// External code should use get_text_range_mut() or DocumentModel methods
1462    pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
1463        self.get_text_range(range.start, range.end.saturating_sub(range.start))
1464            .unwrap_or_default()
1465    }
1466
1467    /// Get all text as a String
1468    /// Returns None if any buffers are unloaded (lazy loading)
1469    pub fn to_string(&self) -> Option<String> {
1470        self.get_all_text_string()
1471    }
1472
1473    /// Get the total number of bytes
1474    pub fn len(&self) -> usize {
1475        self.total_bytes()
1476    }
1477
1478    /// Check if the buffer is empty
1479    pub fn is_empty(&self) -> bool {
1480        self.total_bytes() == 0
1481    }
1482
1483    /// Get the file path associated with this buffer
1484    pub fn file_path(&self) -> Option<&Path> {
1485        self.file_path.as_deref()
1486    }
1487
1488    /// Set the file path for this buffer
1489    pub fn set_file_path(&mut self, path: PathBuf) {
1490        self.file_path = Some(path);
1491    }
1492
1493    /// Clear the file path (make buffer unnamed)
1494    /// Note: This does NOT affect Unloaded chunk file_paths used for lazy loading.
1495    /// Those still point to the original source file for chunk loading.
1496    pub fn clear_file_path(&mut self) {
1497        self.file_path = None;
1498    }
1499
1500    /// Extend buffer to include more bytes from a streaming source file.
1501    /// Used for stdin streaming where the temp file grows over time.
1502    /// Appends a new Unloaded chunk for the new bytes.
1503    pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
1504        let old_size = self.total_bytes();
1505        if new_size <= old_size {
1506            return;
1507        }
1508
1509        let additional_bytes = new_size - old_size;
1510
1511        // Create new Unloaded buffer for the appended region
1512        let buffer_id = self.next_buffer_id;
1513        self.next_buffer_id += 1;
1514
1515        let new_buffer = StringBuffer::new_unloaded(
1516            buffer_id,
1517            source_path.to_path_buf(),
1518            old_size,         // file_offset - where this chunk starts in the file
1519            additional_bytes, // bytes - size of this chunk
1520        );
1521        self.buffers.push(new_buffer);
1522
1523        // Append piece at end of document (insert at offset == total_bytes)
1524        self.piece_tree.insert(
1525            old_size,
1526            BufferLocation::Stored(buffer_id),
1527            0,
1528            additional_bytes,
1529            None, // line_feed_cnt unknown for unloaded chunk
1530            &self.buffers,
1531        );
1532    }
1533
1534    /// Check if the buffer has been modified since last save
1535    pub fn is_modified(&self) -> bool {
1536        self.modified
1537    }
1538
1539    /// Clear the modified flag (after save)
1540    pub fn clear_modified(&mut self) {
1541        self.modified = false;
1542    }
1543
1544    /// Set the modified flag explicitly
1545    /// Used by undo/redo to restore the correct modified state
1546    pub fn set_modified(&mut self, modified: bool) {
1547        self.modified = modified;
1548    }
1549
1550    /// Check if buffer has pending changes for recovery auto-save
1551    pub fn is_recovery_pending(&self) -> bool {
1552        self.recovery_pending
1553    }
1554
1555    /// Mark buffer as needing recovery auto-save (call after edits)
1556    pub fn set_recovery_pending(&mut self, pending: bool) {
1557        self.recovery_pending = pending;
1558    }
1559
1560    /// Check if this is a large file with lazy loading enabled
1561    pub fn is_large_file(&self) -> bool {
1562        self.large_file
1563    }
1564
1565    /// Get the saved file size (size of the file on disk after last load/save)
1566    /// For large files, this is used during recovery to know the expected original file size.
1567    /// Returns None for new unsaved buffers.
1568    pub fn original_file_size(&self) -> Option<usize> {
1569        // Return the tracked saved file size - this is updated when the file is
1570        // loaded or saved, so it always reflects the current file on disk.
1571        self.saved_file_size
1572    }
1573
1574    /// Get recovery chunks for this buffer (only modified portions)
1575    ///
1576    /// For large files, this returns only the pieces that come from Added buffers
1577    /// (i.e., the modifications), not the original file content. This allows
1578    /// efficient incremental recovery without reading/writing the entire file.
1579    ///
1580    /// Returns: Vec of (original_file_offset, data) for each modified chunk
1581    /// The offset is the position in the ORIGINAL file where this chunk should be inserted.
1582    pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
1583        use crate::model::piece_tree::BufferLocation;
1584
1585        let mut chunks = Vec::new();
1586        let total = self.total_bytes();
1587
1588        // Track cumulative bytes from Stored pieces as we iterate.
1589        // This gives us the original file offset for Added pieces.
1590        // The key insight: Added pieces should be inserted at the position
1591        // corresponding to where they appear relative to Stored content,
1592        // not their position in the current document.
1593        let mut stored_bytes_before = 0;
1594
1595        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
1596            match piece.location {
1597                BufferLocation::Stored(_) => {
1598                    // Accumulate stored bytes to track position in original file
1599                    stored_bytes_before += piece.bytes;
1600                }
1601                BufferLocation::Added(buffer_id) => {
1602                    if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
1603                        // Get the data from the buffer if loaded
1604                        if let Some(data) = buffer.get_data() {
1605                            // Extract just the portion this piece references
1606                            let start = piece.buffer_offset;
1607                            let end = start + piece.bytes;
1608                            if end <= data.len() {
1609                                // Use stored_bytes_before as the original file offset.
1610                                // This is where this insertion should go relative to
1611                                // the original file content.
1612                                chunks.push((stored_bytes_before, data[start..end].to_vec()));
1613                            }
1614                        }
1615                    }
1616                }
1617            }
1618        }
1619
1620        chunks
1621    }
1622
1623    /// Check if this buffer contains binary content
1624    pub fn is_binary(&self) -> bool {
1625        self.is_binary
1626    }
1627
1628    /// Get the line ending format for this buffer
1629    pub fn line_ending(&self) -> LineEnding {
1630        self.line_ending
1631    }
1632
1633    /// Set the line ending format for this buffer
1634    ///
1635    /// This marks the buffer as modified since the line ending format has changed.
1636    /// On save, the buffer content will be converted to the new format.
1637    pub fn set_line_ending(&mut self, line_ending: LineEnding) {
1638        self.line_ending = line_ending;
1639        self.mark_content_modified();
1640    }
1641
1642    /// Set the default line ending format for a new/empty buffer
1643    ///
1644    /// Unlike `set_line_ending`, this does NOT mark the buffer as modified.
1645    /// This should be used when initializing a new buffer with a configured default.
1646    pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
1647        self.line_ending = line_ending;
1648        self.original_line_ending = line_ending;
1649    }
1650
1651    /// Detect if the given bytes contain binary content.
1652    ///
1653    /// Binary content is detected by looking for:
1654    /// - Null bytes (0x00)
1655    /// - Non-printable control characters (except common ones like tab, newline, CR)
1656    ///
1657    /// ANSI escape sequences (ESC [ ...) are treated as text, not binary.
1658    pub fn detect_binary(bytes: &[u8]) -> bool {
1659        // Only check the first 8KB for binary detection
1660        let check_len = bytes.len().min(8 * 1024);
1661        let sample = &bytes[..check_len];
1662
1663        let mut i = 0;
1664        while i < sample.len() {
1665            let byte = sample[i];
1666
1667            // Check for ANSI escape sequence (ESC [ or ESC ])
1668            // These are common in text files and should not trigger binary detection
1669            if byte == 0x1B && i + 1 < sample.len() {
1670                let next = sample[i + 1];
1671                if next == b'[' || next == b']' {
1672                    // Skip the escape sequence - find the terminator
1673                    i += 2;
1674                    while i < sample.len() {
1675                        let c = sample[i];
1676                        // ANSI sequences end with a letter (0x40-0x7E for CSI)
1677                        if (0x40..=0x7E).contains(&c) {
1678                            break;
1679                        }
1680                        i += 1;
1681                    }
1682                    i += 1;
1683                    continue;
1684                }
1685            }
1686
1687            // Null byte is a strong indicator of binary content
1688            if byte == 0x00 {
1689                return true;
1690            }
1691
1692            // Check for non-printable control characters
1693            // Allow: tab (0x09), newline (0x0A), carriage return (0x0D)
1694            // Also allow: form feed (0x0C), vertical tab (0x0B) - sometimes used in text
1695            // ESC (0x1B) is handled above for ANSI sequences
1696            if byte < 0x20
1697                && byte != 0x09
1698                && byte != 0x0A
1699                && byte != 0x0D
1700                && byte != 0x0C
1701                && byte != 0x0B
1702                && byte != 0x1B
1703            {
1704                return true;
1705            }
1706
1707            // DEL character (0x7F) is also a control character
1708            if byte == 0x7F {
1709                return true;
1710            }
1711
1712            i += 1;
1713        }
1714
1715        false
1716    }
1717
1718    /// Detect the line ending format from a sample of bytes
1719    ///
1720    /// Uses majority voting: counts CRLF, LF-only, and CR-only occurrences
1721    /// and returns the most common format.
1722    pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
1723        // Only check the first 8KB for line ending detection (same as binary detection)
1724        let check_len = bytes.len().min(8 * 1024);
1725        let sample = &bytes[..check_len];
1726
1727        let mut crlf_count = 0;
1728        let mut lf_only_count = 0;
1729        let mut cr_only_count = 0;
1730
1731        let mut i = 0;
1732        while i < sample.len() {
1733            if sample[i] == b'\r' {
1734                // Check if this is CRLF
1735                if i + 1 < sample.len() && sample[i + 1] == b'\n' {
1736                    crlf_count += 1;
1737                    i += 2; // Skip both \r and \n
1738                    continue;
1739                } else {
1740                    // CR only (old Mac format)
1741                    cr_only_count += 1;
1742                }
1743            } else if sample[i] == b'\n' {
1744                // LF only (Unix format)
1745                lf_only_count += 1;
1746            }
1747            i += 1;
1748        }
1749
1750        // Use majority voting to determine line ending
1751        if crlf_count > lf_only_count && crlf_count > cr_only_count {
1752            LineEnding::CRLF
1753        } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
1754            LineEnding::CR
1755        } else {
1756            // Default to LF if no clear winner or if LF wins
1757            LineEnding::LF
1758        }
1759    }
1760
1761    /// Normalize line endings in the given bytes to LF only
1762    ///
1763    /// Converts CRLF (\r\n) and CR (\r) to LF (\n) for internal representation.
1764    /// This makes editing and cursor movement simpler while preserving the
1765    /// original format for saving.
1766    #[allow(dead_code)] // Kept for tests and potential future use
1767    pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
1768        let mut normalized = Vec::with_capacity(bytes.len());
1769        let mut i = 0;
1770
1771        while i < bytes.len() {
1772            if bytes[i] == b'\r' {
1773                // Check if this is CRLF
1774                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1775                    // CRLF -> LF
1776                    normalized.push(b'\n');
1777                    i += 2; // Skip both \r and \n
1778                    continue;
1779                } else {
1780                    // CR only -> LF
1781                    normalized.push(b'\n');
1782                }
1783            } else {
1784                // Copy byte as-is
1785                normalized.push(bytes[i]);
1786            }
1787            i += 1;
1788        }
1789
1790        normalized
1791    }
1792
1793    /// Convert line endings from any source format to any target format
1794    ///
1795    /// This first normalizes all line endings to LF, then converts to the target format.
1796    /// Used when saving files after the user has changed the line ending format.
1797    fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
1798        // First pass: normalize everything to LF
1799        let mut normalized = Vec::with_capacity(bytes.len());
1800        let mut i = 0;
1801        while i < bytes.len() {
1802            if bytes[i] == b'\r' {
1803                // Check if this is CRLF
1804                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1805                    // CRLF -> LF
1806                    normalized.push(b'\n');
1807                    i += 2;
1808                    continue;
1809                } else {
1810                    // CR only -> LF
1811                    normalized.push(b'\n');
1812                }
1813            } else {
1814                normalized.push(bytes[i]);
1815            }
1816            i += 1;
1817        }
1818
1819        // If target is LF, we're done
1820        if target_ending == LineEnding::LF {
1821            return normalized;
1822        }
1823
1824        // Second pass: convert LF to target format
1825        let replacement = target_ending.as_str().as_bytes();
1826        let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
1827
1828        for byte in normalized {
1829            if byte == b'\n' {
1830                result.extend_from_slice(replacement);
1831            } else {
1832                result.push(byte);
1833            }
1834        }
1835
1836        result
1837    }
1838
1839    /// Get text for a specific line
1840    pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
1841        let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
1842
1843        let bytes = if let Some(end_offset) = end {
1844            end_offset.saturating_sub(start)
1845        } else {
1846            self.total_bytes().saturating_sub(start)
1847        };
1848
1849        self.get_text_range(start, bytes)
1850    }
1851
1852    /// Get the byte offset where a line starts
1853    pub fn line_start_offset(&self, line: usize) -> Option<usize> {
1854        let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
1855        Some(start)
1856    }
1857
1858    /// Get piece information at a byte offset
1859    pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
1860        self.piece_tree.find_by_offset(offset)
1861    }
1862
1863    /// Get tree statistics for debugging
1864    pub fn stats(&self) -> TreeStats {
1865        self.piece_tree.stats()
1866    }
1867
1868    // Search and Replace Operations
1869
1870    /// Find the next occurrence of a pattern, with wrap-around
1871    pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
1872        if pattern.is_empty() {
1873            return None;
1874        }
1875
1876        let pattern_bytes = pattern.as_bytes();
1877        let buffer_len = self.len();
1878
1879        // Search from start_pos to end
1880        if start_pos < buffer_len {
1881            if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
1882                return Some(offset);
1883            }
1884        }
1885
1886        // Wrap around: search from beginning to start_pos
1887        if start_pos > 0 {
1888            if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
1889                return Some(offset);
1890            }
1891        }
1892
1893        None
1894    }
1895
1896    /// Find the next occurrence of a pattern within an optional range
1897    /// If range is None, searches the entire buffer with wrap-around (same as find_next)
1898    /// If range is Some, searches only within that range without wrap-around
1899    pub fn find_next_in_range(
1900        &self,
1901        pattern: &str,
1902        start_pos: usize,
1903        range: Option<Range<usize>>,
1904    ) -> Option<usize> {
1905        if pattern.is_empty() {
1906            return None;
1907        }
1908
1909        if let Some(search_range) = range {
1910            // Search within range only, no wrap-around
1911            let pattern_bytes = pattern.as_bytes();
1912            let search_start = start_pos.max(search_range.start);
1913            let search_end = search_range.end.min(self.len());
1914
1915            if search_start < search_end {
1916                self.find_pattern(search_start, search_end, pattern_bytes)
1917            } else {
1918                None
1919            }
1920        } else {
1921            // No range specified, use normal find_next with wrap-around
1922            self.find_next(pattern, start_pos)
1923        }
1924    }
1925
1926    /// Find pattern in a byte range using overlapping chunks
1927    fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
1928        if pattern.is_empty() || start >= end {
1929            return None;
1930        }
1931
1932        const CHUNK_SIZE: usize = 65536; // 64KB chunks
1933        let overlap = pattern.len().saturating_sub(1).max(1);
1934
1935        // Use the overlapping chunks iterator for efficient streaming search
1936        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
1937
1938        for chunk in chunks {
1939            // Search the entire chunk buffer
1940            if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
1941                let match_end = pos + pattern.len();
1942                // Only report if match ENDS in or after the valid zone
1943                // This ensures patterns spanning boundaries are found exactly once
1944                if match_end > chunk.valid_start {
1945                    let absolute_pos = chunk.absolute_pos + pos;
1946                    // Verify the match doesn't extend beyond our search range
1947                    if absolute_pos + pattern.len() <= end {
1948                        return Some(absolute_pos);
1949                    }
1950                }
1951            }
1952        }
1953
1954        None
1955    }
1956
1957    /// Simple byte pattern search using naive algorithm
1958    fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1959        if needle.is_empty() || needle.len() > haystack.len() {
1960            return None;
1961        }
1962
1963        (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
1964    }
1965
1966    /// Find the next occurrence of a regex pattern, with wrap-around
1967    pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
1968        let buffer_len = self.len();
1969
1970        // Search from start_pos to end
1971        if start_pos < buffer_len {
1972            if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
1973                return Some(offset);
1974            }
1975        }
1976
1977        // Wrap around: search from beginning to start_pos
1978        if start_pos > 0 {
1979            if let Some(offset) = self.find_regex(0, start_pos, regex) {
1980                return Some(offset);
1981            }
1982        }
1983
1984        None
1985    }
1986
1987    /// Find the next occurrence of a regex pattern within an optional range
1988    pub fn find_next_regex_in_range(
1989        &self,
1990        regex: &Regex,
1991        start_pos: usize,
1992        range: Option<Range<usize>>,
1993    ) -> Option<usize> {
1994        if let Some(search_range) = range {
1995            let search_start = start_pos.max(search_range.start);
1996            let search_end = search_range.end.min(self.len());
1997
1998            if search_start < search_end {
1999                self.find_regex(search_start, search_end, regex)
2000            } else {
2001                None
2002            }
2003        } else {
2004            self.find_next_regex(regex, start_pos)
2005        }
2006    }
2007
2008    /// Find regex pattern in a byte range using overlapping chunks
2009    fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
2010        if start >= end {
2011            return None;
2012        }
2013
2014        const CHUNK_SIZE: usize = 1048576; // 1MB chunks
2015        const OVERLAP: usize = 4096; // 4KB overlap for regex
2016
2017        // Use the overlapping chunks iterator for efficient streaming search
2018        // This fixes the critical bug where regex patterns spanning chunk boundaries were missed
2019        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
2020
2021        for chunk in chunks {
2022            // Search the entire chunk buffer
2023            if let Some(mat) = regex.find(&chunk.buffer) {
2024                let match_end = mat.end();
2025                // Only report if match ENDS in or after the valid zone
2026                // This ensures patterns spanning boundaries are found exactly once
2027                if match_end > chunk.valid_start {
2028                    let absolute_pos = chunk.absolute_pos + mat.start();
2029                    // Verify the match doesn't extend beyond our search range
2030                    let match_len = mat.end() - mat.start();
2031                    if absolute_pos + match_len <= end {
2032                        return Some(absolute_pos);
2033                    }
2034                }
2035            }
2036        }
2037
2038        None
2039    }
2040
2041    /// Replace a range with replacement text
2042    pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
2043        if range.start >= self.len() {
2044            return false;
2045        }
2046
2047        let end = range.end.min(self.len());
2048        if end > range.start {
2049            self.delete_bytes(range.start, end - range.start);
2050        }
2051
2052        if !replacement.is_empty() {
2053            self.insert(range.start, replacement);
2054        }
2055
2056        true
2057    }
2058
2059    /// Find and replace the next occurrence of a pattern
2060    pub fn replace_next(
2061        &mut self,
2062        pattern: &str,
2063        replacement: &str,
2064        start_pos: usize,
2065        range: Option<Range<usize>>,
2066    ) -> Option<usize> {
2067        if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
2068            self.replace_range(pos..pos + pattern.len(), replacement);
2069            Some(pos)
2070        } else {
2071            None
2072        }
2073    }
2074
2075    /// Replace all occurrences of a pattern with replacement text
2076    pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
2077        if pattern.is_empty() {
2078            return 0;
2079        }
2080
2081        let mut count = 0;
2082        let mut pos = 0;
2083
2084        // Keep searching and replacing
2085        // Note: we search forward from last replacement to handle growth/shrinkage
2086        // Find next occurrence (no wrap-around for replace_all)
2087        while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
2088            self.replace_range(found_pos..found_pos + pattern.len(), replacement);
2089            count += 1;
2090
2091            // Move past the replacement
2092            pos = found_pos + replacement.len();
2093
2094            // If we're at or past the end, stop
2095            if pos >= self.len() {
2096                break;
2097            }
2098        }
2099
2100        count
2101    }
2102
2103    /// Replace all occurrences of a regex pattern with replacement text
2104    pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
2105        let mut count = 0;
2106        let mut pos = 0;
2107
2108        while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
2109            // Get the match to find its length
2110            let text = self
2111                .get_text_range_mut(found_pos, self.len() - found_pos)
2112                .context("Failed to read text for regex match")?;
2113
2114            if let Some(mat) = regex.find(&text) {
2115                self.replace_range(found_pos..found_pos + mat.len(), replacement);
2116                count += 1;
2117                pos = found_pos + replacement.len();
2118
2119                if pos >= self.len() {
2120                    break;
2121                }
2122            } else {
2123                break;
2124            }
2125        }
2126
2127        Ok(count)
2128    }
2129
2130    // LSP Support (UTF-16 conversions)
2131
2132    /// Convert byte position to (line, column) in bytes
2133    pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
2134        self.offset_to_position(byte_pos)
2135            .map(|pos| (pos.line, pos.column))
2136            .unwrap_or_else(|| (byte_pos / 80, 0)) // Estimate if metadata unavailable
2137    }
2138
2139    /// Convert (line, character) to byte position - 0-indexed
2140    /// character is in BYTES, not UTF-16 code units
2141    /// Optimized to use single line_range() call instead of two
2142    pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
2143        if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2144            // Calculate line length from the range
2145            let line_len = if let Some(end_offset) = end {
2146                end_offset.saturating_sub(start)
2147            } else {
2148                self.total_bytes().saturating_sub(start)
2149            };
2150            let byte_offset = character.min(line_len);
2151            start + byte_offset
2152        } else {
2153            // Line doesn't exist, return end of buffer
2154            self.len()
2155        }
2156    }
2157
2158    /// Convert byte position to LSP position (line, UTF-16 code units)
2159    /// LSP protocol uses UTF-16 code units for character offsets
2160    pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
2161        let (line, column_bytes) = self
2162            .offset_to_position(byte_pos)
2163            .map(|pos| (pos.line, pos.column))
2164            .unwrap_or_else(|| (byte_pos / 80, 0)); // Estimate if metadata unavailable
2165
2166        // Get the line content
2167        if let Some(line_bytes) = self.get_line(line) {
2168            // Convert byte offset to UTF-16 code units
2169            let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
2170            let text_str = String::from_utf8_lossy(text_before);
2171            let utf16_offset = text_str.encode_utf16().count();
2172            (line, utf16_offset)
2173        } else {
2174            (line, 0)
2175        }
2176    }
2177
2178    /// Convert LSP position (line, UTF-16 code units) to byte position
2179    /// LSP uses UTF-16 code units for character offsets, not bytes
2180    /// Optimized to use single line_range() call instead of two
2181    pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
2182        if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2183            // Calculate line length and get line content
2184            let line_len = if let Some(end_offset) = end {
2185                end_offset.saturating_sub(line_start)
2186            } else {
2187                self.total_bytes().saturating_sub(line_start)
2188            };
2189
2190            if line_len > 0 {
2191                // If data is unloaded, return line_start as fallback
2192                let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
2193                    return line_start;
2194                };
2195                let line_str = String::from_utf8_lossy(&line_bytes);
2196
2197                // Convert UTF-16 offset to byte offset
2198                let mut utf16_count = 0;
2199                let mut byte_offset = 0;
2200
2201                for ch in line_str.chars() {
2202                    if utf16_count >= utf16_offset {
2203                        break;
2204                    }
2205                    utf16_count += ch.len_utf16();
2206                    byte_offset += ch.len_utf8();
2207                }
2208
2209                line_start + byte_offset
2210            } else {
2211                line_start
2212            }
2213        } else {
2214            // Line doesn't exist, return end of buffer
2215            self.len()
2216        }
2217    }
2218
2219    // Navigation helpers
2220
2221    /// Find the previous character boundary (UTF-8 aware)
2222    pub fn prev_char_boundary(&self, pos: usize) -> usize {
2223        if pos == 0 {
2224            return 0;
2225        }
2226
2227        // Get a few bytes before pos to find the character boundary
2228        let start = pos.saturating_sub(4);
2229        let Some(bytes) = self.get_text_range(start, pos - start) else {
2230            // Data unloaded, return pos as fallback
2231            return pos;
2232        };
2233
2234        // Walk backwards to find a UTF-8 leading byte
2235        for i in (0..bytes.len()).rev() {
2236            let byte = bytes[i];
2237            // Check if this is a UTF-8 leading byte (not a continuation byte)
2238            if (byte & 0b1100_0000) != 0b1000_0000 {
2239                return start + i;
2240            }
2241        }
2242
2243        // Fallback
2244        pos.saturating_sub(1)
2245    }
2246
2247    /// Find the next character boundary (UTF-8 aware)
2248    pub fn next_char_boundary(&self, pos: usize) -> usize {
2249        let len = self.len();
2250        if pos >= len {
2251            return len;
2252        }
2253
2254        // Get a few bytes after pos to find the character boundary
2255        let end = (pos + 5).min(len);
2256        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2257            // Data unloaded, return pos as fallback
2258            return pos;
2259        };
2260
2261        // Start from index 1 (we want the NEXT boundary)
2262        for (i, &byte) in bytes.iter().enumerate().skip(1) {
2263            // Check if this is a UTF-8 leading byte (not a continuation byte)
2264            if (byte & 0b1100_0000) != 0b1000_0000 {
2265                return pos + i;
2266            }
2267        }
2268
2269        // If we got here, we're at the end or found no boundary in the range
2270        end
2271    }
2272
2273    /// Check if a byte is a UTF-8 continuation byte (not at a char boundary)
2274    /// UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF)
2275    /// This is the same check that str::is_char_boundary uses internally.
2276    #[inline]
2277    fn is_utf8_continuation_byte(byte: u8) -> bool {
2278        (byte & 0b1100_0000) == 0b1000_0000
2279    }
2280
2281    /// Snap position to a valid UTF-8 character boundary
2282    /// If already at a boundary, returns the same position.
2283    /// Otherwise, moves to the previous valid boundary.
2284    pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
2285        let len = self.len();
2286        if pos == 0 || pos >= len {
2287            return pos.min(len);
2288        }
2289
2290        // Get the byte at pos to check if we're at a character boundary
2291        let Some(bytes) = self.get_text_range(pos, 1) else {
2292            // Data unloaded, return pos as fallback
2293            return pos;
2294        };
2295
2296        // A position is at a char boundary if the byte there is NOT a continuation byte
2297        if !Self::is_utf8_continuation_byte(bytes[0]) {
2298            // Already at a character boundary
2299            return pos;
2300        }
2301
2302        // Not at a boundary, find the previous one
2303        self.prev_char_boundary(pos)
2304    }
2305
2306    /// Find the previous grapheme cluster boundary (for proper cursor movement with combining characters)
2307    ///
2308    /// This handles complex scripts like Thai where multiple Unicode code points
2309    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2310    /// is 3 code points but 1 grapheme cluster.
2311    pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
2312        if pos == 0 {
2313            return 0;
2314        }
2315
2316        // Get enough context before pos to find grapheme boundaries
2317        // Thai combining characters can have multiple marks, so get up to 32 bytes
2318        // IMPORTANT: Align start to a valid character boundary to avoid invalid UTF-8
2319        // when get_text_range starts mid-character
2320        let raw_start = pos.saturating_sub(32);
2321        let start = if raw_start == 0 {
2322            0
2323        } else {
2324            // Find the character boundary at or before raw_start
2325            self.prev_char_boundary(raw_start + 1)
2326        };
2327
2328        let Some(bytes) = self.get_text_range(start, pos - start) else {
2329            // Data unloaded, fall back to char boundary
2330            return self.prev_char_boundary(pos);
2331        };
2332
2333        let text = match std::str::from_utf8(&bytes) {
2334            Ok(s) => s,
2335            Err(e) => {
2336                // Still got invalid UTF-8 (shouldn't happen after alignment)
2337                // Try using just the valid portion
2338                let valid_bytes = &bytes[..e.valid_up_to()];
2339                match std::str::from_utf8(valid_bytes) {
2340                    Ok(s) if !s.is_empty() => s,
2341                    _ => return self.prev_char_boundary(pos),
2342                }
2343            }
2344        };
2345
2346        // Use shared grapheme utility with relative position
2347        let rel_pos = pos - start;
2348        let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
2349
2350        // If we landed at the start of this chunk and there's more before,
2351        // we might need to look further back
2352        if new_rel_pos == 0 && start > 0 {
2353            return self.prev_grapheme_boundary(start);
2354        }
2355
2356        start + new_rel_pos
2357    }
2358
2359    /// Find the next grapheme cluster boundary (for proper cursor movement with combining characters)
2360    ///
2361    /// This handles complex scripts like Thai where multiple Unicode code points
2362    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2363    /// is 3 code points but 1 grapheme cluster.
2364    pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
2365        let len = self.len();
2366        if pos >= len {
2367            return len;
2368        }
2369
2370        // Get enough context after pos to find grapheme boundaries
2371        // Thai combining characters can have multiple marks, so get up to 32 bytes
2372        let end = (pos + 32).min(len);
2373        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2374            // Data unloaded, fall back to char boundary
2375            return self.next_char_boundary(pos);
2376        };
2377
2378        // Convert to UTF-8 string, handling the case where we might have
2379        // grabbed bytes that end mid-character (truncate to valid UTF-8)
2380        let text = match std::str::from_utf8(&bytes) {
2381            Ok(s) => s,
2382            Err(e) => {
2383                // The bytes end in an incomplete UTF-8 sequence
2384                // Use only the valid portion (which includes at least the first grapheme)
2385                let valid_bytes = &bytes[..e.valid_up_to()];
2386                match std::str::from_utf8(valid_bytes) {
2387                    Ok(s) if !s.is_empty() => s,
2388                    _ => return self.next_char_boundary(pos),
2389                }
2390            }
2391        };
2392
2393        // Use shared grapheme utility
2394        let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
2395        pos + new_rel_pos
2396    }
2397
2398    /// Find the previous word boundary
2399    pub fn prev_word_boundary(&self, pos: usize) -> usize {
2400        if pos == 0 {
2401            return 0;
2402        }
2403
2404        // Get some text before pos
2405        let start = pos.saturating_sub(256).max(0);
2406        let Some(bytes) = self.get_text_range(start, pos - start) else {
2407            // Data unloaded, return pos as fallback
2408            return pos;
2409        };
2410        let text = String::from_utf8_lossy(&bytes);
2411
2412        let mut found_word_char = false;
2413        let chars: Vec<char> = text.chars().collect();
2414
2415        for i in (0..chars.len()).rev() {
2416            let ch = chars[i];
2417            let is_word_char = ch.is_alphanumeric() || ch == '_';
2418
2419            if found_word_char && !is_word_char {
2420                // We've transitioned from word to non-word
2421                // Calculate the byte position
2422                let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
2423                return start + byte_offset;
2424            }
2425
2426            if is_word_char {
2427                found_word_char = true;
2428            }
2429        }
2430
2431        0
2432    }
2433
2434    /// Find the next word boundary
2435    pub fn next_word_boundary(&self, pos: usize) -> usize {
2436        let len = self.len();
2437        if pos >= len {
2438            return len;
2439        }
2440
2441        // Get some text after pos
2442        let end = (pos + 256).min(len);
2443        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2444            // Data unloaded, return pos as fallback
2445            return pos;
2446        };
2447        let text = String::from_utf8_lossy(&bytes);
2448
2449        let mut found_word_char = false;
2450        let mut byte_offset = 0;
2451
2452        for ch in text.chars() {
2453            let is_word_char = ch.is_alphanumeric() || ch == '_';
2454
2455            if found_word_char && !is_word_char {
2456                // We've transitioned from word to non-word
2457                return pos + byte_offset;
2458            }
2459
2460            if is_word_char {
2461                found_word_char = true;
2462            }
2463
2464            byte_offset += ch.len_utf8();
2465        }
2466
2467        len
2468    }
2469
2470    /// Create a line iterator starting at the given byte position
2471    ///
2472    /// This iterator lazily loads chunks as needed, never scanning the entire file.
2473    /// For large files with unloaded buffers, chunks are loaded on-demand (1MB at a time).
2474    pub fn line_iterator(
2475        &mut self,
2476        byte_pos: usize,
2477        estimated_line_length: usize,
2478    ) -> LineIterator<'_> {
2479        LineIterator::new(self, byte_pos, estimated_line_length)
2480    }
2481
2482    /// Iterate over lines starting from a given byte offset, with line numbers
2483    ///
2484    /// This is a more efficient alternative to using line_iterator() + offset_to_position()
2485    /// because it calculates line numbers incrementally during iteration by accumulating
2486    /// line_feed_cnt from pieces (which is already tracked in the piece tree).
2487    ///
2488    /// Returns: Iterator yielding (byte_offset, content, line_number: Option<usize>)
2489    /// - line_number is Some(n) for small files with line metadata
2490    /// - line_number is None for large files without line metadata
2491    ///
2492    /// # Performance
2493    /// - O(1) per line for line number calculation (vs O(log n) per line with offset_to_position)
2494    /// - Uses single source of truth: piece tree's existing line_feed_cnt metadata
2495    pub fn iter_lines_from(
2496        &mut self,
2497        byte_pos: usize,
2498        max_lines: usize,
2499    ) -> Result<TextBufferLineIterator> {
2500        TextBufferLineIterator::new(self, byte_pos, max_lines)
2501    }
2502
2503    // Legacy API methods for backwards compatibility
2504
2505    /// Get the line number for a given byte offset
2506    ///
2507    /// Returns exact line number if metadata available, otherwise estimates based on bytes.
2508    ///
2509    /// # Behavior by File Size:
2510    /// - **Small files (< 1MB)**: Returns exact line number from piece tree's `line_starts` metadata
2511    /// - **Large files (≥ 1MB)**: Returns estimated line number using `byte_offset / 80`
2512    ///
2513    /// Large files don't maintain line metadata for performance reasons. The estimation
2514    /// assumes ~80 bytes per line on average, which works reasonably well for most text files.
2515    pub fn get_line_number(&self, byte_offset: usize) -> usize {
2516        self.offset_to_position(byte_offset)
2517            .map(|pos| pos.line)
2518            .unwrap_or_else(|| {
2519                // Estimate line number based on average line length of ~80 bytes
2520                byte_offset / 80
2521            })
2522    }
2523
2524    /// Get the starting line number at a byte offset (used for viewport rendering)
2525    ///
2526    /// # Line Cache Architecture (Post-Refactoring):
2527    ///
2528    /// The concept of a separate "line cache" is **now obsolete**. After the refactoring,
2529    /// line tracking is integrated directly into the piece tree via:
2530    /// ```rust
2531    /// BufferData::Loaded {
2532    ///     data: Vec<u8>,
2533    ///     line_starts: Option<Vec<usize>>  // None = large file mode (no line metadata)
2534    /// }
2535    /// ```
2536    ///
2537    /// ## Why This Method Still Exists:
2538    /// The rendering code needs to know what line number to display in the margin at the
2539    /// top of the viewport. This method returns that line number, handling both small
2540    /// and large file modes transparently.
2541    ///
2542    /// ## Small vs Large File Modes:
2543    /// - **Small files**: `line_starts = Some(vec)` → returns exact line number from metadata
2544    /// - **Large files**: `line_starts = None` → returns estimated line number (byte_offset / 80)
2545    ///
2546    /// ## Legacy Line Cache Methods:
2547    /// These methods are now no-ops and can be removed in a future cleanup:
2548    /// - `invalidate_line_cache_from()` - No-op (piece tree updates automatically)
2549    /// - `handle_line_cache_insertion()` - No-op (piece tree updates automatically)
2550    /// - `handle_line_cache_deletion()` - No-op (piece tree updates automatically)
2551    /// - `clear_line_cache()` - No-op (can't clear piece tree metadata)
2552    ///
2553    /// ## Bug Fix (2025-11):
2554    /// Previously this method always returned `0`, causing line numbers in the margin
2555    /// to always show 1, 2, 3... regardless of scroll position. Now it correctly returns
2556    /// the actual line number at `start_byte`.
2557    pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
2558        // No-op for cache population: LineIndex maintains all line starts automatically
2559        // But we need to return the actual line number at start_byte for rendering
2560        self.get_line_number(start_byte)
2561    }
2562
2563    /// Get cached byte offset for line (compatibility method)
2564    pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
2565        self.line_start_offset(line_number)
2566    }
2567
2568    /// Invalidate line cache from offset (no-op in new implementation)
2569    pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
2570        // No-op: LineIndex updates automatically
2571    }
2572
2573    /// Handle line cache insertion (no-op in new implementation)
2574    pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
2575        // No-op: LineIndex updates automatically during insert
2576    }
2577
2578    /// Handle line cache deletion (no-op in new implementation)
2579    pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
2580        // No-op: LineIndex updates automatically during delete
2581    }
2582
2583    /// Clear line cache (no-op in new implementation)
2584    pub fn clear_line_cache(&mut self) {
2585        // No-op: LineIndex can't be cleared
2586    }
2587
2588    // Test helper methods
2589
2590    /// Create a buffer from a string for testing
2591    #[cfg(test)]
2592    pub fn from_str_test(s: &str) -> Self {
2593        Self::from_bytes(s.as_bytes().to_vec())
2594    }
2595
2596    /// Create a new empty buffer for testing
2597    #[cfg(test)]
2598    pub fn new_test() -> Self {
2599        Self::empty()
2600    }
2601}
2602
2603/// Type alias for backwards compatibility
2604pub type Buffer = TextBuffer;
2605
2606// Re-export LineIterator from the line_iterator module
2607pub use crate::primitives::line_iterator::LineIterator;
2608
2609// ============================================================================
2610// Overlapping Chunks Iterator for Efficient Search
2611// ============================================================================
2612
2613/// Information about a chunk of data for pattern matching
2614#[derive(Debug)]
2615pub struct ChunkInfo {
2616    /// The buffer containing this chunk's data (includes overlap from previous chunk)
2617    pub buffer: Vec<u8>,
2618
2619    /// Absolute position in the document where this buffer starts
2620    pub absolute_pos: usize,
2621
2622    /// Offset within buffer where "new" data starts (valid match zone)
2623    /// Matches starting before this offset were already checked in the previous chunk
2624    pub valid_start: usize,
2625}
2626
2627/// Iterator that yields overlapping chunks for pattern matching
2628///
2629/// This iterator implements the VSCode/Sublime approach: pull overlapping chunks
2630/// from the underlying piece tree and use standard search algorithms on them.
2631///
2632/// # Algorithm
2633///
2634/// ```text
2635/// Chunk 1: [------------ valid -----------]
2636/// Chunk 2:      [overlap][---- valid ----]
2637/// Chunk 3:                   [overlap][-- valid --]
2638///
2639/// Only matches starting in the "valid" zone are reported to avoid duplicates.
2640/// ```
2641///
2642/// # Example
2643///
2644/// ```ignore
2645/// let chunks = OverlappingChunks::new(&text_buffer, start, end, 4096, pattern.len()-1);
2646/// for chunk in chunks {
2647///     // Search only starting from chunk.valid_start
2648///     if let Some(pos) = search(&chunk.buffer[chunk.valid_start..]) {
2649///         let absolute_pos = chunk.absolute_pos + chunk.valid_start + pos;
2650///         return Some(absolute_pos);
2651///     }
2652/// }
2653/// ```
2654pub struct OverlappingChunks<'a> {
2655    piece_iter: PieceRangeIter,
2656    buffers: &'a [StringBuffer],
2657
2658    // Reusable chunk buffer that we fill from pieces
2659    buffer: Vec<u8>,
2660    buffer_absolute_pos: usize,
2661
2662    // Current state
2663    current_pos: usize,
2664    end_pos: usize,
2665
2666    // Configuration
2667    chunk_size: usize,
2668    overlap: usize,
2669
2670    // Track first chunk special case
2671    first_chunk: bool,
2672
2673    // Cached piece data for incremental reading
2674    current_piece_data: Option<Vec<u8>>,
2675    current_piece_offset: usize,
2676}
2677
2678impl<'a> OverlappingChunks<'a> {
2679    /// Create a new overlapping chunks iterator
2680    ///
2681    /// # Arguments
2682    ///
2683    /// * `text_buffer` - The text buffer to iterate over
2684    /// * `start` - Start position in the document
2685    /// * `end` - End position in the document (exclusive)
2686    /// * `chunk_size` - Target size for each chunk (excluding overlap)
2687    /// * `overlap` - Number of bytes to overlap between chunks
2688    ///
2689    /// # Recommendations
2690    ///
2691    /// * For literal string search: `chunk_size=65536, overlap=pattern.len()-1`
2692    /// * For regex search: `chunk_size=1048576, overlap=4096`
2693    pub fn new(
2694        text_buffer: &'a TextBuffer,
2695        start: usize,
2696        end: usize,
2697        chunk_size: usize,
2698        overlap: usize,
2699    ) -> Self {
2700        let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
2701
2702        Self {
2703            piece_iter,
2704            buffers: &text_buffer.buffers,
2705            buffer: Vec::with_capacity(chunk_size + overlap),
2706            buffer_absolute_pos: start,
2707            current_pos: start,
2708            end_pos: end,
2709            chunk_size,
2710            overlap,
2711            first_chunk: true,
2712            current_piece_data: None,
2713            current_piece_offset: 0,
2714        }
2715    }
2716
2717    /// Read one byte from the piece iterator
2718    fn read_byte(&mut self) -> Option<u8> {
2719        loop {
2720            // If we have cached piece data, read from it
2721            if let Some(ref data) = self.current_piece_data {
2722                if self.current_piece_offset < data.len() {
2723                    let byte = data[self.current_piece_offset];
2724                    self.current_piece_offset += 1;
2725                    self.current_pos += 1;
2726                    return Some(byte);
2727                } else {
2728                    // Exhausted current piece, move to next
2729                    self.current_piece_data = None;
2730                    self.current_piece_offset = 0;
2731                }
2732            }
2733
2734            // Get next piece
2735            if let Some(piece_view) = self.piece_iter.next() {
2736                let buffer_id = piece_view.location.buffer_id();
2737                if let Some(buffer) = self.buffers.get(buffer_id) {
2738                    // Extract the relevant slice from this piece
2739                    let piece_start_in_doc = piece_view.doc_offset;
2740                    let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2741
2742                    // Clip to our search range
2743                    let read_start = self.current_pos.max(piece_start_in_doc);
2744                    let read_end = self.end_pos.min(piece_end_in_doc);
2745
2746                    if read_end > read_start {
2747                        let offset_in_piece = read_start - piece_start_in_doc;
2748                        let bytes_to_read = read_end - read_start;
2749
2750                        let buffer_start = piece_view.buffer_offset + offset_in_piece;
2751                        let buffer_end = buffer_start + bytes_to_read;
2752
2753                        if let Some(data) = buffer.get_data() {
2754                            if buffer_end <= data.len() {
2755                                // Cache this piece's data
2756                                self.current_piece_data =
2757                                    Some(data[buffer_start..buffer_end].to_vec());
2758                                self.current_piece_offset = 0;
2759                                continue;
2760                            }
2761                        }
2762                    }
2763                }
2764            }
2765
2766            // No more data
2767            return None;
2768        }
2769    }
2770
2771    /// Fill the buffer with the next chunk of data
2772    fn fill_next_chunk(&mut self) -> bool {
2773        if self.first_chunk {
2774            // First chunk: fill up to chunk_size
2775            self.first_chunk = false;
2776            while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
2777                if let Some(byte) = self.read_byte() {
2778                    self.buffer.push(byte);
2779                } else {
2780                    break;
2781                }
2782            }
2783            !self.buffer.is_empty()
2784        } else {
2785            // Subsequent chunks: keep overlap, fill chunk_size NEW bytes
2786            if self.current_pos >= self.end_pos {
2787                return false;
2788            }
2789
2790            // Keep overlap bytes at the end
2791            if self.buffer.len() > self.overlap {
2792                let drain_amount = self.buffer.len() - self.overlap;
2793                self.buffer.drain(0..drain_amount);
2794                self.buffer_absolute_pos += drain_amount;
2795            }
2796
2797            // Fill chunk_size NEW bytes (in addition to overlap)
2798            let before_len = self.buffer.len();
2799            let target_len = self.overlap + self.chunk_size;
2800            while self.buffer.len() < target_len && self.current_pos < self.end_pos {
2801                if let Some(byte) = self.read_byte() {
2802                    self.buffer.push(byte);
2803                } else {
2804                    break;
2805                }
2806            }
2807
2808            // Return true if we added new data
2809            self.buffer.len() > before_len
2810        }
2811    }
2812}
2813
2814impl<'a> Iterator for OverlappingChunks<'a> {
2815    type Item = ChunkInfo;
2816
2817    fn next(&mut self) -> Option<Self::Item> {
2818        // Track if this is the first chunk before filling
2819        let is_first = self.buffer_absolute_pos == self.current_pos;
2820
2821        if !self.fill_next_chunk() {
2822            return None;
2823        }
2824
2825        // First chunk: all data is valid (no overlap from previous)
2826        // Subsequent chunks: overlap bytes are not valid (already checked)
2827        let valid_start = if is_first {
2828            0
2829        } else {
2830            self.overlap.min(self.buffer.len())
2831        };
2832
2833        Some(ChunkInfo {
2834            buffer: self.buffer.clone(),
2835            absolute_pos: self.buffer_absolute_pos,
2836            valid_start,
2837        })
2838    }
2839}
2840
2841#[cfg(test)]
2842mod tests {
2843    use super::*;
2844
2845    #[test]
2846    fn test_empty_buffer() {
2847        let buffer = TextBuffer::empty();
2848        assert_eq!(buffer.total_bytes(), 0);
2849        assert_eq!(buffer.line_count(), Some(1)); // Empty doc has 1 line
2850    }
2851
2852    #[test]
2853    fn test_line_positions_multiline() {
2854        let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec());
2855
2856        // Check line count
2857        assert_eq!(buffer.line_count(), Some(3));
2858
2859        // Check line starts
2860        assert_eq!(buffer.line_start_offset(0), Some(0)); // "Hello\n" starts at 0
2861        assert_eq!(buffer.line_start_offset(1), Some(6)); // "New Line\n" starts at 6
2862        assert_eq!(buffer.line_start_offset(2), Some(15)); // "World!" starts at 15
2863
2864        // Check offset_to_position
2865        assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); // Start of "Hello"
2866        assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); // End of "Hello" (before \n)
2867        assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); // Start of "New Line"
2868        assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); // End of "New Line" (before \n)
2869        assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); // Start of "World!"
2870
2871        // Check line_col_to_position
2872        assert_eq!(buffer.line_col_to_position(0, 5), 5); // End of line 0
2873        assert_eq!(buffer.line_col_to_position(1, 0), 6); // Start of line 1
2874        assert_eq!(buffer.line_col_to_position(1, 8), 14); // End of line 1
2875        assert_eq!(buffer.line_col_to_position(2, 0), 15); // Start of line 2
2876    }
2877
2878    #[test]
2879    fn test_new_from_content() {
2880        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec());
2881        assert_eq!(buffer.total_bytes(), 11);
2882        assert_eq!(buffer.line_count(), Some(2));
2883    }
2884
2885    #[test]
2886    fn test_get_all_text() {
2887        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec());
2888        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
2889    }
2890
2891    #[test]
2892    fn test_insert_at_start() {
2893        let mut buffer = TextBuffer::from_bytes(b"world".to_vec());
2894        buffer.insert_bytes(0, b"hello ".to_vec());
2895
2896        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
2897        assert_eq!(buffer.total_bytes(), 11);
2898    }
2899
2900    #[test]
2901    fn test_insert_in_middle() {
2902        let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec());
2903        buffer.insert_bytes(5, b" ".to_vec());
2904
2905        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
2906        assert_eq!(buffer.total_bytes(), 11);
2907    }
2908
2909    #[test]
2910    fn test_insert_at_end() {
2911        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec());
2912        buffer.insert_bytes(5, b" world".to_vec());
2913
2914        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
2915        assert_eq!(buffer.total_bytes(), 11);
2916    }
2917
2918    #[test]
2919    fn test_insert_with_newlines() {
2920        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec());
2921        buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
2922
2923        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
2924        assert_eq!(buffer.line_count(), Some(3));
2925    }
2926
2927    #[test]
2928    fn test_delete_from_start() {
2929        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec());
2930        buffer.delete_bytes(0, 6);
2931
2932        assert_eq!(buffer.get_all_text().unwrap(), b"world");
2933        assert_eq!(buffer.total_bytes(), 5);
2934    }
2935
2936    #[test]
2937    fn test_delete_from_middle() {
2938        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec());
2939        buffer.delete_bytes(5, 1);
2940
2941        assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
2942        assert_eq!(buffer.total_bytes(), 10);
2943    }
2944
2945    #[test]
2946    fn test_delete_from_end() {
2947        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec());
2948        buffer.delete_bytes(6, 5);
2949
2950        assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
2951        assert_eq!(buffer.total_bytes(), 6);
2952    }
2953
2954    #[test]
2955    fn test_delete_with_newlines() {
2956        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2957        buffer.delete_bytes(5, 7); // Delete "\nworld\n"
2958
2959        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
2960        assert_eq!(buffer.line_count(), Some(1));
2961    }
2962
2963    #[test]
2964    fn test_offset_position_conversions() {
2965        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2966
2967        let pos = buffer.offset_to_position(0);
2968        assert_eq!(pos, Some(Position { line: 0, column: 0 }));
2969
2970        let pos = buffer.offset_to_position(6);
2971        assert_eq!(pos, Some(Position { line: 1, column: 0 }));
2972
2973        let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
2974        assert_eq!(offset, 6);
2975    }
2976
2977    #[test]
2978    fn test_insert_at_position() {
2979        let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec());
2980        buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
2981
2982        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
2983    }
2984
2985    #[test]
2986    fn test_delete_range() {
2987        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2988
2989        let start = Position { line: 0, column: 5 };
2990        let end = Position { line: 2, column: 0 };
2991        buffer.delete_range(start, end);
2992
2993        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
2994    }
2995
2996    #[test]
2997    fn test_get_line() {
2998        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2999
3000        assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
3001        assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
3002        assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
3003        assert_eq!(buffer.get_line(3), None);
3004    }
3005
3006    #[test]
3007    fn test_multiple_operations() {
3008        let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec());
3009
3010        buffer.insert_bytes(0, b"start\n".to_vec());
3011        assert_eq!(buffer.line_count(), Some(4));
3012
3013        buffer.delete_bytes(6, 6); // Delete "line1\n"
3014        assert_eq!(buffer.line_count(), Some(3));
3015
3016        buffer.insert_bytes(6, b"new\n".to_vec());
3017        assert_eq!(buffer.line_count(), Some(4));
3018
3019        let text = buffer.get_all_text().unwrap();
3020        assert_eq!(text, b"start\nnew\nline2\nline3");
3021    }
3022
3023    #[test]
3024    fn test_get_text_range() {
3025        let buffer = TextBuffer::from_bytes(b"hello world".to_vec());
3026
3027        assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
3028        assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
3029        assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
3030    }
3031
3032    #[test]
3033    fn test_empty_operations() {
3034        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec());
3035
3036        buffer.insert_bytes(2, Vec::new());
3037        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3038
3039        buffer.delete_bytes(2, 0);
3040        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3041    }
3042
3043    #[test]
3044    fn test_sequential_inserts_at_beginning() {
3045        // Regression test for piece tree duplicate insertion bug
3046        let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec());
3047
3048        // Delete all
3049        buffer.delete_bytes(0, 12);
3050        assert_eq!(buffer.get_all_text().unwrap(), b"");
3051
3052        // Insert 'a' at 0
3053        buffer.insert_bytes(0, vec![b'a']);
3054        assert_eq!(buffer.get_all_text().unwrap(), b"a");
3055
3056        // Insert 'b' at 0 (should give "ba")
3057        buffer.insert_bytes(0, vec![b'b']);
3058        assert_eq!(buffer.get_all_text().unwrap(), b"ba");
3059    }
3060
3061    // ===== Phase 1-3: Large File Support Tests =====
3062
3063    mod large_file_support {
3064        use super::*;
3065        use crate::model::piece_tree::StringBuffer;
3066        use std::fs::File;
3067        use std::io::Write;
3068        use tempfile::TempDir;
3069
3070        // Phase 1: Option<usize> Type Safety Tests
3071
3072        #[test]
3073        fn test_line_feed_count_is_some_for_loaded_buffer() {
3074            let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
3075            assert_eq!(buffer.line_feed_count(), Some(2));
3076        }
3077
3078        #[test]
3079        fn test_line_feed_count_is_none_for_unloaded_buffer() {
3080            let temp_dir = TempDir::new().unwrap();
3081            let file_path = temp_dir.path().join("test.txt");
3082
3083            let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
3084            assert_eq!(buffer.line_feed_count(), None);
3085        }
3086
3087        #[test]
3088        fn test_line_count_is_some_for_small_buffer() {
3089            let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
3090            assert_eq!(buffer.line_count(), Some(3));
3091        }
3092
3093        #[test]
3094        fn test_piece_tree_works_with_none_line_count() {
3095            // Create a buffer with no line count information
3096            let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
3097            assert_eq!(buffer.line_feed_count(), None);
3098
3099            // Create piece tree without line feed count
3100            use crate::model::piece_tree::{BufferLocation, PieceTree};
3101            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
3102
3103            // line_count should return None
3104            assert_eq!(tree.line_count(), None);
3105        }
3106
3107        // Phase 2: BufferData Enum Tests
3108
3109        #[test]
3110        fn test_buffer_data_loaded_variant() {
3111            let data = b"hello world".to_vec();
3112            let buffer = StringBuffer::new_loaded(0, data.clone(), true);
3113
3114            assert!(buffer.is_loaded());
3115            assert_eq!(buffer.get_data(), Some(&data[..]));
3116            assert!(buffer.get_line_starts().is_some());
3117        }
3118
3119        #[test]
3120        fn test_buffer_data_loaded_without_line_starts() {
3121            let data = b"hello\nworld".to_vec();
3122            let buffer = StringBuffer::new_loaded(0, data.clone(), false);
3123
3124            assert!(buffer.is_loaded());
3125            assert_eq!(buffer.get_data(), Some(&data[..]));
3126            assert_eq!(buffer.get_line_starts(), None); // No line indexing
3127        }
3128
3129        #[test]
3130        fn test_buffer_data_unloaded_variant() {
3131            let temp_dir = TempDir::new().unwrap();
3132            let file_path = temp_dir.path().join("test.txt");
3133
3134            let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
3135
3136            assert!(!buffer.is_loaded());
3137            assert_eq!(buffer.get_data(), None);
3138            assert_eq!(buffer.get_line_starts(), None);
3139        }
3140
3141        #[test]
3142        fn test_buffer_load_method() {
3143            let temp_dir = TempDir::new().unwrap();
3144            let file_path = temp_dir.path().join("test.txt");
3145
3146            // Create test file
3147            let test_data = b"hello world";
3148            File::create(&file_path)
3149                .unwrap()
3150                .write_all(test_data)
3151                .unwrap();
3152
3153            // Create unloaded buffer
3154            let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
3155            assert!(!buffer.is_loaded());
3156
3157            // Load the buffer
3158            buffer.load().unwrap();
3159
3160            // Now it should be loaded
3161            assert!(buffer.is_loaded());
3162            assert_eq!(buffer.get_data(), Some(&test_data[..]));
3163        }
3164
3165        #[test]
3166        fn test_string_buffer_new_vs_new_loaded() {
3167            let data = b"hello\nworld".to_vec();
3168
3169            // StringBuffer::new should compute line starts
3170            let buf1 = StringBuffer::new(0, data.clone());
3171            assert!(buf1.is_loaded());
3172            assert!(buf1.get_line_starts().is_some());
3173            assert_eq!(buf1.line_feed_count(), Some(1));
3174
3175            // StringBuffer::new_loaded with compute_lines=false should not
3176            let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
3177            assert!(buf2.is_loaded());
3178            assert_eq!(buf2.get_line_starts(), None);
3179            assert_eq!(buf2.line_feed_count(), None);
3180        }
3181
3182        // Phase 3: Large File Detection Tests
3183
3184        #[test]
3185        fn test_load_small_file_eager_loading() {
3186            let temp_dir = TempDir::new().unwrap();
3187            let file_path = temp_dir.path().join("small.txt");
3188
3189            // Create a small file (10 bytes < 100MB threshold)
3190            let test_data = b"hello\ntest";
3191            File::create(&file_path)
3192                .unwrap()
3193                .write_all(test_data)
3194                .unwrap();
3195
3196            // Load with default threshold
3197            let buffer = TextBuffer::load_from_file(&file_path, 0).unwrap();
3198
3199            // Should be eagerly loaded (not large_file mode)
3200            assert!(!buffer.large_file);
3201            assert_eq!(buffer.total_bytes(), test_data.len());
3202            assert_eq!(buffer.line_count(), Some(2)); // Has line indexing
3203            assert_eq!(buffer.get_all_text().unwrap(), test_data);
3204
3205            // The buffer should be loaded
3206            assert!(buffer.buffers[0].is_loaded());
3207        }
3208
3209        #[test]
3210        fn test_load_large_file_lazy_loading() {
3211            let temp_dir = TempDir::new().unwrap();
3212            let file_path = temp_dir.path().join("large.txt");
3213
3214            // Create a "large" file by using a small threshold
3215            let test_data = b"hello\nworld\ntest";
3216            File::create(&file_path)
3217                .unwrap()
3218                .write_all(test_data)
3219                .unwrap();
3220
3221            // Load with threshold of 10 bytes (file is 17 bytes, so it's "large")
3222            let buffer = TextBuffer::load_from_file(&file_path, 10).unwrap();
3223
3224            // Should be in large_file mode
3225            assert!(buffer.large_file);
3226            assert_eq!(buffer.total_bytes(), test_data.len());
3227
3228            // Should NOT have line indexing
3229            assert_eq!(buffer.line_count(), None);
3230
3231            // The buffer should be unloaded
3232            assert!(!buffer.buffers[0].is_loaded());
3233            assert_eq!(buffer.buffers[0].get_data(), None);
3234        }
3235
3236        /// Test that reproduces issue #657: Search on large plain text files
3237        ///
3238        /// The bug: When a large file is opened with lazy loading, buffer.to_string()
3239        /// returns None because some buffers are unloaded. This causes search to fail
3240        /// with "Buffer not fully loaded" error.
3241        ///
3242        /// The fix: Use get_text_range_mut() which loads the buffer on demand.
3243        #[test]
3244        fn test_issue_657_search_on_large_file_unloaded_buffer() {
3245            let temp_dir = TempDir::new().unwrap();
3246            let file_path = temp_dir.path().join("large_search_test.txt");
3247
3248            // Create test content with a searchable string
3249            let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
3250            File::create(&file_path)
3251                .unwrap()
3252                .write_all(test_data)
3253                .unwrap();
3254
3255            // Load with small threshold to force lazy loading
3256            let mut buffer = TextBuffer::load_from_file(&file_path, 10).unwrap();
3257
3258            // Verify we're in large file mode with unloaded buffer
3259            assert!(buffer.large_file, "Buffer should be in large file mode");
3260            assert!(
3261                !buffer.buffers[0].is_loaded(),
3262                "Buffer should be unloaded initially"
3263            );
3264
3265            // REPRODUCE THE BUG: to_string() returns None for unloaded buffers
3266            // This is what the old perform_search() code did, causing the error
3267            assert!(
3268                buffer.to_string().is_none(),
3269                "BUG REPRODUCED: to_string() returns None for unloaded buffer"
3270            );
3271
3272            // THE FIX: get_text_range_mut() loads the buffer on demand
3273            let total_bytes = buffer.len();
3274            let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
3275            let content_str = String::from_utf8_lossy(&content);
3276
3277            // Verify the content is now available and contains our search target
3278            assert!(
3279                content_str.contains("SEARCH_TARGET"),
3280                "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
3281            );
3282
3283            // After loading, to_string() should also work
3284            assert!(
3285                buffer.to_string().is_some(),
3286                "After get_text_range_mut(), to_string() should work"
3287            );
3288        }
3289
3290        #[test]
3291        fn test_large_file_threshold_boundary() {
3292            let temp_dir = TempDir::new().unwrap();
3293
3294            // Test exactly at threshold
3295            let file_path = temp_dir.path().join("at_threshold.txt");
3296            let test_data = vec![b'x'; 100];
3297            File::create(&file_path)
3298                .unwrap()
3299                .write_all(&test_data)
3300                .unwrap();
3301
3302            // Load with threshold of 100 bytes - should be large file (>= threshold)
3303            let buffer = TextBuffer::load_from_file(&file_path, 100).unwrap();
3304            assert!(buffer.large_file);
3305
3306            // Test just below threshold
3307            let file_path2 = temp_dir.path().join("below_threshold.txt");
3308            let test_data2 = vec![b'x'; 99];
3309            File::create(&file_path2)
3310                .unwrap()
3311                .write_all(&test_data2)
3312                .unwrap();
3313
3314            // Load with threshold of 100 bytes - should be small file (< threshold)
3315            let buffer2 = TextBuffer::load_from_file(&file_path2, 100).unwrap();
3316            assert!(!buffer2.large_file);
3317        }
3318
3319        #[test]
3320        fn test_large_file_default_threshold() {
3321            let temp_dir = TempDir::new().unwrap();
3322            let file_path = temp_dir.path().join("test.txt");
3323
3324            // Create a small file
3325            File::create(&file_path)
3326                .unwrap()
3327                .write_all(b"hello")
3328                .unwrap();
3329
3330            // Load with threshold 0 - should use DEFAULT_LARGE_FILE_THRESHOLD
3331            let buffer = TextBuffer::load_from_file(&file_path, 0).unwrap();
3332
3333            // 5 bytes < 100MB, so should not be large file
3334            assert!(!buffer.large_file);
3335        }
3336
3337        #[test]
3338        fn test_large_file_has_correct_piece_tree_structure() {
3339            let temp_dir = TempDir::new().unwrap();
3340            let file_path = temp_dir.path().join("large.txt");
3341
3342            let test_data = b"hello world";
3343            File::create(&file_path)
3344                .unwrap()
3345                .write_all(test_data)
3346                .unwrap();
3347
3348            // Load as large file
3349            let buffer = TextBuffer::load_from_file(&file_path, 5).unwrap();
3350
3351            // Should have correct total bytes
3352            assert_eq!(buffer.total_bytes(), test_data.len());
3353
3354            // Should have 1 buffer
3355            assert_eq!(buffer.buffers.len(), 1);
3356
3357            // Buffer should be unloaded
3358            assert!(!buffer.buffers[0].is_loaded());
3359        }
3360
3361        #[test]
3362        fn test_empty_large_file() {
3363            let temp_dir = TempDir::new().unwrap();
3364            let file_path = temp_dir.path().join("empty.txt");
3365
3366            // Create an empty file
3367            File::create(&file_path).unwrap();
3368
3369            // Load as large file
3370            let buffer = TextBuffer::load_from_file(&file_path, 0).unwrap();
3371
3372            // Empty file is handled gracefully
3373            assert_eq!(buffer.total_bytes(), 0);
3374            assert!(buffer.is_empty());
3375        }
3376
3377        #[test]
3378        fn test_large_file_basic_api_operations() {
3379            let temp_dir = TempDir::new().unwrap();
3380            let file_path = temp_dir.path().join("large_test.txt");
3381
3382            // Create a test file with known content
3383            let test_data = b"line1\nline2\nline3\nline4\n";
3384            File::create(&file_path)
3385                .unwrap()
3386                .write_all(test_data)
3387                .unwrap();
3388
3389            // Load as large file (use small threshold to trigger large file mode)
3390            let mut buffer = TextBuffer::load_from_file(&file_path, 10).unwrap();
3391
3392            // Verify it's in large file mode
3393            assert!(buffer.large_file);
3394            assert_eq!(buffer.line_count(), None); // No line indexing
3395
3396            // Test basic access functions
3397            assert_eq!(buffer.total_bytes(), test_data.len());
3398            assert!(!buffer.is_empty());
3399            assert_eq!(buffer.len(), test_data.len());
3400
3401            // Test reading operations using get_text_range_mut (lazy loads on demand)
3402            let range_result = buffer.get_text_range_mut(0, 5).unwrap();
3403            assert_eq!(range_result, b"line1");
3404
3405            let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
3406            assert_eq!(range_result2, b"line2");
3407
3408            // Test get_all_text (via get_text_range after lazy loading)
3409            let all_text = buffer.get_all_text().unwrap();
3410            assert_eq!(all_text, test_data);
3411
3412            // Test slice_bytes method
3413            assert_eq!(buffer.slice_bytes(0..5), b"line1");
3414
3415            // Test basic editing operations
3416            // Insert at offset 0
3417            buffer.insert_bytes(0, b"prefix_".to_vec());
3418            assert_eq!(buffer.total_bytes(), test_data.len() + 7);
3419            assert!(buffer.is_modified());
3420
3421            // Verify the insertion worked
3422            let text_after_insert = buffer.get_all_text().unwrap();
3423            assert_eq!(&text_after_insert[0..7], b"prefix_");
3424            assert_eq!(&text_after_insert[7..12], b"line1");
3425
3426            // Delete some bytes
3427            buffer.delete_bytes(0, 7);
3428            assert_eq!(buffer.total_bytes(), test_data.len());
3429
3430            // Verify deletion worked - should be back to original
3431            let text_after_delete = buffer.get_all_text().unwrap();
3432            assert_eq!(text_after_delete, test_data);
3433
3434            // Insert at end
3435            let end_offset = buffer.total_bytes();
3436            buffer.insert_bytes(end_offset, b"suffix".to_vec());
3437            assert_eq!(buffer.total_bytes(), test_data.len() + 6);
3438
3439            // Verify end insertion
3440            let final_text = buffer.get_all_text().unwrap();
3441            assert!(final_text.ends_with(b"suffix"));
3442            assert_eq!(&final_text[0..test_data.len()], test_data);
3443
3444            // Test offset_to_position
3445            // Note: Without line indexing, position tracking is limited
3446            // but byte-level operations still work
3447            let pos = buffer.offset_to_position(0).unwrap();
3448            assert_eq!(pos.column, 0);
3449
3450            // Test position_to_offset
3451            let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
3452            assert_eq!(offset, 0);
3453
3454            // Test replace operations
3455            let replace_result = buffer.replace_range(0..5, "START");
3456            assert!(replace_result);
3457
3458            let text_after_replace = buffer.get_all_text().unwrap();
3459            assert!(text_after_replace.starts_with(b"START"));
3460        }
3461
3462        #[test]
3463        fn test_large_file_chunk_based_loading() {
3464            let temp_dir = TempDir::new().unwrap();
3465            let file_path = temp_dir.path().join("huge.txt");
3466
3467            // Create a file larger than LOAD_CHUNK_SIZE (1MB)
3468            // We'll create a 3MB file with a repeating pattern so we can verify chunks
3469            let chunk_size = LOAD_CHUNK_SIZE; // 1MB
3470            let file_size = chunk_size * 3; // 3MB
3471
3472            // Pattern: "AAAA...AAAA" (1MB of A's), "BBBB...BBBB" (1MB of B's), "CCCC...CCCC" (1MB of C's)
3473            let mut file = File::create(&file_path).unwrap();
3474            file.write_all(&vec![b'A'; chunk_size]).unwrap();
3475            file.write_all(&vec![b'B'; chunk_size]).unwrap();
3476            file.write_all(&vec![b'C'; chunk_size]).unwrap();
3477            file.flush().unwrap();
3478
3479            // Load as large file (use threshold of 1 byte to ensure large file mode)
3480            let mut buffer = TextBuffer::load_from_file(&file_path, 1).unwrap();
3481
3482            // Verify it's in large file mode
3483            assert!(buffer.large_file);
3484            assert_eq!(buffer.total_bytes(), file_size);
3485
3486            // Buffer should be unloaded initially
3487            assert!(!buffer.buffers[0].is_loaded());
3488
3489            // Read from the first chunk (should load only first 1MB)
3490            let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
3491            assert_eq!(first_chunk_data.len(), 1024);
3492            assert!(first_chunk_data.iter().all(|&b| b == b'A'));
3493
3494            // Read from the middle chunk (offset = 1MB, should load second 1MB)
3495            let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
3496            assert_eq!(second_chunk_data.len(), 1024);
3497            assert!(second_chunk_data.iter().all(|&b| b == b'B'));
3498
3499            // Read from the last chunk (offset = 2MB, should load third 1MB)
3500            let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
3501            assert_eq!(third_chunk_data.len(), 1024);
3502            assert!(third_chunk_data.iter().all(|&b| b == b'C'));
3503
3504            // Verify we can read across chunk boundaries
3505            // Read from middle of first chunk to middle of second chunk
3506            let cross_chunk_offset = chunk_size - 512;
3507            let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
3508            assert_eq!(cross_chunk_data.len(), 1024);
3509            // First 512 bytes should be 'A', next 512 bytes should be 'B'
3510            assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
3511            assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
3512
3513            // After chunk-based loading, verify the piece tree has been split
3514            // The number of buffers should be greater than 1 (original + chunks)
3515            assert!(
3516                buffer.buffers.len() > 1,
3517                "Expected multiple buffers after chunk-based loading, got {}",
3518                buffer.buffers.len()
3519            );
3520
3521            // Test that editing still works after chunk-based loading
3522            buffer.insert_bytes(0, b"PREFIX".to_vec());
3523            assert_eq!(buffer.total_bytes(), file_size + 6);
3524
3525            let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
3526            assert_eq!(after_insert, b"PREFIX");
3527
3528            // Verify the original data is still there after the prefix
3529            let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
3530            assert!(after_prefix.iter().all(|&b| b == b'A'));
3531
3532            // Most importantly: validate the entire buffer content matches the original file
3533            // Create a fresh buffer to read the original file
3534            let mut buffer2 = TextBuffer::load_from_file(&file_path, 1).unwrap();
3535
3536            // Read the entire file in chunks and verify each chunk
3537            let chunk_read_size = 64 * 1024; // Read in 64KB chunks for efficiency
3538            let mut offset = 0;
3539            while offset < file_size {
3540                let bytes_to_read = chunk_read_size.min(file_size - offset);
3541                let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
3542
3543                // Determine which section of the file we're reading
3544                let first_mb_end = chunk_size;
3545                let second_mb_end = chunk_size * 2;
3546
3547                // Validate the data based on which MB section we're in
3548                for (i, &byte) in chunk_data.iter().enumerate() {
3549                    let file_offset = offset + i;
3550                    let expected = if file_offset < first_mb_end {
3551                        b'A'
3552                    } else if file_offset < second_mb_end {
3553                        b'B'
3554                    } else {
3555                        b'C'
3556                    };
3557                    assert_eq!(
3558                        byte, expected,
3559                        "Mismatch at file offset {}: expected {}, got {}",
3560                        file_offset, expected as char, byte as char
3561                    );
3562                }
3563
3564                offset += bytes_to_read;
3565            }
3566        }
3567
3568        /// Test that save_to_file works correctly with partially loaded large files
3569        /// This is a regression test for a bug where saving would silently produce
3570        /// an empty file if any buffer regions were still unloaded.
3571        #[test]
3572        fn test_large_file_incremental_save() {
3573            let temp_dir = TempDir::new().unwrap();
3574            let file_path = temp_dir.path().join("large_save_test.txt");
3575
3576            // Create a small file but use tiny threshold to trigger large file mode
3577            let chunk_size = 1000; // 1KB chunks
3578            let file_size = chunk_size * 2; // 2KB total
3579
3580            let mut file = File::create(&file_path).unwrap();
3581            // First half: 'A' repeated
3582            file.write_all(&vec![b'A'; chunk_size]).unwrap();
3583            // Second half: 'B' repeated
3584            file.write_all(&vec![b'B'; chunk_size]).unwrap();
3585            file.flush().unwrap();
3586
3587            // Load as large file (threshold of 100 bytes)
3588            let mut buffer = TextBuffer::load_from_file(&file_path, 100).unwrap();
3589            assert!(buffer.large_file);
3590            assert_eq!(buffer.total_bytes(), file_size);
3591
3592            // Only read from the beginning - this loads only a small region
3593            let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
3594            assert!(first_bytes.iter().all(|&b| b == b'A'));
3595
3596            // Make an edit at the beginning
3597            buffer.insert_bytes(0, b"PREFIX_".to_vec());
3598
3599            // Save to a new file (to avoid issues with reading while writing same file)
3600            let save_path = temp_dir.path().join("saved.txt");
3601            buffer.save_to_file(&save_path).unwrap();
3602
3603            // Verify the saved file
3604            let saved_content = std::fs::read(&save_path).unwrap();
3605
3606            // Check total size: original + "PREFIX_" (7 bytes)
3607            assert_eq!(
3608                saved_content.len(),
3609                file_size + 7,
3610                "Saved file should be {} bytes, got {}",
3611                file_size + 7,
3612                saved_content.len()
3613            );
3614
3615            // Check prefix
3616            assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
3617
3618            // Check that first chunk (after prefix) contains A's
3619            assert!(
3620                saved_content[7..100].iter().all(|&b| b == b'A'),
3621                "First chunk after prefix should be A's"
3622            );
3623
3624            // Check that second chunk contains B's (this was unloaded!)
3625            let second_chunk_start = 7 + chunk_size;
3626            assert!(
3627                saved_content[second_chunk_start..second_chunk_start + 100]
3628                    .iter()
3629                    .all(|&b| b == b'B'),
3630                "Second chunk should be B's (was unloaded, should be preserved)"
3631            );
3632        }
3633
3634        /// Test that save_to_file handles edits at multiple positions
3635        #[test]
3636        fn test_large_file_save_with_multiple_edits() {
3637            let temp_dir = TempDir::new().unwrap();
3638            let file_path = temp_dir.path().join("multi_edit.txt");
3639
3640            // Create a ~5KB file with numbered lines for easier verification
3641            let mut content = Vec::new();
3642            for i in 0..100 {
3643                content.extend_from_slice(
3644                    format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
3645                );
3646            }
3647            let original_len = content.len();
3648            std::fs::write(&file_path, &content).unwrap();
3649
3650            // Load as large file (threshold of 500 bytes)
3651            let mut buffer = TextBuffer::load_from_file(&file_path, 500).unwrap();
3652            assert!(
3653                buffer.line_count().is_none(),
3654                "Should be in large file mode"
3655            );
3656
3657            // Edit at the beginning
3658            buffer.insert_bytes(0, b"[START]".to_vec());
3659
3660            // Edit somewhere in the middle (load that region first)
3661            let mid_offset = original_len / 2;
3662            let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); // +7 for our insert
3663            buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
3664
3665            // Save
3666            let save_path = temp_dir.path().join("multi_edit_saved.txt");
3667            buffer.save_to_file(&save_path).unwrap();
3668
3669            // Verify
3670            let saved = std::fs::read_to_string(&save_path).unwrap();
3671
3672            assert!(
3673                saved.starts_with("[START]Line 0000"),
3674                "Should start with our edit"
3675            );
3676            assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
3677            assert!(saved.contains("Line 0099"), "Should preserve end of file");
3678
3679            // Verify total length
3680            let expected_len = original_len + 7 + 8; // [START] + [MIDDLE]
3681            assert_eq!(
3682                saved.len(),
3683                expected_len,
3684                "Length should be original + edits"
3685            );
3686        }
3687    }
3688
3689    // ===== Offset to Position Tests =====
3690    // These tests focus on the offset_to_position correctness
3691
3692    #[test]
3693    fn test_offset_to_position_simple() {
3694        // Create a buffer with known line structure
3695        // Line 0: "a\n" (bytes 0-1, newline at 1)
3696        // Line 1: "b\n" (bytes 2-3, newline at 3)
3697        // Line 2: "c\n" (bytes 4-5, newline at 5)
3698        // Line 3: "d" (bytes 6, no newline)
3699        let content = b"a\nb\nc\nd";
3700        let buffer = TextBuffer::from_bytes(content.to_vec());
3701
3702        // Verify specific positions
3703        let pos = buffer
3704            .offset_to_position(0)
3705            .expect("small buffer should have line metadata");
3706        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3707        assert_eq!(pos.column, 0);
3708
3709        let pos = buffer
3710            .offset_to_position(1)
3711            .expect("small buffer should have line metadata");
3712        assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
3713        assert_eq!(pos.column, 1);
3714
3715        let pos = buffer
3716            .offset_to_position(2)
3717            .expect("small buffer should have line metadata");
3718        assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
3719        assert_eq!(pos.column, 0);
3720
3721        let pos = buffer
3722            .offset_to_position(3)
3723            .expect("small buffer should have line metadata");
3724        assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
3725        assert_eq!(pos.column, 1);
3726
3727        let pos = buffer
3728            .offset_to_position(4)
3729            .expect("small buffer should have line metadata");
3730        assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
3731        assert_eq!(pos.column, 0);
3732
3733        let pos = buffer
3734            .offset_to_position(6)
3735            .expect("small buffer should have line metadata");
3736        assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
3737        assert_eq!(pos.column, 0);
3738    }
3739
3740    #[test]
3741    fn test_offset_to_position_after_insert() {
3742        // Start with simple content
3743        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec());
3744
3745        // Insert at position 2 (start of line 1)
3746        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
3747
3748        // After insert, buffer should be: "a\nx\nb\n"
3749        // Line 0: "a\n" (bytes 0-1)
3750        // Line 1: "x\n" (bytes 2-3)
3751        // Line 2: "b\n" (bytes 4-5)
3752
3753        let pos = buffer
3754            .offset_to_position(0)
3755            .expect("small buffer should have line metadata");
3756        assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
3757
3758        let pos = buffer
3759            .offset_to_position(2)
3760            .expect("small buffer should have line metadata");
3761        assert_eq!(
3762            pos.line, 1,
3763            "Byte 2 (start of inserted line) should be on line 1"
3764        );
3765
3766        let pos = buffer
3767            .offset_to_position(4)
3768            .expect("small buffer should have line metadata");
3769        assert_eq!(
3770            pos.line, 2,
3771            "Byte 4 (start of 'b') should be on line 2 after insert"
3772        );
3773    }
3774
3775    #[test]
3776    fn test_offset_to_position_empty_lines() {
3777        // Test with empty lines: "\n\n\n"
3778        let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec());
3779
3780        // Line 0: "\n" (byte 0)
3781        // Line 1: "\n" (byte 1)
3782        // Line 2: "\n" (byte 2)
3783        // Line 3: "" (empty, after last newline)
3784
3785        let pos = buffer
3786            .offset_to_position(0)
3787            .expect("small buffer should have line metadata");
3788        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3789
3790        let pos = buffer
3791            .offset_to_position(1)
3792            .expect("small buffer should have line metadata");
3793        assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
3794
3795        let pos = buffer
3796            .offset_to_position(2)
3797            .expect("small buffer should have line metadata");
3798        assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
3799
3800        let pos = buffer
3801            .offset_to_position(3)
3802            .expect("small buffer should have line metadata");
3803        assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
3804    }
3805
3806    #[test]
3807    fn test_offset_to_position_long_lines() {
3808        // Test with long lines to ensure it's not just line counting
3809        let mut content = Vec::new();
3810        content.extend_from_slice(b"aaaaaaaaaa\n"); // Line 0: 11 bytes (10 'a's + newline)
3811        content.extend_from_slice(b"bbbbbbbbbb\n"); // Line 1: 11 bytes
3812        content.extend_from_slice(b"cccccccccc"); // Line 2: 10 bytes (no newline)
3813
3814        let buffer = TextBuffer::from_bytes(content.clone());
3815
3816        // Test positions at start of each line
3817        let pos = buffer
3818            .offset_to_position(0)
3819            .expect("small buffer should have line metadata");
3820        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3821        assert_eq!(pos.column, 0);
3822
3823        let pos = buffer
3824            .offset_to_position(11)
3825            .expect("small buffer should have line metadata");
3826        assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
3827        assert_eq!(pos.column, 0);
3828
3829        let pos = buffer
3830            .offset_to_position(22)
3831            .expect("small buffer should have line metadata");
3832        assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
3833        assert_eq!(pos.column, 0);
3834
3835        // Test mid-line positions
3836        let pos = buffer
3837            .offset_to_position(5)
3838            .expect("small buffer should have line metadata");
3839        assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
3840        assert_eq!(pos.column, 5);
3841
3842        let pos = buffer
3843            .offset_to_position(16)
3844            .expect("small buffer should have line metadata");
3845        assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
3846        assert_eq!(pos.column, 5);
3847    }
3848
3849    #[test]
3850    fn test_line_iterator_with_offset_to_position() {
3851        // This combines line iterator with offset_to_position to find issues
3852        let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec());
3853
3854        // Test creating line iterator at various positions
3855        for byte_pos in 0..=buffer.len() {
3856            let iter = buffer.line_iterator(byte_pos, 80);
3857            let iter_pos = iter.current_position();
3858            let expected_line = buffer
3859                .offset_to_position(byte_pos)
3860                .expect("small buffer should have line metadata")
3861                .line;
3862            let expected_line_start = buffer.position_to_offset(Position {
3863                line: expected_line,
3864                column: 0,
3865            });
3866
3867            assert_eq!(
3868                iter_pos, expected_line_start,
3869                "LineIterator at byte {} should position at line start {} but got {}",
3870                byte_pos, expected_line_start, iter_pos
3871            );
3872        }
3873    }
3874
3875    #[test]
3876    fn test_piece_tree_line_count_after_insert() {
3877        // Debug the piece tree structure after insert
3878        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec());
3879
3880        // Insert at line 1, column 0
3881        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
3882
3883        // Manually verify line counts
3884        let content = buffer.slice_bytes(0..buffer.len());
3885        let newline_count = content.iter().filter(|&&b| b == b'\n').count();
3886        let expected_line_count = newline_count + 1;
3887        let actual_line_count = buffer.line_count();
3888
3889        assert_eq!(
3890            actual_line_count,
3891            Some(expected_line_count),
3892            "Line count mismatch after insert"
3893        );
3894    }
3895
3896    #[test]
3897    fn test_position_to_lsp_position_after_modification() {
3898        // This test demonstrates a bug in the piece tree's offset_to_position
3899        // where column calculation is incorrect after buffer modifications.
3900        // The position_to_lsp_position function works around this by using
3901        // line_start_offset to calculate the column correctly.
3902
3903        // Initial content: "fn foo(val: i32) {\n    val + 1\n}\n"
3904        let initial = b"fn foo(val: i32) {\n    val + 1\n}\n";
3905        let mut buffer = TextBuffer::from_bytes(initial.to_vec());
3906
3907        // Verify initial positions work correctly
3908        // Position 23 is 'v' of second "val" on line 1
3909        let (line, char) = buffer.position_to_lsp_position(23);
3910        assert_eq!(line, 1, "Initial: position 23 should be on line 1");
3911        assert_eq!(char, 4, "Initial: position 23 should be at char 4");
3912
3913        // Simulate rename: delete "val" at position 23 (line 1, char 4) and insert "value"
3914        // Position 23 = line 1, char 4; Position 26 = line 1, char 7
3915        buffer.delete_range(
3916            Position { line: 1, column: 4 },
3917            Position { line: 1, column: 7 },
3918        );
3919        buffer.insert_bytes(23, b"value".to_vec()); // Insert "value"
3920
3921        // Also rename the first occurrence
3922        // Position 7 = line 0, char 7; Position 10 = line 0, char 10
3923        buffer.delete_range(
3924            Position { line: 0, column: 7 },
3925            Position {
3926                line: 0,
3927                column: 10,
3928            },
3929        );
3930        buffer.insert_bytes(7, b"value".to_vec()); // Insert "value"
3931
3932        // Buffer is now: "fn foo(value: i32) {\n    value + 1\n}\n"
3933        let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
3934        assert_eq!(content, "fn foo(value: i32) {\n    value + 1\n}\n");
3935
3936        // Position 25 is now 'v' of second "value" on line 1
3937        // Line 0: "fn foo(value: i32) {\n" = 21 chars (positions 0-20)
3938        // Line 1: "    value + 1\n" starts at position 21
3939        // Position 25 = 21 + 4 = line 1, char 4
3940
3941        // The workaround in position_to_lsp_position should give correct result
3942        let (line, char) = buffer.position_to_lsp_position(25);
3943        assert_eq!(
3944            line, 1,
3945            "After modification: position 25 should be on line 1"
3946        );
3947        assert_eq!(
3948            char, 4,
3949            "After modification: position 25 should be at char 4"
3950        );
3951
3952        // Also verify position 21 (start of line 1) works
3953        let (line, char) = buffer.position_to_lsp_position(21);
3954        assert_eq!(line, 1, "Position 21 should be on line 1");
3955        assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
3956    }
3957
3958    #[test]
3959    fn test_detect_crlf() {
3960        assert_eq!(
3961            TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
3962            LineEnding::CRLF
3963        );
3964    }
3965
3966    #[test]
3967    fn test_detect_lf() {
3968        assert_eq!(
3969            TextBuffer::detect_line_ending(b"hello\nworld\n"),
3970            LineEnding::LF
3971        );
3972    }
3973
3974    #[test]
3975    fn test_normalize_crlf() {
3976        let input = b"hello\r\nworld\r\n".to_vec();
3977        let output = TextBuffer::normalize_line_endings(input);
3978        assert_eq!(output, b"hello\nworld\n");
3979    }
3980
3981    #[test]
3982    fn test_normalize_empty() {
3983        let input = Vec::new();
3984        let output = TextBuffer::normalize_line_endings(input);
3985        assert_eq!(output, Vec::<u8>::new());
3986    }
3987
3988    /// Regression test: get_all_text() returns empty for large files with unloaded regions
3989    ///
3990    /// This was the root cause of a bug where recovery auto-save would save 0 bytes
3991    /// for large files, causing data loss on crash recovery.
3992    ///
3993    /// The fix is to use get_text_range_mut() which handles lazy loading.
3994    #[test]
3995    fn test_get_all_text_returns_empty_for_unloaded_buffers() {
3996        use tempfile::TempDir;
3997        let temp_dir = TempDir::new().unwrap();
3998        let file_path = temp_dir.path().join("large_test.txt");
3999
4000        // Create a 50KB file
4001        let original_content = "X".repeat(50_000);
4002        std::fs::write(&file_path, &original_content).unwrap();
4003
4004        // Load with small threshold to trigger large file mode
4005        let mut buffer = TextBuffer::load_from_file(&file_path, 1024).unwrap();
4006        assert!(buffer.large_file, "Should be in large file mode");
4007        assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
4008
4009        // Make a small edit
4010        buffer.insert_bytes(0, b"EDITED: ".to_vec());
4011
4012        // get_all_text() now returns None for unloaded buffers instead of empty
4013        // This is the correct behavior - it signals that content is not available
4014        let content_immutable = buffer.get_all_text();
4015
4016        // get_all_text() returns None because it uses get_text_range() which
4017        // returns None for unloaded regions
4018        assert!(
4019            content_immutable.is_none(),
4020            "get_all_text() should return None for large files with unloaded regions. \
4021             Got Some({} bytes) instead of None.",
4022            content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
4023        );
4024
4025        // CORRECT BEHAVIOR: get_text_range_mut() handles lazy loading
4026        let total = buffer.total_bytes();
4027        let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
4028        assert_eq!(
4029            content_lazy.len(),
4030            50_000 + 8,
4031            "get_text_range_mut() should return all content with lazy loading"
4032        );
4033        assert!(
4034            String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
4035            "Content should start with our edit"
4036        );
4037    }
4038
4039    // ===== Line Ending Conversion Tests =====
4040
4041    mod line_ending_conversion {
4042        use super::*;
4043
4044        #[test]
4045        fn test_convert_lf_to_crlf() {
4046            let input = b"Line 1\nLine 2\nLine 3\n";
4047            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4048            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4049        }
4050
4051        #[test]
4052        fn test_convert_crlf_to_lf() {
4053            let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4054            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4055            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4056        }
4057
4058        #[test]
4059        fn test_convert_cr_to_lf() {
4060            let input = b"Line 1\rLine 2\rLine 3\r";
4061            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4062            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4063        }
4064
4065        #[test]
4066        fn test_convert_mixed_to_crlf() {
4067            // Mixed line endings: LF, CRLF, CR
4068            let input = b"Line 1\nLine 2\r\nLine 3\r";
4069            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4070            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4071        }
4072
4073        #[test]
4074        fn test_convert_lf_to_lf_is_noop() {
4075            let input = b"Line 1\nLine 2\nLine 3\n";
4076            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4077            assert_eq!(result, input.to_vec());
4078        }
4079
4080        #[test]
4081        fn test_convert_empty_content() {
4082            let input = b"";
4083            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4084            assert_eq!(result, b"".to_vec());
4085        }
4086
4087        #[test]
4088        fn test_convert_no_line_endings() {
4089            let input = b"No line endings here";
4090            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4091            assert_eq!(result, b"No line endings here".to_vec());
4092        }
4093
4094        #[test]
4095        fn test_set_line_ending_marks_modified() {
4096            let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec());
4097            assert!(!buffer.is_modified());
4098
4099            buffer.set_line_ending(LineEnding::CRLF);
4100            assert!(buffer.is_modified());
4101        }
4102
4103        #[test]
4104        fn test_set_default_line_ending_does_not_mark_modified() {
4105            let mut buffer = TextBuffer::empty();
4106            assert!(!buffer.is_modified());
4107
4108            buffer.set_default_line_ending(LineEnding::CRLF);
4109            assert!(!buffer.is_modified());
4110            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4111        }
4112
4113        #[test]
4114        fn test_save_to_file_converts_lf_to_crlf() {
4115            use tempfile::TempDir;
4116
4117            let temp_dir = TempDir::new().unwrap();
4118            let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
4119
4120            // Create a file with LF line endings
4121            let original_content = b"Line 1\nLine 2\nLine 3\n";
4122            std::fs::write(&file_path, original_content).unwrap();
4123
4124            // Load the file
4125            let mut buffer =
4126                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD).unwrap();
4127            assert_eq!(buffer.line_ending(), LineEnding::LF);
4128
4129            // Change line ending to CRLF
4130            buffer.set_line_ending(LineEnding::CRLF);
4131            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4132            assert!(buffer.is_modified());
4133
4134            // Save the file
4135            buffer.save_to_file(&file_path).unwrap();
4136
4137            // Read back and verify CRLF
4138            let saved_bytes = std::fs::read(&file_path).unwrap();
4139            assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4140        }
4141
4142        #[test]
4143        fn test_save_to_file_converts_crlf_to_lf() {
4144            use tempfile::TempDir;
4145
4146            let temp_dir = TempDir::new().unwrap();
4147            let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
4148
4149            // Create a file with CRLF line endings
4150            let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4151            std::fs::write(&file_path, original_content).unwrap();
4152
4153            // Load the file
4154            let mut buffer =
4155                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD).unwrap();
4156            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4157
4158            // Change line ending to LF
4159            buffer.set_line_ending(LineEnding::LF);
4160            assert_eq!(buffer.line_ending(), LineEnding::LF);
4161            assert!(buffer.is_modified());
4162
4163            // Save the file
4164            buffer.save_to_file(&file_path).unwrap();
4165
4166            // Read back and verify LF (no CRLF)
4167            let saved_bytes = std::fs::read(&file_path).unwrap();
4168            assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
4169        }
4170
4171        #[test]
4172        #[cfg(unix)]
4173        fn test_save_to_unwritable_file() -> anyhow::Result<()> {
4174            use std::fs::Permissions;
4175            use std::os::unix::fs::PermissionsExt;
4176            use tempfile::TempDir;
4177
4178            let temp_dir = TempDir::new().unwrap();
4179            let unwritable_dir = temp_dir.path().join("unwritable_dir");
4180            std::fs::create_dir(&unwritable_dir)?;
4181
4182            let file_path = unwritable_dir.join("unwritable.txt");
4183            std::fs::write(&file_path, "original content")?;
4184
4185            // Make directory unwritable to prevent rename/temp file creation
4186            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4187
4188            let mut buffer = TextBuffer::from_bytes(b"new content".to_vec());
4189            let result = buffer.save_to_file(&file_path);
4190
4191            // Verify that it returns SudoSaveRequired
4192            match result {
4193                Err(e) => {
4194                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4195                        assert_eq!(sudo_err.dest_path, file_path);
4196                        assert!(sudo_err.temp_path.exists());
4197                        // Cleanup temp file
4198                        let _ = std::fs::remove_file(&sudo_err.temp_path);
4199                    } else {
4200                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
4201                    }
4202                }
4203                Ok(_) => panic!("Expected error, but save succeeded"),
4204            }
4205
4206            Ok(())
4207        }
4208
4209        #[test]
4210        #[cfg(unix)]
4211        fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
4212            use std::fs::Permissions;
4213            use std::os::unix::fs::PermissionsExt;
4214            use tempfile::TempDir;
4215
4216            let temp_dir = TempDir::new().unwrap();
4217            let unwritable_dir = temp_dir.path().join("unwritable_dir");
4218            std::fs::create_dir(&unwritable_dir)?;
4219
4220            let file_path = unwritable_dir.join("test.txt");
4221
4222            // Make directory unwritable (no write allowed)
4223            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4224
4225            let mut buffer = TextBuffer::from_bytes(b"content".to_vec());
4226            let result = buffer.save_to_file(&file_path);
4227
4228            match result {
4229                Err(e) => {
4230                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4231                        assert_eq!(sudo_err.dest_path, file_path);
4232                        assert!(sudo_err.temp_path.exists());
4233                        // It should be in /tmp because the directory was not writable
4234                        assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
4235                        // Cleanup
4236                        let _ = std::fs::remove_file(&sudo_err.temp_path);
4237                    } else {
4238                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
4239                    }
4240                }
4241                Ok(_) => panic!("Expected error, but save succeeded"),
4242            }
4243
4244            Ok(())
4245        }
4246    }
4247}
4248
4249#[cfg(test)]
4250mod property_tests {
4251    use super::*;
4252    use proptest::prelude::*;
4253
4254    // Generate text with some newlines
4255    fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
4256        prop::collection::vec(
4257            prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
4258            0..100,
4259        )
4260    }
4261
4262    // Strategy to generate operations
4263    #[derive(Debug, Clone)]
4264    enum Operation {
4265        Insert { offset: usize, text: Vec<u8> },
4266        Delete { offset: usize, bytes: usize },
4267    }
4268
4269    fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
4270        prop::collection::vec(
4271            prop_oneof![
4272                (0usize..200, text_with_newlines())
4273                    .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
4274                (0usize..200, 1usize..50)
4275                    .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
4276            ],
4277            0..50,
4278        )
4279    }
4280
4281    proptest! {
4282        #[test]
4283        fn prop_line_count_consistent(text in text_with_newlines()) {
4284            let buffer = TextBuffer::from_bytes(text.clone());
4285
4286            let newline_count = text.iter().filter(|&&b| b == b'\n').count();
4287            prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
4288        }
4289
4290        #[test]
4291        fn prop_get_all_text_matches_original(text in text_with_newlines()) {
4292            let buffer = TextBuffer::from_bytes(text.clone());
4293            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4294        }
4295
4296        #[test]
4297        fn prop_insert_increases_size(
4298            text in text_with_newlines(),
4299            offset in 0usize..100,
4300            insert_text in text_with_newlines()
4301        ) {
4302            let mut buffer = TextBuffer::from_bytes(text);
4303            let initial_bytes = buffer.total_bytes();
4304
4305            let offset = offset.min(buffer.total_bytes());
4306            buffer.insert_bytes(offset, insert_text.clone());
4307
4308            prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
4309        }
4310
4311        #[test]
4312        fn prop_delete_decreases_size(
4313            text in text_with_newlines(),
4314            offset in 0usize..100,
4315            delete_bytes in 1usize..50
4316        ) {
4317            if text.is_empty() {
4318                return Ok(());
4319            }
4320
4321            let mut buffer = TextBuffer::from_bytes(text);
4322            let initial_bytes = buffer.total_bytes();
4323
4324            let offset = offset.min(buffer.total_bytes());
4325            let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
4326
4327            if delete_bytes == 0 {
4328                return Ok(());
4329            }
4330
4331            buffer.delete_bytes(offset, delete_bytes);
4332
4333            prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
4334        }
4335
4336        #[test]
4337        fn prop_insert_then_delete_restores_original(
4338            text in text_with_newlines(),
4339            offset in 0usize..100,
4340            insert_text in text_with_newlines()
4341        ) {
4342            let mut buffer = TextBuffer::from_bytes(text.clone());
4343
4344            let offset = offset.min(buffer.total_bytes());
4345            buffer.insert_bytes(offset, insert_text.clone());
4346            buffer.delete_bytes(offset, insert_text.len());
4347
4348            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4349        }
4350
4351        #[test]
4352        fn prop_offset_position_roundtrip(text in text_with_newlines()) {
4353            let buffer = TextBuffer::from_bytes(text.clone());
4354
4355            for offset in 0..text.len() {
4356                let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
4357                let back = buffer.position_to_offset(pos);
4358                prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
4359            }
4360        }
4361
4362        #[test]
4363        fn prop_get_text_range_valid(
4364            text in text_with_newlines(),
4365            offset in 0usize..100,
4366            length in 1usize..50
4367        ) {
4368            if text.is_empty() {
4369                return Ok(());
4370            }
4371
4372            let buffer = TextBuffer::from_bytes(text.clone());
4373            let offset = offset.min(buffer.total_bytes());
4374            let length = length.min(buffer.total_bytes() - offset);
4375
4376            if length == 0 {
4377                return Ok(());
4378            }
4379
4380            let result = buffer.get_text_range(offset, length);
4381            prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
4382        }
4383
4384        #[test]
4385        fn prop_operations_maintain_consistency(operations in operation_strategy()) {
4386            let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec());
4387            let mut expected_text = b"initial\ntext".to_vec();
4388
4389            for op in operations {
4390                match op {
4391                    Operation::Insert { offset, text } => {
4392                        let offset = offset.min(buffer.total_bytes());
4393                        buffer.insert_bytes(offset, text.clone());
4394
4395                        // Update expected
4396                        let offset = offset.min(expected_text.len());
4397                        expected_text.splice(offset..offset, text);
4398                    }
4399                    Operation::Delete { offset, bytes } => {
4400                        if offset < buffer.total_bytes() {
4401                            let bytes = bytes.min(buffer.total_bytes() - offset);
4402                            buffer.delete_bytes(offset, bytes);
4403
4404                            // Update expected
4405                            if offset < expected_text.len() {
4406                                let bytes = bytes.min(expected_text.len() - offset);
4407                                expected_text.drain(offset..offset + bytes);
4408                            }
4409                        }
4410                    }
4411                }
4412            }
4413
4414            prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
4415        }
4416
4417        #[test]
4418        fn prop_line_count_never_zero(operations in operation_strategy()) {
4419            let mut buffer = TextBuffer::from_bytes(b"test".to_vec());
4420
4421            for op in operations {
4422                match op {
4423                    Operation::Insert { offset, text } => {
4424                        let offset = offset.min(buffer.total_bytes());
4425                        buffer.insert_bytes(offset, text);
4426                    }
4427                    Operation::Delete { offset, bytes } => {
4428                        buffer.delete_bytes(offset, bytes);
4429                    }
4430                }
4431
4432                // Document always has at least 1 line
4433                prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
4434            }
4435        }
4436
4437        #[test]
4438        fn prop_total_bytes_never_negative(operations in operation_strategy()) {
4439            let mut buffer = TextBuffer::from_bytes(b"test".to_vec());
4440
4441            for op in operations {
4442                match op {
4443                    Operation::Insert { offset, text } => {
4444                        let offset = offset.min(buffer.total_bytes());
4445                        buffer.insert_bytes(offset, text);
4446                    }
4447                    Operation::Delete { offset, bytes } => {
4448                        buffer.delete_bytes(offset, bytes);
4449                    }
4450                }
4451
4452                // Bytes should never overflow
4453                prop_assert!(buffer.total_bytes() < 10_000_000);
4454            }
4455        }
4456
4457        #[test]
4458        fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
4459            let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec());
4460
4461            for op in operations {
4462                match op {
4463                    Operation::Insert { offset, text } => {
4464                        let offset = offset.min(buffer.total_bytes());
4465                        buffer.insert_bytes(offset, text);
4466                    }
4467                    Operation::Delete { offset, bytes } => {
4468                        buffer.delete_bytes(offset, bytes);
4469                    }
4470                }
4471
4472                // Verify we can still convert between offsets and positions
4473                if buffer.total_bytes() > 0 {
4474                    let mid_offset = buffer.total_bytes() / 2;
4475                    if let Some(pos) = buffer.offset_to_position(mid_offset) {
4476                        let back = buffer.position_to_offset(pos);
4477
4478                        // Should be able to roundtrip
4479                        prop_assert!(back <= buffer.total_bytes());
4480                    }
4481                }
4482            }
4483        }
4484    }
4485
4486    #[test]
4487    fn test_detect_binary_text_files() {
4488        // Plain text should not be detected as binary
4489        assert!(!TextBuffer::detect_binary(b"Hello, world!"));
4490        assert!(!TextBuffer::detect_binary(b"Line 1\nLine 2\nLine 3"));
4491        assert!(!TextBuffer::detect_binary(b"Tabs\tand\tnewlines\n"));
4492        assert!(!TextBuffer::detect_binary(b"Carriage return\r\n"));
4493
4494        // Empty content is not binary
4495        assert!(!TextBuffer::detect_binary(b""));
4496
4497        // ANSI CSI escape sequences should be treated as text
4498        assert!(!TextBuffer::detect_binary(b"\x1b[31mRed text\x1b[0m"));
4499    }
4500
4501    #[test]
4502    fn test_detect_binary_binary_files() {
4503        // Null bytes indicate binary
4504        assert!(TextBuffer::detect_binary(b"Hello\x00World"));
4505        assert!(TextBuffer::detect_binary(b"\x00"));
4506
4507        // Non-printable control characters (except tab, newline, CR, form feed, vertical tab)
4508        assert!(TextBuffer::detect_binary(b"Text with \x01 control char"));
4509        assert!(TextBuffer::detect_binary(b"\x02\x03\x04"));
4510
4511        // DEL character (0x7F)
4512        assert!(TextBuffer::detect_binary(b"Text with DEL\x7F"));
4513    }
4514
4515    #[test]
4516    fn test_detect_binary_png_file() {
4517        // PNG file signature: 89 50 4E 47 0D 0A 1A 0A
4518        // The 0x1A byte (substitute character) is a control character that triggers binary detection
4519        let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4520        assert!(TextBuffer::detect_binary(png_header));
4521
4522        // Simulate a PNG file with more data after header
4523        let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4524        png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); // IHDR chunk with null bytes
4525        assert!(TextBuffer::detect_binary(&png_data));
4526    }
4527
4528    #[test]
4529    fn test_detect_binary_other_image_formats() {
4530        // JPEG signature: FF D8 FF
4531        let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
4532        assert!(TextBuffer::detect_binary(jpeg_header));
4533
4534        // GIF signature: GIF89a or GIF87a - contains valid ASCII but typically followed by binary
4535        // GIF header is ASCII but the LSD (Logical Screen Descriptor) contains binary
4536        let gif_data: &[u8] = &[
4537            0x47, 0x49, 0x46, 0x38, 0x39, 0x61, // GIF89a
4538            0x01, 0x00, 0x01, 0x00, // Width=1, Height=1 (little endian)
4539            0x00, // Packed byte
4540            0x00, // Background color index
4541            0x00, // Pixel aspect ratio
4542        ];
4543        // The null bytes in the dimensions trigger binary detection
4544        assert!(TextBuffer::detect_binary(gif_data));
4545
4546        // BMP signature: BM followed by file size (usually contains null bytes)
4547        let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
4548        assert!(TextBuffer::detect_binary(bmp_header));
4549    }
4550
4551    #[test]
4552    fn test_detect_binary_executable_formats() {
4553        // ELF signature (Linux executables)
4554        let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
4555        assert!(TextBuffer::detect_binary(elf_header));
4556
4557        // Mach-O signature (macOS executables) - magic + cpu type/subtype contain null bytes
4558        let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
4559        assert!(TextBuffer::detect_binary(macho_header));
4560
4561        // PE/COFF (Windows executables) - MZ header
4562        let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
4563        assert!(TextBuffer::detect_binary(pe_header));
4564    }
4565}
4566
4567/// Line data with optional line number
4568#[derive(Debug, Clone)]
4569pub struct LineData {
4570    /// Byte offset where this line starts in the document
4571    pub byte_offset: usize,
4572    /// Line content (without trailing newline)
4573    pub content: String,
4574    /// Whether this line ends with a newline
4575    pub has_newline: bool,
4576    /// Line number (None for large files without line metadata)
4577    pub line_number: Option<usize>,
4578}
4579
4580/// Iterator over lines in a TextBuffer that efficiently tracks line numbers
4581/// using piece tree metadata (single source of truth)
4582pub struct TextBufferLineIterator {
4583    /// Collected lines (we collect all at once since we need mutable access to load chunks)
4584    lines: Vec<LineData>,
4585    /// Current index in the lines vector
4586    current_index: usize,
4587    /// Whether there are more lines after these
4588    pub has_more: bool,
4589}
4590
4591impl TextBufferLineIterator {
4592    pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
4593        let buffer_len = buffer.len();
4594        if byte_pos >= buffer_len {
4595            return Ok(Self {
4596                lines: Vec::new(),
4597                current_index: 0,
4598                has_more: false,
4599            });
4600        }
4601
4602        // Check if buffer has line metadata (None for large files > 1MB)
4603        let has_line_metadata = buffer.line_count().is_some();
4604
4605        // Determine starting line number by querying piece tree once
4606        // (only if we have line metadata)
4607        let mut current_line = if has_line_metadata {
4608            buffer.offset_to_position(byte_pos).map(|pos| pos.line)
4609        } else {
4610            None
4611        };
4612
4613        let mut lines = Vec::with_capacity(max_lines);
4614        let mut current_offset = byte_pos;
4615        let estimated_line_length = 80; // Use default estimate
4616
4617        // Collect lines by scanning forward
4618        for _ in 0..max_lines {
4619            if current_offset >= buffer_len {
4620                break;
4621            }
4622
4623            let line_start = current_offset;
4624            let line_number = current_line;
4625
4626            // Estimate how many bytes to load for this line
4627            let estimated_max_line_length = estimated_line_length * 3;
4628            let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
4629
4630            // Load chunk (this handles lazy loading)
4631            let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
4632
4633            // Scan for newline
4634            let mut line_len = 0;
4635            let mut found_newline = false;
4636            for &byte in chunk.iter() {
4637                line_len += 1;
4638                if byte == b'\n' {
4639                    found_newline = true;
4640                    break;
4641                }
4642            }
4643
4644            // Handle long lines (rare case)
4645            if !found_newline && current_offset + line_len < buffer_len {
4646                // Line is longer than expected, load more data
4647                let remaining = buffer_len - current_offset - line_len;
4648                let additional_bytes = estimated_max_line_length.min(remaining);
4649                let more_chunk =
4650                    buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
4651
4652                let mut extended_chunk = chunk;
4653                extended_chunk.extend_from_slice(&more_chunk);
4654
4655                for &byte in more_chunk.iter() {
4656                    line_len += 1;
4657                    if byte == b'\n' {
4658                        found_newline = true;
4659                        break;
4660                    }
4661                }
4662
4663                let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
4664                let has_newline = line_string.ends_with('\n');
4665                let content = if has_newline {
4666                    line_string[..line_string.len() - 1].to_string()
4667                } else {
4668                    line_string
4669                };
4670
4671                lines.push(LineData {
4672                    byte_offset: line_start,
4673                    content,
4674                    has_newline,
4675                    line_number,
4676                });
4677
4678                current_offset += line_len;
4679                if has_line_metadata && found_newline {
4680                    current_line = current_line.map(|n| n + 1);
4681                }
4682                continue;
4683            }
4684
4685            // Normal case
4686            let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
4687            let has_newline = line_string.ends_with('\n');
4688            let content = if has_newline {
4689                line_string[..line_string.len() - 1].to_string()
4690            } else {
4691                line_string
4692            };
4693
4694            lines.push(LineData {
4695                byte_offset: line_start,
4696                content,
4697                has_newline,
4698                line_number,
4699            });
4700
4701            current_offset += line_len;
4702            // Increment line number if we have metadata and found a newline
4703            if has_line_metadata && found_newline {
4704                current_line = current_line.map(|n| n + 1);
4705            }
4706        }
4707
4708        // Check if there are more lines
4709        let has_more = current_offset < buffer_len;
4710
4711        Ok(Self {
4712            lines,
4713            current_index: 0,
4714            has_more,
4715        })
4716    }
4717}
4718
4719impl Iterator for TextBufferLineIterator {
4720    type Item = LineData;
4721
4722    fn next(&mut self) -> Option<Self::Item> {
4723        if self.current_index < self.lines.len() {
4724            let line = self.lines[self.current_index].clone();
4725            self.current_index += 1;
4726            Some(line)
4727        } else {
4728            None
4729        }
4730    }
4731}
fresh/model/buffer.rs

fresh/model/
buffer.rs