Skip to main content

fresh/model/
buffer.rs

1/// Text buffer that uses PieceTree with integrated line tracking
2/// Architecture where the tree is the single source of truth for text and line information
3use crate::model::encoding;
4use crate::model::filesystem::{FileMetadata, FileSystem, WriteOp};
5use crate::model::piece_tree::{
6    BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, PieceView, Position,
7    StringBuffer, TreeStats,
8};
9use crate::model::piece_tree_diff::PieceTreeDiff;
10use crate::primitives::grapheme;
11use anyhow::{Context, Result};
12use regex::bytes::Regex;
13use std::io::{self, Write};
14use std::ops::Range;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17
18// Re-export Encoding for backward compatibility
19pub use encoding::Encoding;
20
21/// Error returned when a file save operation requires elevated privileges.
22///
23/// This error contains all the information needed to perform the save via sudo
24/// in a single operation, preserving original file ownership and permissions.
25#[derive(Debug, Clone, PartialEq)]
26pub struct SudoSaveRequired {
27    /// Path to the temporary file containing the new content
28    pub temp_path: PathBuf,
29    /// Destination path where the file should be saved
30    pub dest_path: PathBuf,
31    /// Original file owner (UID)
32    pub uid: u32,
33    /// Original file group (GID)
34    pub gid: u32,
35    /// Original file permissions (mode)
36    pub mode: u32,
37}
38
39impl std::fmt::Display for SudoSaveRequired {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        write!(
42            f,
43            "Permission denied saving to {}. Use sudo to complete the operation.",
44            self.dest_path.display()
45        )
46    }
47}
48
49impl std::error::Error for SudoSaveRequired {}
50
51/// Error returned when a large file has a non-resynchronizable encoding
52/// and requires user confirmation before loading the entire file into memory.
53///
54/// Non-resynchronizable encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot
55/// determine character boundaries when jumping into the middle of a file.
56/// This means the entire file must be loaded and decoded sequentially.
57#[derive(Debug, Clone, PartialEq)]
58pub struct LargeFileEncodingConfirmation {
59    /// Path to the file
60    pub path: PathBuf,
61    /// Size of the file in bytes
62    pub file_size: usize,
63    /// The detected encoding that requires full loading
64    pub encoding: Encoding,
65}
66
67impl std::fmt::Display for LargeFileEncodingConfirmation {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        let size_mb = self.file_size as f64 / (1024.0 * 1024.0);
70        write!(
71            f,
72            "{} ({:.0} MB) requires full load. (l)oad, (e)ncoding, (C)ancel? ",
73            self.encoding.display_name(),
74            size_mb
75        )
76    }
77}
78
79impl std::error::Error for LargeFileEncodingConfirmation {}
80
81/// A work item for incremental line-feed scanning (one per leaf).
82#[derive(Debug, Clone)]
83pub struct LineScanChunk {
84    /// Index of the leaf in the piece tree's leaf array.
85    pub leaf_index: usize,
86    /// Number of bytes in this leaf.
87    pub byte_len: usize,
88    /// True if the leaf already had a known line_feed_cnt (no I/O needed).
89    pub already_known: bool,
90}
91
92// Large file support configuration
93/// Default threshold for considering a file "large" (100 MB)
94pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
95
96/// Chunk size to load when lazy loading (1 MB)
97pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
98
99/// Chunk alignment for lazy loading (64 KB)
100pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
101
102/// Configuration passed to TextBuffer constructors.
103#[derive(Debug, Clone)]
104pub struct BufferConfig {
105    /// Estimated average line length in bytes. Used for approximate line number
106    /// display in large files and for goto-line byte offset estimation.
107    pub estimated_line_length: usize,
108}
109
110impl Default for BufferConfig {
111    fn default() -> Self {
112        Self {
113            estimated_line_length: 80,
114        }
115    }
116}
117
118/// Line ending format used in the file
119#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
120pub enum LineEnding {
121    /// Unix/Linux/Mac format (\n)
122    #[default]
123    LF,
124    /// Windows format (\r\n)
125    CRLF,
126    /// Old Mac format (\r) - rare but supported
127    CR,
128}
129
130impl LineEnding {
131    /// Get the string representation of this line ending
132    pub fn as_str(&self) -> &'static str {
133        match self {
134            Self::LF => "\n",
135            Self::CRLF => "\r\n",
136            Self::CR => "\r",
137        }
138    }
139
140    /// Get the display name for status bar
141    pub fn display_name(&self) -> &'static str {
142        match self {
143            Self::LF => "LF",
144            Self::CRLF => "CRLF",
145            Self::CR => "CR",
146        }
147    }
148}
149
150/// A write recipe built from the piece tree for saving
151struct WriteRecipe {
152    /// The source file path for Copy operations (if any)
153    src_path: Option<PathBuf>,
154    /// Data chunks for Insert operations (owned to avoid lifetime issues)
155    insert_data: Vec<Vec<u8>>,
156    /// Sequence of actions to build the output file
157    actions: Vec<RecipeAction>,
158}
159
160/// An action in a write recipe
161#[derive(Debug, Clone, Copy)]
162enum RecipeAction {
163    /// Copy bytes from source file at offset
164    Copy { offset: u64, len: u64 },
165    /// Insert data from insert_data[index]
166    Insert { index: usize },
167}
168
169impl WriteRecipe {
170    /// Convert the recipe to WriteOp slice for use with filesystem write_patched
171    fn to_write_ops(&self) -> Vec<WriteOp<'_>> {
172        self.actions
173            .iter()
174            .map(|action| match action {
175                RecipeAction::Copy { offset, len } => WriteOp::Copy {
176                    offset: *offset,
177                    len: *len,
178                },
179                RecipeAction::Insert { index } => WriteOp::Insert {
180                    data: &self.insert_data[*index],
181                },
182            })
183            .collect()
184    }
185
186    /// Check if this recipe has any Copy operations
187    fn has_copy_ops(&self) -> bool {
188        self.actions
189            .iter()
190            .any(|a| matches!(a, RecipeAction::Copy { .. }))
191    }
192
193    /// Flatten all Insert operations into a single buffer.
194    /// Only valid when has_copy_ops() returns false.
195    fn flatten_inserts(&self) -> Vec<u8> {
196        let mut result = Vec::new();
197        for action in &self.actions {
198            if let RecipeAction::Insert { index } = action {
199                result.extend_from_slice(&self.insert_data[*index]);
200            }
201        }
202        result
203    }
204}
205
206/// Represents a line number (simplified for new implementation)
207/// Legacy enum kept for backwards compatibility - always Absolute now
208#[derive(Debug, Clone, Copy, PartialEq, Eq)]
209pub enum LineNumber {
210    /// Absolute line number - this is the actual line number in the file
211    Absolute(usize),
212    /// Relative line number (deprecated - now same as Absolute)
213    Relative {
214        line: usize,
215        from_cached_line: usize,
216    },
217}
218
219impl LineNumber {
220    /// Get the line number value
221    pub fn value(&self) -> usize {
222        match self {
223            Self::Absolute(line) | Self::Relative { line, .. } => *line,
224        }
225    }
226
227    /// Check if this is an absolute line number
228    pub fn is_absolute(&self) -> bool {
229        matches!(self, LineNumber::Absolute(_))
230    }
231
232    /// Check if this is a relative line number
233    pub fn is_relative(&self) -> bool {
234        matches!(self, LineNumber::Relative { .. })
235    }
236
237    /// Format the line number for display
238    pub fn format(&self) -> String {
239        match self {
240            Self::Absolute(line) => format!("{}", line + 1),
241            Self::Relative { line, .. } => format!("~{}", line + 1),
242        }
243    }
244}
245
246/// A text buffer that manages document content using a piece table
247/// with integrated line tracking
248pub struct TextBuffer {
249    /// Filesystem abstraction for file I/O operations.
250    /// Stored internally so methods can access it without threading through call chains.
251    fs: Arc<dyn FileSystem + Send + Sync>,
252
253    /// The piece tree for efficient text manipulation with integrated line tracking
254    piece_tree: PieceTree,
255
256    /// Snapshot of the piece tree root at last save (shared via Arc)
257    saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
258
259    /// List of string buffers containing chunks of text data
260    /// Index 0 is typically the original/stored buffer
261    /// Additional buffers are added for modifications
262    buffers: Vec<StringBuffer>,
263
264    /// Next buffer ID to assign
265    next_buffer_id: usize,
266
267    /// Optional file path for persistence
268    file_path: Option<PathBuf>,
269
270    /// Has the buffer been modified since last save?
271    modified: bool,
272
273    /// Does the buffer have unsaved changes for recovery auto-save?
274    /// This is separate from `modified` because recovery auto-save doesn't
275    /// clear `modified` (buffer still differs from on-disk file).
276    recovery_pending: bool,
277
278    /// Is this a large file (no line indexing, lazy loading enabled)?
279    large_file: bool,
280
281    /// Has a line feed scan been performed on this large file?
282    /// When true, piece tree leaves have accurate `line_feed_cnt` values,
283    /// and edits will ensure the relevant chunk is loaded before splitting
284    /// so that `compute_line_feeds_static` can recount accurately.
285    line_feeds_scanned: bool,
286
287    /// Is this a binary file? Binary files are opened read-only and render
288    /// unprintable characters as code points.
289    is_binary: bool,
290
291    /// Line ending format detected from the file (or default for new files)
292    line_ending: LineEnding,
293
294    /// Original line ending format when file was loaded (used for conversion on save)
295    /// This tracks what the file had when loaded, so we can detect if the user
296    /// changed the line ending format and needs conversion on save.
297    original_line_ending: LineEnding,
298
299    /// Text encoding format detected from the file (or default for new files)
300    encoding: Encoding,
301
302    /// Original encoding when file was loaded (used for conversion on save)
303    /// Similar to original_line_ending, tracks what the file had when loaded.
304    original_encoding: Encoding,
305
306    /// The file size on disk after the last save.
307    /// Used for chunked recovery to know the original file size for reconstruction.
308    /// Updated when loading from file or after saving.
309    saved_file_size: Option<usize>,
310
311    /// Monotonic version counter for change tracking.
312    version: u64,
313
314    /// Buffer configuration (estimated line length, etc.)
315    config: BufferConfig,
316}
317
318/// Snapshot of a TextBuffer's piece tree and associated string buffers.
319///
320/// Used by BulkEdit undo/redo to capture the complete buffer state.
321/// Without this, consolidate_after_save() would destroy the string buffers
322/// that a BulkEdit's piece tree snapshot references, causing corruption on undo.
323#[derive(Debug, Clone)]
324pub struct BufferSnapshot {
325    pub piece_tree: PieceTree,
326    pub buffers: Vec<StringBuffer>,
327    pub next_buffer_id: usize,
328}
329
330impl TextBuffer {
331    /// Create a new text buffer with the given filesystem implementation.
332    /// Note: large_file_threshold is ignored in the new implementation
333    pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
334        let piece_tree = PieceTree::empty();
335        let line_ending = LineEnding::default();
336        let encoding = Encoding::default();
337        TextBuffer {
338            fs,
339            saved_root: piece_tree.root(),
340            piece_tree,
341            buffers: vec![StringBuffer::new(0, Vec::new())],
342            next_buffer_id: 1,
343            file_path: None,
344            modified: false,
345            recovery_pending: false,
346            large_file: false,
347            line_feeds_scanned: false,
348            is_binary: false,
349            line_ending,
350            original_line_ending: line_ending,
351            encoding,
352            original_encoding: encoding,
353            saved_file_size: None,
354            version: 0,
355            config: BufferConfig::default(),
356        }
357    }
358
359    /// Create an empty buffer associated with a file path.
360    /// Used for files that don't exist yet — the path is set so saving will create the file.
361    pub fn new_with_path(
362        large_file_threshold: usize,
363        fs: Arc<dyn FileSystem + Send + Sync>,
364        path: PathBuf,
365    ) -> Self {
366        let mut buffer = Self::new(large_file_threshold, fs);
367        buffer.file_path = Some(path);
368        buffer
369    }
370
371    /// Current buffer version (monotonic, wraps on overflow)
372    pub fn version(&self) -> u64 {
373        self.version
374    }
375
376    /// Get a reference to the filesystem implementation used by this buffer.
377    pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
378        &self.fs
379    }
380
381    /// Set the filesystem implementation for this buffer.
382    pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
383        self.fs = fs;
384    }
385
386    #[inline]
387    fn bump_version(&mut self) {
388        self.version = self.version.wrapping_add(1);
389    }
390
391    #[inline]
392    fn mark_content_modified(&mut self) {
393        self.modified = true;
394        self.recovery_pending = true;
395        self.bump_version();
396    }
397
398    /// Create a text buffer from raw bytes WITHOUT encoding conversion.
399    /// Used for binary files where we want to preserve the exact bytes.
400    fn from_bytes_raw(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
401        let bytes = content.len();
402
403        // For binary files, detect line ending but don't convert encoding
404        let line_ending = Self::detect_line_ending(&content);
405
406        // Create initial StringBuffer with ID 0
407        let buffer = StringBuffer::new(0, content);
408        let line_feed_cnt = buffer.line_feed_count();
409
410        let piece_tree = if bytes > 0 {
411            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
412        } else {
413            PieceTree::empty()
414        };
415
416        let saved_root = piece_tree.root();
417
418        TextBuffer {
419            fs,
420            line_ending,
421            original_line_ending: line_ending,
422            encoding: Encoding::Utf8, // Binary files treated as raw bytes (no conversion)
423            original_encoding: Encoding::Utf8,
424            piece_tree,
425            saved_root,
426            buffers: vec![buffer],
427            next_buffer_id: 1,
428            file_path: None,
429            modified: false,
430            recovery_pending: false,
431            large_file: false,
432            line_feeds_scanned: false,
433            is_binary: true,
434            saved_file_size: Some(bytes),
435            version: 0,
436            config: BufferConfig::default(),
437        }
438    }
439
440    /// Create a text buffer from initial content with the given filesystem.
441    pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
442        // Auto-detect encoding and convert to UTF-8 if needed
443        let (encoding, utf8_content) = Self::detect_and_convert_encoding(&content);
444
445        let bytes = utf8_content.len();
446
447        // Auto-detect line ending format from content
448        let line_ending = Self::detect_line_ending(&utf8_content);
449
450        // Create initial StringBuffer with ID 0
451        let buffer = StringBuffer::new(0, utf8_content);
452        let line_feed_cnt = buffer.line_feed_count();
453
454        let piece_tree = if bytes > 0 {
455            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
456        } else {
457            PieceTree::empty()
458        };
459
460        let saved_root = piece_tree.root();
461
462        TextBuffer {
463            fs,
464            line_ending,
465            original_line_ending: line_ending,
466            encoding,
467            original_encoding: encoding,
468            piece_tree,
469            saved_root,
470            buffers: vec![buffer],
471            next_buffer_id: 1,
472            file_path: None,
473            modified: false,
474            recovery_pending: false,
475            large_file: false,
476            line_feeds_scanned: false,
477            is_binary: false,
478            saved_file_size: Some(bytes), // Treat initial content as "saved" state
479            version: 0,
480            config: BufferConfig::default(),
481        }
482    }
483
484    /// Create a text buffer from bytes with a specific encoding (no auto-detection).
485    pub fn from_bytes_with_encoding(
486        content: Vec<u8>,
487        encoding: Encoding,
488        fs: Arc<dyn FileSystem + Send + Sync>,
489    ) -> Self {
490        // Convert from specified encoding to UTF-8
491        let utf8_content = encoding::convert_to_utf8(&content, encoding);
492
493        let bytes = utf8_content.len();
494
495        // Auto-detect line ending format from content
496        let line_ending = Self::detect_line_ending(&utf8_content);
497
498        // Create initial StringBuffer with ID 0
499        let buffer = StringBuffer::new(0, utf8_content);
500        let line_feed_cnt = buffer.line_feed_count();
501
502        let piece_tree = if bytes > 0 {
503            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
504        } else {
505            PieceTree::empty()
506        };
507
508        let saved_root = piece_tree.root();
509
510        TextBuffer {
511            fs,
512            line_ending,
513            original_line_ending: line_ending,
514            encoding,
515            original_encoding: encoding,
516            piece_tree,
517            saved_root,
518            buffers: vec![buffer],
519            next_buffer_id: 1,
520            file_path: None,
521            modified: false,
522            recovery_pending: false,
523            large_file: false,
524            line_feeds_scanned: false,
525            is_binary: false,
526            saved_file_size: Some(bytes),
527            version: 0,
528            config: BufferConfig::default(),
529        }
530    }
531
532    /// Create a text buffer from a string with the given filesystem.
533    pub fn from_str(
534        s: &str,
535        _large_file_threshold: usize,
536        fs: Arc<dyn FileSystem + Send + Sync>,
537    ) -> Self {
538        Self::from_bytes(s.as_bytes().to_vec(), fs)
539    }
540
541    /// Create an empty text buffer with the given filesystem.
542    pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
543        let piece_tree = PieceTree::empty();
544        let saved_root = piece_tree.root();
545        let line_ending = LineEnding::default();
546        let encoding = Encoding::default();
547        TextBuffer {
548            fs,
549            piece_tree,
550            saved_root,
551            buffers: vec![StringBuffer::new(0, Vec::new())],
552            next_buffer_id: 1,
553            file_path: None,
554            modified: false,
555            recovery_pending: false,
556            large_file: false,
557            line_feeds_scanned: false,
558            is_binary: false,
559            line_ending,
560            original_line_ending: line_ending,
561            encoding,
562            original_encoding: encoding,
563            saved_file_size: None,
564            version: 0,
565            config: BufferConfig::default(),
566        }
567    }
568
569    /// Load a text buffer from a file using the given filesystem.
570    pub fn load_from_file<P: AsRef<Path>>(
571        path: P,
572        large_file_threshold: usize,
573        fs: Arc<dyn FileSystem + Send + Sync>,
574    ) -> anyhow::Result<Self> {
575        let path = path.as_ref();
576
577        // Get file size to determine loading strategy
578        let metadata = fs.metadata(path)?;
579        let file_size = metadata.size as usize;
580
581        // Use threshold parameter or default
582        let threshold = if large_file_threshold > 0 {
583            large_file_threshold
584        } else {
585            DEFAULT_LARGE_FILE_THRESHOLD
586        };
587
588        // Choose loading strategy based on file size
589        if file_size >= threshold {
590            Self::load_large_file(path, file_size, fs)
591        } else {
592            Self::load_small_file(path, fs)
593        }
594    }
595
596    /// Load a text buffer from a file with a specific encoding (no auto-detection).
597    pub fn load_from_file_with_encoding<P: AsRef<Path>>(
598        path: P,
599        encoding: Encoding,
600        fs: Arc<dyn FileSystem + Send + Sync>,
601        config: BufferConfig,
602    ) -> anyhow::Result<Self> {
603        let path = path.as_ref();
604        let contents = fs.read_file(path)?;
605
606        let mut buffer = Self::from_bytes_with_encoding(contents, encoding, fs);
607        buffer.file_path = Some(path.to_path_buf());
608        buffer.modified = false;
609        buffer.config = config;
610        Ok(buffer)
611    }
612
613    /// Load a small file with full eager loading and line indexing
614    fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
615        let contents = fs.read_file(path)?;
616
617        // Use unified encoding/binary detection
618        let (encoding, is_binary) = Self::detect_encoding_or_binary(&contents);
619
620        // For binary files, skip encoding conversion to preserve raw bytes
621        let mut buffer = if is_binary {
622            Self::from_bytes_raw(contents, fs)
623        } else {
624            // from_bytes handles encoding detection/conversion and line ending detection
625            Self::from_bytes(contents, fs)
626        };
627        buffer.file_path = Some(path.to_path_buf());
628        buffer.modified = false;
629        buffer.large_file = false;
630        buffer.is_binary = is_binary;
631        // For binary files, ensure encoding matches detection
632        if is_binary {
633            buffer.encoding = encoding;
634            buffer.original_encoding = encoding;
635        }
636        // Note: line_ending and encoding are already set by from_bytes/from_bytes_raw
637        Ok(buffer)
638    }
639
640    /// Check if loading a large file requires user confirmation due to encoding.
641    ///
642    /// Some encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot be "resynchronized" -
643    /// meaning you cannot determine character boundaries when jumping into the middle
644    /// of a file. These encodings require loading the entire file into memory.
645    ///
646    /// Returns `Some(confirmation)` if user confirmation is needed, `None` if the file
647    /// can be loaded with lazy/streaming loading.
648    pub fn check_large_file_encoding(
649        path: impl AsRef<Path>,
650        fs: Arc<dyn FileSystem + Send + Sync>,
651    ) -> anyhow::Result<Option<LargeFileEncodingConfirmation>> {
652        let path = path.as_ref();
653        let metadata = fs.metadata(path)?;
654        let file_size = metadata.size as usize;
655
656        // Only check for large files
657        if file_size < DEFAULT_LARGE_FILE_THRESHOLD {
658            return Ok(None);
659        }
660
661        // Read a sample to detect encoding
662        let sample_size = file_size.min(8 * 1024);
663        let sample = fs.read_range(path, 0, sample_size)?;
664        let (encoding, is_binary) = Self::detect_encoding_or_binary(&sample);
665
666        // Binary files don't need confirmation (loaded as-is)
667        if is_binary {
668            return Ok(None);
669        }
670
671        // Check if the encoding requires full file loading
672        if encoding.requires_full_file_load() {
673            return Ok(Some(LargeFileEncodingConfirmation {
674                path: path.to_path_buf(),
675                file_size,
676                encoding,
677            }));
678        }
679
680        Ok(None)
681    }
682
683    /// Load a large file with unloaded buffer (no line indexing, lazy loading)
684    ///
685    /// If `force_full_load` is true, loads the entire file regardless of encoding.
686    /// This should be set to true after user confirms loading a non-resynchronizable encoding.
687    fn load_large_file(
688        path: &Path,
689        file_size: usize,
690        fs: Arc<dyn FileSystem + Send + Sync>,
691    ) -> anyhow::Result<Self> {
692        Self::load_large_file_internal(path, file_size, fs, false)
693    }
694
695    /// Load a large file, optionally forcing full load for non-resynchronizable encodings.
696    ///
697    /// Called with `force_full_load=true` after user confirms the warning about
698    /// non-resynchronizable encodings requiring full file loading.
699    pub fn load_large_file_confirmed(
700        path: impl AsRef<Path>,
701        fs: Arc<dyn FileSystem + Send + Sync>,
702    ) -> anyhow::Result<Self> {
703        let path = path.as_ref();
704        let metadata = fs.metadata(path)?;
705        let file_size = metadata.size as usize;
706        Self::load_large_file_internal(path, file_size, fs, true)
707    }
708
709    /// Internal implementation for loading large files.
710    fn load_large_file_internal(
711        path: &Path,
712        file_size: usize,
713        fs: Arc<dyn FileSystem + Send + Sync>,
714        force_full_load: bool,
715    ) -> anyhow::Result<Self> {
716        use crate::model::piece_tree::{BufferData, BufferLocation};
717
718        // Read a sample of the file to detect encoding and whether it's binary
719        // We read the first 8KB for detection
720        let sample_size = file_size.min(8 * 1024);
721        let sample = fs.read_range(path, 0, sample_size)?;
722
723        // Use unified encoding/binary detection
724        let (encoding, is_binary) = Self::detect_encoding_or_binary(&sample);
725
726        // Binary files skip encoding conversion to preserve raw bytes
727        if is_binary {
728            tracing::info!("Large binary file detected, loading without encoding conversion");
729            let contents = fs.read_file(path)?;
730            let mut buffer = Self::from_bytes_raw(contents, fs);
731            buffer.file_path = Some(path.to_path_buf());
732            buffer.modified = false;
733            buffer.large_file = true;
734            buffer.encoding = encoding;
735            buffer.original_encoding = encoding;
736            return Ok(buffer);
737        }
738
739        // Check if encoding requires full file loading
740        let requires_full_load = encoding.requires_full_file_load();
741
742        // For non-resynchronizable encodings, require confirmation unless forced
743        if requires_full_load && !force_full_load {
744            anyhow::bail!(LargeFileEncodingConfirmation {
745                path: path.to_path_buf(),
746                file_size,
747                encoding,
748            });
749        }
750
751        // For encodings that require full load (non-resynchronizable or non-UTF-8),
752        // load the entire file and convert
753        if !matches!(encoding, Encoding::Utf8 | Encoding::Ascii) {
754            tracing::info!(
755                "Large file with non-UTF-8 encoding ({:?}), loading fully for conversion",
756                encoding
757            );
758            let contents = fs.read_file(path)?;
759            let mut buffer = Self::from_bytes(contents, fs);
760            buffer.file_path = Some(path.to_path_buf());
761            buffer.modified = false;
762            buffer.large_file = true; // Still mark as large file for UI purposes
763            buffer.is_binary = is_binary;
764            return Ok(buffer);
765        }
766
767        // UTF-8/ASCII files can use lazy loading
768        let line_ending = Self::detect_line_ending(&sample);
769
770        // Create an unloaded buffer that references the entire file
771        let buffer = StringBuffer {
772            id: 0,
773            data: BufferData::Unloaded {
774                file_path: path.to_path_buf(),
775                file_offset: 0,
776                bytes: file_size,
777            },
778            stored_file_offset: None,
779        };
780
781        // Create piece tree with a single piece covering the whole file
782        // No line feed count (None) since we're not computing line indexing
783        let piece_tree = if file_size > 0 {
784            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
785        } else {
786            PieceTree::empty()
787        };
788        let saved_root = piece_tree.root();
789
790        tracing::debug!(
791            "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
792            file_size,
793            file_size
794        );
795
796        Ok(TextBuffer {
797            fs,
798            piece_tree,
799            saved_root,
800            buffers: vec![buffer],
801            next_buffer_id: 1,
802            file_path: Some(path.to_path_buf()),
803            modified: false,
804            recovery_pending: false,
805            large_file: true,
806            line_feeds_scanned: false,
807            is_binary,
808            line_ending,
809            original_line_ending: line_ending,
810            encoding,
811            original_encoding: encoding,
812            saved_file_size: Some(file_size),
813            version: 0,
814            config: BufferConfig::default(),
815        })
816    }
817
818    /// Save the buffer to its associated file
819    pub fn save(&mut self) -> anyhow::Result<()> {
820        if let Some(path) = &self.file_path {
821            self.save_to_file(path.clone())
822        } else {
823            anyhow::bail!(io::Error::new(
824                io::ErrorKind::NotFound,
825                "No file path associated with buffer",
826            ))
827        }
828    }
829
830    /// Check if we should use in-place writing to preserve file ownership.
831    /// Returns true if the file exists and is owned by a different user.
832    /// On Unix, only root or the file owner can change file ownership with chown.
833    /// When the current user is not the file owner, using atomic write (temp file + rename)
834    /// would change the file's ownership to the current user. To preserve ownership,
835    /// we must write directly to the existing file instead.
836    fn should_use_inplace_write(&self, dest_path: &Path) -> bool {
837        !self.fs.is_owner(dest_path)
838    }
839
840    /// Build a write recipe from the piece tree for saving.
841    ///
842    /// This creates a recipe of Copy and Insert operations that can reconstruct
843    /// the buffer content. Copy operations reference unchanged regions in the
844    /// source file, while Insert operations contain new/modified data.
845    ///
846    /// # Returns
847    /// A WriteRecipe with the source path, insert data, and sequence of actions.
848    fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
849        let total = self.total_bytes();
850
851        // Determine the source file for Copy operations (if any)
852        // We can only use Copy if:
853        // 1. We have a source file path
854        // 2. The source file exists
855        // 3. No line ending conversion is needed
856        // 4. No encoding conversion is needed
857        let needs_line_ending_conversion = self.line_ending != self.original_line_ending;
858        // We need encoding conversion if:
859        // - NOT a binary file (binary files preserve raw bytes), AND
860        // - Either the encoding changed from the original, OR
861        // - The target encoding isn't plain UTF-8/ASCII (since internal storage is UTF-8)
862        // For example: UTF-8 BOM files are stored as UTF-8, so we need to add BOM on save
863        let needs_encoding_conversion = !self.is_binary
864            && (self.encoding != self.original_encoding
865                || !matches!(self.encoding, Encoding::Utf8 | Encoding::Ascii));
866        let needs_conversion = needs_line_ending_conversion || needs_encoding_conversion;
867
868        let src_path_for_copy: Option<&Path> = if needs_conversion {
869            None
870        } else {
871            self.file_path.as_deref().filter(|p| self.fs.exists(p))
872        };
873        let target_ending = self.line_ending;
874        let target_encoding = self.encoding;
875
876        let mut insert_data: Vec<Vec<u8>> = Vec::new();
877        let mut actions: Vec<RecipeAction> = Vec::new();
878
879        // Add BOM as the first piece if the target encoding has one
880        if let Some(bom) = target_encoding.bom_bytes() {
881            insert_data.push(bom.to_vec());
882            actions.push(RecipeAction::Insert { index: 0 });
883        }
884
885        for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
886            let buffer_id = piece_view.location.buffer_id();
887            let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
888                io::Error::new(
889                    io::ErrorKind::InvalidData,
890                    format!("Buffer {} not found", buffer_id),
891                )
892            })?;
893
894            match &buffer.data {
895                // Unloaded buffer: can use Copy if same source file, else load and send
896                BufferData::Unloaded {
897                    file_path,
898                    file_offset,
899                    ..
900                } => {
901                    // Can only use Copy if:
902                    // - This is a Stored piece (original file content)
903                    // - We have a valid source for copying
904                    // - This buffer is from that source
905                    // - No line ending or encoding conversion needed
906                    let can_copy = matches!(piece_view.location, BufferLocation::Stored(_))
907                        && src_path_for_copy.is_some_and(|src| file_path == src);
908
909                    if can_copy {
910                        let src_offset = (*file_offset + piece_view.buffer_offset) as u64;
911                        actions.push(RecipeAction::Copy {
912                            offset: src_offset,
913                            len: piece_view.bytes as u64,
914                        });
915                        continue;
916                    }
917
918                    // Need to load and send this unloaded region
919                    // This happens when: different source file, or conversion needed
920                    let data = self.fs.read_range(
921                        file_path,
922                        (*file_offset + piece_view.buffer_offset) as u64,
923                        piece_view.bytes,
924                    )?;
925
926                    let data = if needs_line_ending_conversion {
927                        Self::convert_line_endings_to(&data, target_ending)
928                    } else {
929                        data
930                    };
931
932                    // Convert encoding if needed
933                    let data = if needs_encoding_conversion {
934                        Self::convert_to_encoding(&data, target_encoding)
935                    } else {
936                        data
937                    };
938
939                    let index = insert_data.len();
940                    insert_data.push(data);
941                    actions.push(RecipeAction::Insert { index });
942                }
943
944                // Loaded data: send as Insert
945                BufferData::Loaded { data, .. } => {
946                    let start = piece_view.buffer_offset;
947                    let end = start + piece_view.bytes;
948                    let chunk = &data[start..end];
949
950                    let chunk = if needs_line_ending_conversion {
951                        Self::convert_line_endings_to(chunk, target_ending)
952                    } else {
953                        chunk.to_vec()
954                    };
955
956                    // Convert encoding if needed
957                    let chunk = if needs_encoding_conversion {
958                        Self::convert_to_encoding(&chunk, target_encoding)
959                    } else {
960                        chunk
961                    };
962
963                    let index = insert_data.len();
964                    insert_data.push(chunk);
965                    actions.push(RecipeAction::Insert { index });
966                }
967            }
968        }
969
970        Ok(WriteRecipe {
971            src_path: src_path_for_copy.map(|p| p.to_path_buf()),
972            insert_data,
973            actions,
974        })
975    }
976
977    /// Create a temporary file for saving.
978    ///
979    /// Tries to create the file in the same directory as the destination file first
980    /// to allow for an atomic rename. If that fails (e.g., due to directory permissions),
981    /// falls back to the system temporary directory.
982    fn create_temp_file(
983        &self,
984        dest_path: &Path,
985    ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
986        // Try creating in same directory first
987        let same_dir_temp = self.fs.temp_path_for(dest_path);
988        match self.fs.create_file(&same_dir_temp) {
989            Ok(file) => Ok((same_dir_temp, file)),
990            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
991                // Fallback to system temp directory
992                let temp_path = self.fs.unique_temp_path(dest_path);
993                let file = self.fs.create_file(&temp_path)?;
994                Ok((temp_path, file))
995            }
996            Err(e) => Err(e),
997        }
998    }
999
1000    /// Create a temporary file in the recovery directory for in-place writes.
1001    /// This allows recovery if a crash occurs during the in-place write operation.
1002    fn create_recovery_temp_file(
1003        &self,
1004        dest_path: &Path,
1005    ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1006        // Get recovery directory: $XDG_DATA_HOME/fresh/recovery or ~/.local/share/fresh/recovery
1007        let recovery_dir = crate::input::input_history::get_data_dir()
1008            .map(|d| d.join("recovery"))
1009            .unwrap_or_else(|_| std::env::temp_dir());
1010
1011        // Ensure directory exists
1012        self.fs.create_dir_all(&recovery_dir)?;
1013
1014        // Create unique filename based on destination file and timestamp
1015        let file_name = dest_path
1016            .file_name()
1017            .unwrap_or_else(|| std::ffi::OsStr::new("fresh-save"));
1018        let timestamp = std::time::SystemTime::now()
1019            .duration_since(std::time::UNIX_EPOCH)
1020            .map(|d| d.as_nanos())
1021            .unwrap_or(0);
1022        let pid = std::process::id();
1023
1024        let temp_name = format!(
1025            ".inplace-{}-{}-{}.tmp",
1026            file_name.to_string_lossy(),
1027            pid,
1028            timestamp
1029        );
1030        let temp_path = recovery_dir.join(temp_name);
1031
1032        let file = self.fs.create_file(&temp_path)?;
1033        Ok((temp_path, file))
1034    }
1035
1036    /// Get the path for in-place write recovery metadata.
1037    /// Uses the same recovery directory as temp files.
1038    fn inplace_recovery_meta_path(&self, dest_path: &Path) -> PathBuf {
1039        let recovery_dir = crate::input::input_history::get_data_dir()
1040            .map(|d| d.join("recovery"))
1041            .unwrap_or_else(|_| std::env::temp_dir());
1042
1043        let hash = crate::services::recovery::path_hash(dest_path);
1044        recovery_dir.join(format!("{}.inplace.json", hash))
1045    }
1046
1047    /// Write in-place recovery metadata using self.fs.
1048    /// This is called before the dangerous streaming step so we can recover on crash.
1049    fn write_inplace_recovery_meta(
1050        &self,
1051        meta_path: &Path,
1052        dest_path: &Path,
1053        temp_path: &Path,
1054        original_metadata: &Option<FileMetadata>,
1055    ) -> io::Result<()> {
1056        #[cfg(unix)]
1057        let (uid, gid, mode) = original_metadata
1058            .as_ref()
1059            .map(|m| {
1060                (
1061                    m.uid.unwrap_or(0),
1062                    m.gid.unwrap_or(0),
1063                    m.permissions.as_ref().map(|p| p.mode()).unwrap_or(0o644),
1064                )
1065            })
1066            .unwrap_or((0, 0, 0o644));
1067        #[cfg(not(unix))]
1068        let (uid, gid, mode) = (0u32, 0u32, 0o644u32);
1069
1070        let recovery = crate::services::recovery::InplaceWriteRecovery::new(
1071            dest_path.to_path_buf(),
1072            temp_path.to_path_buf(),
1073            uid,
1074            gid,
1075            mode,
1076        );
1077
1078        let json = serde_json::to_string_pretty(&recovery)
1079            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1080
1081        self.fs.write_file(meta_path, json.as_bytes())
1082    }
1083
1084    /// Save the buffer to a specific file
1085    ///
1086    /// Uses the write recipe approach for both local and remote filesystems:
1087    /// - Copy ops reference unchanged regions in the source file
1088    /// - Insert ops contain new/modified data
1089    ///
1090    /// For remote filesystems, the recipe is sent to the agent which reconstructs
1091    /// the file server-side, avoiding transfer of unchanged content.
1092    ///
1093    /// For local filesystems with ownership concerns (file owned by another user),
1094    /// uses in-place writing to preserve ownership. Otherwise uses atomic writes.
1095    ///
1096    /// If the line ending format has been changed (via set_line_ending), all content
1097    /// will be converted to the new format during save.
1098    pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
1099        let dest_path = path.as_ref();
1100        let total = self.total_bytes();
1101
1102        // Handle empty files
1103        if total == 0 {
1104            self.fs.write_file(dest_path, &[])?;
1105            self.finalize_save(dest_path)?;
1106            return Ok(());
1107        }
1108
1109        // Build the write recipe (unified for all filesystem types)
1110        let recipe = self.build_write_recipe()?;
1111        let ops = recipe.to_write_ops();
1112
1113        // Check if we need in-place writing to preserve file ownership (local only)
1114        // Remote filesystems handle this differently
1115        let is_local = self.fs.remote_connection_info().is_none();
1116        let use_inplace = is_local && self.should_use_inplace_write(dest_path);
1117
1118        if use_inplace {
1119            // In-place write: write directly to preserve ownership
1120            self.save_with_inplace_write(dest_path, &recipe)?;
1121        } else if !recipe.has_copy_ops() && !is_local {
1122            // Remote with no Copy ops: use write_file directly (more efficient)
1123            let data = recipe.flatten_inserts();
1124            self.fs.write_file(dest_path, &data)?;
1125        } else if is_local {
1126            // Local: use write_file or write_patched with sudo fallback
1127            let write_result = if !recipe.has_copy_ops() {
1128                let data = recipe.flatten_inserts();
1129                self.fs.write_file(dest_path, &data)
1130            } else {
1131                let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1132                self.fs.write_patched(src_for_patch, dest_path, &ops)
1133            };
1134
1135            if let Err(e) = write_result {
1136                if e.kind() == io::ErrorKind::PermissionDenied {
1137                    // Create temp file and return sudo error
1138                    let original_metadata = self.fs.metadata_if_exists(dest_path);
1139                    let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1140                    self.write_recipe_to_file(&mut temp_file, &recipe)?;
1141                    temp_file.sync_all()?;
1142                    drop(temp_file);
1143                    return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
1144                }
1145                return Err(e.into());
1146            }
1147        } else {
1148            // Remote with Copy ops: use write_patched
1149            let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1150            self.fs.write_patched(src_for_patch, dest_path, &ops)?;
1151        }
1152
1153        self.finalize_save(dest_path)?;
1154        Ok(())
1155    }
1156
1157    /// Write using in-place mode to preserve file ownership.
1158    ///
1159    /// This is used when the file is owned by a different user and we need
1160    /// to write directly to the existing file to preserve its ownership.
1161    ///
1162    /// The approach:
1163    /// 1. Write the recipe to a temp file first (reads from original, writes to temp)
1164    /// 2. Stream the temp file content to the destination file (truncates and writes)
1165    /// 3. Delete the temp file
1166    ///
1167    /// This avoids the bug where truncating the destination before reading Copy chunks
1168    /// would corrupt the file. It also works for huge files since we stream in chunks.
1169    fn save_with_inplace_write(
1170        &self,
1171        dest_path: &Path,
1172        recipe: &WriteRecipe,
1173    ) -> anyhow::Result<()> {
1174        let original_metadata = self.fs.metadata_if_exists(dest_path);
1175
1176        // Optimization: if no Copy ops, we can write directly without a temp file
1177        // (same as the non-inplace path for small files)
1178        if !recipe.has_copy_ops() {
1179            let data = recipe.flatten_inserts();
1180            return self.write_data_inplace(dest_path, &data, original_metadata);
1181        }
1182
1183        // Step 1: Write recipe to a temp file in the recovery directory
1184        // This reads Copy chunks from the original file (still intact) and writes to temp.
1185        // Using the recovery directory allows crash recovery if the operation fails.
1186        let (temp_path, mut temp_file) = self.create_recovery_temp_file(dest_path)?;
1187        if let Err(e) = self.write_recipe_to_file(&mut temp_file, recipe) {
1188            // Best-effort cleanup of temp file on write failure
1189            #[allow(clippy::let_underscore_must_use)]
1190            let _ = self.fs.remove_file(&temp_path);
1191            return Err(e.into());
1192        }
1193        temp_file.sync_all()?;
1194        drop(temp_file);
1195
1196        // Step 1.5: Save recovery metadata before the dangerous step
1197        // If we crash during step 2, this metadata + temp file allows recovery
1198        let recovery_meta_path = self.inplace_recovery_meta_path(dest_path);
1199        // Best effort - don't fail the save if we can't write recovery metadata
1200        #[allow(clippy::let_underscore_must_use)]
1201        let _ = self.write_inplace_recovery_meta(
1202            &recovery_meta_path,
1203            dest_path,
1204            &temp_path,
1205            &original_metadata,
1206        );
1207
1208        // Step 2: Stream temp file content to destination
1209        // Now it's safe to truncate the destination since all data is in temp
1210        match self.fs.open_file_for_write(dest_path) {
1211            Ok(mut out_file) => {
1212                if let Err(e) = self.stream_file_to_writer(&temp_path, &mut out_file) {
1213                    // Don't delete temp file or recovery metadata - allow recovery
1214                    return Err(e.into());
1215                }
1216                out_file.sync_all()?;
1217                // Success! Clean up temp file and recovery metadata (best-effort)
1218                #[allow(clippy::let_underscore_must_use)]
1219                let _ = self.fs.remove_file(&temp_path);
1220                #[allow(clippy::let_underscore_must_use)]
1221                let _ = self.fs.remove_file(&recovery_meta_path);
1222                Ok(())
1223            }
1224            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1225                // Can't write to destination - trigger sudo fallback
1226                // Keep temp file for sudo to use, clean up recovery metadata (best-effort)
1227                #[allow(clippy::let_underscore_must_use)]
1228                let _ = self.fs.remove_file(&recovery_meta_path);
1229                Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1230            }
1231            Err(e) => {
1232                // Don't delete temp file or recovery metadata - allow recovery
1233                Err(e.into())
1234            }
1235        }
1236    }
1237
1238    /// Write data directly to a file in-place, with sudo fallback on permission denied.
1239    fn write_data_inplace(
1240        &self,
1241        dest_path: &Path,
1242        data: &[u8],
1243        original_metadata: Option<FileMetadata>,
1244    ) -> anyhow::Result<()> {
1245        match self.fs.open_file_for_write(dest_path) {
1246            Ok(mut out_file) => {
1247                out_file.write_all(data)?;
1248                out_file.sync_all()?;
1249                Ok(())
1250            }
1251            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1252                // Create temp file for sudo fallback
1253                let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1254                temp_file.write_all(data)?;
1255                temp_file.sync_all()?;
1256                drop(temp_file);
1257                Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1258            }
1259            Err(e) => Err(e.into()),
1260        }
1261    }
1262
1263    /// Stream a file's content to a writer in chunks to avoid memory issues with large files.
1264    fn stream_file_to_writer(
1265        &self,
1266        src_path: &Path,
1267        out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1268    ) -> io::Result<()> {
1269        const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks
1270
1271        let file_size = self.fs.metadata(src_path)?.size;
1272        let mut offset = 0u64;
1273
1274        while offset < file_size {
1275            let remaining = file_size - offset;
1276            let chunk_len = std::cmp::min(remaining, CHUNK_SIZE as u64) as usize;
1277            let chunk = self.fs.read_range(src_path, offset, chunk_len)?;
1278            out_file.write_all(&chunk)?;
1279            offset += chunk_len as u64;
1280        }
1281
1282        Ok(())
1283    }
1284
1285    /// Write the recipe content to a file writer.
1286    fn write_recipe_to_file(
1287        &self,
1288        out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1289        recipe: &WriteRecipe,
1290    ) -> io::Result<()> {
1291        for action in &recipe.actions {
1292            match action {
1293                RecipeAction::Copy { offset, len } => {
1294                    // Read from source and write to output
1295                    let src_path = recipe.src_path.as_ref().ok_or_else(|| {
1296                        io::Error::new(io::ErrorKind::InvalidData, "Copy action without source")
1297                    })?;
1298                    let data = self.fs.read_range(src_path, *offset, *len as usize)?;
1299                    out_file.write_all(&data)?;
1300                }
1301                RecipeAction::Insert { index } => {
1302                    out_file.write_all(&recipe.insert_data[*index])?;
1303                }
1304            }
1305        }
1306        Ok(())
1307    }
1308
1309    /// Finalize save state after successful write.
1310    fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
1311        let new_size = self.fs.metadata(dest_path)?.size as usize;
1312        tracing::debug!(
1313            "Buffer::save: updating saved_file_size from {:?} to {}",
1314            self.saved_file_size,
1315            new_size
1316        );
1317        self.saved_file_size = Some(new_size);
1318        self.file_path = Some(dest_path.to_path_buf());
1319
1320        // Consolidate the piece tree to synchronize with disk (for large files)
1321        // or to simplify structure (for small files).
1322        self.consolidate_after_save(dest_path, new_size);
1323
1324        self.mark_saved_snapshot();
1325        self.original_line_ending = self.line_ending;
1326        self.original_encoding = self.encoding;
1327        Ok(())
1328    }
1329
1330    /// Finalize buffer state after an external save operation (e.g., via sudo).
1331    ///
1332    /// This updates the saved snapshot and file size to match the new state on disk.
1333    pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
1334        let new_size = self.fs.metadata(&dest_path)?.size as usize;
1335        self.saved_file_size = Some(new_size);
1336        self.file_path = Some(dest_path.clone());
1337
1338        // Consolidate the piece tree to synchronize with disk or simplify structure.
1339        self.consolidate_after_save(&dest_path, new_size);
1340
1341        self.mark_saved_snapshot();
1342        self.original_line_ending = self.line_ending;
1343        self.original_encoding = self.encoding;
1344        Ok(())
1345    }
1346
1347    /// Consolidate the piece tree into a single piece.
1348    /// For large files, this creates a reference to the disk file to save memory and sync offsets.
1349    /// For small files, this flattens all edits into a single in-memory buffer.
1350    fn consolidate_after_save(&mut self, path: &Path, file_size: usize) {
1351        if self.large_file {
1352            self.consolidate_large_file(path, file_size);
1353        } else {
1354            self.consolidate_small_file();
1355        }
1356    }
1357
1358    /// Consolidate large file piece tree into a single piece pointing to the new file.
1359    /// This ensures that subsequent operations correctly reference the new content and offsets.
1360    /// Preserves total line feed count from the old tree if a scan was previously done.
1361    fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
1362        // Preserve line feed count from the old tree if we had scanned it
1363        let preserved_lf = if self.line_feeds_scanned {
1364            self.piece_tree.line_count().map(|c| c.saturating_sub(1))
1365        } else {
1366            None
1367        };
1368
1369        let buffer = StringBuffer {
1370            id: 0,
1371            data: BufferData::Unloaded {
1372                file_path: path.to_path_buf(),
1373                file_offset: 0,
1374                bytes: file_size,
1375            },
1376            stored_file_offset: None,
1377        };
1378
1379        self.piece_tree = if file_size > 0 {
1380            PieceTree::new(BufferLocation::Stored(0), 0, file_size, preserved_lf)
1381        } else {
1382            PieceTree::empty()
1383        };
1384
1385        self.buffers = vec![buffer];
1386        self.next_buffer_id = 1;
1387
1388        tracing::debug!(
1389            "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
1390            file_size
1391        );
1392    }
1393
1394    /// Consolidate small file edits into a single in-memory buffer and re-index lines.
1395    fn consolidate_small_file(&mut self) {
1396        if let Some(bytes) = self.get_all_text() {
1397            let line_feed_cnt = bytes.iter().filter(|&&b| b == b'\n').count();
1398            let len = bytes.len();
1399
1400            // Create a single loaded buffer with line indexing
1401            let buffer = StringBuffer::new_loaded(0, bytes, true);
1402
1403            self.piece_tree = if len > 0 {
1404                PieceTree::new(BufferLocation::Stored(0), 0, len, Some(line_feed_cnt))
1405            } else {
1406                PieceTree::empty()
1407            };
1408
1409            self.buffers = vec![buffer];
1410            self.next_buffer_id = 1;
1411
1412            tracing::debug!(
1413                "Buffer::consolidate_small_file: consolidated into single loaded buffer of {} bytes",
1414                len
1415            );
1416        }
1417    }
1418
1419    /// Internal helper to create a SudoSaveRequired error.
1420    fn make_sudo_error(
1421        &self,
1422        temp_path: PathBuf,
1423        dest_path: &Path,
1424        original_metadata: Option<FileMetadata>,
1425    ) -> anyhow::Error {
1426        #[cfg(unix)]
1427        let (uid, gid, mode) = if let Some(ref meta) = original_metadata {
1428            (
1429                meta.uid.unwrap_or(0),
1430                meta.gid.unwrap_or(0),
1431                meta.permissions
1432                    .as_ref()
1433                    .map(|p| p.mode() & 0o7777)
1434                    .unwrap_or(0),
1435            )
1436        } else {
1437            (0, 0, 0)
1438        };
1439        #[cfg(not(unix))]
1440        let (uid, gid, mode) = (0u32, 0u32, 0u32);
1441
1442        let _ = original_metadata; // suppress unused warning on non-Unix
1443
1444        anyhow::anyhow!(SudoSaveRequired {
1445            temp_path,
1446            dest_path: dest_path.to_path_buf(),
1447            uid,
1448            gid,
1449            mode,
1450        })
1451    }
1452
1453    /// Get the total number of bytes in the document
1454    pub fn total_bytes(&self) -> usize {
1455        self.piece_tree.total_bytes()
1456    }
1457
1458    /// Get the total number of lines in the document
1459    /// Uses the piece tree's integrated line tracking
1460    /// Returns None if line count is unknown (e.g., for large files without line indexing)
1461    pub fn line_count(&self) -> Option<usize> {
1462        self.piece_tree.line_count()
1463    }
1464
1465    /// Snapshot the current tree as the saved baseline
1466    pub fn mark_saved_snapshot(&mut self) {
1467        self.saved_root = self.piece_tree.root();
1468        self.modified = false;
1469    }
1470
1471    /// Refresh the saved root to match the current tree structure without
1472    /// clearing the modified flag.  Call this after structural-only changes
1473    /// (e.g. chunk_split_and_load during search scan) so that
1474    /// `diff_since_saved()` can take the fast `Arc::ptr_eq` path.
1475    pub fn refresh_saved_root_if_unmodified(&mut self) {
1476        if !self.modified {
1477            self.saved_root = self.piece_tree.root();
1478        }
1479    }
1480
1481    /// Apply a chunk-load buffer replacement to `saved_root`.
1482    ///
1483    /// When viewport loading converts a `Stored(buffer_id)` piece to
1484    /// `Added(new_buffer_id)` in the current tree and the buffer is already
1485    /// modified, we must apply the same transformation to `saved_root` so
1486    /// that `diff_since_saved()` can match loaded-but-unedited regions by
1487    /// `(location, offset)` identity.
1488    fn apply_chunk_load_to_saved_root(
1489        &mut self,
1490        old_buffer_id: usize,
1491        chunk_offset_in_buffer: usize,
1492        chunk_bytes: usize,
1493        new_buffer_id: usize,
1494    ) {
1495        use crate::model::piece_tree::{LeafData, PieceTree};
1496
1497        let mut leaves = Vec::new();
1498        self.saved_root.collect_leaves(&mut leaves);
1499
1500        let mut modified = false;
1501        let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + 2);
1502
1503        for leaf in &leaves {
1504            if leaf.location.buffer_id() != old_buffer_id {
1505                new_leaves.push(*leaf);
1506                continue;
1507            }
1508
1509            let leaf_start = leaf.offset;
1510            let leaf_end = leaf.offset + leaf.bytes;
1511            let chunk_start = chunk_offset_in_buffer;
1512            let chunk_end = chunk_offset_in_buffer + chunk_bytes;
1513
1514            // Check if this leaf overlaps the chunk range
1515            if chunk_start >= leaf_end || chunk_end <= leaf_start {
1516                // No overlap — keep as-is
1517                new_leaves.push(*leaf);
1518                continue;
1519            }
1520
1521            modified = true;
1522
1523            // Prefix: portion of this leaf before the chunk
1524            if chunk_start > leaf_start {
1525                new_leaves.push(LeafData::new(
1526                    leaf.location,
1527                    leaf.offset,
1528                    chunk_start - leaf_start,
1529                    None, // line feed count unknown after split
1530                ));
1531            }
1532
1533            // The chunk itself — replaced with Added(new_buffer_id)
1534            let actual_start = chunk_start.max(leaf_start);
1535            let actual_end = chunk_end.min(leaf_end);
1536            let offset_in_chunk = actual_start - chunk_start;
1537            new_leaves.push(LeafData::new(
1538                BufferLocation::Added(new_buffer_id),
1539                offset_in_chunk,
1540                actual_end - actual_start,
1541                None,
1542            ));
1543
1544            // Suffix: portion of this leaf after the chunk
1545            if chunk_end < leaf_end {
1546                new_leaves.push(LeafData::new(
1547                    leaf.location,
1548                    chunk_end,
1549                    leaf_end - chunk_end,
1550                    None,
1551                ));
1552            }
1553        }
1554
1555        if modified {
1556            self.saved_root = PieceTree::from_leaves(&new_leaves).root();
1557        }
1558    }
1559
1560    /// Diff the current piece tree against the last saved snapshot.
1561    ///
1562    /// This compares actual byte content, not just tree structure. This means
1563    /// that if you delete text and then paste it back, the diff will correctly
1564    /// show no changes (even though the tree structure differs).
1565    ///
1566    /// Uses a two-phase algorithm for efficiency:
1567    /// - Phase 1: Fast structure-based diff to find changed byte ranges (O(num_leaves))
1568    /// - Phase 2: Only compare actual content within changed ranges (O(edit_size))
1569    ///
1570    /// This is O(edit_size) instead of O(file_size) for small edits in large files.
1571    pub fn diff_since_saved(&self) -> PieceTreeDiff {
1572        let _span = tracing::info_span!(
1573            "diff_since_saved",
1574            large_file = self.large_file,
1575            modified = self.modified,
1576            lf_scanned = self.line_feeds_scanned
1577        )
1578        .entered();
1579
1580        // Fast path: if the buffer hasn't been modified since loading/saving,
1581        // the content is identical to the saved version by definition.
1582        // This avoids an expensive O(num_leaves) structure walk when the tree
1583        // has been restructured for non-edit reasons (viewport chunk loading,
1584        // line-scan preparation, search-scan splits).
1585        if !self.modified {
1586            tracing::trace!("diff_since_saved: not modified → equal");
1587            return PieceTreeDiff {
1588                equal: true,
1589                byte_ranges: Vec::new(),
1590                line_ranges: Some(Vec::new()),
1591                nodes_visited: 0,
1592            };
1593        }
1594
1595        // Quick check: if tree roots are identical (Arc pointer equality),
1596        // the content is definitely the same.
1597        if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
1598            tracing::trace!("diff_since_saved: Arc::ptr_eq fast path → equal");
1599            return PieceTreeDiff {
1600                equal: true,
1601                byte_ranges: Vec::new(),
1602                line_ranges: Some(Vec::new()),
1603                nodes_visited: 0,
1604            };
1605        }
1606
1607        // Phase 1: Fast structure-based diff to find which byte ranges differ
1608        // This is O(number of leaves) - very fast even for large files
1609        let structure_diff = self.diff_trees_by_structure();
1610
1611        // If structure says trees are equal (same pieces in same order), we're done
1612        if structure_diff.equal {
1613            tracing::trace!(
1614                "diff_since_saved: structure equal, line_ranges={}",
1615                structure_diff
1616                    .line_ranges
1617                    .as_ref()
1618                    .map_or("None".to_string(), |r| format!("Some({})", r.len()))
1619            );
1620            return structure_diff;
1621        }
1622
1623        // Phase 2: For small changed regions, verify with actual content comparison
1624        // This handles the case where different pieces contain identical content
1625        // (e.g., delete text then paste it back)
1626        let total_changed_bytes: usize = structure_diff
1627            .byte_ranges
1628            .iter()
1629            .map(|r| r.end.saturating_sub(r.start))
1630            .sum();
1631
1632        // Only do content verification if the changed region is reasonably small
1633        // For large changes, trust the structure-based diff
1634        const MAX_VERIFY_BYTES: usize = 64 * 1024; // 64KB threshold for verification
1635
1636        if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
1637            // Check if content in the changed ranges is actually different
1638            if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
1639                tracing::trace!(
1640                    "diff_since_saved: content differs, byte_ranges={}, line_ranges={}",
1641                    structure_diff.byte_ranges.len(),
1642                    structure_diff
1643                        .line_ranges
1644                        .as_ref()
1645                        .map_or("None".to_string(), |r| format!("Some({})", r.len()))
1646                );
1647                // Content actually differs - return the structure diff result
1648                return structure_diff;
1649            } else {
1650                // Content is the same despite structure differences (rare case: undo/redo)
1651                return PieceTreeDiff {
1652                    equal: true,
1653                    byte_ranges: Vec::new(),
1654                    line_ranges: Some(Vec::new()),
1655                    nodes_visited: structure_diff.nodes_visited,
1656                };
1657            }
1658        }
1659
1660        tracing::info!(
1661            "diff_since_saved: large change, byte_ranges={}, line_ranges={}, nodes_visited={}",
1662            structure_diff.byte_ranges.len(),
1663            structure_diff
1664                .line_ranges
1665                .as_ref()
1666                .map_or("None".to_string(), |r| format!("Some({})", r.len())),
1667            structure_diff.nodes_visited
1668        );
1669        // For large changes or when we can't verify, trust the structure diff
1670        structure_diff
1671    }
1672
1673    /// Check if the actual byte content differs in the given ranges.
1674    /// Returns true if content differs, false if content is identical.
1675    fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
1676        let saved_bytes = self.tree_total_bytes(&self.saved_root);
1677        let current_bytes = self.piece_tree.total_bytes();
1678
1679        // Different total sizes means content definitely differs
1680        if saved_bytes != current_bytes {
1681            return true;
1682        }
1683
1684        // For each changed range, compare the actual bytes
1685        for range in byte_ranges {
1686            if range.start >= range.end {
1687                continue;
1688            }
1689
1690            // Extract bytes from saved tree for this range
1691            let saved_slice =
1692                self.extract_range_from_tree(&self.saved_root, range.start, range.end);
1693            // Extract bytes from current tree for this range
1694            let current_slice = self.get_text_range(range.start, range.end);
1695
1696            match (saved_slice, current_slice) {
1697                (Some(saved), Some(current)) => {
1698                    if saved != current {
1699                        return true; // Content differs
1700                    }
1701                }
1702                _ => {
1703                    // Couldn't read content, assume it differs to be safe
1704                    return true;
1705                }
1706            }
1707        }
1708
1709        // All ranges have identical content
1710        false
1711    }
1712
1713    /// Extract a byte range from a saved tree root
1714    fn extract_range_from_tree(
1715        &self,
1716        root: &Arc<crate::model::piece_tree::PieceTreeNode>,
1717        start: usize,
1718        end: usize,
1719    ) -> Option<Vec<u8>> {
1720        let mut result = Vec::with_capacity(end.saturating_sub(start));
1721        self.collect_range_from_node(root, start, end, 0, &mut result)?;
1722        Some(result)
1723    }
1724
1725    /// Recursively collect bytes from a range within a tree node
1726    fn collect_range_from_node(
1727        &self,
1728        node: &Arc<crate::model::piece_tree::PieceTreeNode>,
1729        range_start: usize,
1730        range_end: usize,
1731        node_offset: usize,
1732        result: &mut Vec<u8>,
1733    ) -> Option<()> {
1734        use crate::model::piece_tree::PieceTreeNode;
1735
1736        match node.as_ref() {
1737            PieceTreeNode::Internal {
1738                left_bytes,
1739                left,
1740                right,
1741                ..
1742            } => {
1743                let left_end = node_offset + left_bytes;
1744
1745                // Check if range overlaps with left subtree
1746                if range_start < left_end {
1747                    self.collect_range_from_node(
1748                        left,
1749                        range_start,
1750                        range_end,
1751                        node_offset,
1752                        result,
1753                    )?;
1754                }
1755
1756                // Check if range overlaps with right subtree
1757                if range_end > left_end {
1758                    self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
1759                }
1760            }
1761            PieceTreeNode::Leaf {
1762                location,
1763                offset,
1764                bytes,
1765                ..
1766            } => {
1767                let node_end = node_offset + bytes;
1768
1769                // Check if this leaf overlaps with our range
1770                if range_start < node_end && range_end > node_offset {
1771                    let buf = self.buffers.get(location.buffer_id())?;
1772                    let data = buf.get_data()?;
1773
1774                    // Calculate the slice within this leaf
1775                    let leaf_start = range_start.saturating_sub(node_offset);
1776                    let leaf_end = (range_end - node_offset).min(*bytes);
1777
1778                    if leaf_start < leaf_end {
1779                        let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
1780                        result.extend_from_slice(slice);
1781                    }
1782                }
1783            }
1784        }
1785        Some(())
1786    }
1787
1788    /// Helper to get total bytes from a tree root
1789    fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
1790        use crate::model::piece_tree::PieceTreeNode;
1791        match root.as_ref() {
1792            PieceTreeNode::Internal {
1793                left_bytes, right, ..
1794            } => left_bytes + self.tree_total_bytes(right),
1795            PieceTreeNode::Leaf { bytes, .. } => *bytes,
1796        }
1797    }
1798
1799    /// Structure-based diff comparing piece tree leaves
1800    fn diff_trees_by_structure(&self) -> PieceTreeDiff {
1801        crate::model::piece_tree_diff::diff_piece_trees(
1802            &self.saved_root,
1803            &self.piece_tree.root(),
1804            &|leaf, start, len| {
1805                if len == 0 {
1806                    return Some(0);
1807                }
1808                // Try counting from raw byte data first
1809                if let Some(buf) = self.buffers.get(leaf.location.buffer_id()) {
1810                    if let Some(data) = buf.get_data() {
1811                        let start = leaf.offset + start;
1812                        let end = start + len;
1813                        if let Some(slice) = data.get(start..end) {
1814                            let line_feeds = slice.iter().filter(|&&b| b == b'\n').count();
1815                            return Some(line_feeds);
1816                        }
1817                    }
1818                }
1819                // Fallback: use the leaf's cached line_feed_cnt when we're
1820                // querying the entire leaf. This handles unloaded segments in
1821                // large file mode after line scanning has populated the metadata.
1822                if start == 0 && len == leaf.bytes {
1823                    leaf.line_feed_cnt.map(|c| c)
1824                } else {
1825                    tracing::warn!(
1826                        "diff line_counter: returning None for partial leaf query: \
1827                         loc={:?} offset={} bytes={} lf_cnt={:?} query_start={} query_len={}",
1828                        leaf.location,
1829                        leaf.offset,
1830                        leaf.bytes,
1831                        leaf.line_feed_cnt,
1832                        start,
1833                        len
1834                    );
1835                    None
1836                }
1837            },
1838        )
1839    }
1840
1841    /// Convert a byte offset to a line/column position
1842    pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
1843        self.piece_tree
1844            .offset_to_position(offset, &self.buffers)
1845            .map(|(line, column)| Position { line, column })
1846    }
1847
1848    /// Convert a line/column position to a byte offset
1849    pub fn position_to_offset(&self, position: Position) -> usize {
1850        self.piece_tree
1851            .position_to_offset(position.line, position.column, &self.buffers)
1852    }
1853
1854    /// Insert text at the given byte offset
1855    pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
1856        if text.is_empty() {
1857            return self.piece_tree.cursor_at_offset(offset);
1858        }
1859
1860        // Mark as modified (updates version)
1861        self.mark_content_modified();
1862
1863        // Count line feeds in the text to insert
1864        let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
1865
1866        // Optimization: try to append to existing buffer if insertion is at piece boundary
1867        let (buffer_location, buffer_offset, text_len) =
1868            if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
1869                append_info
1870            } else {
1871                // Create a new StringBuffer for this insertion
1872                let buffer_id = self.next_buffer_id;
1873                self.next_buffer_id += 1;
1874                let buffer = StringBuffer::new(buffer_id, text.clone());
1875                self.buffers.push(buffer);
1876                (BufferLocation::Added(buffer_id), 0, text.len())
1877            };
1878
1879        // When line feeds have been scanned, ensure the chunk at the insertion
1880        // point is loaded so compute_line_feeds_static can recount during splits.
1881        if self.line_feeds_scanned {
1882            self.ensure_chunk_loaded_at(offset);
1883        }
1884
1885        // Update piece tree (need to pass buffers reference)
1886        self.piece_tree.insert(
1887            offset,
1888            buffer_location,
1889            buffer_offset,
1890            text_len,
1891            line_feed_cnt,
1892            &self.buffers,
1893        )
1894    }
1895
1896    /// Try to append to an existing buffer if insertion point aligns with buffer end
1897    /// Returns (BufferLocation, buffer_offset, text_len) if append succeeds, None otherwise
1898    fn try_append_to_existing_buffer(
1899        &mut self,
1900        offset: usize,
1901        text: &[u8],
1902    ) -> Option<(BufferLocation, usize, usize)> {
1903        // Only optimize for non-empty insertions after existing content
1904        if text.is_empty() || offset == 0 {
1905            return None;
1906        }
1907
1908        // Find the piece containing the byte just before the insertion point
1909        // This avoids the saturating_sub issue
1910        let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1911
1912        // Check if insertion is exactly at the end of this piece
1913        // offset_in_piece tells us where (offset-1) is within the piece
1914        // For insertion to be at piece end, (offset-1) must be the last byte
1915        let offset_in_piece = piece_info.offset_in_piece?;
1916        if offset_in_piece + 1 != piece_info.bytes {
1917            return None; // Not at the end of the piece
1918        }
1919
1920        // Only append to "Added" buffers (not original Stored buffers)
1921        if !matches!(piece_info.location, BufferLocation::Added(_)) {
1922            return None;
1923        }
1924
1925        let buffer_id = piece_info.location.buffer_id();
1926        let buffer = self.buffers.get_mut(buffer_id)?;
1927
1928        // Check if buffer is loaded
1929        let buffer_len = buffer.get_data()?.len();
1930
1931        // Check if this piece ends exactly at the end of its buffer
1932        if piece_info.offset + piece_info.bytes != buffer_len {
1933            return None;
1934        }
1935
1936        // Perfect! Append to this buffer
1937        let append_offset = buffer.append(text);
1938
1939        Some((piece_info.location, append_offset, text.len()))
1940    }
1941
1942    /// Insert text (from &str) at the given byte offset
1943    pub fn insert(&mut self, offset: usize, text: &str) {
1944        self.insert_bytes(offset, text.as_bytes().to_vec());
1945    }
1946
1947    /// Insert text at a line/column position
1948    /// This now uses the optimized piece_tree.insert_at_position() for a single traversal
1949    pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1950        if text.is_empty() {
1951            let offset = self.position_to_offset(position);
1952            return self.piece_tree.cursor_at_offset(offset);
1953        }
1954
1955        self.mark_content_modified();
1956
1957        // Count line feeds in the text to insert
1958        let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1959
1960        // Create a new StringBuffer for this insertion
1961        let buffer_id = self.next_buffer_id;
1962        self.next_buffer_id += 1;
1963        let buffer = StringBuffer::new(buffer_id, text.clone());
1964        self.buffers.push(buffer);
1965
1966        // Use the optimized position-based insertion (single traversal)
1967        self.piece_tree.insert_at_position(
1968            position.line,
1969            position.column,
1970            BufferLocation::Added(buffer_id),
1971            0,
1972            text.len(),
1973            line_feed_cnt,
1974            &self.buffers,
1975        )
1976    }
1977
1978    /// Delete text starting at the given byte offset
1979    pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1980        if bytes == 0 || offset >= self.total_bytes() {
1981            return;
1982        }
1983
1984        // When line feeds have been scanned, ensure chunks at delete boundaries
1985        // are loaded so compute_line_feeds_static can recount during splits.
1986        if self.line_feeds_scanned {
1987            self.ensure_chunk_loaded_at(offset);
1988            let end = (offset + bytes).min(self.total_bytes());
1989            if end > offset {
1990                self.ensure_chunk_loaded_at(end.saturating_sub(1));
1991            }
1992        }
1993
1994        // Update piece tree
1995        self.piece_tree.delete(offset, bytes, &self.buffers);
1996
1997        self.mark_content_modified();
1998    }
1999
2000    /// Delete text in a range
2001    pub fn delete(&mut self, range: Range<usize>) {
2002        if range.end > range.start {
2003            self.delete_bytes(range.start, range.end - range.start);
2004        }
2005    }
2006
2007    /// Delete text in a line/column range
2008    /// This now uses the optimized piece_tree.delete_position_range() for a single traversal
2009    pub fn delete_range(&mut self, start: Position, end: Position) {
2010        // Use the optimized position-based deletion
2011        self.piece_tree.delete_position_range(
2012            start.line,
2013            start.column,
2014            end.line,
2015            end.column,
2016            &self.buffers,
2017        );
2018        self.mark_content_modified();
2019    }
2020
2021    /// Replace the entire buffer content with new content
2022    /// This is an O(n) operation that rebuilds the piece tree in a single pass,
2023    /// avoiding the O(n²) complexity of applying individual edits.
2024    ///
2025    /// This is used for bulk operations like "replace all" where applying
2026    /// individual edits would be prohibitively slow.
2027    pub fn replace_content(&mut self, new_content: &str) {
2028        let bytes = new_content.len();
2029        let content_bytes = new_content.as_bytes().to_vec();
2030
2031        // Count line feeds in the new content
2032        let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
2033
2034        // Create a new StringBuffer for the new content
2035        let buffer_id = self.next_buffer_id;
2036        self.next_buffer_id += 1;
2037        let buffer = StringBuffer::new(buffer_id, content_bytes);
2038        self.buffers.push(buffer);
2039
2040        // Rebuild the piece tree with a single piece containing all the new content
2041        if bytes > 0 {
2042            self.piece_tree = PieceTree::new(
2043                BufferLocation::Added(buffer_id),
2044                0,
2045                bytes,
2046                Some(line_feed_cnt),
2047            );
2048        } else {
2049            self.piece_tree = PieceTree::empty();
2050        }
2051
2052        self.mark_content_modified();
2053    }
2054
2055    /// Restore a previously saved buffer state (for undo/redo of BulkEdit).
2056    ///
2057    /// This restores the piece tree AND the buffers list, which is critical
2058    /// because consolidate_after_save() replaces self.buffers. Without restoring
2059    /// buffers, the piece tree would reference buffer IDs that no longer exist.
2060    pub fn restore_buffer_state(&mut self, snapshot: &BufferSnapshot) {
2061        self.piece_tree = snapshot.piece_tree.clone();
2062        self.buffers = snapshot.buffers.clone();
2063        self.next_buffer_id = snapshot.next_buffer_id;
2064        self.mark_content_modified();
2065    }
2066
2067    /// Snapshot the current buffer state (piece tree + buffers) for BulkEdit undo/redo.
2068    ///
2069    /// The snapshot includes buffers because consolidate_after_save() can replace
2070    /// self.buffers between the snapshot and restore, which would otherwise cause
2071    /// the restored piece tree to reference nonexistent buffer IDs.
2072    pub fn snapshot_buffer_state(&self) -> Arc<BufferSnapshot> {
2073        Arc::new(BufferSnapshot {
2074            piece_tree: self.piece_tree.clone(),
2075            buffers: self.buffers.clone(),
2076            next_buffer_id: self.next_buffer_id,
2077        })
2078    }
2079
2080    /// Apply bulk edits efficiently in a single pass
2081    /// Returns the net change in bytes
2082    pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
2083        // Pre-allocate buffers for all insert texts (only non-empty texts)
2084        // This avoids the borrow conflict in the closure
2085        // IMPORTANT: Only add entries for non-empty texts because the closure
2086        // is only called for edits with non-empty insert text
2087        let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2088
2089        for (_, _, text) in edits {
2090            if !text.is_empty() {
2091                let buffer_id = self.next_buffer_id;
2092                self.next_buffer_id += 1;
2093                let content = text.as_bytes().to_vec();
2094                let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
2095                let bytes = content.len();
2096                let buffer = StringBuffer::new(buffer_id, content);
2097                self.buffers.push(buffer);
2098                buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
2099            }
2100            // No placeholder for empty texts - the closure is only called for non-empty texts
2101        }
2102
2103        // Now call apply_bulk_edits with a simple index-based closure
2104        let mut idx = 0;
2105        let delta = self
2106            .piece_tree
2107            .apply_bulk_edits(edits, &self.buffers, |_text| {
2108                let info = buffer_info[idx];
2109                idx += 1;
2110                info
2111            });
2112
2113        self.mark_content_modified();
2114        delta
2115    }
2116
2117    /// Get text from a byte offset range
2118    /// This now uses the optimized piece_tree.iter_pieces_in_range() for a single traversal
2119    /// Get text from a byte offset range (read-only)
2120    /// Returns None if any buffer in the range is unloaded
2121    /// PRIVATE: External code should use get_text_range_mut() which handles lazy loading
2122    fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
2123        if bytes == 0 {
2124            return Some(Vec::new());
2125        }
2126
2127        let mut result = Vec::with_capacity(bytes);
2128        let end_offset = offset + bytes;
2129        let mut collected = 0;
2130
2131        // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
2132        for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
2133            let buffer_id = piece_view.location.buffer_id();
2134            if let Some(buffer) = self.buffers.get(buffer_id) {
2135                // Calculate the range to read from this piece
2136                let piece_start_in_doc = piece_view.doc_offset;
2137                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2138
2139                // Clip to the requested range
2140                let read_start = offset.max(piece_start_in_doc);
2141                let read_end = end_offset.min(piece_end_in_doc);
2142
2143                if read_end > read_start {
2144                    let offset_in_piece = read_start - piece_start_in_doc;
2145                    let bytes_to_read = read_end - read_start;
2146
2147                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
2148                    let buffer_end = buffer_start + bytes_to_read;
2149
2150                    // Return None if buffer is unloaded (type-safe)
2151                    let data = buffer.get_data()?;
2152
2153                    if buffer_end <= data.len() {
2154                        result.extend_from_slice(&data[buffer_start..buffer_end]);
2155                        collected += bytes_to_read;
2156
2157                        if collected >= bytes {
2158                            break;
2159                        }
2160                    }
2161                }
2162            }
2163        }
2164
2165        Some(result)
2166    }
2167
2168    /// Get text from a byte offset range with lazy loading
2169    /// This will load unloaded chunks on-demand and always returns complete data
2170    ///
2171    /// Returns an error if loading fails or if data cannot be read for any reason.
2172    ///
2173    /// NOTE: Currently loads entire buffers on-demand. Future optimization would split
2174    /// large pieces and load only LOAD_CHUNK_SIZE chunks at a time.
2175    pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
2176        let _span = tracing::info_span!("get_text_range_mut", offset, bytes).entered();
2177        if bytes == 0 {
2178            return Ok(Vec::new());
2179        }
2180
2181        let mut result = Vec::with_capacity(bytes);
2182        // Clamp end_offset to buffer length to handle reads beyond EOF
2183        let end_offset = (offset + bytes).min(self.len());
2184        let mut current_offset = offset;
2185        let mut iteration_count = 0u32;
2186
2187        // Keep iterating until we've collected all requested bytes
2188        while current_offset < end_offset {
2189            iteration_count += 1;
2190            let mut made_progress = false;
2191            let mut restarted_iteration = false;
2192
2193            // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
2194            for piece_view in self
2195                .piece_tree
2196                .iter_pieces_in_range(current_offset, end_offset)
2197            {
2198                let buffer_id = piece_view.location.buffer_id();
2199
2200                // Check if buffer needs loading
2201                let needs_loading = self
2202                    .buffers
2203                    .get(buffer_id)
2204                    .map(|b| !b.is_loaded())
2205                    .unwrap_or(false);
2206
2207                if needs_loading && self.chunk_split_and_load(&piece_view, current_offset)? {
2208                    restarted_iteration = true;
2209                    break;
2210                }
2211
2212                // Calculate the range to read from this piece
2213                let piece_start_in_doc = piece_view.doc_offset;
2214                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2215
2216                // Clip to the requested range
2217                let read_start = current_offset.max(piece_start_in_doc);
2218                let read_end = end_offset.min(piece_end_in_doc);
2219
2220                if read_end > read_start {
2221                    let offset_in_piece = read_start - piece_start_in_doc;
2222                    let bytes_to_read = read_end - read_start;
2223
2224                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
2225                    let buffer_end = buffer_start + bytes_to_read;
2226
2227                    // Buffer should be loaded now
2228                    let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
2229                    let data = buffer
2230                        .get_data()
2231                        .context("Buffer data unavailable after load")?;
2232
2233                    anyhow::ensure!(
2234                        buffer_end <= data.len(),
2235                        "Buffer range out of bounds: requested {}..{}, buffer size {}",
2236                        buffer_start,
2237                        buffer_end,
2238                        data.len()
2239                    );
2240
2241                    result.extend_from_slice(&data[buffer_start..buffer_end]);
2242                    current_offset = read_end;
2243                    made_progress = true;
2244                }
2245            }
2246
2247            // If we didn't make progress and didn't restart iteration, this is an error
2248            if !made_progress && !restarted_iteration {
2249                tracing::error!(
2250                    "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
2251                    current_offset,
2252                    offset,
2253                    end_offset,
2254                    self.len()
2255                );
2256                tracing::error!(
2257                    "Piece tree stats: {} total bytes",
2258                    self.piece_tree.stats().total_bytes
2259                );
2260                anyhow::bail!(
2261                    "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
2262                    current_offset,
2263                    offset,
2264                    end_offset,
2265                    self.len()
2266                );
2267            }
2268        }
2269
2270        if iteration_count > 1 {
2271            tracing::info!(
2272                iteration_count,
2273                result_len = result.len(),
2274                "get_text_range_mut: completed with multiple iterations"
2275            );
2276        }
2277
2278        Ok(result)
2279    }
2280
2281    /// Prepare a viewport for rendering
2282    ///
2283    /// This is called before rendering with &mut access to pre-load all data
2284    /// that will be needed for the viewport. It estimates the number of bytes
2285    /// needed based on the line count and pre-loads them.
2286    ///
2287    /// # Arguments
2288    /// * `start_offset` - The byte offset where the viewport starts
2289    /// * `line_count` - The number of lines to prepare (estimate)
2290    ///
2291    /// # Returns
2292    /// Ok(()) if preparation succeeded, Err if loading failed
2293    pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
2294        let _span = tracing::info_span!("prepare_viewport", start_offset, line_count).entered();
2295        // Estimate how many bytes we need (pessimistic assumption)
2296        // Average line length is typically 80-100 bytes, but we use 200 to be safe
2297        let estimated_bytes = line_count.saturating_mul(200);
2298
2299        // Cap the estimate at the remaining bytes in the document
2300        let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
2301        let bytes_to_load = estimated_bytes.min(remaining_bytes);
2302        tracing::trace!(
2303            bytes_to_load,
2304            total_bytes = self.total_bytes(),
2305            "prepare_viewport loading"
2306        );
2307
2308        // Pre-load with full chunk-splitting support
2309        // This may load more than we need, but ensures all data is available
2310        self.get_text_range_mut(start_offset, bytes_to_load)?;
2311
2312        Ok(())
2313    }
2314
2315    /// Split a piece that references a large unloaded buffer, create a chunk
2316    /// buffer for the region around `current_offset`, and load it.
2317    ///
2318    /// Returns `true` if the piece tree was modified (caller must restart its
2319    /// iteration), `false` if the piece was small enough to load in-place.
2320    fn chunk_split_and_load(
2321        &mut self,
2322        piece_view: &PieceView,
2323        current_offset: usize,
2324    ) -> Result<bool> {
2325        let buffer_id = piece_view.location.buffer_id();
2326
2327        // The underlying buffer may be much larger than this piece (e.g. the
2328        // whole-file Stored buffer after rebuild_with_pristine_saved_root).
2329        // We must chunk-split if either the piece or its buffer exceeds
2330        // LOAD_CHUNK_SIZE, because `load()` loads the entire buffer.
2331        let buffer_bytes = self
2332            .buffers
2333            .get(buffer_id)
2334            .and_then(|b| b.unloaded_bytes())
2335            .unwrap_or(0);
2336        let needs_chunk_split =
2337            piece_view.bytes > LOAD_CHUNK_SIZE || buffer_bytes > piece_view.bytes;
2338
2339        tracing::info!(
2340            buffer_id,
2341            piece_bytes = piece_view.bytes,
2342            buffer_bytes,
2343            needs_chunk_split,
2344            piece_doc_offset = piece_view.doc_offset,
2345            current_offset,
2346            "chunk_split_and_load: loading unloaded piece"
2347        );
2348
2349        if !needs_chunk_split {
2350            // Piece is small enough and its buffer matches — load in-place.
2351            let _span = tracing::info_span!(
2352                "load_small_buffer",
2353                piece_bytes = piece_view.bytes,
2354                buffer_id,
2355            )
2356            .entered();
2357            self.buffers
2358                .get_mut(buffer_id)
2359                .context("Buffer not found")?
2360                .load(&*self.fs)
2361                .context("Failed to load buffer")?;
2362            return Ok(false);
2363        }
2364
2365        let _span = tracing::info_span!(
2366            "chunk_split_and_load",
2367            piece_bytes = piece_view.bytes,
2368            buffer_id,
2369        )
2370        .entered();
2371
2372        let piece_start_in_doc = piece_view.doc_offset;
2373        let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
2374
2375        // When the piece already fits within LOAD_CHUNK_SIZE, create a chunk
2376        // buffer for the exact piece range (no alignment/splitting needed).
2377        // Alignment rounding is only useful when carving a sub-range out of a
2378        // piece larger than LOAD_CHUNK_SIZE.
2379        let (chunk_start_in_buffer, chunk_bytes) = if piece_view.bytes <= LOAD_CHUNK_SIZE {
2380            (piece_view.buffer_offset, piece_view.bytes)
2381        } else {
2382            let start =
2383                (piece_view.buffer_offset + offset_in_piece) / CHUNK_ALIGNMENT * CHUNK_ALIGNMENT;
2384            let bytes = LOAD_CHUNK_SIZE
2385                .min((piece_view.buffer_offset + piece_view.bytes).saturating_sub(start));
2386            (start, bytes)
2387        };
2388
2389        // Calculate document offsets for splitting
2390        let chunk_start_offset_in_piece =
2391            chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
2392        let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
2393        let split_end_in_doc = split_start_in_doc + chunk_bytes;
2394
2395        // Split the piece to isolate the chunk
2396        if chunk_start_offset_in_piece > 0 {
2397            self.piece_tree
2398                .split_at_offset(split_start_in_doc, &self.buffers);
2399        }
2400        if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
2401            self.piece_tree
2402                .split_at_offset(split_end_in_doc, &self.buffers);
2403        }
2404
2405        // Create a new buffer for this chunk
2406        let chunk_buffer = self
2407            .buffers
2408            .get(buffer_id)
2409            .context("Buffer not found")?
2410            .create_chunk_buffer(self.next_buffer_id, chunk_start_in_buffer, chunk_bytes)
2411            .context("Failed to create chunk buffer")?;
2412
2413        self.next_buffer_id += 1;
2414        let new_buffer_id = chunk_buffer.id;
2415        self.buffers.push(chunk_buffer);
2416
2417        // Update the piece to reference the new chunk buffer
2418        self.piece_tree.replace_buffer_reference(
2419            buffer_id,
2420            piece_view.buffer_offset + chunk_start_offset_in_piece,
2421            chunk_bytes,
2422            BufferLocation::Added(new_buffer_id),
2423        );
2424
2425        // Load the chunk buffer
2426        self.buffers
2427            .get_mut(new_buffer_id)
2428            .context("Chunk buffer not found")?
2429            .load(&*self.fs)
2430            .context("Failed to load chunk")?;
2431
2432        // split_at_offset uses compute_line_feeds_static which returns None
2433        // for unloaded buffers, destroying the scanned line feed counts.
2434        // Fix up: the loaded chunk is counted from memory, remaining unloaded
2435        // pieces use the filesystem's count_line_feeds_in_range.
2436        if self.line_feeds_scanned {
2437            let leaves = self.piece_tree.get_leaves();
2438            let mut fixups: Vec<(usize, usize)> = Vec::new();
2439            for (idx, leaf) in leaves.iter().enumerate() {
2440                if leaf.line_feed_cnt.is_none() {
2441                    if let Ok(count) = self.scan_leaf(leaf) {
2442                        fixups.push((idx, count));
2443                    }
2444                }
2445            }
2446            if !fixups.is_empty() {
2447                self.piece_tree.update_leaf_line_feeds_path_copy(&fixups);
2448            }
2449        }
2450
2451        // Keep saved_root in sync with viewport-loading tree restructures so
2452        // that diff_since_saved() can match by (location, offset) identity.
2453        //
2454        // When !modified the current tree IS the saved state, so just snapshot.
2455        // When modified, we must apply the same Stored→Added leaf replacement
2456        // to saved_root so the diff doesn't see loaded-but-unedited regions as
2457        // changed.
2458        if !self.modified {
2459            self.saved_root = self.piece_tree.root();
2460        } else {
2461            self.apply_chunk_load_to_saved_root(
2462                buffer_id,
2463                chunk_start_in_buffer,
2464                chunk_bytes,
2465                new_buffer_id,
2466            );
2467        }
2468
2469        Ok(true)
2470    }
2471
2472    /// Get all text as a single Vec<u8>
2473    /// Returns None if any buffers are unloaded (lazy loading)
2474    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
2475    pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
2476        self.get_text_range(0, self.total_bytes())
2477    }
2478
2479    /// Get all text as a String
2480    /// Returns None if any buffers are unloaded (lazy loading)
2481    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
2482    pub(crate) fn get_all_text_string(&self) -> Option<String> {
2483        self.get_all_text()
2484            .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
2485    }
2486
2487    /// Get text from a byte range as bytes
2488    /// CRATE-PRIVATE: Returns empty vector if any buffers are unloaded (silently fails!)
2489    /// Only use this when you KNOW the data is loaded (e.g., for syntax highlighting small regions)
2490    /// External code should use get_text_range_mut() or DocumentModel methods
2491    pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
2492        self.get_text_range(range.start, range.end.saturating_sub(range.start))
2493            .unwrap_or_default()
2494    }
2495
2496    /// Get all text as a String
2497    /// Returns None if any buffers are unloaded (lazy loading)
2498    pub fn to_string(&self) -> Option<String> {
2499        self.get_all_text_string()
2500    }
2501
2502    /// Get the total number of bytes
2503    pub fn len(&self) -> usize {
2504        self.total_bytes()
2505    }
2506
2507    /// Check if the buffer is empty
2508    pub fn is_empty(&self) -> bool {
2509        self.total_bytes() == 0
2510    }
2511
2512    /// Get the file path associated with this buffer
2513    pub fn file_path(&self) -> Option<&Path> {
2514        self.file_path.as_deref()
2515    }
2516
2517    /// Update the file path after a rename operation on disk.
2518    pub fn rename_file_path(&mut self, path: PathBuf) {
2519        self.file_path = Some(path);
2520    }
2521
2522    /// Clear the file path (make buffer unnamed)
2523    /// Note: This does NOT affect Unloaded chunk file_paths used for lazy loading.
2524    /// Those still point to the original source file for chunk loading.
2525    pub fn clear_file_path(&mut self) {
2526        self.file_path = None;
2527    }
2528
2529    /// Extend buffer to include more bytes from a streaming source file.
2530    /// Used for stdin streaming where the temp file grows over time.
2531    /// Appends a new Unloaded chunk for the new bytes.
2532    pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
2533        let old_size = self.total_bytes();
2534        if new_size <= old_size {
2535            return;
2536        }
2537
2538        let additional_bytes = new_size - old_size;
2539
2540        // Create new Unloaded buffer for the appended region
2541        let buffer_id = self.next_buffer_id;
2542        self.next_buffer_id += 1;
2543
2544        let new_buffer = StringBuffer::new_unloaded(
2545            buffer_id,
2546            source_path.to_path_buf(),
2547            old_size,         // file_offset - where this chunk starts in the file
2548            additional_bytes, // bytes - size of this chunk
2549        );
2550        self.buffers.push(new_buffer);
2551
2552        // Append piece at end of document (insert at offset == total_bytes)
2553        self.piece_tree.insert(
2554            old_size,
2555            BufferLocation::Stored(buffer_id),
2556            0,
2557            additional_bytes,
2558            None, // line_feed_cnt unknown for unloaded chunk
2559            &self.buffers,
2560        );
2561    }
2562
2563    /// Check if the buffer has been modified since last save
2564    pub fn is_modified(&self) -> bool {
2565        self.modified
2566    }
2567
2568    /// Clear the modified flag (after save)
2569    pub fn clear_modified(&mut self) {
2570        self.modified = false;
2571    }
2572
2573    /// Set the modified flag explicitly
2574    /// Used by undo/redo to restore the correct modified state
2575    pub fn set_modified(&mut self, modified: bool) {
2576        self.modified = modified;
2577    }
2578
2579    /// Check if buffer has pending changes for recovery auto-save
2580    pub fn is_recovery_pending(&self) -> bool {
2581        self.recovery_pending
2582    }
2583
2584    /// Mark buffer as needing recovery auto-save (call after edits)
2585    pub fn set_recovery_pending(&mut self, pending: bool) {
2586        self.recovery_pending = pending;
2587    }
2588
2589    /// Ensure the buffer chunk at the given byte offset is loaded.
2590    ///
2591    /// When `line_feeds_scanned` is true, piece splits during insert/delete need
2592    /// the buffer data to be loaded so `compute_line_feeds_static` can accurately
2593    /// recount line feeds for each half. This method loads the chunk if needed.
2594    fn ensure_chunk_loaded_at(&mut self, offset: usize) {
2595        if let Some(piece_info) = self.piece_tree.find_by_offset(offset) {
2596            let buffer_id = piece_info.location.buffer_id();
2597            if let Some(buffer) = self.buffers.get_mut(buffer_id) {
2598                if !buffer.is_loaded() {
2599                    let buf_bytes = buffer.unloaded_bytes().unwrap_or(0);
2600                    tracing::info!(
2601                        "ensure_chunk_loaded_at: loading buffer {} ({} bytes) for offset {}",
2602                        buffer_id,
2603                        buf_bytes,
2604                        offset
2605                    );
2606                    if let Err(e) = buffer.load(&*self.fs) {
2607                        tracing::warn!("Failed to load chunk at offset {offset}: {e}");
2608                    }
2609                }
2610            }
2611        }
2612    }
2613
2614    /// Check if this is a large file with lazy loading enabled
2615    pub fn is_large_file(&self) -> bool {
2616        self.large_file
2617    }
2618
2619    /// Check if line feeds have been scanned for this large file.
2620    /// When true, `line_count()` returns exact values.
2621    pub fn has_line_feed_scan(&self) -> bool {
2622        self.line_feeds_scanned
2623    }
2624
2625    /// Get the raw piece tree leaves (for storing alongside scan chunks).
2626    pub fn piece_tree_leaves(&self) -> Vec<crate::model::piece_tree::LeafData> {
2627        self.piece_tree.get_leaves()
2628    }
2629
2630    /// Prepare work items for an incremental line scan.
2631    ///
2632    /// First splits any oversized leaves in the piece tree so every leaf is
2633    /// at most `LOAD_CHUNK_SIZE` bytes.  Then returns one work item per leaf.
2634    /// After scanning, `get_text_range_mut` will never need to split a scanned
2635    /// leaf (it's already chunk-sized), so line-feed counts are preserved.
2636    ///
2637    /// Returns `(chunks, total_bytes)`.
2638    pub fn prepare_line_scan(&mut self) -> (Vec<LineScanChunk>, usize) {
2639        // Pre-split the tree so every leaf ≤ LOAD_CHUNK_SIZE.
2640        self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2641
2642        let leaves = self.piece_tree.get_leaves();
2643        let total_bytes: usize = leaves.iter().map(|l| l.bytes).sum();
2644        let mut chunks = Vec::new();
2645
2646        for (idx, leaf) in leaves.iter().enumerate() {
2647            chunks.push(LineScanChunk {
2648                leaf_index: idx,
2649                byte_len: leaf.bytes,
2650                already_known: leaf.line_feed_cnt.is_some(),
2651            });
2652        }
2653
2654        (chunks, total_bytes)
2655    }
2656
2657    /// Count `\n` bytes in a single leaf.
2658    ///
2659    /// Uses `count_line_feeds_in_range` for unloaded buffers, which remote
2660    /// filesystem implementations can override to count server-side.
2661    pub fn scan_leaf(&self, leaf: &crate::model::piece_tree::LeafData) -> std::io::Result<usize> {
2662        let buffer_id = leaf.location.buffer_id();
2663        let buffer = self
2664            .buffers
2665            .get(buffer_id)
2666            .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::NotFound, "buffer not found"))?;
2667
2668        let count = match &buffer.data {
2669            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2670                let end = (leaf.offset + leaf.bytes).min(data.len());
2671                data[leaf.offset..end]
2672                    .iter()
2673                    .filter(|&&b| b == b'\n')
2674                    .count()
2675            }
2676            crate::model::piece_tree::BufferData::Unloaded {
2677                file_path,
2678                file_offset,
2679                ..
2680            } => {
2681                let read_offset = *file_offset as u64 + leaf.offset as u64;
2682                self.fs
2683                    .count_line_feeds_in_range(file_path, read_offset, leaf.bytes)?
2684            }
2685        };
2686        Ok(count)
2687    }
2688
2689    /// Return the I/O parameters for an unloaded leaf, or `None` if loaded.
2690    ///
2691    /// Used by the incremental scan to distinguish leaves that can be counted
2692    /// in-memory (via `scan_leaf`) from those that need filesystem I/O.
2693    pub fn leaf_io_params(
2694        &self,
2695        leaf: &crate::model::piece_tree::LeafData,
2696    ) -> Option<(std::path::PathBuf, u64, usize)> {
2697        let buffer_id = leaf.location.buffer_id();
2698        let buffer = self.buffers.get(buffer_id)?;
2699        match &buffer.data {
2700            crate::model::piece_tree::BufferData::Loaded { .. } => None,
2701            crate::model::piece_tree::BufferData::Unloaded {
2702                file_path,
2703                file_offset,
2704                ..
2705            } => {
2706                let read_offset = *file_offset as u64 + leaf.offset as u64;
2707                Some((file_path.clone(), read_offset, leaf.bytes))
2708            }
2709        }
2710    }
2711
2712    /// Get a reference to the string buffers (for parallel scanning).
2713    pub fn buffer_slice(&self) -> &[StringBuffer] {
2714        &self.buffers
2715    }
2716
2717    /// Apply the results of an incremental line scan.
2718    pub fn apply_scan_updates(&mut self, updates: &[(usize, usize)]) {
2719        self.piece_tree.update_leaf_line_feeds(updates);
2720        self.line_feeds_scanned = true;
2721    }
2722
2723    /// After an incremental line-feed scan completes, rebuild the tree so that
2724    /// `saved_root` and the current tree share `Arc` pointers for unedited
2725    /// subtrees. This makes `diff_since_saved()` O(edited regions) instead of
2726    /// O(file size).
2727    pub fn rebuild_with_pristine_saved_root(&mut self, scan_updates: &[(usize, usize)]) {
2728        let file_size = match self.saved_file_size {
2729            Some(s) => s,
2730            None => {
2731                // Fallback: no saved file size means we can't build a pristine
2732                // tree. Just apply updates the old way.
2733                self.apply_scan_updates(scan_updates);
2734                return;
2735            }
2736        };
2737
2738        // --- Walk the current tree to extract deletions and insertions ---
2739        let total = self.total_bytes();
2740        // Deletions: gaps in Stored coverage (orig_offset, len).
2741        let mut deletions: Vec<(usize, usize)> = Vec::new();
2742        // Insertions: (post_delete_offset, location, buf_offset, bytes, lf_cnt).
2743        // post_delete_offset = cumulative surviving Stored bytes before this point.
2744        let mut insertions: Vec<(usize, BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2745        let mut orig_cursor: usize = 0;
2746        let mut stored_bytes_in_doc: usize = 0;
2747
2748        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2749            match piece.location {
2750                BufferLocation::Stored(_) => {
2751                    if piece.buffer_offset > orig_cursor {
2752                        deletions.push((orig_cursor, piece.buffer_offset - orig_cursor));
2753                    }
2754                    orig_cursor = piece.buffer_offset + piece.bytes;
2755                    stored_bytes_in_doc += piece.bytes;
2756                }
2757                BufferLocation::Added(id) => {
2758                    // Check if this Added buffer was created by loading a chunk
2759                    // from the stored file (via get_text_range_mut chunk loading).
2760                    // If so, treat it as stored content, not a user edit.
2761                    if let Some(file_off) = self.buffers.get(id).and_then(|b| b.stored_file_offset)
2762                    {
2763                        if file_off > orig_cursor {
2764                            deletions.push((orig_cursor, file_off - orig_cursor));
2765                        }
2766                        orig_cursor = file_off + piece.bytes;
2767                        stored_bytes_in_doc += piece.bytes;
2768                    } else {
2769                        insertions.push((
2770                            stored_bytes_in_doc,
2771                            piece.location,
2772                            piece.buffer_offset,
2773                            piece.bytes,
2774                            piece.line_feed_cnt,
2775                        ));
2776                    }
2777                }
2778            }
2779        }
2780        // Trailing deletion.
2781        if orig_cursor < file_size {
2782            deletions.push((orig_cursor, file_size - orig_cursor));
2783        }
2784
2785        // --- Build pristine tree (full original file, pre-split, with lf counts) ---
2786        let mut pristine = if file_size > 0 {
2787            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
2788        } else {
2789            PieceTree::empty()
2790        };
2791        pristine.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2792        pristine.update_leaf_line_feeds(scan_updates);
2793
2794        // Snapshot the pristine tree as saved_root.
2795        self.saved_root = pristine.root();
2796
2797        // If no edits, the pristine tree IS the current tree.
2798        if deletions.is_empty() && insertions.is_empty() {
2799            self.piece_tree = pristine;
2800            self.line_feeds_scanned = true;
2801            return;
2802        }
2803
2804        // --- Replay edits onto a clone of the pristine tree ---
2805        let mut tree = pristine;
2806
2807        // Apply deletions from HIGH to LOW offset so earlier offsets stay valid.
2808        deletions.sort_by(|a, b| b.0.cmp(&a.0));
2809        for &(offset, len) in &deletions {
2810            tree.delete(offset, len, &self.buffers);
2811        }
2812
2813        // Apply insertions from LOW to HIGH. Each insertion shifts subsequent
2814        // offsets by its byte count, tracked via insert_delta.
2815        let mut insert_delta: usize = 0;
2816        for &(offset, location, buf_offset, bytes, lf_cnt) in &insertions {
2817            tree.insert(
2818                offset + insert_delta,
2819                location,
2820                buf_offset,
2821                bytes,
2822                lf_cnt,
2823                &self.buffers,
2824            );
2825            insert_delta += bytes;
2826        }
2827
2828        // Path-copy insert/delete may split Stored leaves whose data is
2829        // Unloaded, producing fragments with line_feed_cnt = None
2830        // (compute_line_feeds_static can't read unloaded data). Fix them up
2831        // by scanning any remaining None leaves.
2832        let leaves = tree.get_leaves();
2833        let mut fixups: Vec<(usize, usize)> = Vec::new();
2834        for (idx, leaf) in leaves.iter().enumerate() {
2835            if leaf.line_feed_cnt.is_none() {
2836                if let Ok(count) = self.scan_leaf(leaf) {
2837                    fixups.push((idx, count));
2838                }
2839            }
2840        }
2841        if !fixups.is_empty() {
2842            tree.update_leaf_line_feeds_path_copy(&fixups);
2843        }
2844
2845        self.piece_tree = tree;
2846        self.line_feeds_scanned = true;
2847    }
2848
2849    /// Resolve the exact byte offset for a given line number (0-indexed).
2850    ///
2851    /// Uses the tree's line feed counts to find the piece containing the target line,
2852    /// then loads/reads that piece's data to find the exact newline position.
2853    /// This works even when buffers are unloaded (large file with scanned line index).
2854    pub fn resolve_line_byte_offset(&mut self, target_line: usize) -> Option<usize> {
2855        if target_line == 0 {
2856            return Some(0);
2857        }
2858
2859        // Use tree metadata to find the piece containing the target line
2860        let (doc_offset, buffer_id, piece_offset, piece_bytes, lines_before) =
2861            self.piece_tree.piece_info_for_line(target_line)?;
2862
2863        // We need to find the (target_line - lines_before)-th newline within this piece
2864        let lines_to_skip = target_line - lines_before;
2865
2866        // Get the piece data — either from loaded buffer or read from disk
2867        let buffer = self.buffers.get(buffer_id)?;
2868        let piece_data: Vec<u8> = match &buffer.data {
2869            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2870                let end = (piece_offset + piece_bytes).min(data.len());
2871                data[piece_offset..end].to_vec()
2872            }
2873            crate::model::piece_tree::BufferData::Unloaded {
2874                file_path,
2875                file_offset,
2876                ..
2877            } => {
2878                let read_offset = *file_offset as u64 + piece_offset as u64;
2879                self.fs
2880                    .read_range(file_path, read_offset, piece_bytes)
2881                    .ok()?
2882            }
2883        };
2884
2885        // Count newlines to find the target line start
2886        let mut newlines_found = 0;
2887        for (i, &byte) in piece_data.iter().enumerate() {
2888            if byte == b'\n' {
2889                newlines_found += 1;
2890                if newlines_found == lines_to_skip {
2891                    // The target line starts right after this newline
2892                    return Some(doc_offset + i + 1);
2893                }
2894            }
2895        }
2896
2897        // If we didn't find enough newlines, the line starts in the next piece
2898        // Return the end of this piece as an approximation
2899        Some(doc_offset + piece_bytes)
2900    }
2901
2902    /// Get the saved file size (size of the file on disk after last load/save)
2903    /// For large files, this is used during recovery to know the expected original file size.
2904    /// Returns None for new unsaved buffers.
2905    pub fn original_file_size(&self) -> Option<usize> {
2906        // Return the tracked saved file size - this is updated when the file is
2907        // loaded or saved, so it always reflects the current file on disk.
2908        self.saved_file_size
2909    }
2910
2911    /// Get recovery chunks for this buffer (only modified portions)
2912    ///
2913    /// For large files, this returns only the pieces that come from Added buffers
2914    /// (i.e., the modifications), not the original file content. This allows
2915    /// efficient incremental recovery without reading/writing the entire file.
2916    ///
2917    /// Returns: Vec of (original_file_offset, data) for each modified chunk
2918    /// The offset is the position in the ORIGINAL file where this chunk should be inserted.
2919    pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
2920        use crate::model::piece_tree::BufferLocation;
2921
2922        let mut chunks = Vec::new();
2923        let total = self.total_bytes();
2924
2925        // Track cumulative bytes from Stored pieces as we iterate.
2926        // This gives us the original file offset for Added pieces.
2927        // The key insight: Added pieces should be inserted at the position
2928        // corresponding to where they appear relative to Stored content,
2929        // not their position in the current document.
2930        let mut stored_bytes_before = 0;
2931
2932        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2933            match piece.location {
2934                BufferLocation::Stored(_) => {
2935                    // Accumulate stored bytes to track position in original file
2936                    stored_bytes_before += piece.bytes;
2937                }
2938                BufferLocation::Added(buffer_id) => {
2939                    if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
2940                        // Get the data from the buffer if loaded
2941                        if let Some(data) = buffer.get_data() {
2942                            // Extract just the portion this piece references
2943                            let start = piece.buffer_offset;
2944                            let end = start + piece.bytes;
2945                            if end <= data.len() {
2946                                // Use stored_bytes_before as the original file offset.
2947                                // This is where this insertion should go relative to
2948                                // the original file content.
2949                                chunks.push((stored_bytes_before, data[start..end].to_vec()));
2950                            }
2951                        }
2952                    }
2953                }
2954            }
2955        }
2956
2957        chunks
2958    }
2959
2960    /// Check if this buffer contains binary content
2961    pub fn is_binary(&self) -> bool {
2962        self.is_binary
2963    }
2964
2965    /// Get the line ending format for this buffer
2966    pub fn line_ending(&self) -> LineEnding {
2967        self.line_ending
2968    }
2969
2970    /// Set the line ending format for this buffer
2971    ///
2972    /// This marks the buffer as modified since the line ending format has changed.
2973    /// On save, the buffer content will be converted to the new format.
2974    pub fn set_line_ending(&mut self, line_ending: LineEnding) {
2975        self.line_ending = line_ending;
2976        self.mark_content_modified();
2977    }
2978
2979    /// Set the default line ending format for a new/empty buffer
2980    ///
2981    /// Unlike `set_line_ending`, this does NOT mark the buffer as modified.
2982    /// This should be used when initializing a new buffer with a configured default.
2983    pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
2984        self.line_ending = line_ending;
2985        self.original_line_ending = line_ending;
2986    }
2987
2988    /// Get the encoding format for this buffer
2989    pub fn encoding(&self) -> Encoding {
2990        self.encoding
2991    }
2992
2993    /// Set the encoding format for this buffer
2994    ///
2995    /// This marks the buffer as modified since the encoding format has changed.
2996    /// On save, the buffer content will be converted to the new encoding.
2997    pub fn set_encoding(&mut self, encoding: Encoding) {
2998        self.encoding = encoding;
2999        self.mark_content_modified();
3000    }
3001
3002    /// Set the default encoding format for a new/empty buffer
3003    ///
3004    /// Unlike `set_encoding`, this does NOT mark the buffer as modified.
3005    /// This should be used when initializing a new buffer with a configured default.
3006    pub fn set_default_encoding(&mut self, encoding: Encoding) {
3007        self.encoding = encoding;
3008        self.original_encoding = encoding;
3009    }
3010
3011    /// Detect the line ending format from a sample of bytes
3012    ///
3013    /// Uses majority voting: counts CRLF, LF-only, and CR-only occurrences
3014    /// and returns the most common format.
3015    pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
3016        // Only check the first 8KB for line ending detection (same as binary detection)
3017        let check_len = bytes.len().min(8 * 1024);
3018        let sample = &bytes[..check_len];
3019
3020        let mut crlf_count = 0;
3021        let mut lf_only_count = 0;
3022        let mut cr_only_count = 0;
3023
3024        let mut i = 0;
3025        while i < sample.len() {
3026            if sample[i] == b'\r' {
3027                // Check if this is CRLF
3028                if i + 1 < sample.len() && sample[i + 1] == b'\n' {
3029                    crlf_count += 1;
3030                    i += 2; // Skip both \r and \n
3031                    continue;
3032                } else {
3033                    // CR only (old Mac format)
3034                    cr_only_count += 1;
3035                }
3036            } else if sample[i] == b'\n' {
3037                // LF only (Unix format)
3038                lf_only_count += 1;
3039            }
3040            i += 1;
3041        }
3042
3043        // Use majority voting to determine line ending
3044        if crlf_count > lf_only_count && crlf_count > cr_only_count {
3045            LineEnding::CRLF
3046        } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
3047            LineEnding::CR
3048        } else {
3049            // Default to LF if no clear winner or if LF wins
3050            LineEnding::LF
3051        }
3052    }
3053
3054    /// Detect the text encoding from a sample of bytes
3055    ///
3056    /// Delegates to the encoding module. Use `detect_encoding_or_binary`
3057    /// when you need to know if the content should be treated as binary.
3058    pub fn detect_encoding(bytes: &[u8]) -> Encoding {
3059        encoding::detect_encoding(bytes)
3060    }
3061
3062    /// Detect the text encoding and whether content is binary.
3063    ///
3064    /// Returns (Encoding, is_binary) where:
3065    /// - Encoding is the detected encoding (or default if binary)
3066    /// - is_binary is true if the content should be treated as raw binary
3067    ///
3068    /// Delegates to the encoding module for detection logic.
3069    pub fn detect_encoding_or_binary(bytes: &[u8]) -> (Encoding, bool) {
3070        encoding::detect_encoding_or_binary(bytes)
3071    }
3072
3073    /// Detect encoding and convert bytes to UTF-8
3074    ///
3075    /// Returns the detected encoding and the UTF-8 converted content.
3076    /// This is the core function for normalizing file content to UTF-8 on load.
3077    pub fn detect_and_convert_encoding(bytes: &[u8]) -> (Encoding, Vec<u8>) {
3078        encoding::detect_and_convert(bytes)
3079    }
3080
3081    /// Convert UTF-8 content to the specified encoding for saving
3082    ///
3083    /// Used when saving files to convert internal UTF-8 representation
3084    /// back to the original (or user-selected) encoding.
3085    /// Note: This does NOT add BOM - the BOM is handled separately in build_write_recipe.
3086    pub fn convert_to_encoding(utf8_bytes: &[u8], target_encoding: Encoding) -> Vec<u8> {
3087        encoding::convert_from_utf8(utf8_bytes, target_encoding)
3088    }
3089
3090    /// Normalize line endings in the given bytes to LF only
3091    ///
3092    /// Converts CRLF (\r\n) and CR (\r) to LF (\n) for internal representation.
3093    /// This makes editing and cursor movement simpler while preserving the
3094    /// original format for saving.
3095    #[allow(dead_code)] // Kept for tests and potential future use
3096    pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
3097        let mut normalized = Vec::with_capacity(bytes.len());
3098        let mut i = 0;
3099
3100        while i < bytes.len() {
3101            if bytes[i] == b'\r' {
3102                // Check if this is CRLF
3103                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3104                    // CRLF -> LF
3105                    normalized.push(b'\n');
3106                    i += 2; // Skip both \r and \n
3107                    continue;
3108                } else {
3109                    // CR only -> LF
3110                    normalized.push(b'\n');
3111                }
3112            } else {
3113                // Copy byte as-is
3114                normalized.push(bytes[i]);
3115            }
3116            i += 1;
3117        }
3118
3119        normalized
3120    }
3121
3122    /// Convert line endings from any source format to any target format
3123    ///
3124    /// This first normalizes all line endings to LF, then converts to the target format.
3125    /// Used when saving files after the user has changed the line ending format.
3126    fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
3127        // First pass: normalize everything to LF
3128        let mut normalized = Vec::with_capacity(bytes.len());
3129        let mut i = 0;
3130        while i < bytes.len() {
3131            if bytes[i] == b'\r' {
3132                // Check if this is CRLF
3133                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3134                    // CRLF -> LF
3135                    normalized.push(b'\n');
3136                    i += 2;
3137                    continue;
3138                } else {
3139                    // CR only -> LF
3140                    normalized.push(b'\n');
3141                }
3142            } else {
3143                normalized.push(bytes[i]);
3144            }
3145            i += 1;
3146        }
3147
3148        // If target is LF, we're done
3149        if target_ending == LineEnding::LF {
3150            return normalized;
3151        }
3152
3153        // Second pass: convert LF to target format
3154        let replacement = target_ending.as_str().as_bytes();
3155        let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
3156
3157        for byte in normalized {
3158            if byte == b'\n' {
3159                result.extend_from_slice(replacement);
3160            } else {
3161                result.push(byte);
3162            }
3163        }
3164
3165        result
3166    }
3167
3168    /// Get text for a specific line
3169    pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
3170        let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
3171
3172        let bytes = if let Some(end_offset) = end {
3173            end_offset.saturating_sub(start)
3174        } else {
3175            self.total_bytes().saturating_sub(start)
3176        };
3177
3178        self.get_text_range(start, bytes)
3179    }
3180
3181    /// Get the byte offset where a line starts
3182    pub fn line_start_offset(&self, line: usize) -> Option<usize> {
3183        let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
3184        Some(start)
3185    }
3186
3187    /// Get piece information at a byte offset
3188    pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
3189        self.piece_tree.find_by_offset(offset)
3190    }
3191
3192    /// Get tree statistics for debugging
3193    pub fn stats(&self) -> TreeStats {
3194        self.piece_tree.stats()
3195    }
3196
3197    // Search and Replace Operations
3198
3199    /// Find the next occurrence of a pattern, with wrap-around
3200    pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
3201        if pattern.is_empty() {
3202            return None;
3203        }
3204
3205        let pattern_bytes = pattern.as_bytes();
3206        let buffer_len = self.len();
3207
3208        // Search from start_pos to end
3209        if start_pos < buffer_len {
3210            if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
3211                return Some(offset);
3212            }
3213        }
3214
3215        // Wrap around: search from beginning to start_pos
3216        if start_pos > 0 {
3217            if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
3218                return Some(offset);
3219            }
3220        }
3221
3222        None
3223    }
3224
3225    /// Find the next occurrence of a pattern within an optional range
3226    /// If range is None, searches the entire buffer with wrap-around (same as find_next)
3227    /// If range is Some, searches only within that range without wrap-around
3228    pub fn find_next_in_range(
3229        &self,
3230        pattern: &str,
3231        start_pos: usize,
3232        range: Option<Range<usize>>,
3233    ) -> Option<usize> {
3234        if pattern.is_empty() {
3235            return None;
3236        }
3237
3238        if let Some(search_range) = range {
3239            // Search within range only, no wrap-around
3240            let pattern_bytes = pattern.as_bytes();
3241            let search_start = start_pos.max(search_range.start);
3242            let search_end = search_range.end.min(self.len());
3243
3244            if search_start < search_end {
3245                self.find_pattern(search_start, search_end, pattern_bytes)
3246            } else {
3247                None
3248            }
3249        } else {
3250            // No range specified, use normal find_next with wrap-around
3251            self.find_next(pattern, start_pos)
3252        }
3253    }
3254
3255    /// Find pattern in a byte range using overlapping chunks
3256    fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
3257        if pattern.is_empty() || start >= end {
3258            return None;
3259        }
3260
3261        const CHUNK_SIZE: usize = 65536; // 64KB chunks
3262        let overlap = pattern.len().saturating_sub(1).max(1);
3263
3264        // Use the overlapping chunks iterator for efficient streaming search
3265        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
3266
3267        for chunk in chunks {
3268            // Search the entire chunk buffer
3269            if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
3270                let match_end = pos + pattern.len();
3271                // Only report if match ENDS in or after the valid zone
3272                // This ensures patterns spanning boundaries are found exactly once
3273                if match_end > chunk.valid_start {
3274                    let absolute_pos = chunk.absolute_pos + pos;
3275                    // Verify the match doesn't extend beyond our search range
3276                    if absolute_pos + pattern.len() <= end {
3277                        return Some(absolute_pos);
3278                    }
3279                }
3280            }
3281        }
3282
3283        None
3284    }
3285
3286    /// Simple byte pattern search using naive algorithm
3287    fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
3288        if needle.is_empty() || needle.len() > haystack.len() {
3289            return None;
3290        }
3291
3292        (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
3293    }
3294
3295    /// Find the next occurrence of a regex pattern, with wrap-around
3296    pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
3297        let buffer_len = self.len();
3298
3299        // Search from start_pos to end
3300        if start_pos < buffer_len {
3301            if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
3302                return Some(offset);
3303            }
3304        }
3305
3306        // Wrap around: search from beginning to start_pos
3307        if start_pos > 0 {
3308            if let Some(offset) = self.find_regex(0, start_pos, regex) {
3309                return Some(offset);
3310            }
3311        }
3312
3313        None
3314    }
3315
3316    /// Find the next occurrence of a regex pattern within an optional range
3317    pub fn find_next_regex_in_range(
3318        &self,
3319        regex: &Regex,
3320        start_pos: usize,
3321        range: Option<Range<usize>>,
3322    ) -> Option<usize> {
3323        if let Some(search_range) = range {
3324            let search_start = start_pos.max(search_range.start);
3325            let search_end = search_range.end.min(self.len());
3326
3327            if search_start < search_end {
3328                self.find_regex(search_start, search_end, regex)
3329            } else {
3330                None
3331            }
3332        } else {
3333            self.find_next_regex(regex, start_pos)
3334        }
3335    }
3336
3337    /// Find regex pattern in a byte range using overlapping chunks
3338    fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
3339        if start >= end {
3340            return None;
3341        }
3342
3343        const CHUNK_SIZE: usize = 1048576; // 1MB chunks
3344        const OVERLAP: usize = 4096; // 4KB overlap for regex
3345
3346        // Use the overlapping chunks iterator for efficient streaming search
3347        // This fixes the critical bug where regex patterns spanning chunk boundaries were missed
3348        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
3349
3350        for chunk in chunks {
3351            // Search the entire chunk buffer
3352            if let Some(mat) = regex.find(&chunk.buffer) {
3353                let match_end = mat.end();
3354                // Only report if match ENDS in or after the valid zone
3355                // This ensures patterns spanning boundaries are found exactly once
3356                if match_end > chunk.valid_start {
3357                    let absolute_pos = chunk.absolute_pos + mat.start();
3358                    // Verify the match doesn't extend beyond our search range
3359                    let match_len = mat.end() - mat.start();
3360                    if absolute_pos + match_len <= end {
3361                        return Some(absolute_pos);
3362                    }
3363                }
3364            }
3365        }
3366
3367        None
3368    }
3369
3370    /// Replace a range with replacement text
3371    pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
3372        if range.start >= self.len() {
3373            return false;
3374        }
3375
3376        let end = range.end.min(self.len());
3377        if end > range.start {
3378            self.delete_bytes(range.start, end - range.start);
3379        }
3380
3381        if !replacement.is_empty() {
3382            self.insert(range.start, replacement);
3383        }
3384
3385        true
3386    }
3387
3388    /// Find and replace the next occurrence of a pattern
3389    pub fn replace_next(
3390        &mut self,
3391        pattern: &str,
3392        replacement: &str,
3393        start_pos: usize,
3394        range: Option<Range<usize>>,
3395    ) -> Option<usize> {
3396        if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
3397            self.replace_range(pos..pos + pattern.len(), replacement);
3398            Some(pos)
3399        } else {
3400            None
3401        }
3402    }
3403
3404    /// Replace all occurrences of a pattern with replacement text
3405    pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
3406        if pattern.is_empty() {
3407            return 0;
3408        }
3409
3410        let mut count = 0;
3411        let mut pos = 0;
3412
3413        // Keep searching and replacing
3414        // Note: we search forward from last replacement to handle growth/shrinkage
3415        // Find next occurrence (no wrap-around for replace_all)
3416        while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
3417            self.replace_range(found_pos..found_pos + pattern.len(), replacement);
3418            count += 1;
3419
3420            // Move past the replacement
3421            pos = found_pos + replacement.len();
3422
3423            // If we're at or past the end, stop
3424            if pos >= self.len() {
3425                break;
3426            }
3427        }
3428
3429        count
3430    }
3431
3432    /// Replace all occurrences of a regex pattern with replacement text
3433    pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
3434        let mut count = 0;
3435        let mut pos = 0;
3436
3437        while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
3438            // Get the match to find its length
3439            let text = self
3440                .get_text_range_mut(found_pos, self.len() - found_pos)
3441                .context("Failed to read text for regex match")?;
3442
3443            if let Some(mat) = regex.find(&text) {
3444                self.replace_range(found_pos..found_pos + mat.len(), replacement);
3445                count += 1;
3446                pos = found_pos + replacement.len();
3447
3448                if pos >= self.len() {
3449                    break;
3450                }
3451            } else {
3452                break;
3453            }
3454        }
3455
3456        Ok(count)
3457    }
3458
3459    // LSP Support (UTF-16 conversions)
3460
3461    /// Convert byte position to (line, column) in bytes
3462    pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
3463        self.offset_to_position(byte_pos)
3464            .map(|pos| (pos.line, pos.column))
3465            .unwrap_or_else(|| (byte_pos / 80, 0)) // Estimate if metadata unavailable
3466    }
3467
3468    /// Convert (line, character) to byte position - 0-indexed
3469    /// character is in BYTES, not UTF-16 code units
3470    /// Optimized to use single line_range() call instead of two
3471    pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
3472        if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3473            // Calculate line length from the range
3474            let line_len = if let Some(end_offset) = end {
3475                end_offset.saturating_sub(start)
3476            } else {
3477                self.total_bytes().saturating_sub(start)
3478            };
3479            let byte_offset = character.min(line_len);
3480            start + byte_offset
3481        } else {
3482            // Line doesn't exist, return end of buffer
3483            self.len()
3484        }
3485    }
3486
3487    /// Convert byte position to LSP position (line, UTF-16 code units)
3488    /// LSP protocol uses UTF-16 code units for character offsets
3489    pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
3490        let (line, column_bytes) = self
3491            .offset_to_position(byte_pos)
3492            .map(|pos| (pos.line, pos.column))
3493            .unwrap_or_else(|| (byte_pos / 80, 0)); // Estimate if metadata unavailable
3494
3495        // Get the line content
3496        if let Some(line_bytes) = self.get_line(line) {
3497            // Convert byte offset to UTF-16 code units
3498            let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
3499            let text_str = String::from_utf8_lossy(text_before);
3500            let utf16_offset = text_str.encode_utf16().count();
3501            (line, utf16_offset)
3502        } else {
3503            (line, 0)
3504        }
3505    }
3506
3507    /// Convert LSP position (line, UTF-16 code units) to byte position
3508    /// LSP uses UTF-16 code units for character offsets, not bytes
3509    /// Optimized to use single line_range() call instead of two
3510    pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
3511        if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3512            // Calculate line length and get line content
3513            let line_len = if let Some(end_offset) = end {
3514                end_offset.saturating_sub(line_start)
3515            } else {
3516                self.total_bytes().saturating_sub(line_start)
3517            };
3518
3519            if line_len > 0 {
3520                // If data is unloaded, return line_start as fallback
3521                let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
3522                    return line_start;
3523                };
3524                let line_str = String::from_utf8_lossy(&line_bytes);
3525
3526                // Convert UTF-16 offset to byte offset
3527                let mut utf16_count = 0;
3528                let mut byte_offset = 0;
3529
3530                for ch in line_str.chars() {
3531                    if utf16_count >= utf16_offset {
3532                        break;
3533                    }
3534                    utf16_count += ch.len_utf16();
3535                    byte_offset += ch.len_utf8();
3536                }
3537
3538                line_start + byte_offset
3539            } else {
3540                line_start
3541            }
3542        } else {
3543            // Line doesn't exist, return end of buffer
3544            self.len()
3545        }
3546    }
3547
3548    // Navigation helpers
3549
3550    /// Find the previous character boundary (UTF-8 aware)
3551    pub fn prev_char_boundary(&self, pos: usize) -> usize {
3552        if pos == 0 {
3553            return 0;
3554        }
3555
3556        // Get a few bytes before pos to find the character boundary
3557        let start = pos.saturating_sub(4);
3558        let Some(bytes) = self.get_text_range(start, pos - start) else {
3559            // Data unloaded, return pos as fallback
3560            return pos;
3561        };
3562
3563        // Walk backwards to find a UTF-8 leading byte
3564        for i in (0..bytes.len()).rev() {
3565            let byte = bytes[i];
3566            // Check if this is a UTF-8 leading byte (not a continuation byte)
3567            if (byte & 0b1100_0000) != 0b1000_0000 {
3568                return start + i;
3569            }
3570        }
3571
3572        // Fallback
3573        pos.saturating_sub(1)
3574    }
3575
3576    /// Find the next character boundary (UTF-8 aware)
3577    pub fn next_char_boundary(&self, pos: usize) -> usize {
3578        let len = self.len();
3579        if pos >= len {
3580            return len;
3581        }
3582
3583        // Get a few bytes after pos to find the character boundary
3584        let end = (pos + 5).min(len);
3585        let Some(bytes) = self.get_text_range(pos, end - pos) else {
3586            // Data unloaded, return pos as fallback
3587            return pos;
3588        };
3589
3590        // Start from index 1 (we want the NEXT boundary)
3591        for (i, &byte) in bytes.iter().enumerate().skip(1) {
3592            // Check if this is a UTF-8 leading byte (not a continuation byte)
3593            if (byte & 0b1100_0000) != 0b1000_0000 {
3594                return pos + i;
3595            }
3596        }
3597
3598        // If we got here, we're at the end or found no boundary in the range
3599        end
3600    }
3601
3602    /// Check if a byte is a UTF-8 continuation byte (not at a char boundary)
3603    /// UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF)
3604    /// This is the same check that str::is_char_boundary uses internally.
3605    #[inline]
3606    fn is_utf8_continuation_byte(byte: u8) -> bool {
3607        (byte & 0b1100_0000) == 0b1000_0000
3608    }
3609
3610    /// Snap position to a valid UTF-8 character boundary
3611    /// If already at a boundary, returns the same position.
3612    /// Otherwise, moves to the previous valid boundary.
3613    pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
3614        let len = self.len();
3615        if pos == 0 || pos >= len {
3616            return pos.min(len);
3617        }
3618
3619        // Get the byte at pos to check if we're at a character boundary
3620        let Some(bytes) = self.get_text_range(pos, 1) else {
3621            // Data unloaded, return pos as fallback
3622            return pos;
3623        };
3624
3625        // A position is at a char boundary if the byte there is NOT a continuation byte
3626        if !Self::is_utf8_continuation_byte(bytes[0]) {
3627            // Already at a character boundary
3628            return pos;
3629        }
3630
3631        // Not at a boundary, find the previous one
3632        self.prev_char_boundary(pos)
3633    }
3634
3635    /// Find the previous grapheme cluster boundary (for proper cursor movement with combining characters)
3636    ///
3637    /// This handles complex scripts like Thai where multiple Unicode code points
3638    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
3639    /// is 3 code points but 1 grapheme cluster.
3640    pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
3641        if pos == 0 {
3642            return 0;
3643        }
3644
3645        // Get enough context before pos to find grapheme boundaries
3646        // Thai combining characters can have multiple marks, so get up to 32 bytes
3647        // IMPORTANT: Align start to a valid character boundary to avoid invalid UTF-8
3648        // when get_text_range starts mid-character
3649        let raw_start = pos.saturating_sub(32);
3650        let start = if raw_start == 0 {
3651            0
3652        } else {
3653            // Find the character boundary at or before raw_start
3654            self.prev_char_boundary(raw_start + 1)
3655        };
3656
3657        let Some(bytes) = self.get_text_range(start, pos - start) else {
3658            // Data unloaded, fall back to char boundary
3659            return self.prev_char_boundary(pos);
3660        };
3661
3662        let text = match std::str::from_utf8(&bytes) {
3663            Ok(s) => s,
3664            Err(e) => {
3665                // Still got invalid UTF-8 (shouldn't happen after alignment)
3666                // Try using just the valid portion
3667                let valid_bytes = &bytes[..e.valid_up_to()];
3668                match std::str::from_utf8(valid_bytes) {
3669                    Ok(s) if !s.is_empty() => s,
3670                    _ => return self.prev_char_boundary(pos),
3671                }
3672            }
3673        };
3674
3675        // Use shared grapheme utility with relative position
3676        let rel_pos = pos - start;
3677        let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
3678
3679        // If we landed at the start of this chunk and there's more before,
3680        // we might need to look further back
3681        if new_rel_pos == 0 && start > 0 {
3682            return self.prev_grapheme_boundary(start);
3683        }
3684
3685        start + new_rel_pos
3686    }
3687
3688    /// Find the next grapheme cluster boundary (for proper cursor movement with combining characters)
3689    ///
3690    /// This handles complex scripts like Thai where multiple Unicode code points
3691    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
3692    /// is 3 code points but 1 grapheme cluster.
3693    pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
3694        let len = self.len();
3695        if pos >= len {
3696            return len;
3697        }
3698
3699        // Get enough context after pos to find grapheme boundaries
3700        // Thai combining characters can have multiple marks, so get up to 32 bytes
3701        let end = (pos + 32).min(len);
3702        let Some(bytes) = self.get_text_range(pos, end - pos) else {
3703            // Data unloaded, fall back to char boundary
3704            return self.next_char_boundary(pos);
3705        };
3706
3707        // Convert to UTF-8 string, handling the case where we might have
3708        // grabbed bytes that end mid-character (truncate to valid UTF-8)
3709        let text = match std::str::from_utf8(&bytes) {
3710            Ok(s) => s,
3711            Err(e) => {
3712                // The bytes end in an incomplete UTF-8 sequence
3713                // Use only the valid portion (which includes at least the first grapheme)
3714                let valid_bytes = &bytes[..e.valid_up_to()];
3715                match std::str::from_utf8(valid_bytes) {
3716                    Ok(s) if !s.is_empty() => s,
3717                    _ => return self.next_char_boundary(pos),
3718                }
3719            }
3720        };
3721
3722        // Use shared grapheme utility
3723        let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
3724        pos + new_rel_pos
3725    }
3726
3727    /// Find the previous word boundary
3728    pub fn prev_word_boundary(&self, pos: usize) -> usize {
3729        if pos == 0 {
3730            return 0;
3731        }
3732
3733        // Get some text before pos
3734        let start = pos.saturating_sub(256).max(0);
3735        let Some(bytes) = self.get_text_range(start, pos - start) else {
3736            // Data unloaded, return pos as fallback
3737            return pos;
3738        };
3739        let text = String::from_utf8_lossy(&bytes);
3740
3741        let mut found_word_char = false;
3742        let chars: Vec<char> = text.chars().collect();
3743
3744        for i in (0..chars.len()).rev() {
3745            let ch = chars[i];
3746            let is_word_char = ch.is_alphanumeric() || ch == '_';
3747
3748            if found_word_char && !is_word_char {
3749                // We've transitioned from word to non-word
3750                // Calculate the byte position
3751                let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
3752                return start + byte_offset;
3753            }
3754
3755            if is_word_char {
3756                found_word_char = true;
3757            }
3758        }
3759
3760        0
3761    }
3762
3763    /// Find the next word boundary
3764    pub fn next_word_boundary(&self, pos: usize) -> usize {
3765        let len = self.len();
3766        if pos >= len {
3767            return len;
3768        }
3769
3770        // Get some text after pos
3771        let end = (pos + 256).min(len);
3772        let Some(bytes) = self.get_text_range(pos, end - pos) else {
3773            // Data unloaded, return pos as fallback
3774            return pos;
3775        };
3776        let text = String::from_utf8_lossy(&bytes);
3777
3778        let mut found_word_char = false;
3779        let mut byte_offset = 0;
3780
3781        for ch in text.chars() {
3782            let is_word_char = ch.is_alphanumeric() || ch == '_';
3783
3784            if found_word_char && !is_word_char {
3785                // We've transitioned from word to non-word
3786                return pos + byte_offset;
3787            }
3788
3789            if is_word_char {
3790                found_word_char = true;
3791            }
3792
3793            byte_offset += ch.len_utf8();
3794        }
3795
3796        len
3797    }
3798
3799    /// Create a line iterator starting at the given byte position
3800    ///
3801    /// This iterator lazily loads chunks as needed, never scanning the entire file.
3802    /// For large files with unloaded buffers, chunks are loaded on-demand (1MB at a time).
3803    pub fn line_iterator(
3804        &mut self,
3805        byte_pos: usize,
3806        estimated_line_length: usize,
3807    ) -> LineIterator<'_> {
3808        LineIterator::new(self, byte_pos, estimated_line_length)
3809    }
3810
3811    /// Iterate over lines starting from a given byte offset, with line numbers
3812    ///
3813    /// This is a more efficient alternative to using line_iterator() + offset_to_position()
3814    /// because it calculates line numbers incrementally during iteration by accumulating
3815    /// line_feed_cnt from pieces (which is already tracked in the piece tree).
3816    ///
3817    /// Returns: Iterator yielding (byte_offset, content, line_number: Option<usize>)
3818    /// - line_number is Some(n) for small files with line metadata
3819    /// - line_number is None for large files without line metadata
3820    ///
3821    /// # Performance
3822    /// - O(1) per line for line number calculation (vs O(log n) per line with offset_to_position)
3823    /// - Uses single source of truth: piece tree's existing line_feed_cnt metadata
3824    pub fn iter_lines_from(
3825        &mut self,
3826        byte_pos: usize,
3827        max_lines: usize,
3828    ) -> Result<TextBufferLineIterator> {
3829        TextBufferLineIterator::new(self, byte_pos, max_lines)
3830    }
3831
3832    // Legacy API methods for backwards compatibility
3833
3834    /// Get the line number for a given byte offset
3835    ///
3836    /// Returns exact line number if metadata available, otherwise estimates based on bytes.
3837    ///
3838    /// # Behavior by File Size:
3839    /// - **Small files (< 1MB)**: Returns exact line number from piece tree's `line_starts` metadata
3840    /// - **Large files (≥ 1MB)**: Returns estimated line number using `byte_offset / estimated_line_length`
3841    ///
3842    /// Large files don't maintain line metadata for performance reasons. The estimation
3843    /// uses the configured `estimated_line_length` (default 80 bytes).
3844    pub fn get_line_number(&self, byte_offset: usize) -> usize {
3845        self.offset_to_position(byte_offset)
3846            .map(|pos| pos.line)
3847            .unwrap_or_else(|| {
3848                // Estimate line number based on configured average line length
3849                byte_offset / self.config.estimated_line_length
3850            })
3851    }
3852
3853    /// Get the configured estimated line length for approximate line number calculations.
3854    pub fn estimated_line_length(&self) -> usize {
3855        self.config.estimated_line_length
3856    }
3857
3858    /// Get the starting line number at a byte offset (used for viewport rendering)
3859    ///
3860    /// # Line Cache Architecture (Post-Refactoring):
3861    ///
3862    /// The concept of a separate "line cache" is **now obsolete**. After the refactoring,
3863    /// line tracking is integrated directly into the piece tree via:
3864    /// ```rust
3865    /// BufferData::Loaded {
3866    ///     data: Vec<u8>,
3867    ///     line_starts: Option<Vec<usize>>  // None = large file mode (no line metadata)
3868    /// }
3869    /// ```
3870    ///
3871    /// ## Why This Method Still Exists:
3872    /// The rendering code needs to know what line number to display in the margin at the
3873    /// top of the viewport. This method returns that line number, handling both small
3874    /// and large file modes transparently.
3875    ///
3876    /// ## Small vs Large File Modes:
3877    /// - **Small files**: `line_starts = Some(vec)` → returns exact line number from metadata
3878    /// - **Large files**: `line_starts = None` → returns estimated line number (byte_offset / estimated_line_length)
3879    ///
3880    /// ## Legacy Line Cache Methods:
3881    /// These methods are now no-ops and can be removed in a future cleanup:
3882    /// - `invalidate_line_cache_from()` - No-op (piece tree updates automatically)
3883    /// - `handle_line_cache_insertion()` - No-op (piece tree updates automatically)
3884    /// - `handle_line_cache_deletion()` - No-op (piece tree updates automatically)
3885    /// - `clear_line_cache()` - No-op (can't clear piece tree metadata)
3886    ///
3887    /// ## Bug Fix (2025-11):
3888    /// Previously this method always returned `0`, causing line numbers in the margin
3889    /// to always show 1, 2, 3... regardless of scroll position. Now it correctly returns
3890    /// the actual line number at `start_byte`.
3891    pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
3892        // No-op for cache population: LineIndex maintains all line starts automatically
3893        // But we need to return the actual line number at start_byte for rendering
3894        self.get_line_number(start_byte)
3895    }
3896
3897    /// Get cached byte offset for line (compatibility method)
3898    pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
3899        self.line_start_offset(line_number)
3900    }
3901
3902    /// Invalidate line cache from offset (no-op in new implementation)
3903    pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
3904        // No-op: LineIndex updates automatically
3905    }
3906
3907    /// Handle line cache insertion (no-op in new implementation)
3908    pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
3909        // No-op: LineIndex updates automatically during insert
3910    }
3911
3912    /// Handle line cache deletion (no-op in new implementation)
3913    pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
3914        // No-op: LineIndex updates automatically during delete
3915    }
3916
3917    /// Clear line cache (no-op in new implementation)
3918    pub fn clear_line_cache(&mut self) {
3919        // No-op: LineIndex can't be cleared
3920    }
3921
3922    // Test helper methods
3923
3924    /// Create a buffer from a string for testing
3925    #[cfg(test)]
3926    pub fn from_str_test(s: &str) -> Self {
3927        Self::from_bytes(
3928            s.as_bytes().to_vec(),
3929            std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
3930        )
3931    }
3932
3933    /// Create a new empty buffer for testing
3934    #[cfg(test)]
3935    pub fn new_test() -> Self {
3936        Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
3937    }
3938}
3939
3940/// Type alias for backwards compatibility
3941pub type Buffer = TextBuffer;
3942
3943// Re-export LineIterator from the line_iterator module
3944pub use crate::primitives::line_iterator::LineIterator;
3945
3946// ============================================================================
3947// Overlapping Chunks Iterator for Efficient Search
3948// ============================================================================
3949
3950/// Information about a chunk of data for pattern matching
3951#[derive(Debug)]
3952pub struct ChunkInfo {
3953    /// The buffer containing this chunk's data (includes overlap from previous chunk)
3954    pub buffer: Vec<u8>,
3955
3956    /// Absolute position in the document where this buffer starts
3957    pub absolute_pos: usize,
3958
3959    /// Offset within buffer where "new" data starts (valid match zone)
3960    /// Matches starting before this offset were already checked in the previous chunk
3961    pub valid_start: usize,
3962}
3963
3964/// Iterator that yields overlapping chunks for pattern matching
3965///
3966/// This iterator implements the VSCode/Sublime approach: pull overlapping chunks
3967/// from the underlying piece tree and use standard search algorithms on them.
3968///
3969/// # Algorithm
3970///
3971/// ```text
3972/// Chunk 1: [------------ valid -----------]
3973/// Chunk 2:      [overlap][---- valid ----]
3974/// Chunk 3:                   [overlap][-- valid --]
3975///
3976/// Only matches starting in the "valid" zone are reported to avoid duplicates.
3977/// ```
3978///
3979/// # Example
3980///
3981/// ```ignore
3982/// let chunks = OverlappingChunks::new(&text_buffer, start, end, 4096, pattern.len()-1);
3983/// for chunk in chunks {
3984///     // Search only starting from chunk.valid_start
3985///     if let Some(pos) = search(&chunk.buffer[chunk.valid_start..]) {
3986///         let absolute_pos = chunk.absolute_pos + chunk.valid_start + pos;
3987///         return Some(absolute_pos);
3988///     }
3989/// }
3990/// ```
3991pub struct OverlappingChunks<'a> {
3992    piece_iter: PieceRangeIter,
3993    buffers: &'a [StringBuffer],
3994
3995    // Reusable chunk buffer that we fill from pieces
3996    buffer: Vec<u8>,
3997    buffer_absolute_pos: usize,
3998
3999    // Current state
4000    current_pos: usize,
4001    end_pos: usize,
4002
4003    // Configuration
4004    chunk_size: usize,
4005    overlap: usize,
4006
4007    // Track first chunk special case
4008    first_chunk: bool,
4009
4010    // Cached piece data for incremental reading
4011    current_piece_data: Option<Vec<u8>>,
4012    current_piece_offset: usize,
4013}
4014
4015impl<'a> OverlappingChunks<'a> {
4016    /// Create a new overlapping chunks iterator
4017    ///
4018    /// # Arguments
4019    ///
4020    /// * `text_buffer` - The text buffer to iterate over
4021    /// * `start` - Start position in the document
4022    /// * `end` - End position in the document (exclusive)
4023    /// * `chunk_size` - Target size for each chunk (excluding overlap)
4024    /// * `overlap` - Number of bytes to overlap between chunks
4025    ///
4026    /// # Recommendations
4027    ///
4028    /// * For literal string search: `chunk_size=65536, overlap=pattern.len()-1`
4029    /// * For regex search: `chunk_size=1048576, overlap=4096`
4030    pub fn new(
4031        text_buffer: &'a TextBuffer,
4032        start: usize,
4033        end: usize,
4034        chunk_size: usize,
4035        overlap: usize,
4036    ) -> Self {
4037        let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
4038
4039        Self {
4040            piece_iter,
4041            buffers: &text_buffer.buffers,
4042            buffer: Vec::with_capacity(chunk_size + overlap),
4043            buffer_absolute_pos: start,
4044            current_pos: start,
4045            end_pos: end,
4046            chunk_size,
4047            overlap,
4048            first_chunk: true,
4049            current_piece_data: None,
4050            current_piece_offset: 0,
4051        }
4052    }
4053
4054    /// Read one byte from the piece iterator
4055    fn read_byte(&mut self) -> Option<u8> {
4056        loop {
4057            // If we have cached piece data, read from it
4058            if let Some(ref data) = self.current_piece_data {
4059                if self.current_piece_offset < data.len() {
4060                    let byte = data[self.current_piece_offset];
4061                    self.current_piece_offset += 1;
4062                    self.current_pos += 1;
4063                    return Some(byte);
4064                } else {
4065                    // Exhausted current piece, move to next
4066                    self.current_piece_data = None;
4067                    self.current_piece_offset = 0;
4068                }
4069            }
4070
4071            // Get next piece
4072            if let Some(piece_view) = self.piece_iter.next() {
4073                let buffer_id = piece_view.location.buffer_id();
4074                if let Some(buffer) = self.buffers.get(buffer_id) {
4075                    // Extract the relevant slice from this piece
4076                    let piece_start_in_doc = piece_view.doc_offset;
4077                    let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
4078
4079                    // Clip to our search range
4080                    let read_start = self.current_pos.max(piece_start_in_doc);
4081                    let read_end = self.end_pos.min(piece_end_in_doc);
4082
4083                    if read_end > read_start {
4084                        let offset_in_piece = read_start - piece_start_in_doc;
4085                        let bytes_to_read = read_end - read_start;
4086
4087                        let buffer_start = piece_view.buffer_offset + offset_in_piece;
4088                        let buffer_end = buffer_start + bytes_to_read;
4089
4090                        if let Some(data) = buffer.get_data() {
4091                            if buffer_end <= data.len() {
4092                                // Cache this piece's data
4093                                self.current_piece_data =
4094                                    Some(data[buffer_start..buffer_end].to_vec());
4095                                self.current_piece_offset = 0;
4096                                continue;
4097                            }
4098                        }
4099                    }
4100                }
4101            }
4102
4103            // No more data
4104            return None;
4105        }
4106    }
4107
4108    /// Fill the buffer with the next chunk of data
4109    fn fill_next_chunk(&mut self) -> bool {
4110        if self.first_chunk {
4111            // First chunk: fill up to chunk_size
4112            self.first_chunk = false;
4113            while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
4114                if let Some(byte) = self.read_byte() {
4115                    self.buffer.push(byte);
4116                } else {
4117                    break;
4118                }
4119            }
4120            !self.buffer.is_empty()
4121        } else {
4122            // Subsequent chunks: keep overlap, fill chunk_size NEW bytes
4123            if self.current_pos >= self.end_pos {
4124                return false;
4125            }
4126
4127            // Keep overlap bytes at the end
4128            if self.buffer.len() > self.overlap {
4129                let drain_amount = self.buffer.len() - self.overlap;
4130                self.buffer.drain(0..drain_amount);
4131                self.buffer_absolute_pos += drain_amount;
4132            }
4133
4134            // Fill chunk_size NEW bytes (in addition to overlap)
4135            let before_len = self.buffer.len();
4136            let target_len = self.overlap + self.chunk_size;
4137            while self.buffer.len() < target_len && self.current_pos < self.end_pos {
4138                if let Some(byte) = self.read_byte() {
4139                    self.buffer.push(byte);
4140                } else {
4141                    break;
4142                }
4143            }
4144
4145            // Return true if we added new data
4146            self.buffer.len() > before_len
4147        }
4148    }
4149}
4150
4151impl<'a> Iterator for OverlappingChunks<'a> {
4152    type Item = ChunkInfo;
4153
4154    fn next(&mut self) -> Option<Self::Item> {
4155        // Track if this is the first chunk before filling
4156        let is_first = self.buffer_absolute_pos == self.current_pos;
4157
4158        if !self.fill_next_chunk() {
4159            return None;
4160        }
4161
4162        // First chunk: all data is valid (no overlap from previous)
4163        // Subsequent chunks: overlap bytes are not valid (already checked)
4164        let valid_start = if is_first {
4165            0
4166        } else {
4167            self.overlap.min(self.buffer.len())
4168        };
4169
4170        Some(ChunkInfo {
4171            buffer: self.buffer.clone(),
4172            absolute_pos: self.buffer_absolute_pos,
4173            valid_start,
4174        })
4175    }
4176}
4177
4178#[cfg(test)]
4179mod tests {
4180    use crate::model::filesystem::StdFileSystem;
4181    use std::sync::Arc;
4182
4183    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
4184        Arc::new(StdFileSystem)
4185    }
4186    use super::*;
4187
4188    #[test]
4189    fn test_empty_buffer() {
4190        let buffer = TextBuffer::empty(test_fs());
4191        assert_eq!(buffer.total_bytes(), 0);
4192        assert_eq!(buffer.line_count(), Some(1)); // Empty doc has 1 line
4193    }
4194
4195    #[test]
4196    fn test_line_positions_multiline() {
4197        let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec(), test_fs());
4198
4199        // Check line count
4200        assert_eq!(buffer.line_count(), Some(3));
4201
4202        // Check line starts
4203        assert_eq!(buffer.line_start_offset(0), Some(0)); // "Hello\n" starts at 0
4204        assert_eq!(buffer.line_start_offset(1), Some(6)); // "New Line\n" starts at 6
4205        assert_eq!(buffer.line_start_offset(2), Some(15)); // "World!" starts at 15
4206
4207        // Check offset_to_position
4208        assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); // Start of "Hello"
4209        assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); // End of "Hello" (before \n)
4210        assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); // Start of "New Line"
4211        assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); // End of "New Line" (before \n)
4212        assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); // Start of "World!"
4213
4214        // Check line_col_to_position
4215        assert_eq!(buffer.line_col_to_position(0, 5), 5); // End of line 0
4216        assert_eq!(buffer.line_col_to_position(1, 0), 6); // Start of line 1
4217        assert_eq!(buffer.line_col_to_position(1, 8), 14); // End of line 1
4218        assert_eq!(buffer.line_col_to_position(2, 0), 15); // Start of line 2
4219    }
4220
4221    #[test]
4222    fn test_new_from_content() {
4223        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4224        assert_eq!(buffer.total_bytes(), 11);
4225        assert_eq!(buffer.line_count(), Some(2));
4226    }
4227
4228    #[test]
4229    fn test_get_all_text() {
4230        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4231        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
4232    }
4233
4234    #[test]
4235    fn test_insert_at_start() {
4236        let mut buffer = TextBuffer::from_bytes(b"world".to_vec(), test_fs());
4237        buffer.insert_bytes(0, b"hello ".to_vec());
4238
4239        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4240        assert_eq!(buffer.total_bytes(), 11);
4241    }
4242
4243    #[test]
4244    fn test_insert_in_middle() {
4245        let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec(), test_fs());
4246        buffer.insert_bytes(5, b" ".to_vec());
4247
4248        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4249        assert_eq!(buffer.total_bytes(), 11);
4250    }
4251
4252    #[test]
4253    fn test_insert_at_end() {
4254        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4255        buffer.insert_bytes(5, b" world".to_vec());
4256
4257        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4258        assert_eq!(buffer.total_bytes(), 11);
4259    }
4260
4261    #[test]
4262    fn test_insert_with_newlines() {
4263        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4264        buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
4265
4266        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
4267        assert_eq!(buffer.line_count(), Some(3));
4268    }
4269
4270    #[test]
4271    fn test_delete_from_start() {
4272        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4273        buffer.delete_bytes(0, 6);
4274
4275        assert_eq!(buffer.get_all_text().unwrap(), b"world");
4276        assert_eq!(buffer.total_bytes(), 5);
4277    }
4278
4279    #[test]
4280    fn test_delete_from_middle() {
4281        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4282        buffer.delete_bytes(5, 1);
4283
4284        assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
4285        assert_eq!(buffer.total_bytes(), 10);
4286    }
4287
4288    #[test]
4289    fn test_delete_from_end() {
4290        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4291        buffer.delete_bytes(6, 5);
4292
4293        assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
4294        assert_eq!(buffer.total_bytes(), 6);
4295    }
4296
4297    #[test]
4298    fn test_delete_with_newlines() {
4299        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4300        buffer.delete_bytes(5, 7); // Delete "\nworld\n"
4301
4302        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4303        assert_eq!(buffer.line_count(), Some(1));
4304    }
4305
4306    #[test]
4307    fn test_offset_position_conversions() {
4308        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4309
4310        let pos = buffer.offset_to_position(0);
4311        assert_eq!(pos, Some(Position { line: 0, column: 0 }));
4312
4313        let pos = buffer.offset_to_position(6);
4314        assert_eq!(pos, Some(Position { line: 1, column: 0 }));
4315
4316        let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
4317        assert_eq!(offset, 6);
4318    }
4319
4320    #[test]
4321    fn test_insert_at_position() {
4322        let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4323        buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
4324
4325        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
4326    }
4327
4328    #[test]
4329    fn test_delete_range() {
4330        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4331
4332        let start = Position { line: 0, column: 5 };
4333        let end = Position { line: 2, column: 0 };
4334        buffer.delete_range(start, end);
4335
4336        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4337    }
4338
4339    #[test]
4340    fn test_get_line() {
4341        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4342
4343        assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
4344        assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
4345        assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
4346        assert_eq!(buffer.get_line(3), None);
4347    }
4348
4349    #[test]
4350    fn test_multiple_operations() {
4351        let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
4352
4353        buffer.insert_bytes(0, b"start\n".to_vec());
4354        assert_eq!(buffer.line_count(), Some(4));
4355
4356        buffer.delete_bytes(6, 6); // Delete "line1\n"
4357        assert_eq!(buffer.line_count(), Some(3));
4358
4359        buffer.insert_bytes(6, b"new\n".to_vec());
4360        assert_eq!(buffer.line_count(), Some(4));
4361
4362        let text = buffer.get_all_text().unwrap();
4363        assert_eq!(text, b"start\nnew\nline2\nline3");
4364    }
4365
4366    #[test]
4367    fn test_get_text_range() {
4368        let buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4369
4370        assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
4371        assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
4372        assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
4373    }
4374
4375    #[test]
4376    fn test_empty_operations() {
4377        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4378
4379        buffer.insert_bytes(2, Vec::new());
4380        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4381
4382        buffer.delete_bytes(2, 0);
4383        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4384    }
4385
4386    #[test]
4387    fn test_sequential_inserts_at_beginning() {
4388        // Regression test for piece tree duplicate insertion bug
4389        let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
4390
4391        // Delete all
4392        buffer.delete_bytes(0, 12);
4393        assert_eq!(buffer.get_all_text().unwrap(), b"");
4394
4395        // Insert 'a' at 0
4396        buffer.insert_bytes(0, vec![b'a']);
4397        assert_eq!(buffer.get_all_text().unwrap(), b"a");
4398
4399        // Insert 'b' at 0 (should give "ba")
4400        buffer.insert_bytes(0, vec![b'b']);
4401        assert_eq!(buffer.get_all_text().unwrap(), b"ba");
4402    }
4403
4404    // ===== Phase 1-3: Large File Support Tests =====
4405
4406    mod large_file_support {
4407        use super::*;
4408        use crate::model::piece_tree::StringBuffer;
4409        use std::fs::File;
4410        use std::io::Write;
4411        use tempfile::TempDir;
4412
4413        // Phase 1: Option<usize> Type Safety Tests
4414
4415        #[test]
4416        fn test_line_feed_count_is_some_for_loaded_buffer() {
4417            let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
4418            assert_eq!(buffer.line_feed_count(), Some(2));
4419        }
4420
4421        #[test]
4422        fn test_line_feed_count_is_none_for_unloaded_buffer() {
4423            let temp_dir = TempDir::new().unwrap();
4424            let file_path = temp_dir.path().join("test.txt");
4425
4426            let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
4427            assert_eq!(buffer.line_feed_count(), None);
4428        }
4429
4430        #[test]
4431        fn test_line_count_is_some_for_small_buffer() {
4432            let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4433            assert_eq!(buffer.line_count(), Some(3));
4434        }
4435
4436        #[test]
4437        fn test_piece_tree_works_with_none_line_count() {
4438            // Create a buffer with no line count information
4439            let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
4440            assert_eq!(buffer.line_feed_count(), None);
4441
4442            // Create piece tree without line feed count
4443            use crate::model::piece_tree::{BufferLocation, PieceTree};
4444            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
4445
4446            // line_count should return None
4447            assert_eq!(tree.line_count(), None);
4448        }
4449
4450        // Phase 2: BufferData Enum Tests
4451
4452        #[test]
4453        fn test_buffer_data_loaded_variant() {
4454            let data = b"hello world".to_vec();
4455            let buffer = StringBuffer::new_loaded(0, data.clone(), true);
4456
4457            assert!(buffer.is_loaded());
4458            assert_eq!(buffer.get_data(), Some(&data[..]));
4459            assert!(buffer.get_line_starts().is_some());
4460        }
4461
4462        #[test]
4463        fn test_buffer_data_loaded_without_line_starts() {
4464            let data = b"hello\nworld".to_vec();
4465            let buffer = StringBuffer::new_loaded(0, data.clone(), false);
4466
4467            assert!(buffer.is_loaded());
4468            assert_eq!(buffer.get_data(), Some(&data[..]));
4469            assert_eq!(buffer.get_line_starts(), None); // No line indexing
4470        }
4471
4472        #[test]
4473        fn test_buffer_data_unloaded_variant() {
4474            let temp_dir = TempDir::new().unwrap();
4475            let file_path = temp_dir.path().join("test.txt");
4476
4477            let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
4478
4479            assert!(!buffer.is_loaded());
4480            assert_eq!(buffer.get_data(), None);
4481            assert_eq!(buffer.get_line_starts(), None);
4482        }
4483
4484        #[test]
4485        fn test_buffer_load_method() {
4486            let temp_dir = TempDir::new().unwrap();
4487            let file_path = temp_dir.path().join("test.txt");
4488
4489            // Create test file
4490            let test_data = b"hello world";
4491            File::create(&file_path)
4492                .unwrap()
4493                .write_all(test_data)
4494                .unwrap();
4495
4496            // Create unloaded buffer
4497            let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
4498            assert!(!buffer.is_loaded());
4499
4500            // Load the buffer using local filesystem
4501            let fs = crate::model::filesystem::StdFileSystem;
4502            buffer.load(&fs).unwrap();
4503
4504            // Now it should be loaded
4505            assert!(buffer.is_loaded());
4506            assert_eq!(buffer.get_data(), Some(&test_data[..]));
4507        }
4508
4509        #[test]
4510        fn test_string_buffer_new_vs_new_loaded() {
4511            let data = b"hello\nworld".to_vec();
4512
4513            // StringBuffer::new should compute line starts
4514            let buf1 = StringBuffer::new(0, data.clone());
4515            assert!(buf1.is_loaded());
4516            assert!(buf1.get_line_starts().is_some());
4517            assert_eq!(buf1.line_feed_count(), Some(1));
4518
4519            // StringBuffer::new_loaded with compute_lines=false should not
4520            let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
4521            assert!(buf2.is_loaded());
4522            assert_eq!(buf2.get_line_starts(), None);
4523            assert_eq!(buf2.line_feed_count(), None);
4524        }
4525
4526        // Phase 3: Large File Detection Tests
4527
4528        #[test]
4529        fn test_load_small_file_eager_loading() {
4530            let temp_dir = TempDir::new().unwrap();
4531            let file_path = temp_dir.path().join("small.txt");
4532
4533            // Create a small file (10 bytes < 100MB threshold)
4534            let test_data = b"hello\ntest";
4535            File::create(&file_path)
4536                .unwrap()
4537                .write_all(test_data)
4538                .unwrap();
4539
4540            // Load with default threshold
4541            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
4542
4543            // Should be eagerly loaded (not large_file mode)
4544            assert!(!buffer.large_file);
4545            assert_eq!(buffer.total_bytes(), test_data.len());
4546            assert_eq!(buffer.line_count(), Some(2)); // Has line indexing
4547            assert_eq!(buffer.get_all_text().unwrap(), test_data);
4548
4549            // The buffer should be loaded
4550            assert!(buffer.buffers[0].is_loaded());
4551        }
4552
4553        #[test]
4554        fn test_load_large_file_lazy_loading() {
4555            let temp_dir = TempDir::new().unwrap();
4556            let file_path = temp_dir.path().join("large.txt");
4557
4558            // Create a "large" file by using a small threshold
4559            let test_data = b"hello\nworld\ntest";
4560            File::create(&file_path)
4561                .unwrap()
4562                .write_all(test_data)
4563                .unwrap();
4564
4565            // Load with threshold of 10 bytes (file is 17 bytes, so it's "large")
4566            let buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
4567
4568            // Should be in large_file mode
4569            assert!(buffer.large_file);
4570            assert_eq!(buffer.total_bytes(), test_data.len());
4571
4572            // Should NOT have line indexing
4573            assert_eq!(buffer.line_count(), None);
4574
4575            // The buffer should be unloaded
4576            assert!(!buffer.buffers[0].is_loaded());
4577            assert_eq!(buffer.buffers[0].get_data(), None);
4578        }
4579
4580        /// Test that reproduces issue #657: Search on large plain text files
4581        ///
4582        /// The bug: When a large file is opened with lazy loading, buffer.to_string()
4583        /// returns None because some buffers are unloaded. This causes search to fail
4584        /// with "Buffer not fully loaded" error.
4585        ///
4586        /// The fix: Use get_text_range_mut() which loads the buffer on demand.
4587        #[test]
4588        fn test_issue_657_search_on_large_file_unloaded_buffer() {
4589            let temp_dir = TempDir::new().unwrap();
4590            let file_path = temp_dir.path().join("large_search_test.txt");
4591
4592            // Create test content with a searchable string
4593            let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
4594            File::create(&file_path)
4595                .unwrap()
4596                .write_all(test_data)
4597                .unwrap();
4598
4599            // Load with small threshold to force lazy loading
4600            let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
4601
4602            // Verify we're in large file mode with unloaded buffer
4603            assert!(buffer.large_file, "Buffer should be in large file mode");
4604            assert!(
4605                !buffer.buffers[0].is_loaded(),
4606                "Buffer should be unloaded initially"
4607            );
4608
4609            // REPRODUCE THE BUG: to_string() returns None for unloaded buffers
4610            // This is what the old perform_search() code did, causing the error
4611            assert!(
4612                buffer.to_string().is_none(),
4613                "BUG REPRODUCED: to_string() returns None for unloaded buffer"
4614            );
4615
4616            // THE FIX: get_text_range_mut() loads the buffer on demand
4617            let total_bytes = buffer.len();
4618            let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
4619            let content_str = String::from_utf8_lossy(&content);
4620
4621            // Verify the content is now available and contains our search target
4622            assert!(
4623                content_str.contains("SEARCH_TARGET"),
4624                "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
4625            );
4626
4627            // After loading, to_string() should also work
4628            assert!(
4629                buffer.to_string().is_some(),
4630                "After get_text_range_mut(), to_string() should work"
4631            );
4632        }
4633
4634        #[test]
4635        fn test_large_file_threshold_boundary() {
4636            let temp_dir = TempDir::new().unwrap();
4637
4638            // Test exactly at threshold
4639            let file_path = temp_dir.path().join("at_threshold.txt");
4640            let test_data = vec![b'x'; 100];
4641            File::create(&file_path)
4642                .unwrap()
4643                .write_all(&test_data)
4644                .unwrap();
4645
4646            // Load with threshold of 100 bytes - should be large file (>= threshold)
4647            let buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
4648            assert!(buffer.large_file);
4649
4650            // Test just below threshold
4651            let file_path2 = temp_dir.path().join("below_threshold.txt");
4652            let test_data2 = vec![b'x'; 99];
4653            File::create(&file_path2)
4654                .unwrap()
4655                .write_all(&test_data2)
4656                .unwrap();
4657
4658            // Load with threshold of 100 bytes - should be small file (< threshold)
4659            let buffer2 = TextBuffer::load_from_file(&file_path2, 100, test_fs()).unwrap();
4660            assert!(!buffer2.large_file);
4661        }
4662
4663        #[test]
4664        fn test_large_file_default_threshold() {
4665            let temp_dir = TempDir::new().unwrap();
4666            let file_path = temp_dir.path().join("test.txt");
4667
4668            // Create a small file
4669            File::create(&file_path)
4670                .unwrap()
4671                .write_all(b"hello")
4672                .unwrap();
4673
4674            // Load with threshold 0 - should use DEFAULT_LARGE_FILE_THRESHOLD
4675            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
4676
4677            // 5 bytes < 100MB, so should not be large file
4678            assert!(!buffer.large_file);
4679        }
4680
4681        #[test]
4682        fn test_large_file_has_correct_piece_tree_structure() {
4683            let temp_dir = TempDir::new().unwrap();
4684            let file_path = temp_dir.path().join("large.txt");
4685
4686            let test_data = b"hello world";
4687            File::create(&file_path)
4688                .unwrap()
4689                .write_all(test_data)
4690                .unwrap();
4691
4692            // Load as large file
4693            let buffer = TextBuffer::load_from_file(&file_path, 5, test_fs()).unwrap();
4694
4695            // Should have correct total bytes
4696            assert_eq!(buffer.total_bytes(), test_data.len());
4697
4698            // Should have 1 buffer
4699            assert_eq!(buffer.buffers.len(), 1);
4700
4701            // Buffer should be unloaded
4702            assert!(!buffer.buffers[0].is_loaded());
4703        }
4704
4705        #[test]
4706        fn test_empty_large_file() {
4707            let temp_dir = TempDir::new().unwrap();
4708            let file_path = temp_dir.path().join("empty.txt");
4709
4710            // Create an empty file
4711            File::create(&file_path).unwrap();
4712
4713            // Load as large file
4714            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
4715
4716            // Empty file is handled gracefully
4717            assert_eq!(buffer.total_bytes(), 0);
4718            assert!(buffer.is_empty());
4719        }
4720
4721        #[test]
4722        fn test_large_file_basic_api_operations() {
4723            let temp_dir = TempDir::new().unwrap();
4724            let file_path = temp_dir.path().join("large_test.txt");
4725
4726            // Create a test file with known content
4727            let test_data = b"line1\nline2\nline3\nline4\n";
4728            File::create(&file_path)
4729                .unwrap()
4730                .write_all(test_data)
4731                .unwrap();
4732
4733            // Load as large file (use small threshold to trigger large file mode)
4734            let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
4735
4736            // Verify it's in large file mode
4737            assert!(buffer.large_file);
4738            assert_eq!(buffer.line_count(), None); // No line indexing
4739
4740            // Test basic access functions
4741            assert_eq!(buffer.total_bytes(), test_data.len());
4742            assert!(!buffer.is_empty());
4743            assert_eq!(buffer.len(), test_data.len());
4744
4745            // Test reading operations using get_text_range_mut (lazy loads on demand)
4746            let range_result = buffer.get_text_range_mut(0, 5).unwrap();
4747            assert_eq!(range_result, b"line1");
4748
4749            let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
4750            assert_eq!(range_result2, b"line2");
4751
4752            // Test get_all_text (via get_text_range after lazy loading)
4753            let all_text = buffer.get_all_text().unwrap();
4754            assert_eq!(all_text, test_data);
4755
4756            // Test slice_bytes method
4757            assert_eq!(buffer.slice_bytes(0..5), b"line1");
4758
4759            // Test basic editing operations
4760            // Insert at offset 0
4761            buffer.insert_bytes(0, b"prefix_".to_vec());
4762            assert_eq!(buffer.total_bytes(), test_data.len() + 7);
4763            assert!(buffer.is_modified());
4764
4765            // Verify the insertion worked
4766            let text_after_insert = buffer.get_all_text().unwrap();
4767            assert_eq!(&text_after_insert[0..7], b"prefix_");
4768            assert_eq!(&text_after_insert[7..12], b"line1");
4769
4770            // Delete some bytes
4771            buffer.delete_bytes(0, 7);
4772            assert_eq!(buffer.total_bytes(), test_data.len());
4773
4774            // Verify deletion worked - should be back to original
4775            let text_after_delete = buffer.get_all_text().unwrap();
4776            assert_eq!(text_after_delete, test_data);
4777
4778            // Insert at end
4779            let end_offset = buffer.total_bytes();
4780            buffer.insert_bytes(end_offset, b"suffix".to_vec());
4781            assert_eq!(buffer.total_bytes(), test_data.len() + 6);
4782
4783            // Verify end insertion
4784            let final_text = buffer.get_all_text().unwrap();
4785            assert!(final_text.ends_with(b"suffix"));
4786            assert_eq!(&final_text[0..test_data.len()], test_data);
4787
4788            // Test offset_to_position
4789            // Note: Without line indexing, position tracking is limited
4790            // but byte-level operations still work
4791            let pos = buffer.offset_to_position(0).unwrap();
4792            assert_eq!(pos.column, 0);
4793
4794            // Test position_to_offset
4795            let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
4796            assert_eq!(offset, 0);
4797
4798            // Test replace operations
4799            let replace_result = buffer.replace_range(0..5, "START");
4800            assert!(replace_result);
4801
4802            let text_after_replace = buffer.get_all_text().unwrap();
4803            assert!(text_after_replace.starts_with(b"START"));
4804        }
4805
4806        #[test]
4807        fn test_large_file_chunk_based_loading() {
4808            let temp_dir = TempDir::new().unwrap();
4809            let file_path = temp_dir.path().join("huge.txt");
4810
4811            // Create a file larger than LOAD_CHUNK_SIZE (1MB)
4812            // We'll create a 3MB file with a repeating pattern so we can verify chunks
4813            let chunk_size = LOAD_CHUNK_SIZE; // 1MB
4814            let file_size = chunk_size * 3; // 3MB
4815
4816            // Pattern: "AAAA...AAAA" (1MB of A's), "BBBB...BBBB" (1MB of B's), "CCCC...CCCC" (1MB of C's)
4817            let mut file = File::create(&file_path).unwrap();
4818            file.write_all(&vec![b'A'; chunk_size]).unwrap();
4819            file.write_all(&vec![b'B'; chunk_size]).unwrap();
4820            file.write_all(&vec![b'C'; chunk_size]).unwrap();
4821            file.flush().unwrap();
4822
4823            // Load as large file (use threshold of 1 byte to ensure large file mode)
4824            let mut buffer = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
4825
4826            // Verify it's in large file mode
4827            assert!(buffer.large_file);
4828            assert_eq!(buffer.total_bytes(), file_size);
4829
4830            // Buffer should be unloaded initially
4831            assert!(!buffer.buffers[0].is_loaded());
4832
4833            // Read from the first chunk (should load only first 1MB)
4834            let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
4835            assert_eq!(first_chunk_data.len(), 1024);
4836            assert!(first_chunk_data.iter().all(|&b| b == b'A'));
4837
4838            // Read from the middle chunk (offset = 1MB, should load second 1MB)
4839            let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
4840            assert_eq!(second_chunk_data.len(), 1024);
4841            assert!(second_chunk_data.iter().all(|&b| b == b'B'));
4842
4843            // Read from the last chunk (offset = 2MB, should load third 1MB)
4844            let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
4845            assert_eq!(third_chunk_data.len(), 1024);
4846            assert!(third_chunk_data.iter().all(|&b| b == b'C'));
4847
4848            // Verify we can read across chunk boundaries
4849            // Read from middle of first chunk to middle of second chunk
4850            let cross_chunk_offset = chunk_size - 512;
4851            let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
4852            assert_eq!(cross_chunk_data.len(), 1024);
4853            // First 512 bytes should be 'A', next 512 bytes should be 'B'
4854            assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
4855            assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
4856
4857            // After chunk-based loading, verify the piece tree has been split
4858            // The number of buffers should be greater than 1 (original + chunks)
4859            assert!(
4860                buffer.buffers.len() > 1,
4861                "Expected multiple buffers after chunk-based loading, got {}",
4862                buffer.buffers.len()
4863            );
4864
4865            // Test that editing still works after chunk-based loading
4866            buffer.insert_bytes(0, b"PREFIX".to_vec());
4867            assert_eq!(buffer.total_bytes(), file_size + 6);
4868
4869            let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
4870            assert_eq!(after_insert, b"PREFIX");
4871
4872            // Verify the original data is still there after the prefix
4873            let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
4874            assert!(after_prefix.iter().all(|&b| b == b'A'));
4875
4876            // Most importantly: validate the entire buffer content matches the original file
4877            // Create a fresh buffer to read the original file
4878            let mut buffer2 = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
4879
4880            // Read the entire file in chunks and verify each chunk
4881            let chunk_read_size = 64 * 1024; // Read in 64KB chunks for efficiency
4882            let mut offset = 0;
4883            while offset < file_size {
4884                let bytes_to_read = chunk_read_size.min(file_size - offset);
4885                let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
4886
4887                // Determine which section of the file we're reading
4888                let first_mb_end = chunk_size;
4889                let second_mb_end = chunk_size * 2;
4890
4891                // Validate the data based on which MB section we're in
4892                for (i, &byte) in chunk_data.iter().enumerate() {
4893                    let file_offset = offset + i;
4894                    let expected = if file_offset < first_mb_end {
4895                        b'A'
4896                    } else if file_offset < second_mb_end {
4897                        b'B'
4898                    } else {
4899                        b'C'
4900                    };
4901                    assert_eq!(
4902                        byte, expected,
4903                        "Mismatch at file offset {}: expected {}, got {}",
4904                        file_offset, expected as char, byte as char
4905                    );
4906                }
4907
4908                offset += bytes_to_read;
4909            }
4910        }
4911
4912        /// Test that save_to_file works correctly with partially loaded large files
4913        /// This is a regression test for a bug where saving would silently produce
4914        /// an empty file if any buffer regions were still unloaded.
4915        #[test]
4916        fn test_large_file_incremental_save() {
4917            let temp_dir = TempDir::new().unwrap();
4918            let file_path = temp_dir.path().join("large_save_test.txt");
4919
4920            // Create a small file but use tiny threshold to trigger large file mode
4921            let chunk_size = 1000; // 1KB chunks
4922            let file_size = chunk_size * 2; // 2KB total
4923
4924            let mut file = File::create(&file_path).unwrap();
4925            // First half: 'A' repeated
4926            file.write_all(&vec![b'A'; chunk_size]).unwrap();
4927            // Second half: 'B' repeated
4928            file.write_all(&vec![b'B'; chunk_size]).unwrap();
4929            file.flush().unwrap();
4930
4931            // Load as large file (threshold of 100 bytes)
4932            let mut buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
4933            assert!(buffer.large_file);
4934            assert_eq!(buffer.total_bytes(), file_size);
4935
4936            // Only read from the beginning - this loads only a small region
4937            let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
4938            assert!(first_bytes.iter().all(|&b| b == b'A'));
4939
4940            // Make an edit at the beginning
4941            buffer.insert_bytes(0, b"PREFIX_".to_vec());
4942
4943            // Save to a new file (to avoid issues with reading while writing same file)
4944            let save_path = temp_dir.path().join("saved.txt");
4945            buffer.save_to_file(&save_path).unwrap();
4946
4947            // Verify the saved file
4948            let saved_content = std::fs::read(&save_path).unwrap();
4949
4950            // Check total size: original + "PREFIX_" (7 bytes)
4951            assert_eq!(
4952                saved_content.len(),
4953                file_size + 7,
4954                "Saved file should be {} bytes, got {}",
4955                file_size + 7,
4956                saved_content.len()
4957            );
4958
4959            // Check prefix
4960            assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
4961
4962            // Check that first chunk (after prefix) contains A's
4963            assert!(
4964                saved_content[7..100].iter().all(|&b| b == b'A'),
4965                "First chunk after prefix should be A's"
4966            );
4967
4968            // Check that second chunk contains B's (this was unloaded!)
4969            let second_chunk_start = 7 + chunk_size;
4970            assert!(
4971                saved_content[second_chunk_start..second_chunk_start + 100]
4972                    .iter()
4973                    .all(|&b| b == b'B'),
4974                "Second chunk should be B's (was unloaded, should be preserved)"
4975            );
4976        }
4977
4978        /// Test that save_to_file handles edits at multiple positions
4979        #[test]
4980        fn test_large_file_save_with_multiple_edits() {
4981            let temp_dir = TempDir::new().unwrap();
4982            let file_path = temp_dir.path().join("multi_edit.txt");
4983
4984            // Create a ~5KB file with numbered lines for easier verification
4985            let mut content = Vec::new();
4986            for i in 0..100 {
4987                content.extend_from_slice(
4988                    format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
4989                );
4990            }
4991            let original_len = content.len();
4992            std::fs::write(&file_path, &content).unwrap();
4993
4994            // Load as large file (threshold of 500 bytes)
4995            let mut buffer = TextBuffer::load_from_file(&file_path, 500, test_fs()).unwrap();
4996            assert!(
4997                buffer.line_count().is_none(),
4998                "Should be in large file mode"
4999            );
5000
5001            // Edit at the beginning
5002            buffer.insert_bytes(0, b"[START]".to_vec());
5003
5004            // Edit somewhere in the middle (load that region first)
5005            let mid_offset = original_len / 2;
5006            let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); // +7 for our insert
5007            buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
5008
5009            // Save
5010            let save_path = temp_dir.path().join("multi_edit_saved.txt");
5011            buffer.save_to_file(&save_path).unwrap();
5012
5013            // Verify
5014            let saved = std::fs::read_to_string(&save_path).unwrap();
5015
5016            assert!(
5017                saved.starts_with("[START]Line 0000"),
5018                "Should start with our edit"
5019            );
5020            assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
5021            assert!(saved.contains("Line 0099"), "Should preserve end of file");
5022
5023            // Verify total length
5024            let expected_len = original_len + 7 + 8; // [START] + [MIDDLE]
5025            assert_eq!(
5026                saved.len(),
5027                expected_len,
5028                "Length should be original + edits"
5029            );
5030        }
5031    }
5032
5033    // ===== Offset to Position Tests =====
5034    // These tests focus on the offset_to_position correctness
5035
5036    #[test]
5037    fn test_offset_to_position_simple() {
5038        // Create a buffer with known line structure
5039        // Line 0: "a\n" (bytes 0-1, newline at 1)
5040        // Line 1: "b\n" (bytes 2-3, newline at 3)
5041        // Line 2: "c\n" (bytes 4-5, newline at 5)
5042        // Line 3: "d" (bytes 6, no newline)
5043        let content = b"a\nb\nc\nd";
5044        let buffer = TextBuffer::from_bytes(content.to_vec(), test_fs());
5045
5046        // Verify specific positions
5047        let pos = buffer
5048            .offset_to_position(0)
5049            .expect("small buffer should have line metadata");
5050        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5051        assert_eq!(pos.column, 0);
5052
5053        let pos = buffer
5054            .offset_to_position(1)
5055            .expect("small buffer should have line metadata");
5056        assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
5057        assert_eq!(pos.column, 1);
5058
5059        let pos = buffer
5060            .offset_to_position(2)
5061            .expect("small buffer should have line metadata");
5062        assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
5063        assert_eq!(pos.column, 0);
5064
5065        let pos = buffer
5066            .offset_to_position(3)
5067            .expect("small buffer should have line metadata");
5068        assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
5069        assert_eq!(pos.column, 1);
5070
5071        let pos = buffer
5072            .offset_to_position(4)
5073            .expect("small buffer should have line metadata");
5074        assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
5075        assert_eq!(pos.column, 0);
5076
5077        let pos = buffer
5078            .offset_to_position(6)
5079            .expect("small buffer should have line metadata");
5080        assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
5081        assert_eq!(pos.column, 0);
5082    }
5083
5084    #[test]
5085    fn test_offset_to_position_after_insert() {
5086        // Start with simple content
5087        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5088
5089        // Insert at position 2 (start of line 1)
5090        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5091
5092        // After insert, buffer should be: "a\nx\nb\n"
5093        // Line 0: "a\n" (bytes 0-1)
5094        // Line 1: "x\n" (bytes 2-3)
5095        // Line 2: "b\n" (bytes 4-5)
5096
5097        let pos = buffer
5098            .offset_to_position(0)
5099            .expect("small buffer should have line metadata");
5100        assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
5101
5102        let pos = buffer
5103            .offset_to_position(2)
5104            .expect("small buffer should have line metadata");
5105        assert_eq!(
5106            pos.line, 1,
5107            "Byte 2 (start of inserted line) should be on line 1"
5108        );
5109
5110        let pos = buffer
5111            .offset_to_position(4)
5112            .expect("small buffer should have line metadata");
5113        assert_eq!(
5114            pos.line, 2,
5115            "Byte 4 (start of 'b') should be on line 2 after insert"
5116        );
5117    }
5118
5119    #[test]
5120    fn test_offset_to_position_empty_lines() {
5121        // Test with empty lines: "\n\n\n"
5122        let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec(), test_fs());
5123
5124        // Line 0: "\n" (byte 0)
5125        // Line 1: "\n" (byte 1)
5126        // Line 2: "\n" (byte 2)
5127        // Line 3: "" (empty, after last newline)
5128
5129        let pos = buffer
5130            .offset_to_position(0)
5131            .expect("small buffer should have line metadata");
5132        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5133
5134        let pos = buffer
5135            .offset_to_position(1)
5136            .expect("small buffer should have line metadata");
5137        assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
5138
5139        let pos = buffer
5140            .offset_to_position(2)
5141            .expect("small buffer should have line metadata");
5142        assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
5143
5144        let pos = buffer
5145            .offset_to_position(3)
5146            .expect("small buffer should have line metadata");
5147        assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
5148    }
5149
5150    #[test]
5151    fn test_offset_to_position_long_lines() {
5152        // Test with long lines to ensure it's not just line counting
5153        let mut content = Vec::new();
5154        content.extend_from_slice(b"aaaaaaaaaa\n"); // Line 0: 11 bytes (10 'a's + newline)
5155        content.extend_from_slice(b"bbbbbbbbbb\n"); // Line 1: 11 bytes
5156        content.extend_from_slice(b"cccccccccc"); // Line 2: 10 bytes (no newline)
5157
5158        let buffer = TextBuffer::from_bytes(content.clone(), test_fs());
5159
5160        // Test positions at start of each line
5161        let pos = buffer
5162            .offset_to_position(0)
5163            .expect("small buffer should have line metadata");
5164        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5165        assert_eq!(pos.column, 0);
5166
5167        let pos = buffer
5168            .offset_to_position(11)
5169            .expect("small buffer should have line metadata");
5170        assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
5171        assert_eq!(pos.column, 0);
5172
5173        let pos = buffer
5174            .offset_to_position(22)
5175            .expect("small buffer should have line metadata");
5176        assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
5177        assert_eq!(pos.column, 0);
5178
5179        // Test mid-line positions
5180        let pos = buffer
5181            .offset_to_position(5)
5182            .expect("small buffer should have line metadata");
5183        assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
5184        assert_eq!(pos.column, 5);
5185
5186        let pos = buffer
5187            .offset_to_position(16)
5188            .expect("small buffer should have line metadata");
5189        assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
5190        assert_eq!(pos.column, 5);
5191    }
5192
5193    #[test]
5194    fn test_line_iterator_with_offset_to_position() {
5195        // This combines line iterator with offset_to_position to find issues
5196        let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec(), test_fs());
5197
5198        // Test creating line iterator at various positions
5199        for byte_pos in 0..=buffer.len() {
5200            let iter = buffer.line_iterator(byte_pos, 80);
5201            let iter_pos = iter.current_position();
5202            let expected_line = buffer
5203                .offset_to_position(byte_pos)
5204                .expect("small buffer should have line metadata")
5205                .line;
5206            let expected_line_start = buffer.position_to_offset(Position {
5207                line: expected_line,
5208                column: 0,
5209            });
5210
5211            assert_eq!(
5212                iter_pos, expected_line_start,
5213                "LineIterator at byte {} should position at line start {} but got {}",
5214                byte_pos, expected_line_start, iter_pos
5215            );
5216        }
5217    }
5218
5219    #[test]
5220    fn test_piece_tree_line_count_after_insert() {
5221        // Debug the piece tree structure after insert
5222        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5223
5224        // Insert at line 1, column 0
5225        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5226
5227        // Manually verify line counts
5228        let content = buffer.slice_bytes(0..buffer.len());
5229        let newline_count = content.iter().filter(|&&b| b == b'\n').count();
5230        let expected_line_count = newline_count + 1;
5231        let actual_line_count = buffer.line_count();
5232
5233        assert_eq!(
5234            actual_line_count,
5235            Some(expected_line_count),
5236            "Line count mismatch after insert"
5237        );
5238    }
5239
5240    #[test]
5241    fn test_position_to_lsp_position_after_modification() {
5242        // This test demonstrates a bug in the piece tree's offset_to_position
5243        // where column calculation is incorrect after buffer modifications.
5244        // The position_to_lsp_position function works around this by using
5245        // line_start_offset to calculate the column correctly.
5246
5247        // Initial content: "fn foo(val: i32) {\n    val + 1\n}\n"
5248        let initial = b"fn foo(val: i32) {\n    val + 1\n}\n";
5249        let mut buffer = TextBuffer::from_bytes(initial.to_vec(), test_fs());
5250
5251        // Verify initial positions work correctly
5252        // Position 23 is 'v' of second "val" on line 1
5253        let (line, char) = buffer.position_to_lsp_position(23);
5254        assert_eq!(line, 1, "Initial: position 23 should be on line 1");
5255        assert_eq!(char, 4, "Initial: position 23 should be at char 4");
5256
5257        // Simulate rename: delete "val" at position 23 (line 1, char 4) and insert "value"
5258        // Position 23 = line 1, char 4; Position 26 = line 1, char 7
5259        buffer.delete_range(
5260            Position { line: 1, column: 4 },
5261            Position { line: 1, column: 7 },
5262        );
5263        buffer.insert_bytes(23, b"value".to_vec()); // Insert "value"
5264
5265        // Also rename the first occurrence
5266        // Position 7 = line 0, char 7; Position 10 = line 0, char 10
5267        buffer.delete_range(
5268            Position { line: 0, column: 7 },
5269            Position {
5270                line: 0,
5271                column: 10,
5272            },
5273        );
5274        buffer.insert_bytes(7, b"value".to_vec()); // Insert "value"
5275
5276        // Buffer is now: "fn foo(value: i32) {\n    value + 1\n}\n"
5277        let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
5278        assert_eq!(content, "fn foo(value: i32) {\n    value + 1\n}\n");
5279
5280        // Position 25 is now 'v' of second "value" on line 1
5281        // Line 0: "fn foo(value: i32) {\n" = 21 chars (positions 0-20)
5282        // Line 1: "    value + 1\n" starts at position 21
5283        // Position 25 = 21 + 4 = line 1, char 4
5284
5285        // The workaround in position_to_lsp_position should give correct result
5286        let (line, char) = buffer.position_to_lsp_position(25);
5287        assert_eq!(
5288            line, 1,
5289            "After modification: position 25 should be on line 1"
5290        );
5291        assert_eq!(
5292            char, 4,
5293            "After modification: position 25 should be at char 4"
5294        );
5295
5296        // Also verify position 21 (start of line 1) works
5297        let (line, char) = buffer.position_to_lsp_position(21);
5298        assert_eq!(line, 1, "Position 21 should be on line 1");
5299        assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
5300    }
5301
5302    #[test]
5303    fn test_detect_crlf() {
5304        assert_eq!(
5305            TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
5306            LineEnding::CRLF
5307        );
5308    }
5309
5310    #[test]
5311    fn test_detect_lf() {
5312        assert_eq!(
5313            TextBuffer::detect_line_ending(b"hello\nworld\n"),
5314            LineEnding::LF
5315        );
5316    }
5317
5318    #[test]
5319    fn test_normalize_crlf() {
5320        let input = b"hello\r\nworld\r\n".to_vec();
5321        let output = TextBuffer::normalize_line_endings(input);
5322        assert_eq!(output, b"hello\nworld\n");
5323    }
5324
5325    #[test]
5326    fn test_normalize_empty() {
5327        let input = Vec::new();
5328        let output = TextBuffer::normalize_line_endings(input);
5329        assert_eq!(output, Vec::<u8>::new());
5330    }
5331
5332    /// Regression test: get_all_text() returns empty for large files with unloaded regions
5333    ///
5334    /// This was the root cause of a bug where recovery auto-save would save 0 bytes
5335    /// for large files, causing data loss on crash recovery.
5336    ///
5337    /// The fix is to use get_text_range_mut() which handles lazy loading.
5338    #[test]
5339    fn test_get_all_text_returns_empty_for_unloaded_buffers() {
5340        use tempfile::TempDir;
5341        let temp_dir = TempDir::new().unwrap();
5342        let file_path = temp_dir.path().join("large_test.txt");
5343
5344        // Create a 50KB file
5345        let original_content = "X".repeat(50_000);
5346        std::fs::write(&file_path, &original_content).unwrap();
5347
5348        // Load with small threshold to trigger large file mode
5349        let mut buffer = TextBuffer::load_from_file(&file_path, 1024, test_fs()).unwrap();
5350        assert!(buffer.large_file, "Should be in large file mode");
5351        assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
5352
5353        // Make a small edit
5354        buffer.insert_bytes(0, b"EDITED: ".to_vec());
5355
5356        // get_all_text() now returns None for unloaded buffers instead of empty
5357        // This is the correct behavior - it signals that content is not available
5358        let content_immutable = buffer.get_all_text();
5359
5360        // get_all_text() returns None because it uses get_text_range() which
5361        // returns None for unloaded regions
5362        assert!(
5363            content_immutable.is_none(),
5364            "get_all_text() should return None for large files with unloaded regions. \
5365             Got Some({} bytes) instead of None.",
5366            content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
5367        );
5368
5369        // CORRECT BEHAVIOR: get_text_range_mut() handles lazy loading
5370        let total = buffer.total_bytes();
5371        let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
5372        assert_eq!(
5373            content_lazy.len(),
5374            50_000 + 8,
5375            "get_text_range_mut() should return all content with lazy loading"
5376        );
5377        assert!(
5378            String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
5379            "Content should start with our edit"
5380        );
5381    }
5382
5383    // ===== Line Ending Conversion Tests =====
5384
5385    mod line_ending_conversion {
5386        use super::*;
5387
5388        #[test]
5389        fn test_convert_lf_to_crlf() {
5390            let input = b"Line 1\nLine 2\nLine 3\n";
5391            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5392            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5393        }
5394
5395        #[test]
5396        fn test_convert_crlf_to_lf() {
5397            let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
5398            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5399            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5400        }
5401
5402        #[test]
5403        fn test_convert_cr_to_lf() {
5404            let input = b"Line 1\rLine 2\rLine 3\r";
5405            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5406            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5407        }
5408
5409        #[test]
5410        fn test_convert_mixed_to_crlf() {
5411            // Mixed line endings: LF, CRLF, CR
5412            let input = b"Line 1\nLine 2\r\nLine 3\r";
5413            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5414            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5415        }
5416
5417        #[test]
5418        fn test_convert_lf_to_lf_is_noop() {
5419            let input = b"Line 1\nLine 2\nLine 3\n";
5420            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5421            assert_eq!(result, input.to_vec());
5422        }
5423
5424        #[test]
5425        fn test_convert_empty_content() {
5426            let input = b"";
5427            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5428            assert_eq!(result, b"".to_vec());
5429        }
5430
5431        #[test]
5432        fn test_convert_no_line_endings() {
5433            let input = b"No line endings here";
5434            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5435            assert_eq!(result, b"No line endings here".to_vec());
5436        }
5437
5438        #[test]
5439        fn test_set_line_ending_marks_modified() {
5440            let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec(), test_fs());
5441            assert!(!buffer.is_modified());
5442
5443            buffer.set_line_ending(LineEnding::CRLF);
5444            assert!(buffer.is_modified());
5445        }
5446
5447        #[test]
5448        fn test_set_default_line_ending_does_not_mark_modified() {
5449            let mut buffer = TextBuffer::empty(test_fs());
5450            assert!(!buffer.is_modified());
5451
5452            buffer.set_default_line_ending(LineEnding::CRLF);
5453            assert!(!buffer.is_modified());
5454            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
5455        }
5456
5457        #[test]
5458        fn test_save_to_file_converts_lf_to_crlf() {
5459            use tempfile::TempDir;
5460
5461            let temp_dir = TempDir::new().unwrap();
5462            let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
5463
5464            // Create a file with LF line endings
5465            let original_content = b"Line 1\nLine 2\nLine 3\n";
5466            std::fs::write(&file_path, original_content).unwrap();
5467
5468            // Load the file
5469            let mut buffer =
5470                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
5471                    .unwrap();
5472            assert_eq!(buffer.line_ending(), LineEnding::LF);
5473
5474            // Change line ending to CRLF
5475            buffer.set_line_ending(LineEnding::CRLF);
5476            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
5477            assert!(buffer.is_modified());
5478
5479            // Save the file
5480            buffer.save_to_file(&file_path).unwrap();
5481
5482            // Read back and verify CRLF
5483            let saved_bytes = std::fs::read(&file_path).unwrap();
5484            assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5485        }
5486
5487        #[test]
5488        fn test_save_to_file_converts_crlf_to_lf() {
5489            use tempfile::TempDir;
5490
5491            let temp_dir = TempDir::new().unwrap();
5492            let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
5493
5494            // Create a file with CRLF line endings
5495            let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
5496            std::fs::write(&file_path, original_content).unwrap();
5497
5498            // Load the file
5499            let mut buffer =
5500                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
5501                    .unwrap();
5502            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
5503
5504            // Change line ending to LF
5505            buffer.set_line_ending(LineEnding::LF);
5506            assert_eq!(buffer.line_ending(), LineEnding::LF);
5507            assert!(buffer.is_modified());
5508
5509            // Save the file
5510            buffer.save_to_file(&file_path).unwrap();
5511
5512            // Read back and verify LF (no CRLF)
5513            let saved_bytes = std::fs::read(&file_path).unwrap();
5514            assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
5515        }
5516
5517        #[test]
5518        #[cfg(unix)]
5519        fn test_save_to_unwritable_file() -> anyhow::Result<()> {
5520            // Root (uid 0) bypasses Unix file permission checks, so these
5521            // permission-denied tests are meaningless when running as root.
5522            if unsafe { libc::getuid() } == 0 {
5523                eprintln!("Skipping test: root bypasses file permission checks");
5524                return Ok(());
5525            }
5526            use std::fs::Permissions;
5527            use std::os::unix::fs::PermissionsExt;
5528            use tempfile::TempDir;
5529
5530            let temp_dir = TempDir::new().unwrap();
5531            let unwritable_dir = temp_dir.path().join("unwritable_dir");
5532            std::fs::create_dir(&unwritable_dir)?;
5533
5534            let file_path = unwritable_dir.join("unwritable.txt");
5535            std::fs::write(&file_path, "original content")?;
5536
5537            // Make directory unwritable to prevent rename/temp file creation
5538            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
5539
5540            let mut buffer = TextBuffer::from_bytes(b"new content".to_vec(), test_fs());
5541            let result = buffer.save_to_file(&file_path);
5542
5543            // Verify that it returns SudoSaveRequired
5544            match result {
5545                Err(e) => {
5546                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
5547                        assert_eq!(sudo_err.dest_path, file_path);
5548                        assert!(sudo_err.temp_path.exists());
5549                        // Cleanup temp file
5550                        drop(std::fs::remove_file(&sudo_err.temp_path));
5551                    } else {
5552                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
5553                    }
5554                }
5555                Ok(_) => panic!("Expected error, but save succeeded"),
5556            }
5557
5558            Ok(())
5559        }
5560
5561        #[test]
5562        #[cfg(unix)]
5563        fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
5564            // Root (uid 0) bypasses Unix file permission checks, so these
5565            // permission-denied tests are meaningless when running as root.
5566            if unsafe { libc::getuid() } == 0 {
5567                eprintln!("Skipping test: root bypasses file permission checks");
5568                return Ok(());
5569            }
5570            use std::fs::Permissions;
5571            use std::os::unix::fs::PermissionsExt;
5572            use tempfile::TempDir;
5573
5574            let temp_dir = TempDir::new().unwrap();
5575            let unwritable_dir = temp_dir.path().join("unwritable_dir");
5576            std::fs::create_dir(&unwritable_dir)?;
5577
5578            let file_path = unwritable_dir.join("test.txt");
5579
5580            // Make directory unwritable (no write allowed)
5581            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
5582
5583            let mut buffer = TextBuffer::from_bytes(b"content".to_vec(), test_fs());
5584            let result = buffer.save_to_file(&file_path);
5585
5586            match result {
5587                Err(e) => {
5588                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
5589                        assert_eq!(sudo_err.dest_path, file_path);
5590                        assert!(sudo_err.temp_path.exists());
5591                        // It should be in /tmp because the directory was not writable
5592                        assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
5593                        // Cleanup
5594                        drop(std::fs::remove_file(&sudo_err.temp_path));
5595                    } else {
5596                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
5597                    }
5598                }
5599                Ok(_) => panic!("Expected error, but save succeeded"),
5600            }
5601
5602            Ok(())
5603        }
5604    }
5605
5606    mod large_file_encoding_tests {
5607        use super::*;
5608
5609        #[test]
5610        fn test_large_file_encoding_confirmation_display() {
5611            let confirmation = LargeFileEncodingConfirmation {
5612                path: PathBuf::from("/test/file.txt"),
5613                file_size: 150 * 1024 * 1024, // 150 MB
5614                encoding: Encoding::ShiftJis,
5615            };
5616
5617            let display = format!("{}", confirmation);
5618            assert!(display.contains("150 MB"), "Display: {}", display);
5619            assert!(display.contains("Shift-JIS"), "Display: {}", display);
5620            assert!(
5621                display.contains("requires full load"),
5622                "Display: {}",
5623                display
5624            );
5625        }
5626
5627        #[test]
5628        fn test_large_file_encoding_confirmation_equality() {
5629            let a = LargeFileEncodingConfirmation {
5630                path: PathBuf::from("/test/file.txt"),
5631                file_size: 100 * 1024 * 1024,
5632                encoding: Encoding::Gb18030,
5633            };
5634            let b = LargeFileEncodingConfirmation {
5635                path: PathBuf::from("/test/file.txt"),
5636                file_size: 100 * 1024 * 1024,
5637                encoding: Encoding::Gb18030,
5638            };
5639            let c = LargeFileEncodingConfirmation {
5640                path: PathBuf::from("/test/other.txt"),
5641                file_size: 100 * 1024 * 1024,
5642                encoding: Encoding::Gb18030,
5643            };
5644
5645            assert_eq!(a, b);
5646            assert_ne!(a, c);
5647        }
5648
5649        #[test]
5650        fn test_encoding_requires_confirmation() {
5651            // Resynchronizable encodings should NOT require confirmation
5652            assert!(!Encoding::Utf8.requires_full_file_load());
5653            assert!(!Encoding::Utf8Bom.requires_full_file_load());
5654            assert!(!Encoding::Ascii.requires_full_file_load());
5655            assert!(!Encoding::Latin1.requires_full_file_load());
5656            assert!(!Encoding::Windows1252.requires_full_file_load());
5657            assert!(!Encoding::Utf16Le.requires_full_file_load());
5658            assert!(!Encoding::Utf16Be.requires_full_file_load());
5659
5660            // Non-resynchronizable CJK encodings SHOULD require confirmation
5661            assert!(Encoding::Gb18030.requires_full_file_load());
5662            assert!(Encoding::Gbk.requires_full_file_load());
5663            assert!(Encoding::ShiftJis.requires_full_file_load());
5664            assert!(Encoding::EucKr.requires_full_file_load());
5665        }
5666
5667        #[test]
5668        fn test_check_large_file_encoding_small_file() {
5669            use tempfile::NamedTempFile;
5670
5671            // Create a small file (well under threshold)
5672            let temp = NamedTempFile::new().unwrap();
5673            std::fs::write(temp.path(), b"hello world").unwrap();
5674
5675            let result = TextBuffer::check_large_file_encoding(temp.path(), test_fs()).unwrap();
5676            assert!(
5677                result.is_none(),
5678                "Small files should not require confirmation"
5679            );
5680        }
5681
5682        #[test]
5683        fn test_large_file_encoding_error_downcast() {
5684            // Verify that LargeFileEncodingConfirmation can be used as an anyhow error
5685            let confirmation = LargeFileEncodingConfirmation {
5686                path: PathBuf::from("/test/file.txt"),
5687                file_size: 200 * 1024 * 1024,
5688                encoding: Encoding::EucKr,
5689            };
5690
5691            let error: anyhow::Error = confirmation.clone().into();
5692            let downcast = error.downcast_ref::<LargeFileEncodingConfirmation>();
5693            assert!(downcast.is_some());
5694            assert_eq!(downcast.unwrap().encoding, Encoding::EucKr);
5695        }
5696    }
5697
5698    mod rebuild_pristine_saved_root_tests {
5699        use super::*;
5700        use crate::model::piece_tree::BufferLocation;
5701        use std::sync::Arc;
5702
5703        /// Create a large-file-mode TextBuffer from raw bytes, simulating what
5704        /// `load_from_file` does for files above the large-file threshold.
5705        fn large_file_buffer(content: &[u8]) -> TextBuffer {
5706            let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
5707                Arc::new(crate::model::filesystem::StdFileSystem);
5708            let bytes = content.len();
5709            let buffer =
5710                crate::model::piece_tree::StringBuffer::new_loaded(0, content.to_vec(), false);
5711            let piece_tree = if bytes > 0 {
5712                crate::model::piece_tree::PieceTree::new(BufferLocation::Stored(0), 0, bytes, None)
5713            } else {
5714                crate::model::piece_tree::PieceTree::empty()
5715            };
5716            let saved_root = piece_tree.root();
5717            TextBuffer {
5718                fs,
5719                piece_tree,
5720                saved_root,
5721                buffers: vec![buffer],
5722                next_buffer_id: 1,
5723                file_path: None,
5724                modified: false,
5725                recovery_pending: false,
5726                large_file: true,
5727                line_feeds_scanned: false,
5728                is_binary: false,
5729                line_ending: LineEnding::LF,
5730                original_line_ending: LineEnding::LF,
5731                encoding: Encoding::Utf8,
5732                original_encoding: Encoding::Utf8,
5733                saved_file_size: Some(bytes),
5734                version: 0,
5735                config: BufferConfig::default(),
5736            }
5737        }
5738
5739        /// Simulate prepare_line_scan + scanning: pre-split and compute lf counts.
5740        fn scan_line_feeds(buf: &mut TextBuffer) -> Vec<(usize, usize)> {
5741            buf.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
5742            let leaves = buf.piece_tree.get_leaves();
5743            let mut updates = Vec::new();
5744            for (idx, leaf) in leaves.iter().enumerate() {
5745                if leaf.line_feed_cnt.is_some() {
5746                    continue;
5747                }
5748                let count = buf.scan_leaf(leaf).unwrap();
5749                updates.push((idx, count));
5750            }
5751            updates
5752        }
5753
5754        /// Generate a repeating pattern with newlines for testing.
5755        fn make_content(size: usize) -> Vec<u8> {
5756            let line = b"abcdefghij0123456789ABCDEFGHIJ0123456789abcdefghij0123456789ABCDEFGHIJ\n";
5757            let mut out = Vec::with_capacity(size);
5758            while out.len() < size {
5759                let remaining = size - out.len();
5760                let take = remaining.min(line.len());
5761                out.extend_from_slice(&line[..take]);
5762            }
5763            out
5764        }
5765
5766        #[test]
5767        fn test_no_edits_arc_ptr_eq() {
5768            let content = make_content(2 * 1024 * 1024);
5769            let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
5770            let mut buf = large_file_buffer(&content);
5771
5772            // Before scan, line_count should be None (large file, no indexing).
5773            assert!(buf.line_count().is_none());
5774
5775            let updates = scan_line_feeds(&mut buf);
5776            buf.rebuild_with_pristine_saved_root(&updates);
5777
5778            // After rebuild, line_count must be Some (exact).
5779            assert_eq!(buf.line_count(), Some(expected_lf + 1));
5780
5781            // After rebuild with no edits, roots should be identical (Arc::ptr_eq).
5782            assert!(Arc::ptr_eq(&buf.saved_root, &buf.piece_tree.root()));
5783            let diff = buf.diff_since_saved();
5784            assert!(diff.equal);
5785            assert!(buf.line_feeds_scanned);
5786            assert_eq!(buf.get_all_text().unwrap(), content);
5787        }
5788
5789        #[test]
5790        fn test_single_insertion() {
5791            let content = make_content(2 * 1024 * 1024);
5792            let mut buf = large_file_buffer(&content);
5793            let updates = scan_line_feeds(&mut buf);
5794
5795            // Insert some text in the middle.
5796            let insert_offset = 1_000_000;
5797            let insert_text = b"INSERTED_TEXT\n";
5798            buf.insert_bytes(insert_offset, insert_text.to_vec());
5799
5800            buf.rebuild_with_pristine_saved_root(&updates);
5801
5802            // Content should match the shadow model.
5803            let mut expected = content.clone();
5804            expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
5805            assert_eq!(buf.get_all_text().unwrap(), expected);
5806
5807            // line_count must be Some (exact) after rebuild, even with edits.
5808            let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
5809            assert_eq!(buf.line_count(), Some(expected_lf + 1));
5810
5811            // Diff should NOT be equal.
5812            let diff = buf.diff_since_saved();
5813            assert!(!diff.equal);
5814            assert!(!diff.byte_ranges.is_empty());
5815        }
5816
5817        /// After rebuild + insert near EOF, diff line_ranges must be
5818        /// document-absolute.  The bug: `with_doc_offsets` assigned consecutive
5819        /// offsets from 0 to the collected leaves, missing skipped (shared)
5820        /// subtrees' bytes.
5821        #[test]
5822        fn test_diff_line_ranges_are_document_absolute_after_eof_insert() {
5823            let content = make_content(4 * 1024 * 1024); // 4MB → 4 chunks at 1MB each
5824            let total_lf = content.iter().filter(|&&b| b == b'\n').count();
5825            let mut buf = large_file_buffer(&content);
5826            let updates = scan_line_feeds(&mut buf);
5827            buf.rebuild_with_pristine_saved_root(&updates);
5828
5829            // Insert 5 bytes near EOF (last 100 bytes of the file).
5830            let insert_offset = content.len() - 100;
5831            buf.insert_bytes(insert_offset, b"HELLO".to_vec());
5832
5833            let diff = buf.diff_since_saved();
5834            assert!(!diff.equal, "diff should detect the insertion");
5835            assert!(
5836                !diff.byte_ranges.is_empty(),
5837                "byte_ranges should not be empty"
5838            );
5839
5840            // byte_ranges must be near the end of the document, not near 0.
5841            let first_range = &diff.byte_ranges[0];
5842            assert!(
5843                first_range.start >= content.len() - 200,
5844                "byte_ranges should be document-absolute (near EOF): got {:?}, expected near {}",
5845                first_range,
5846                insert_offset,
5847            );
5848
5849            // line_ranges must also be document-absolute.
5850            let line_ranges = diff
5851                .line_ranges
5852                .as_ref()
5853                .expect("line_ranges should be Some");
5854            assert!(!line_ranges.is_empty(), "line_ranges should not be empty");
5855            let first_lr = &line_ranges[0];
5856            // The insert is near EOF, so the line number should be near total_lf.
5857            let expected_min_line = total_lf.saturating_sub(10);
5858            assert!(
5859                first_lr.start >= expected_min_line,
5860                "line_ranges should be document-absolute: got {:?}, expected start >= {} (total lines ~{})",
5861                first_lr,
5862                expected_min_line,
5863                total_lf,
5864            );
5865        }
5866
5867        #[test]
5868        fn test_single_deletion() {
5869            let content = make_content(2 * 1024 * 1024);
5870            let mut buf = large_file_buffer(&content);
5871            let updates = scan_line_feeds(&mut buf);
5872
5873            // Delete a range.
5874            let del_start = 500_000;
5875            let del_len = 1000;
5876            buf.delete_bytes(del_start, del_len);
5877
5878            buf.rebuild_with_pristine_saved_root(&updates);
5879
5880            let mut expected = content.clone();
5881            expected.drain(del_start..del_start + del_len);
5882            assert_eq!(buf.get_all_text().unwrap(), expected);
5883
5884            let diff = buf.diff_since_saved();
5885            assert!(!diff.equal);
5886        }
5887
5888        #[test]
5889        fn test_insert_and_delete() {
5890            let content = make_content(2 * 1024 * 1024);
5891            let mut buf = large_file_buffer(&content);
5892            let updates = scan_line_feeds(&mut buf);
5893
5894            // Delete near the start, insert near the end.
5895            let del_start = 100_000;
5896            let del_len = 500;
5897            buf.delete_bytes(del_start, del_len);
5898
5899            let insert_offset = 1_500_000; // in the post-delete document
5900            let insert_text = b"NEW_CONTENT\n";
5901            buf.insert_bytes(insert_offset, insert_text.to_vec());
5902
5903            buf.rebuild_with_pristine_saved_root(&updates);
5904
5905            // Build expected content.
5906            let mut expected = content.clone();
5907            expected.drain(del_start..del_start + del_len);
5908            expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
5909            assert_eq!(buf.get_all_text().unwrap(), expected);
5910
5911            let diff = buf.diff_since_saved();
5912            assert!(!diff.equal);
5913        }
5914
5915        #[test]
5916        fn test_multiple_scattered_edits() {
5917            let content = make_content(3 * 1024 * 1024);
5918            let mut buf = large_file_buffer(&content);
5919            let updates = scan_line_feeds(&mut buf);
5920            let mut expected = content.clone();
5921
5922            // Apply several edits across chunk boundaries, tracking the shadow model.
5923            // Edit 1: delete at offset 100k
5924            buf.delete_bytes(100_000, 200);
5925            expected.drain(100_000..100_200);
5926
5927            // Edit 2: insert at offset 500k (in current doc, which shifted)
5928            buf.insert_bytes(500_000, b"AAAA\n".to_vec());
5929            expected.splice(500_000..500_000, b"AAAA\n".iter().copied());
5930
5931            // Edit 3: delete at offset 2M
5932            buf.delete_bytes(2_000_000, 300);
5933            expected.drain(2_000_000..2_000_300);
5934
5935            // Edit 4: insert at offset 1M
5936            buf.insert_bytes(1_000_000, b"BBBB\n".to_vec());
5937            expected.splice(1_000_000..1_000_000, b"BBBB\n".iter().copied());
5938
5939            buf.rebuild_with_pristine_saved_root(&updates);
5940
5941            assert_eq!(buf.get_all_text().unwrap(), expected);
5942            let diff = buf.diff_since_saved();
5943            assert!(!diff.equal);
5944        }
5945
5946        #[test]
5947        fn test_content_preserved_after_rebuild() {
5948            // Verify that get_all_text matches before and after rebuild for
5949            // a buffer with edits.
5950            let content = make_content(2 * 1024 * 1024);
5951            let mut buf = large_file_buffer(&content);
5952            let updates = scan_line_feeds(&mut buf);
5953
5954            buf.insert_bytes(0, b"HEADER\n".to_vec());
5955            buf.delete_bytes(1_000_000, 500);
5956
5957            let text_before = buf.get_all_text().unwrap();
5958            buf.rebuild_with_pristine_saved_root(&updates);
5959            let text_after = buf.get_all_text().unwrap();
5960
5961            assert_eq!(text_before, text_after);
5962        }
5963
5964        /// Create a large-file-mode TextBuffer backed by an actual file on disk
5965        /// (Unloaded buffer), matching the real `load_from_file` code path.
5966        fn large_file_buffer_unloaded(path: &std::path::Path, file_size: usize) -> TextBuffer {
5967            let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
5968                Arc::new(crate::model::filesystem::StdFileSystem);
5969            let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
5970                0,
5971                path.to_path_buf(),
5972                0,
5973                file_size,
5974            );
5975            let piece_tree = if file_size > 0 {
5976                crate::model::piece_tree::PieceTree::new(
5977                    BufferLocation::Stored(0),
5978                    0,
5979                    file_size,
5980                    None,
5981                )
5982            } else {
5983                crate::model::piece_tree::PieceTree::empty()
5984            };
5985            let saved_root = piece_tree.root();
5986            TextBuffer {
5987                fs,
5988                piece_tree,
5989                saved_root,
5990                buffers: vec![buffer],
5991                next_buffer_id: 1,
5992                file_path: Some(path.to_path_buf()),
5993                modified: false,
5994                recovery_pending: false,
5995                large_file: true,
5996                line_feeds_scanned: false,
5997                is_binary: false,
5998                line_ending: LineEnding::LF,
5999                original_line_ending: LineEnding::LF,
6000                encoding: Encoding::Utf8,
6001                original_encoding: Encoding::Utf8,
6002                saved_file_size: Some(file_size),
6003                version: 0,
6004                config: BufferConfig::default(),
6005            }
6006        }
6007
6008        #[test]
6009        fn test_unloaded_buffer_no_edits_line_count() {
6010            let content = make_content(2 * 1024 * 1024);
6011            let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6012
6013            let tmp = tempfile::NamedTempFile::new().unwrap();
6014            std::fs::write(tmp.path(), &content).unwrap();
6015            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6016
6017            assert!(
6018                buf.line_count().is_none(),
6019                "before scan, line_count should be None"
6020            );
6021
6022            let updates = scan_line_feeds(&mut buf);
6023            buf.rebuild_with_pristine_saved_root(&updates);
6024
6025            assert_eq!(
6026                buf.line_count(),
6027                Some(expected_lf + 1),
6028                "after rebuild, line_count must be exact"
6029            );
6030            assert!(buf.line_feeds_scanned);
6031        }
6032
6033        #[test]
6034        fn test_unloaded_buffer_with_edits_line_count() {
6035            let content = make_content(2 * 1024 * 1024);
6036
6037            let tmp = tempfile::NamedTempFile::new().unwrap();
6038            std::fs::write(tmp.path(), &content).unwrap();
6039            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6040
6041            let updates = scan_line_feeds(&mut buf);
6042
6043            // Insert text in the middle (creates an Added piece).
6044            let insert_text = b"INSERTED\n";
6045            buf.insert_bytes(1_000_000, insert_text.to_vec());
6046
6047            buf.rebuild_with_pristine_saved_root(&updates);
6048
6049            let mut expected = content.clone();
6050            expected.splice(1_000_000..1_000_000, insert_text.iter().copied());
6051            let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6052
6053            assert_eq!(
6054                buf.line_count(),
6055                Some(expected_lf + 1),
6056                "after rebuild with edits, line_count must be exact"
6057            );
6058            assert!(buf.line_feeds_scanned);
6059        }
6060
6061        /// After rebuild, diff_since_saved should visit a small number of nodes
6062        /// proportional to edit regions, NOT the full tree. This catches
6063        /// regressions where Arc pointers are accidentally destroyed (e.g. by
6064        /// flattening and rebuilding the tree).
6065        #[test]
6066        fn test_diff_efficiency_after_rebuild() {
6067            // Use 32MB so the tree has ~32 leaves (at 1MB chunk size),
6068            // making the efficiency difference between O(log N) and O(N) clear.
6069            let content = make_content(32 * 1024 * 1024);
6070            let mut buf = large_file_buffer(&content);
6071
6072            let updates = scan_line_feeds(&mut buf);
6073
6074            // Insert a small piece of text in one chunk.
6075            buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6076
6077            buf.rebuild_with_pristine_saved_root(&updates);
6078
6079            let diff = buf.diff_since_saved();
6080            assert!(!diff.equal);
6081
6082            let total_leaves = buf.piece_tree.get_leaves().len();
6083            // The diff should visit far fewer nodes than the total tree.
6084            // With path-copying, only the path from root to the edited leaf
6085            // (and its immediate neighbours) should be visited — roughly
6086            // O(log N) nodes, not O(N).
6087            assert!(
6088                diff.nodes_visited < total_leaves,
6089                "diff visited {} nodes but tree has {} leaves — \
6090                 Arc::ptr_eq short-circuiting is not working",
6091                diff.nodes_visited,
6092                total_leaves,
6093            );
6094        }
6095
6096        /// After rebuild_with_pristine_saved_root, loading a small viewport
6097        /// range must NOT cause the entire original file buffer to be loaded.
6098        /// This is a regression test for a bug where the pristine tree's 1MB
6099        /// pieces all referenced Stored(0) (the whole-file buffer). Because
6100        /// piece_view.bytes (1MB) <= LOAD_CHUNK_SIZE, get_text_range_mut took
6101        /// the "load_small_buffer" path, calling load() on the 814MB buffer.
6102        #[test]
6103        fn test_viewport_load_after_rebuild_does_not_load_entire_file() {
6104            use std::sync::atomic::{AtomicUsize, Ordering};
6105
6106            /// Filesystem wrapper that tracks the largest read_range call.
6107            struct TrackingFs {
6108                inner: crate::model::filesystem::StdFileSystem,
6109                max_read_range_len: Arc<AtomicUsize>,
6110            }
6111
6112            impl crate::model::filesystem::FileSystem for TrackingFs {
6113                fn read_file(&self, path: &Path) -> std::io::Result<Vec<u8>> {
6114                    self.inner.read_file(path)
6115                }
6116                fn read_range(
6117                    &self,
6118                    path: &Path,
6119                    offset: u64,
6120                    len: usize,
6121                ) -> std::io::Result<Vec<u8>> {
6122                    self.max_read_range_len.fetch_max(len, Ordering::SeqCst);
6123                    self.inner.read_range(path, offset, len)
6124                }
6125                fn write_file(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
6126                    self.inner.write_file(path, data)
6127                }
6128                fn create_file(
6129                    &self,
6130                    path: &Path,
6131                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6132                {
6133                    self.inner.create_file(path)
6134                }
6135                fn open_file(
6136                    &self,
6137                    path: &Path,
6138                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileReader>>
6139                {
6140                    self.inner.open_file(path)
6141                }
6142                fn open_file_for_write(
6143                    &self,
6144                    path: &Path,
6145                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6146                {
6147                    self.inner.open_file_for_write(path)
6148                }
6149                fn open_file_for_append(
6150                    &self,
6151                    path: &Path,
6152                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6153                {
6154                    self.inner.open_file_for_append(path)
6155                }
6156                fn set_file_length(&self, path: &Path, len: u64) -> std::io::Result<()> {
6157                    self.inner.set_file_length(path, len)
6158                }
6159                fn rename(&self, from: &Path, to: &Path) -> std::io::Result<()> {
6160                    self.inner.rename(from, to)
6161                }
6162                fn copy(&self, from: &Path, to: &Path) -> std::io::Result<u64> {
6163                    self.inner.copy(from, to)
6164                }
6165                fn remove_file(&self, path: &Path) -> std::io::Result<()> {
6166                    self.inner.remove_file(path)
6167                }
6168                fn remove_dir(&self, path: &Path) -> std::io::Result<()> {
6169                    self.inner.remove_dir(path)
6170                }
6171                fn metadata(
6172                    &self,
6173                    path: &Path,
6174                ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6175                    self.inner.metadata(path)
6176                }
6177                fn symlink_metadata(
6178                    &self,
6179                    path: &Path,
6180                ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6181                    self.inner.symlink_metadata(path)
6182                }
6183                fn is_dir(&self, path: &Path) -> std::io::Result<bool> {
6184                    self.inner.is_dir(path)
6185                }
6186                fn is_file(&self, path: &Path) -> std::io::Result<bool> {
6187                    self.inner.is_file(path)
6188                }
6189                fn set_permissions(
6190                    &self,
6191                    path: &Path,
6192                    permissions: &crate::model::filesystem::FilePermissions,
6193                ) -> std::io::Result<()> {
6194                    self.inner.set_permissions(path, permissions)
6195                }
6196                fn is_owner(&self, path: &Path) -> bool {
6197                    self.inner.is_owner(path)
6198                }
6199                fn read_dir(
6200                    &self,
6201                    path: &Path,
6202                ) -> std::io::Result<Vec<crate::model::filesystem::DirEntry>> {
6203                    self.inner.read_dir(path)
6204                }
6205                fn create_dir(&self, path: &Path) -> std::io::Result<()> {
6206                    self.inner.create_dir(path)
6207                }
6208                fn create_dir_all(&self, path: &Path) -> std::io::Result<()> {
6209                    self.inner.create_dir_all(path)
6210                }
6211                fn canonicalize(&self, path: &Path) -> std::io::Result<PathBuf> {
6212                    self.inner.canonicalize(path)
6213                }
6214                fn current_uid(&self) -> u32 {
6215                    self.inner.current_uid()
6216                }
6217                fn sudo_write(
6218                    &self,
6219                    path: &Path,
6220                    data: &[u8],
6221                    mode: u32,
6222                    uid: u32,
6223                    gid: u32,
6224                ) -> std::io::Result<()> {
6225                    self.inner.sudo_write(path, data, mode, uid, gid)
6226                }
6227            }
6228
6229            // Create a 3MB file with newlines (3 chunks at LOAD_CHUNK_SIZE=1MB).
6230            let file_size = LOAD_CHUNK_SIZE * 3;
6231            let content = make_content(file_size);
6232
6233            let tmp = tempfile::NamedTempFile::new().unwrap();
6234            std::fs::write(tmp.path(), &content).unwrap();
6235
6236            let max_read = Arc::new(AtomicUsize::new(0));
6237            let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6238                Arc::new(TrackingFs {
6239                    inner: crate::model::filesystem::StdFileSystem,
6240                    max_read_range_len: max_read.clone(),
6241                });
6242
6243            // Build an unloaded large-file buffer with the tracking FS.
6244            let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6245                0,
6246                tmp.path().to_path_buf(),
6247                0,
6248                file_size,
6249            );
6250            let piece_tree = PieceTree::new(BufferLocation::Stored(0), 0, file_size, None);
6251            let saved_root = piece_tree.root();
6252            let mut buf = TextBuffer {
6253                fs,
6254                piece_tree,
6255                saved_root,
6256                buffers: vec![buffer],
6257                next_buffer_id: 1,
6258                file_path: Some(tmp.path().to_path_buf()),
6259                modified: false,
6260                recovery_pending: false,
6261                large_file: true,
6262                line_feeds_scanned: false,
6263                is_binary: false,
6264                line_ending: LineEnding::LF,
6265                original_line_ending: LineEnding::LF,
6266                encoding: Encoding::Utf8,
6267                original_encoding: Encoding::Utf8,
6268                saved_file_size: Some(file_size),
6269                version: 0,
6270                config: BufferConfig::default(),
6271            };
6272
6273            // Load a small viewport in the middle (forces chunk splitting).
6274            let viewport_offset = LOAD_CHUNK_SIZE + 100; // somewhere in chunk 2
6275            buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6276
6277            // Run the line-feed scan and rebuild the pristine tree.
6278            let updates = scan_line_feeds(&mut buf);
6279            buf.rebuild_with_pristine_saved_root(&updates);
6280
6281            // Reset the tracker — we only care about reads AFTER the rebuild.
6282            max_read.store(0, Ordering::SeqCst);
6283
6284            // Load the same viewport range again.
6285            buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6286
6287            let largest_read = max_read.load(Ordering::SeqCst);
6288            assert!(
6289                largest_read <= LOAD_CHUNK_SIZE,
6290                "After rebuild, loading a viewport triggered a read of {} bytes \
6291                 (file_size={}). This means the entire Stored buffer is being \
6292                 loaded instead of just the needed chunk.",
6293                largest_read,
6294                file_size,
6295            );
6296        }
6297
6298        /// After rebuild_with_pristine_saved_root, loading a viewport must not
6299        /// destroy the line feed counts on pieces. The chunk-split path in
6300        /// get_text_range_mut calls split_at_offset, which invokes
6301        /// compute_line_feeds_static — returning None for unloaded buffers.
6302        /// This turns exact line numbers back into byte-based estimates.
6303        #[test]
6304        fn test_viewport_load_after_rebuild_preserves_line_counts() {
6305            let file_size = LOAD_CHUNK_SIZE * 3;
6306            let content = make_content(file_size);
6307
6308            let tmp = tempfile::NamedTempFile::new().unwrap();
6309            std::fs::write(tmp.path(), &content).unwrap();
6310            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6311
6312            // Scan + rebuild so every leaf has a known line_feed_cnt.
6313            let updates = scan_line_feeds(&mut buf);
6314            buf.rebuild_with_pristine_saved_root(&updates);
6315
6316            let line_count_before = buf.piece_tree.line_count();
6317            assert!(
6318                line_count_before.is_some(),
6319                "line_count must be Some after rebuild"
6320            );
6321
6322            // Load a viewport that starts in the MIDDLE of a piece, forcing
6323            // split_at_offset (not just replace_buffer_reference).
6324            let mid_piece_offset = LOAD_CHUNK_SIZE + LOAD_CHUNK_SIZE / 2;
6325            buf.get_text_range_mut(mid_piece_offset, 4096).unwrap();
6326
6327            let line_count_after = buf.piece_tree.line_count();
6328            assert!(
6329                line_count_after.is_some(),
6330                "line_count must still be Some after viewport load \
6331                 (was {:?} before, now {:?})",
6332                line_count_before,
6333                line_count_after,
6334            );
6335            assert_eq!(
6336                line_count_before, line_count_after,
6337                "line_count must not change after viewport load"
6338            );
6339        }
6340
6341        /// Same test but with Unloaded data (the fixup path).
6342        #[test]
6343        fn test_diff_efficiency_after_rebuild_unloaded() {
6344            let content = make_content(32 * 1024 * 1024);
6345
6346            let tmp = tempfile::NamedTempFile::new().unwrap();
6347            std::fs::write(tmp.path(), &content).unwrap();
6348            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6349
6350            let updates = scan_line_feeds(&mut buf);
6351
6352            buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6353
6354            buf.rebuild_with_pristine_saved_root(&updates);
6355
6356            let diff = buf.diff_since_saved();
6357            assert!(!diff.equal);
6358
6359            let total_leaves = buf.piece_tree.get_leaves().len();
6360            assert!(
6361                diff.nodes_visited < total_leaves,
6362                "diff visited {} nodes but tree has {} leaves — \
6363                 Arc::ptr_eq short-circuiting is not working (unloaded path)",
6364                diff.nodes_visited,
6365                total_leaves,
6366            );
6367        }
6368    }
6369}
6370
6371#[cfg(test)]
6372mod property_tests {
6373    use crate::model::filesystem::StdFileSystem;
6374    use std::sync::Arc;
6375
6376    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
6377        Arc::new(StdFileSystem)
6378    }
6379    use super::*;
6380    use proptest::prelude::*;
6381
6382    // Generate text with some newlines
6383    fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
6384        prop::collection::vec(
6385            prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
6386            0..100,
6387        )
6388    }
6389
6390    // Strategy to generate operations
6391    #[derive(Debug, Clone)]
6392    enum Operation {
6393        Insert { offset: usize, text: Vec<u8> },
6394        Delete { offset: usize, bytes: usize },
6395    }
6396
6397    fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
6398        prop::collection::vec(
6399            prop_oneof![
6400                (0usize..200, text_with_newlines())
6401                    .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
6402                (0usize..200, 1usize..50)
6403                    .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
6404            ],
6405            0..50,
6406        )
6407    }
6408
6409    proptest! {
6410        #[test]
6411        fn prop_line_count_consistent(text in text_with_newlines()) {
6412            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6413
6414            let newline_count = text.iter().filter(|&&b| b == b'\n').count();
6415            prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
6416        }
6417
6418        #[test]
6419        fn prop_get_all_text_matches_original(text in text_with_newlines()) {
6420            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6421            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
6422        }
6423
6424        #[test]
6425        fn prop_insert_increases_size(
6426            text in text_with_newlines(),
6427            offset in 0usize..100,
6428            insert_text in text_with_newlines()
6429        ) {
6430            let mut buffer = TextBuffer::from_bytes(text, test_fs());
6431            let initial_bytes = buffer.total_bytes();
6432
6433            let offset = offset.min(buffer.total_bytes());
6434            buffer.insert_bytes(offset, insert_text.clone());
6435
6436            prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
6437        }
6438
6439        #[test]
6440        fn prop_delete_decreases_size(
6441            text in text_with_newlines(),
6442            offset in 0usize..100,
6443            delete_bytes in 1usize..50
6444        ) {
6445            if text.is_empty() {
6446                return Ok(());
6447            }
6448
6449            let mut buffer = TextBuffer::from_bytes(text, test_fs());
6450            let initial_bytes = buffer.total_bytes();
6451
6452            let offset = offset.min(buffer.total_bytes());
6453            let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
6454
6455            if delete_bytes == 0 {
6456                return Ok(());
6457            }
6458
6459            buffer.delete_bytes(offset, delete_bytes);
6460
6461            prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
6462        }
6463
6464        #[test]
6465        fn prop_insert_then_delete_restores_original(
6466            text in text_with_newlines(),
6467            offset in 0usize..100,
6468            insert_text in text_with_newlines()
6469        ) {
6470            let mut buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6471
6472            let offset = offset.min(buffer.total_bytes());
6473            buffer.insert_bytes(offset, insert_text.clone());
6474            buffer.delete_bytes(offset, insert_text.len());
6475
6476            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
6477        }
6478
6479        #[test]
6480        fn prop_offset_position_roundtrip(text in text_with_newlines()) {
6481            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6482
6483            for offset in 0..text.len() {
6484                let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
6485                let back = buffer.position_to_offset(pos);
6486                prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
6487            }
6488        }
6489
6490        #[test]
6491        fn prop_get_text_range_valid(
6492            text in text_with_newlines(),
6493            offset in 0usize..100,
6494            length in 1usize..50
6495        ) {
6496            if text.is_empty() {
6497                return Ok(());
6498            }
6499
6500            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6501            let offset = offset.min(buffer.total_bytes());
6502            let length = length.min(buffer.total_bytes() - offset);
6503
6504            if length == 0 {
6505                return Ok(());
6506            }
6507
6508            let result = buffer.get_text_range(offset, length);
6509            prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
6510        }
6511
6512        #[test]
6513        fn prop_operations_maintain_consistency(operations in operation_strategy()) {
6514            let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
6515            let mut expected_text = b"initial\ntext".to_vec();
6516
6517            for op in operations {
6518                match op {
6519                    Operation::Insert { offset, text } => {
6520                        let offset = offset.min(buffer.total_bytes());
6521                        buffer.insert_bytes(offset, text.clone());
6522
6523                        // Update expected
6524                        let offset = offset.min(expected_text.len());
6525                        expected_text.splice(offset..offset, text);
6526                    }
6527                    Operation::Delete { offset, bytes } => {
6528                        if offset < buffer.total_bytes() {
6529                            let bytes = bytes.min(buffer.total_bytes() - offset);
6530                            buffer.delete_bytes(offset, bytes);
6531
6532                            // Update expected
6533                            if offset < expected_text.len() {
6534                                let bytes = bytes.min(expected_text.len() - offset);
6535                                expected_text.drain(offset..offset + bytes);
6536                            }
6537                        }
6538                    }
6539                }
6540            }
6541
6542            prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
6543        }
6544
6545        #[test]
6546        fn prop_line_count_never_zero(operations in operation_strategy()) {
6547            let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
6548
6549            for op in operations {
6550                match op {
6551                    Operation::Insert { offset, text } => {
6552                        let offset = offset.min(buffer.total_bytes());
6553                        buffer.insert_bytes(offset, text);
6554                    }
6555                    Operation::Delete { offset, bytes } => {
6556                        buffer.delete_bytes(offset, bytes);
6557                    }
6558                }
6559
6560                // Document always has at least 1 line
6561                prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
6562            }
6563        }
6564
6565        #[test]
6566        fn prop_total_bytes_never_negative(operations in operation_strategy()) {
6567            let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
6568
6569            for op in operations {
6570                match op {
6571                    Operation::Insert { offset, text } => {
6572                        let offset = offset.min(buffer.total_bytes());
6573                        buffer.insert_bytes(offset, text);
6574                    }
6575                    Operation::Delete { offset, bytes } => {
6576                        buffer.delete_bytes(offset, bytes);
6577                    }
6578                }
6579
6580                // Bytes should never overflow
6581                prop_assert!(buffer.total_bytes() < 10_000_000);
6582            }
6583        }
6584
6585        #[test]
6586        fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
6587            let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
6588
6589            for op in operations {
6590                match op {
6591                    Operation::Insert { offset, text } => {
6592                        let offset = offset.min(buffer.total_bytes());
6593                        buffer.insert_bytes(offset, text);
6594                    }
6595                    Operation::Delete { offset, bytes } => {
6596                        buffer.delete_bytes(offset, bytes);
6597                    }
6598                }
6599
6600                // Verify we can still convert between offsets and positions
6601                if buffer.total_bytes() > 0 {
6602                    let mid_offset = buffer.total_bytes() / 2;
6603                    if let Some(pos) = buffer.offset_to_position(mid_offset) {
6604                        let back = buffer.position_to_offset(pos);
6605
6606                        // Should be able to roundtrip
6607                        prop_assert!(back <= buffer.total_bytes());
6608                    }
6609                }
6610            }
6611        }
6612
6613        #[test]
6614        fn prop_write_recipe_matches_content(text in text_with_newlines()) {
6615            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6616            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
6617
6618            // Apply the recipe to get the output
6619            let output = apply_recipe(&buffer, &recipe);
6620            prop_assert_eq!(output, text, "Recipe output should match original content");
6621        }
6622
6623        #[test]
6624        fn prop_write_recipe_after_edits(
6625            initial_text in text_with_newlines(),
6626            operations in operation_strategy()
6627        ) {
6628            let mut buffer = TextBuffer::from_bytes(initial_text, test_fs());
6629
6630            // Apply random operations
6631            for op in operations {
6632                match op {
6633                    Operation::Insert { offset, text } => {
6634                        let offset = offset.min(buffer.total_bytes());
6635                        buffer.insert_bytes(offset, text);
6636                    }
6637                    Operation::Delete { offset, bytes } => {
6638                        if offset < buffer.total_bytes() {
6639                            let bytes = bytes.min(buffer.total_bytes() - offset);
6640                            if bytes > 0 {
6641                                buffer.delete_bytes(offset, bytes);
6642                            }
6643                        }
6644                    }
6645                }
6646            }
6647
6648            // Build recipe and verify it matches buffer content
6649            let expected = buffer.get_all_text().unwrap();
6650            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
6651            let output = apply_recipe(&buffer, &recipe);
6652
6653            prop_assert_eq!(output, expected, "Recipe output should match buffer content after edits");
6654        }
6655
6656        #[test]
6657        fn prop_write_recipe_copy_ops_valid(
6658            text in prop::collection::vec(prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n')], 10..200),
6659            edit_offset in 0usize..100,
6660            edit_text in text_with_newlines()
6661        ) {
6662            use tempfile::TempDir;
6663
6664            // Create a temp file with initial content
6665            let temp_dir = TempDir::new().unwrap();
6666            let file_path = temp_dir.path().join("test.txt");
6667            std::fs::write(&file_path, &text).unwrap();
6668
6669            // Load the file (creates unloaded buffer regions)
6670            let mut buffer = TextBuffer::load_from_file(&file_path, 1024 * 1024, test_fs()).unwrap();
6671
6672            // Make an edit in the middle
6673            let edit_offset = edit_offset.min(buffer.total_bytes());
6674            buffer.insert_bytes(edit_offset, edit_text.clone());
6675
6676            // Build recipe - should have Copy ops for unmodified regions
6677            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
6678
6679            // Verify recipe produces correct output
6680            let expected = buffer.get_all_text().unwrap();
6681            let output = apply_recipe(&buffer, &recipe);
6682            prop_assert_eq!(output, expected, "Recipe with Copy ops should match buffer content");
6683
6684            // Verify we have at least some Copy ops if the file was large enough
6685            // (Copy ops reference unloaded regions from the original file)
6686            if text.len() > 100 && edit_offset > 10 {
6687                let has_copy = recipe.actions.iter().any(|a| matches!(a, RecipeAction::Copy { .. }));
6688                // Note: We don't assert this because line ending conversion or other factors
6689                // might cause all Insert ops, which is valid behavior
6690                let _ = has_copy;
6691            }
6692        }
6693    }
6694
6695    /// Helper to apply a WriteRecipe and return the resulting bytes
6696    fn apply_recipe(buffer: &TextBuffer, recipe: &WriteRecipe) -> Vec<u8> {
6697        let mut output = Vec::new();
6698        for action in &recipe.actions {
6699            match action {
6700                RecipeAction::Copy { offset, len } => {
6701                    if let Some(src_path) = &recipe.src_path {
6702                        let data = buffer
6703                            .fs
6704                            .read_range(src_path, *offset, *len as usize)
6705                            .expect("read_range should succeed for Copy op");
6706                        output.extend_from_slice(&data);
6707                    } else {
6708                        panic!("Copy action without source path");
6709                    }
6710                }
6711                RecipeAction::Insert { index } => {
6712                    output.extend_from_slice(&recipe.insert_data[*index]);
6713                }
6714            }
6715        }
6716        output
6717    }
6718
6719    /// Helper to check if bytes are detected as binary
6720    fn is_detected_as_binary(bytes: &[u8]) -> bool {
6721        TextBuffer::detect_encoding_or_binary(bytes).1
6722    }
6723
6724    #[test]
6725    fn test_detect_binary_text_files() {
6726        // Plain text should not be detected as binary
6727        assert!(!is_detected_as_binary(b"Hello, world!"));
6728        assert!(!is_detected_as_binary(b"Line 1\nLine 2\nLine 3"));
6729        assert!(!is_detected_as_binary(b"Tabs\tand\tnewlines\n"));
6730        assert!(!is_detected_as_binary(b"Carriage return\r\n"));
6731
6732        // Empty content is not binary
6733        assert!(!is_detected_as_binary(b""));
6734
6735        // ANSI CSI escape sequences should be treated as text
6736        assert!(!is_detected_as_binary(b"\x1b[31mRed text\x1b[0m"));
6737    }
6738
6739    #[test]
6740    fn test_detect_binary_binary_files() {
6741        // Null bytes indicate binary
6742        assert!(is_detected_as_binary(b"Hello\x00World"));
6743        assert!(is_detected_as_binary(b"\x00"));
6744
6745        // Non-printable control characters (except tab, newline, CR, form feed, vertical tab)
6746        assert!(is_detected_as_binary(b"Text with \x01 control char"));
6747        assert!(is_detected_as_binary(b"\x02\x03\x04"));
6748
6749        // DEL character (0x7F)
6750        assert!(is_detected_as_binary(b"Text with DEL\x7F"));
6751    }
6752
6753    #[test]
6754    fn test_detect_binary_png_file() {
6755        // PNG file signature: 89 50 4E 47 0D 0A 1A 0A
6756        // The 0x1A byte (substitute character) is a control character that triggers binary detection
6757        let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
6758        assert!(is_detected_as_binary(png_header));
6759
6760        // Simulate a PNG file with more data after header
6761        let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
6762        png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); // IHDR chunk with null bytes
6763        assert!(is_detected_as_binary(&png_data));
6764    }
6765
6766    #[test]
6767    fn test_detect_binary_other_image_formats() {
6768        // JPEG signature: FF D8 FF
6769        let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
6770        assert!(is_detected_as_binary(jpeg_header));
6771
6772        // GIF signature: GIF89a or GIF87a - contains valid ASCII but typically followed by binary
6773        // GIF header is ASCII but the LSD (Logical Screen Descriptor) contains binary
6774        let gif_data: &[u8] = &[
6775            0x47, 0x49, 0x46, 0x38, 0x39, 0x61, // GIF89a
6776            0x01, 0x00, 0x01, 0x00, // Width=1, Height=1 (little endian)
6777            0x00, // Packed byte
6778            0x00, // Background color index
6779            0x00, // Pixel aspect ratio
6780        ];
6781        // The null bytes in the dimensions trigger binary detection
6782        assert!(is_detected_as_binary(gif_data));
6783
6784        // BMP signature: BM followed by file size (usually contains null bytes)
6785        let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
6786        assert!(is_detected_as_binary(bmp_header));
6787    }
6788
6789    #[test]
6790    fn test_detect_binary_executable_formats() {
6791        // ELF signature (Linux executables)
6792        let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
6793        assert!(is_detected_as_binary(elf_header));
6794
6795        // Mach-O signature (macOS executables) - magic + cpu type/subtype contain null bytes
6796        let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
6797        assert!(is_detected_as_binary(macho_header));
6798
6799        // PE/COFF (Windows executables) - MZ header
6800        let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
6801        assert!(is_detected_as_binary(pe_header));
6802    }
6803}
6804
6805/// Line data with optional line number
6806#[derive(Debug, Clone)]
6807pub struct LineData {
6808    /// Byte offset where this line starts in the document
6809    pub byte_offset: usize,
6810    /// Line content (without trailing newline)
6811    pub content: String,
6812    /// Whether this line ends with a newline
6813    pub has_newline: bool,
6814    /// Line number (None for large files without line metadata)
6815    pub line_number: Option<usize>,
6816}
6817
6818/// Iterator over lines in a TextBuffer that efficiently tracks line numbers
6819/// using piece tree metadata (single source of truth)
6820pub struct TextBufferLineIterator {
6821    /// Collected lines (we collect all at once since we need mutable access to load chunks)
6822    lines: Vec<LineData>,
6823    /// Current index in the lines vector
6824    current_index: usize,
6825    /// Whether there are more lines after these
6826    pub has_more: bool,
6827}
6828
6829impl TextBufferLineIterator {
6830    pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
6831        let buffer_len = buffer.len();
6832        if byte_pos >= buffer_len {
6833            return Ok(Self {
6834                lines: Vec::new(),
6835                current_index: 0,
6836                has_more: false,
6837            });
6838        }
6839
6840        // Check if buffer has line metadata (None for large files > 1MB)
6841        let has_line_metadata = buffer.line_count().is_some();
6842
6843        // Determine starting line number by querying piece tree once
6844        // (only if we have line metadata)
6845        let mut current_line = if has_line_metadata {
6846            buffer.offset_to_position(byte_pos).map(|pos| pos.line)
6847        } else {
6848            None
6849        };
6850
6851        let mut lines = Vec::with_capacity(max_lines);
6852        let mut current_offset = byte_pos;
6853        let estimated_line_length = 80; // Use default estimate
6854
6855        // Collect lines by scanning forward
6856        for _ in 0..max_lines {
6857            if current_offset >= buffer_len {
6858                break;
6859            }
6860
6861            let line_start = current_offset;
6862            let line_number = current_line;
6863
6864            // Estimate how many bytes to load for this line
6865            let estimated_max_line_length = estimated_line_length * 3;
6866            let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
6867
6868            // Load chunk (this handles lazy loading)
6869            let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
6870
6871            // Scan for newline
6872            let mut line_len = 0;
6873            let mut found_newline = false;
6874            for &byte in chunk.iter() {
6875                line_len += 1;
6876                if byte == b'\n' {
6877                    found_newline = true;
6878                    break;
6879                }
6880            }
6881
6882            // Handle long lines (rare case)
6883            if !found_newline && current_offset + line_len < buffer_len {
6884                // Line is longer than expected, load more data
6885                let remaining = buffer_len - current_offset - line_len;
6886                let additional_bytes = estimated_max_line_length.min(remaining);
6887                let more_chunk =
6888                    buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
6889
6890                let mut extended_chunk = chunk;
6891                extended_chunk.extend_from_slice(&more_chunk);
6892
6893                for &byte in more_chunk.iter() {
6894                    line_len += 1;
6895                    if byte == b'\n' {
6896                        found_newline = true;
6897                        break;
6898                    }
6899                }
6900
6901                let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
6902                let has_newline = line_string.ends_with('\n');
6903                let content = if has_newline {
6904                    line_string[..line_string.len() - 1].to_string()
6905                } else {
6906                    line_string
6907                };
6908
6909                lines.push(LineData {
6910                    byte_offset: line_start,
6911                    content,
6912                    has_newline,
6913                    line_number,
6914                });
6915
6916                current_offset += line_len;
6917                if has_line_metadata && found_newline {
6918                    current_line = current_line.map(|n| n + 1);
6919                }
6920                continue;
6921            }
6922
6923            // Normal case
6924            let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
6925            let has_newline = line_string.ends_with('\n');
6926            let content = if has_newline {
6927                line_string[..line_string.len() - 1].to_string()
6928            } else {
6929                line_string
6930            };
6931
6932            lines.push(LineData {
6933                byte_offset: line_start,
6934                content,
6935                has_newline,
6936                line_number,
6937            });
6938
6939            current_offset += line_len;
6940            // Increment line number if we have metadata and found a newline
6941            if has_line_metadata && found_newline {
6942                current_line = current_line.map(|n| n + 1);
6943            }
6944        }
6945
6946        // Check if there are more lines
6947        let has_more = current_offset < buffer_len;
6948
6949        Ok(Self {
6950            lines,
6951            current_index: 0,
6952            has_more,
6953        })
6954    }
6955}
6956
6957impl Iterator for TextBufferLineIterator {
6958    type Item = LineData;
6959
6960    fn next(&mut self) -> Option<Self::Item> {
6961        if self.current_index < self.lines.len() {
6962            let line = self.lines[self.current_index].clone();
6963            self.current_index += 1;
6964            Some(line)
6965        } else {
6966            None
6967        }
6968    }
6969}