Skip to main content

fresh/model/
buffer.rs

1/// Text buffer that uses PieceTree with integrated line tracking
2/// Architecture where the tree is the single source of truth for text and line information
3use crate::model::encoding;
4use crate::model::filesystem::{
5    FileMetadata, FileSearchCursor, FileSearchOptions, FileSystem, WriteOp,
6};
7use crate::model::piece_tree::{
8    BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, PieceView, Position,
9    StringBuffer, TreeStats,
10};
11use crate::model::piece_tree_diff::PieceTreeDiff;
12use crate::primitives::grapheme;
13use anyhow::{Context, Result};
14use regex::bytes::Regex;
15use std::io::{self, Write};
16use std::ops::Range;
17use std::path::{Path, PathBuf};
18use std::sync::Arc;
19
20// Re-export Encoding for backward compatibility
21pub use encoding::Encoding;
22
23/// Error returned when a file save operation requires elevated privileges.
24///
25/// This error contains all the information needed to perform the save via sudo
26/// in a single operation, preserving original file ownership and permissions.
27#[derive(Debug, Clone, PartialEq)]
28pub struct SudoSaveRequired {
29    /// Path to the temporary file containing the new content
30    pub temp_path: PathBuf,
31    /// Destination path where the file should be saved
32    pub dest_path: PathBuf,
33    /// Original file owner (UID)
34    pub uid: u32,
35    /// Original file group (GID)
36    pub gid: u32,
37    /// Original file permissions (mode)
38    pub mode: u32,
39}
40
41impl std::fmt::Display for SudoSaveRequired {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(
44            f,
45            "Permission denied saving to {}. Use sudo to complete the operation.",
46            self.dest_path.display()
47        )
48    }
49}
50
51impl std::error::Error for SudoSaveRequired {}
52
53/// Error returned when a large file has a non-resynchronizable encoding
54/// and requires user confirmation before loading the entire file into memory.
55///
56/// Non-resynchronizable encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot
57/// determine character boundaries when jumping into the middle of a file.
58/// This means the entire file must be loaded and decoded sequentially.
59#[derive(Debug, Clone, PartialEq)]
60pub struct LargeFileEncodingConfirmation {
61    /// Path to the file
62    pub path: PathBuf,
63    /// Size of the file in bytes
64    pub file_size: usize,
65    /// The detected encoding that requires full loading
66    pub encoding: Encoding,
67}
68
69impl std::fmt::Display for LargeFileEncodingConfirmation {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        let size_mb = self.file_size as f64 / (1024.0 * 1024.0);
72        write!(
73            f,
74            "{} ({:.0} MB) requires full load. (l)oad, (e)ncoding, (C)ancel? ",
75            self.encoding.display_name(),
76            size_mb
77        )
78    }
79}
80
81impl std::error::Error for LargeFileEncodingConfirmation {}
82
83/// A work item for incremental line-feed scanning (one per leaf).
84#[derive(Debug, Clone)]
85pub struct LineScanChunk {
86    /// Index of the leaf in the piece tree's leaf array.
87    pub leaf_index: usize,
88    /// Number of bytes in this leaf.
89    pub byte_len: usize,
90    /// True if the leaf already had a known line_feed_cnt (no I/O needed).
91    pub already_known: bool,
92}
93
94// Re-export SearchMatch from filesystem — same type is used by both
95// FileSystem::search_file (project grep on disk) and the piece-tree
96// search below (in-editor Ctrl+F and dirty buffers).
97pub use crate::model::filesystem::SearchMatch;
98
99/// Mutable state for an incremental chunked search over a TextBuffer's
100/// piece tree.  This is the in-editor search path — it reads chunks via
101/// `get_text_range_mut` which loads lazily from disk and works with the
102/// piece tree's edit history.
103///
104/// For searching files on disk (project-wide grep), see
105/// `FileSystem::search_file` which uses `read_range` and doesn't need
106/// a TextBuffer at all.
107///
108/// Created by `TextBuffer::search_scan_init`, advanced by
109/// `TextBuffer::search_scan_next_chunk`.  The same struct is used by
110/// both the Editor's incremental (non-blocking) search and the project-
111/// wide search running inside `spawn_blocking`.
112#[derive(Debug)]
113pub struct ChunkedSearchState {
114    /// One work item per piece-tree leaf (after `prepare_line_scan` splits).
115    pub chunks: Vec<LineScanChunk>,
116    /// Index of the next chunk to process.
117    pub next_chunk: usize,
118    /// Running document byte offset for the next chunk.
119    pub next_doc_offset: usize,
120    /// Total bytes in the buffer.
121    pub total_bytes: usize,
122    /// Bytes scanned so far (for progress reporting).
123    pub scanned_bytes: usize,
124    /// Compiled regex for searching.
125    pub regex: regex::bytes::Regex,
126    /// Accumulated match results with line/column/context.
127    pub matches: Vec<SearchMatch>,
128    /// Tail bytes from the previous chunk for cross-boundary matching.
129    pub overlap_tail: Vec<u8>,
130    /// Byte offset of the overlap_tail's first byte in the document.
131    pub overlap_doc_offset: usize,
132    /// Maximum number of matches before capping.
133    pub max_matches: usize,
134    /// Whether the match count was capped.
135    pub capped: bool,
136    /// Length of the original query string (for overlap sizing).
137    pub query_len: usize,
138    /// 1-based line number at the start of the next non-overlap data.
139    /// Advanced incrementally as chunks are processed.
140    pub(crate) running_line: usize,
141}
142
143impl ChunkedSearchState {
144    /// Returns true if the scan is complete (all chunks processed or capped).
145    pub fn is_done(&self) -> bool {
146        self.next_chunk >= self.chunks.len() || self.capped
147    }
148
149    /// Progress as a percentage (0–100).
150    pub fn progress_percent(&self) -> usize {
151        if self.total_bytes > 0 {
152            (self.scanned_bytes * 100) / self.total_bytes
153        } else {
154            100
155        }
156    }
157}
158
159// Large file support configuration
160/// Default threshold for considering a file "large" (100 MB)
161pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
162
163/// Chunk size to load when lazy loading (1 MB)
164pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
165
166/// Chunk alignment for lazy loading (64 KB)
167pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
168
169/// Configuration passed to TextBuffer constructors.
170#[derive(Debug, Clone)]
171pub struct BufferConfig {
172    /// Estimated average line length in bytes. Used for approximate line number
173    /// display in large files and for goto-line byte offset estimation.
174    pub estimated_line_length: usize,
175}
176
177impl Default for BufferConfig {
178    fn default() -> Self {
179        Self {
180            estimated_line_length: 80,
181        }
182    }
183}
184
185/// Line ending format used in the file
186#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
187pub enum LineEnding {
188    /// Unix/Linux/Mac format (\n)
189    #[default]
190    LF,
191    /// Windows format (\r\n)
192    CRLF,
193    /// Old Mac format (\r) - rare but supported
194    CR,
195}
196
197impl LineEnding {
198    /// Get the string representation of this line ending
199    pub fn as_str(&self) -> &'static str {
200        match self {
201            Self::LF => "\n",
202            Self::CRLF => "\r\n",
203            Self::CR => "\r",
204        }
205    }
206
207    /// Get the display name for status bar
208    pub fn display_name(&self) -> &'static str {
209        match self {
210            Self::LF => "LF",
211            Self::CRLF => "CRLF",
212            Self::CR => "CR",
213        }
214    }
215}
216
217/// A write recipe built from the piece tree for saving
218struct WriteRecipe {
219    /// The source file path for Copy operations (if any)
220    src_path: Option<PathBuf>,
221    /// Data chunks for Insert operations (owned to avoid lifetime issues)
222    insert_data: Vec<Vec<u8>>,
223    /// Sequence of actions to build the output file
224    actions: Vec<RecipeAction>,
225}
226
227/// An action in a write recipe
228#[derive(Debug, Clone, Copy)]
229enum RecipeAction {
230    /// Copy bytes from source file at offset
231    Copy { offset: u64, len: u64 },
232    /// Insert data from insert_data[index]
233    Insert { index: usize },
234}
235
236impl WriteRecipe {
237    /// Convert the recipe to WriteOp slice for use with filesystem write_patched
238    fn to_write_ops(&self) -> Vec<WriteOp<'_>> {
239        self.actions
240            .iter()
241            .map(|action| match action {
242                RecipeAction::Copy { offset, len } => WriteOp::Copy {
243                    offset: *offset,
244                    len: *len,
245                },
246                RecipeAction::Insert { index } => WriteOp::Insert {
247                    data: &self.insert_data[*index],
248                },
249            })
250            .collect()
251    }
252
253    /// Check if this recipe has any Copy operations
254    fn has_copy_ops(&self) -> bool {
255        self.actions
256            .iter()
257            .any(|a| matches!(a, RecipeAction::Copy { .. }))
258    }
259
260    /// Flatten all Insert operations into a single buffer.
261    /// Only valid when has_copy_ops() returns false.
262    fn flatten_inserts(&self) -> Vec<u8> {
263        let mut result = Vec::new();
264        for action in &self.actions {
265            if let RecipeAction::Insert { index } = action {
266                result.extend_from_slice(&self.insert_data[*index]);
267            }
268        }
269        result
270    }
271}
272
273/// Represents a line number (simplified for new implementation)
274/// Legacy enum kept for backwards compatibility - always Absolute now
275#[derive(Debug, Clone, Copy, PartialEq, Eq)]
276pub enum LineNumber {
277    /// Absolute line number - this is the actual line number in the file
278    Absolute(usize),
279    /// Relative line number (deprecated - now same as Absolute)
280    Relative {
281        line: usize,
282        from_cached_line: usize,
283    },
284}
285
286impl LineNumber {
287    /// Get the line number value
288    pub fn value(&self) -> usize {
289        match self {
290            Self::Absolute(line) | Self::Relative { line, .. } => *line,
291        }
292    }
293
294    /// Check if this is an absolute line number
295    pub fn is_absolute(&self) -> bool {
296        matches!(self, LineNumber::Absolute(_))
297    }
298
299    /// Check if this is a relative line number
300    pub fn is_relative(&self) -> bool {
301        matches!(self, LineNumber::Relative { .. })
302    }
303
304    /// Format the line number for display
305    pub fn format(&self) -> String {
306        match self {
307            Self::Absolute(line) => format!("{}", line + 1),
308            Self::Relative { line, .. } => format!("~{}", line + 1),
309        }
310    }
311}
312
313/// A text buffer that manages document content using a piece table
314/// with integrated line tracking
315pub struct TextBuffer {
316    /// Filesystem abstraction for file I/O operations.
317    /// Stored internally so methods can access it without threading through call chains.
318    fs: Arc<dyn FileSystem + Send + Sync>,
319
320    /// The piece tree for efficient text manipulation with integrated line tracking
321    piece_tree: PieceTree,
322
323    /// Snapshot of the piece tree root at last save (shared via Arc)
324    saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
325
326    /// List of string buffers containing chunks of text data
327    /// Index 0 is typically the original/stored buffer
328    /// Additional buffers are added for modifications
329    buffers: Vec<StringBuffer>,
330
331    /// Next buffer ID to assign
332    next_buffer_id: usize,
333
334    /// Optional file path for persistence
335    file_path: Option<PathBuf>,
336
337    /// Has the buffer been modified since last save?
338    modified: bool,
339
340    /// Does the buffer have unsaved changes for recovery auto-save?
341    /// This is separate from `modified` because recovery auto-save doesn't
342    /// clear `modified` (buffer still differs from on-disk file).
343    recovery_pending: bool,
344
345    /// Is this a large file (no line indexing, lazy loading enabled)?
346    large_file: bool,
347
348    /// Has a line feed scan been performed on this large file?
349    /// When true, piece tree leaves have accurate `line_feed_cnt` values,
350    /// and edits will ensure the relevant chunk is loaded before splitting
351    /// so that `compute_line_feeds_static` can recount accurately.
352    line_feeds_scanned: bool,
353
354    /// Is this a binary file? Binary files are opened read-only and render
355    /// unprintable characters as code points.
356    is_binary: bool,
357
358    /// Line ending format detected from the file (or default for new files)
359    line_ending: LineEnding,
360
361    /// Original line ending format when file was loaded (used for conversion on save)
362    /// This tracks what the file had when loaded, so we can detect if the user
363    /// changed the line ending format and needs conversion on save.
364    original_line_ending: LineEnding,
365
366    /// Text encoding format detected from the file (or default for new files)
367    encoding: Encoding,
368
369    /// Original encoding when file was loaded (used for conversion on save)
370    /// Similar to original_line_ending, tracks what the file had when loaded.
371    original_encoding: Encoding,
372
373    /// The file size on disk after the last save.
374    /// Used for chunked recovery to know the original file size for reconstruction.
375    /// Updated when loading from file or after saving.
376    saved_file_size: Option<usize>,
377
378    /// Monotonic version counter for change tracking.
379    version: u64,
380
381    /// Buffer configuration (estimated line length, etc.)
382    config: BufferConfig,
383}
384
385/// Snapshot of a TextBuffer's piece tree and associated string buffers.
386///
387/// Used by BulkEdit undo/redo to capture the complete buffer state.
388/// Without this, consolidate_after_save() would destroy the string buffers
389/// that a BulkEdit's piece tree snapshot references, causing corruption on undo.
390#[derive(Debug, Clone)]
391pub struct BufferSnapshot {
392    pub piece_tree: PieceTree,
393    pub buffers: Vec<StringBuffer>,
394    pub next_buffer_id: usize,
395}
396
397impl TextBuffer {
398    /// Create a new text buffer with the given filesystem implementation.
399    /// Note: large_file_threshold is ignored in the new implementation
400    pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
401        let piece_tree = PieceTree::empty();
402        let line_ending = LineEnding::default();
403        let encoding = Encoding::default();
404        TextBuffer {
405            fs,
406            saved_root: piece_tree.root(),
407            piece_tree,
408            buffers: vec![StringBuffer::new(0, Vec::new())],
409            next_buffer_id: 1,
410            file_path: None,
411            modified: false,
412            recovery_pending: false,
413            large_file: false,
414            line_feeds_scanned: false,
415            is_binary: false,
416            line_ending,
417            original_line_ending: line_ending,
418            encoding,
419            original_encoding: encoding,
420            saved_file_size: None,
421            version: 0,
422            config: BufferConfig::default(),
423        }
424    }
425
426    /// Create an empty buffer associated with a file path.
427    /// Used for files that don't exist yet — the path is set so saving will create the file.
428    pub fn new_with_path(
429        large_file_threshold: usize,
430        fs: Arc<dyn FileSystem + Send + Sync>,
431        path: PathBuf,
432    ) -> Self {
433        let mut buffer = Self::new(large_file_threshold, fs);
434        buffer.file_path = Some(path);
435        buffer
436    }
437
438    /// Current buffer version (monotonic, wraps on overflow)
439    pub fn version(&self) -> u64 {
440        self.version
441    }
442
443    /// Get a reference to the filesystem implementation used by this buffer.
444    pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
445        &self.fs
446    }
447
448    /// Set the filesystem implementation for this buffer.
449    pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
450        self.fs = fs;
451    }
452
453    #[inline]
454    fn bump_version(&mut self) {
455        self.version = self.version.wrapping_add(1);
456    }
457
458    #[inline]
459    fn mark_content_modified(&mut self) {
460        self.modified = true;
461        self.recovery_pending = true;
462        self.bump_version();
463    }
464
465    /// Create a text buffer from raw bytes WITHOUT encoding conversion.
466    /// Used for binary files where we want to preserve the exact bytes.
467    fn from_bytes_raw(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
468        let bytes = content.len();
469
470        // For binary files, detect line ending but don't convert encoding
471        let line_ending = Self::detect_line_ending(&content);
472
473        // Create initial StringBuffer with ID 0
474        let buffer = StringBuffer::new(0, content);
475        let line_feed_cnt = buffer.line_feed_count();
476
477        let piece_tree = if bytes > 0 {
478            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
479        } else {
480            PieceTree::empty()
481        };
482
483        let saved_root = piece_tree.root();
484
485        TextBuffer {
486            fs,
487            line_ending,
488            original_line_ending: line_ending,
489            encoding: Encoding::Utf8, // Binary files treated as raw bytes (no conversion)
490            original_encoding: Encoding::Utf8,
491            piece_tree,
492            saved_root,
493            buffers: vec![buffer],
494            next_buffer_id: 1,
495            file_path: None,
496            modified: false,
497            recovery_pending: false,
498            large_file: false,
499            line_feeds_scanned: false,
500            is_binary: true,
501            saved_file_size: Some(bytes),
502            version: 0,
503            config: BufferConfig::default(),
504        }
505    }
506
507    /// Create a text buffer from initial content with the given filesystem.
508    pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
509        // Auto-detect encoding and convert to UTF-8 if needed
510        let (encoding, utf8_content) = Self::detect_and_convert_encoding(&content);
511
512        let bytes = utf8_content.len();
513
514        // Auto-detect line ending format from content
515        let line_ending = Self::detect_line_ending(&utf8_content);
516
517        // Create initial StringBuffer with ID 0
518        let buffer = StringBuffer::new(0, utf8_content);
519        let line_feed_cnt = buffer.line_feed_count();
520
521        let piece_tree = if bytes > 0 {
522            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
523        } else {
524            PieceTree::empty()
525        };
526
527        let saved_root = piece_tree.root();
528
529        TextBuffer {
530            fs,
531            line_ending,
532            original_line_ending: line_ending,
533            encoding,
534            original_encoding: encoding,
535            piece_tree,
536            saved_root,
537            buffers: vec![buffer],
538            next_buffer_id: 1,
539            file_path: None,
540            modified: false,
541            recovery_pending: false,
542            large_file: false,
543            line_feeds_scanned: false,
544            is_binary: false,
545            saved_file_size: Some(bytes), // Treat initial content as "saved" state
546            version: 0,
547            config: BufferConfig::default(),
548        }
549    }
550
551    /// Create a text buffer from bytes with a specific encoding (no auto-detection).
552    pub fn from_bytes_with_encoding(
553        content: Vec<u8>,
554        encoding: Encoding,
555        fs: Arc<dyn FileSystem + Send + Sync>,
556    ) -> Self {
557        // Convert from specified encoding to UTF-8
558        let utf8_content = encoding::convert_to_utf8(&content, encoding);
559
560        let bytes = utf8_content.len();
561
562        // Auto-detect line ending format from content
563        let line_ending = Self::detect_line_ending(&utf8_content);
564
565        // Create initial StringBuffer with ID 0
566        let buffer = StringBuffer::new(0, utf8_content);
567        let line_feed_cnt = buffer.line_feed_count();
568
569        let piece_tree = if bytes > 0 {
570            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
571        } else {
572            PieceTree::empty()
573        };
574
575        let saved_root = piece_tree.root();
576
577        TextBuffer {
578            fs,
579            line_ending,
580            original_line_ending: line_ending,
581            encoding,
582            original_encoding: encoding,
583            piece_tree,
584            saved_root,
585            buffers: vec![buffer],
586            next_buffer_id: 1,
587            file_path: None,
588            modified: false,
589            recovery_pending: false,
590            large_file: false,
591            line_feeds_scanned: false,
592            is_binary: false,
593            saved_file_size: Some(bytes),
594            version: 0,
595            config: BufferConfig::default(),
596        }
597    }
598
599    /// Create a text buffer from a string with the given filesystem.
600    pub fn from_str(
601        s: &str,
602        _large_file_threshold: usize,
603        fs: Arc<dyn FileSystem + Send + Sync>,
604    ) -> Self {
605        Self::from_bytes(s.as_bytes().to_vec(), fs)
606    }
607
608    /// Create an empty text buffer with the given filesystem.
609    pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
610        let piece_tree = PieceTree::empty();
611        let saved_root = piece_tree.root();
612        let line_ending = LineEnding::default();
613        let encoding = Encoding::default();
614        TextBuffer {
615            fs,
616            piece_tree,
617            saved_root,
618            buffers: vec![StringBuffer::new(0, Vec::new())],
619            next_buffer_id: 1,
620            file_path: None,
621            modified: false,
622            recovery_pending: false,
623            large_file: false,
624            line_feeds_scanned: false,
625            is_binary: false,
626            line_ending,
627            original_line_ending: line_ending,
628            encoding,
629            original_encoding: encoding,
630            saved_file_size: None,
631            version: 0,
632            config: BufferConfig::default(),
633        }
634    }
635
636    /// Load a text buffer from a file using the given filesystem.
637    pub fn load_from_file<P: AsRef<Path>>(
638        path: P,
639        large_file_threshold: usize,
640        fs: Arc<dyn FileSystem + Send + Sync>,
641    ) -> anyhow::Result<Self> {
642        let path = path.as_ref();
643
644        // Get file size to determine loading strategy
645        let metadata = fs.metadata(path)?;
646        let file_size = metadata.size as usize;
647
648        // Use threshold parameter or default
649        let threshold = if large_file_threshold > 0 {
650            large_file_threshold
651        } else {
652            DEFAULT_LARGE_FILE_THRESHOLD
653        };
654
655        // Choose loading strategy based on file size
656        if file_size >= threshold {
657            Self::load_large_file(path, file_size, fs)
658        } else {
659            Self::load_small_file(path, fs)
660        }
661    }
662
663    /// Load a text buffer from a file with a specific encoding (no auto-detection).
664    pub fn load_from_file_with_encoding<P: AsRef<Path>>(
665        path: P,
666        encoding: Encoding,
667        fs: Arc<dyn FileSystem + Send + Sync>,
668        config: BufferConfig,
669    ) -> anyhow::Result<Self> {
670        let path = path.as_ref();
671        let contents = fs.read_file(path)?;
672
673        let mut buffer = Self::from_bytes_with_encoding(contents, encoding, fs);
674        buffer.file_path = Some(path.to_path_buf());
675        buffer.modified = false;
676        buffer.config = config;
677        Ok(buffer)
678    }
679
680    /// Load a small file with full eager loading and line indexing
681    fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
682        let contents = fs.read_file(path)?;
683
684        // Use unified encoding/binary detection
685        let (encoding, is_binary) = Self::detect_encoding_or_binary(&contents, false);
686
687        // For binary files, skip encoding conversion to preserve raw bytes
688        let mut buffer = if is_binary {
689            Self::from_bytes_raw(contents, fs)
690        } else {
691            // from_bytes handles encoding detection/conversion and line ending detection
692            Self::from_bytes(contents, fs)
693        };
694        buffer.file_path = Some(path.to_path_buf());
695        buffer.modified = false;
696        buffer.large_file = false;
697        buffer.is_binary = is_binary;
698        // For binary files, ensure encoding matches detection
699        if is_binary {
700            buffer.encoding = encoding;
701            buffer.original_encoding = encoding;
702        }
703        // Note: line_ending and encoding are already set by from_bytes/from_bytes_raw
704        Ok(buffer)
705    }
706
707    /// Check if loading a large file requires user confirmation due to encoding.
708    ///
709    /// Some encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot be "resynchronized" -
710    /// meaning you cannot determine character boundaries when jumping into the middle
711    /// of a file. These encodings require loading the entire file into memory.
712    ///
713    /// Returns `Some(confirmation)` if user confirmation is needed, `None` if the file
714    /// can be loaded with lazy/streaming loading.
715    pub fn check_large_file_encoding(
716        path: impl AsRef<Path>,
717        fs: Arc<dyn FileSystem + Send + Sync>,
718    ) -> anyhow::Result<Option<LargeFileEncodingConfirmation>> {
719        let path = path.as_ref();
720        let metadata = fs.metadata(path)?;
721        let file_size = metadata.size as usize;
722
723        // Only check for large files
724        if file_size < DEFAULT_LARGE_FILE_THRESHOLD {
725            return Ok(None);
726        }
727
728        // Read a sample to detect encoding
729        let sample_size = file_size.min(8 * 1024);
730        let sample = fs.read_range(path, 0, sample_size)?;
731        let (encoding, is_binary) =
732            Self::detect_encoding_or_binary(&sample, file_size > sample_size);
733
734        // Binary files don't need confirmation (loaded as-is)
735        if is_binary {
736            return Ok(None);
737        }
738
739        // Check if the encoding requires full file loading
740        if encoding.requires_full_file_load() {
741            return Ok(Some(LargeFileEncodingConfirmation {
742                path: path.to_path_buf(),
743                file_size,
744                encoding,
745            }));
746        }
747
748        Ok(None)
749    }
750
751    /// Load a large file with unloaded buffer (no line indexing, lazy loading)
752    ///
753    /// If `force_full_load` is true, loads the entire file regardless of encoding.
754    /// This should be set to true after user confirms loading a non-resynchronizable encoding.
755    fn load_large_file(
756        path: &Path,
757        file_size: usize,
758        fs: Arc<dyn FileSystem + Send + Sync>,
759    ) -> anyhow::Result<Self> {
760        Self::load_large_file_internal(path, file_size, fs, false)
761    }
762
763    /// Load a large file, optionally forcing full load for non-resynchronizable encodings.
764    ///
765    /// Called with `force_full_load=true` after user confirms the warning about
766    /// non-resynchronizable encodings requiring full file loading.
767    pub fn load_large_file_confirmed(
768        path: impl AsRef<Path>,
769        fs: Arc<dyn FileSystem + Send + Sync>,
770    ) -> anyhow::Result<Self> {
771        let path = path.as_ref();
772        let metadata = fs.metadata(path)?;
773        let file_size = metadata.size as usize;
774        Self::load_large_file_internal(path, file_size, fs, true)
775    }
776
777    /// Internal implementation for loading large files.
778    fn load_large_file_internal(
779        path: &Path,
780        file_size: usize,
781        fs: Arc<dyn FileSystem + Send + Sync>,
782        force_full_load: bool,
783    ) -> anyhow::Result<Self> {
784        use crate::model::piece_tree::{BufferData, BufferLocation};
785
786        // Read a sample of the file to detect encoding and whether it's binary
787        // We read the first 8KB for detection
788        let sample_size = file_size.min(8 * 1024);
789        let sample = fs.read_range(path, 0, sample_size)?;
790
791        // Use unified encoding/binary detection
792        let (encoding, is_binary) =
793            Self::detect_encoding_or_binary(&sample, file_size > sample_size);
794
795        // Binary files skip encoding conversion to preserve raw bytes
796        if is_binary {
797            tracing::info!("Large binary file detected, loading without encoding conversion");
798            let contents = fs.read_file(path)?;
799            let mut buffer = Self::from_bytes_raw(contents, fs);
800            buffer.file_path = Some(path.to_path_buf());
801            buffer.modified = false;
802            buffer.large_file = true;
803            buffer.encoding = encoding;
804            buffer.original_encoding = encoding;
805            return Ok(buffer);
806        }
807
808        // Check if encoding requires full file loading
809        let requires_full_load = encoding.requires_full_file_load();
810
811        // For non-resynchronizable encodings, require confirmation unless forced
812        if requires_full_load && !force_full_load {
813            anyhow::bail!(LargeFileEncodingConfirmation {
814                path: path.to_path_buf(),
815                file_size,
816                encoding,
817            });
818        }
819
820        // For encodings that require full load (non-resynchronizable or non-UTF-8),
821        // load the entire file and convert
822        if !matches!(encoding, Encoding::Utf8 | Encoding::Ascii) {
823            tracing::info!(
824                "Large file with non-UTF-8 encoding ({:?}), loading fully for conversion",
825                encoding
826            );
827            let contents = fs.read_file(path)?;
828            let mut buffer = Self::from_bytes(contents, fs);
829            buffer.file_path = Some(path.to_path_buf());
830            buffer.modified = false;
831            buffer.large_file = true; // Still mark as large file for UI purposes
832            buffer.is_binary = is_binary;
833            return Ok(buffer);
834        }
835
836        // UTF-8/ASCII files can use lazy loading
837        let line_ending = Self::detect_line_ending(&sample);
838
839        // Create an unloaded buffer that references the entire file
840        let buffer = StringBuffer {
841            id: 0,
842            data: BufferData::Unloaded {
843                file_path: path.to_path_buf(),
844                file_offset: 0,
845                bytes: file_size,
846            },
847            stored_file_offset: None,
848        };
849
850        // Create piece tree with a single piece covering the whole file
851        // No line feed count (None) since we're not computing line indexing
852        let piece_tree = if file_size > 0 {
853            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
854        } else {
855            PieceTree::empty()
856        };
857        let saved_root = piece_tree.root();
858
859        tracing::debug!(
860            "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
861            file_size,
862            file_size
863        );
864
865        Ok(TextBuffer {
866            fs,
867            piece_tree,
868            saved_root,
869            buffers: vec![buffer],
870            next_buffer_id: 1,
871            file_path: Some(path.to_path_buf()),
872            modified: false,
873            recovery_pending: false,
874            large_file: true,
875            line_feeds_scanned: false,
876            is_binary,
877            line_ending,
878            original_line_ending: line_ending,
879            encoding,
880            original_encoding: encoding,
881            saved_file_size: Some(file_size),
882            version: 0,
883            config: BufferConfig::default(),
884        })
885    }
886
887    /// Save the buffer to its associated file
888    pub fn save(&mut self) -> anyhow::Result<()> {
889        if let Some(path) = &self.file_path {
890            self.save_to_file(path.clone())
891        } else {
892            anyhow::bail!(io::Error::new(
893                io::ErrorKind::NotFound,
894                "No file path associated with buffer",
895            ))
896        }
897    }
898
899    /// Check if we should use in-place writing to preserve file ownership.
900    /// Returns true if the file exists and is owned by a different user.
901    /// On Unix, only root or the file owner can change file ownership with chown.
902    /// When the current user is not the file owner, using atomic write (temp file + rename)
903    /// would change the file's ownership to the current user. To preserve ownership,
904    /// we must write directly to the existing file instead.
905    fn should_use_inplace_write(&self, dest_path: &Path) -> bool {
906        !self.fs.is_owner(dest_path)
907    }
908
909    /// Build a write recipe from the piece tree for saving.
910    ///
911    /// This creates a recipe of Copy and Insert operations that can reconstruct
912    /// the buffer content. Copy operations reference unchanged regions in the
913    /// source file, while Insert operations contain new/modified data.
914    ///
915    /// # Returns
916    /// A WriteRecipe with the source path, insert data, and sequence of actions.
917    fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
918        let total = self.total_bytes();
919
920        // Determine the source file for Copy operations (if any)
921        // We can only use Copy if:
922        // 1. We have a source file path
923        // 2. The source file exists
924        // 3. No line ending conversion is needed
925        // 4. No encoding conversion is needed
926        let needs_line_ending_conversion = self.line_ending != self.original_line_ending;
927        // We need encoding conversion if:
928        // - NOT a binary file (binary files preserve raw bytes), AND
929        // - Either the encoding changed from the original, OR
930        // - The target encoding isn't plain UTF-8/ASCII (since internal storage is UTF-8)
931        // For example: UTF-8 BOM files are stored as UTF-8, so we need to add BOM on save
932        let needs_encoding_conversion = !self.is_binary
933            && (self.encoding != self.original_encoding
934                || !matches!(self.encoding, Encoding::Utf8 | Encoding::Ascii));
935        let needs_conversion = needs_line_ending_conversion || needs_encoding_conversion;
936
937        let src_path_for_copy: Option<&Path> = if needs_conversion {
938            None
939        } else {
940            self.file_path.as_deref().filter(|p| self.fs.exists(p))
941        };
942        let target_ending = self.line_ending;
943        let target_encoding = self.encoding;
944
945        let mut insert_data: Vec<Vec<u8>> = Vec::new();
946        let mut actions: Vec<RecipeAction> = Vec::new();
947
948        // Add BOM as the first piece if the target encoding has one
949        if let Some(bom) = target_encoding.bom_bytes() {
950            insert_data.push(bom.to_vec());
951            actions.push(RecipeAction::Insert { index: 0 });
952        }
953
954        for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
955            let buffer_id = piece_view.location.buffer_id();
956            let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
957                io::Error::new(
958                    io::ErrorKind::InvalidData,
959                    format!("Buffer {} not found", buffer_id),
960                )
961            })?;
962
963            match &buffer.data {
964                // Unloaded buffer: can use Copy if same source file, else load and send
965                BufferData::Unloaded {
966                    file_path,
967                    file_offset,
968                    ..
969                } => {
970                    // Can only use Copy if:
971                    // - This is a Stored piece (original file content)
972                    // - We have a valid source for copying
973                    // - This buffer is from that source
974                    // - No line ending or encoding conversion needed
975                    let can_copy = matches!(piece_view.location, BufferLocation::Stored(_))
976                        && src_path_for_copy.is_some_and(|src| file_path == src);
977
978                    if can_copy {
979                        let src_offset = (*file_offset + piece_view.buffer_offset) as u64;
980                        actions.push(RecipeAction::Copy {
981                            offset: src_offset,
982                            len: piece_view.bytes as u64,
983                        });
984                        continue;
985                    }
986
987                    // Need to load and send this unloaded region
988                    // This happens when: different source file, or conversion needed
989                    let data = self.fs.read_range(
990                        file_path,
991                        (*file_offset + piece_view.buffer_offset) as u64,
992                        piece_view.bytes,
993                    )?;
994
995                    let data = if needs_line_ending_conversion {
996                        Self::convert_line_endings_to(&data, target_ending)
997                    } else {
998                        data
999                    };
1000
1001                    // Convert encoding if needed
1002                    let data = if needs_encoding_conversion {
1003                        Self::convert_to_encoding(&data, target_encoding)
1004                    } else {
1005                        data
1006                    };
1007
1008                    let index = insert_data.len();
1009                    insert_data.push(data);
1010                    actions.push(RecipeAction::Insert { index });
1011                }
1012
1013                // Loaded data: send as Insert
1014                BufferData::Loaded { data, .. } => {
1015                    let start = piece_view.buffer_offset;
1016                    let end = start + piece_view.bytes;
1017                    let chunk = &data[start..end];
1018
1019                    let chunk = if needs_line_ending_conversion {
1020                        Self::convert_line_endings_to(chunk, target_ending)
1021                    } else {
1022                        chunk.to_vec()
1023                    };
1024
1025                    // Convert encoding if needed
1026                    let chunk = if needs_encoding_conversion {
1027                        Self::convert_to_encoding(&chunk, target_encoding)
1028                    } else {
1029                        chunk
1030                    };
1031
1032                    let index = insert_data.len();
1033                    insert_data.push(chunk);
1034                    actions.push(RecipeAction::Insert { index });
1035                }
1036            }
1037        }
1038
1039        Ok(WriteRecipe {
1040            src_path: src_path_for_copy.map(|p| p.to_path_buf()),
1041            insert_data,
1042            actions,
1043        })
1044    }
1045
1046    /// Create a temporary file for saving.
1047    ///
1048    /// Tries to create the file in the same directory as the destination file first
1049    /// to allow for an atomic rename. If that fails (e.g., due to directory permissions),
1050    /// falls back to the system temporary directory.
1051    fn create_temp_file(
1052        &self,
1053        dest_path: &Path,
1054    ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1055        // Try creating in same directory first
1056        let same_dir_temp = self.fs.temp_path_for(dest_path);
1057        match self.fs.create_file(&same_dir_temp) {
1058            Ok(file) => Ok((same_dir_temp, file)),
1059            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1060                // Fallback to system temp directory
1061                let temp_path = self.fs.unique_temp_path(dest_path);
1062                let file = self.fs.create_file(&temp_path)?;
1063                Ok((temp_path, file))
1064            }
1065            Err(e) => Err(e),
1066        }
1067    }
1068
1069    /// Create a temporary file in the recovery directory for in-place writes.
1070    /// This allows recovery if a crash occurs during the in-place write operation.
1071    fn create_recovery_temp_file(
1072        &self,
1073        dest_path: &Path,
1074    ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1075        // Get recovery directory: $XDG_DATA_HOME/fresh/recovery or ~/.local/share/fresh/recovery
1076        let recovery_dir = crate::input::input_history::get_data_dir()
1077            .map(|d| d.join("recovery"))
1078            .unwrap_or_else(|_| std::env::temp_dir());
1079
1080        // Ensure directory exists
1081        self.fs.create_dir_all(&recovery_dir)?;
1082
1083        // Create unique filename based on destination file and timestamp
1084        let file_name = dest_path
1085            .file_name()
1086            .unwrap_or_else(|| std::ffi::OsStr::new("fresh-save"));
1087        let timestamp = std::time::SystemTime::now()
1088            .duration_since(std::time::UNIX_EPOCH)
1089            .map(|d| d.as_nanos())
1090            .unwrap_or(0);
1091        let pid = std::process::id();
1092
1093        let temp_name = format!(
1094            ".inplace-{}-{}-{}.tmp",
1095            file_name.to_string_lossy(),
1096            pid,
1097            timestamp
1098        );
1099        let temp_path = recovery_dir.join(temp_name);
1100
1101        let file = self.fs.create_file(&temp_path)?;
1102        Ok((temp_path, file))
1103    }
1104
1105    /// Get the path for in-place write recovery metadata.
1106    /// Uses the same recovery directory as temp files.
1107    fn inplace_recovery_meta_path(&self, dest_path: &Path) -> PathBuf {
1108        let recovery_dir = crate::input::input_history::get_data_dir()
1109            .map(|d| d.join("recovery"))
1110            .unwrap_or_else(|_| std::env::temp_dir());
1111
1112        let hash = crate::services::recovery::path_hash(dest_path);
1113        recovery_dir.join(format!("{}.inplace.json", hash))
1114    }
1115
1116    /// Write in-place recovery metadata using self.fs.
1117    /// This is called before the dangerous streaming step so we can recover on crash.
1118    fn write_inplace_recovery_meta(
1119        &self,
1120        meta_path: &Path,
1121        dest_path: &Path,
1122        temp_path: &Path,
1123        original_metadata: &Option<FileMetadata>,
1124    ) -> io::Result<()> {
1125        #[cfg(unix)]
1126        let (uid, gid, mode) = original_metadata
1127            .as_ref()
1128            .map(|m| {
1129                (
1130                    m.uid.unwrap_or(0),
1131                    m.gid.unwrap_or(0),
1132                    m.permissions.as_ref().map(|p| p.mode()).unwrap_or(0o644),
1133                )
1134            })
1135            .unwrap_or((0, 0, 0o644));
1136        #[cfg(not(unix))]
1137        let (uid, gid, mode) = (0u32, 0u32, 0o644u32);
1138
1139        let recovery = crate::services::recovery::InplaceWriteRecovery::new(
1140            dest_path.to_path_buf(),
1141            temp_path.to_path_buf(),
1142            uid,
1143            gid,
1144            mode,
1145        );
1146
1147        let json = serde_json::to_string_pretty(&recovery)
1148            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1149
1150        self.fs.write_file(meta_path, json.as_bytes())
1151    }
1152
1153    /// Save the buffer to a specific file
1154    ///
1155    /// Uses the write recipe approach for both local and remote filesystems:
1156    /// - Copy ops reference unchanged regions in the source file
1157    /// - Insert ops contain new/modified data
1158    ///
1159    /// For remote filesystems, the recipe is sent to the agent which reconstructs
1160    /// the file server-side, avoiding transfer of unchanged content.
1161    ///
1162    /// For local filesystems with ownership concerns (file owned by another user),
1163    /// uses in-place writing to preserve ownership. Otherwise uses atomic writes.
1164    ///
1165    /// If the line ending format has been changed (via set_line_ending), all content
1166    /// will be converted to the new format during save.
1167    pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
1168        let dest_path = path.as_ref();
1169        let total = self.total_bytes();
1170
1171        // Handle empty files
1172        if total == 0 {
1173            self.fs.write_file(dest_path, &[])?;
1174            self.finalize_save(dest_path)?;
1175            return Ok(());
1176        }
1177
1178        // Build the write recipe (unified for all filesystem types)
1179        let recipe = self.build_write_recipe()?;
1180        let ops = recipe.to_write_ops();
1181
1182        // Check if we need in-place writing to preserve file ownership (local only)
1183        // Remote filesystems handle this differently
1184        let is_local = self.fs.remote_connection_info().is_none();
1185        let use_inplace = is_local && self.should_use_inplace_write(dest_path);
1186
1187        if use_inplace {
1188            // In-place write: write directly to preserve ownership
1189            self.save_with_inplace_write(dest_path, &recipe)?;
1190        } else if !recipe.has_copy_ops() && !is_local {
1191            // Remote with no Copy ops: use write_file directly (more efficient)
1192            let data = recipe.flatten_inserts();
1193            self.fs.write_file(dest_path, &data)?;
1194        } else if is_local {
1195            // Local: use write_file or write_patched with sudo fallback
1196            let write_result = if !recipe.has_copy_ops() {
1197                let data = recipe.flatten_inserts();
1198                self.fs.write_file(dest_path, &data)
1199            } else {
1200                let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1201                self.fs.write_patched(src_for_patch, dest_path, &ops)
1202            };
1203
1204            if let Err(e) = write_result {
1205                if e.kind() == io::ErrorKind::PermissionDenied {
1206                    // Create temp file and return sudo error
1207                    let original_metadata = self.fs.metadata_if_exists(dest_path);
1208                    let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1209                    self.write_recipe_to_file(&mut temp_file, &recipe)?;
1210                    temp_file.sync_all()?;
1211                    drop(temp_file);
1212                    return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
1213                }
1214                return Err(e.into());
1215            }
1216        } else {
1217            // Remote with Copy ops: use write_patched
1218            let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1219            self.fs.write_patched(src_for_patch, dest_path, &ops)?;
1220        }
1221
1222        self.finalize_save(dest_path)?;
1223        Ok(())
1224    }
1225
1226    /// Write using in-place mode to preserve file ownership.
1227    ///
1228    /// This is used when the file is owned by a different user and we need
1229    /// to write directly to the existing file to preserve its ownership.
1230    ///
1231    /// The approach:
1232    /// 1. Write the recipe to a temp file first (reads from original, writes to temp)
1233    /// 2. Stream the temp file content to the destination file (truncates and writes)
1234    /// 3. Delete the temp file
1235    ///
1236    /// This avoids the bug where truncating the destination before reading Copy chunks
1237    /// would corrupt the file. It also works for huge files since we stream in chunks.
1238    fn save_with_inplace_write(
1239        &self,
1240        dest_path: &Path,
1241        recipe: &WriteRecipe,
1242    ) -> anyhow::Result<()> {
1243        let original_metadata = self.fs.metadata_if_exists(dest_path);
1244
1245        // Optimization: if no Copy ops, we can write directly without a temp file
1246        // (same as the non-inplace path for small files)
1247        if !recipe.has_copy_ops() {
1248            let data = recipe.flatten_inserts();
1249            return self.write_data_inplace(dest_path, &data, original_metadata);
1250        }
1251
1252        // Step 1: Write recipe to a temp file in the recovery directory
1253        // This reads Copy chunks from the original file (still intact) and writes to temp.
1254        // Using the recovery directory allows crash recovery if the operation fails.
1255        let (temp_path, mut temp_file) = self.create_recovery_temp_file(dest_path)?;
1256        if let Err(e) = self.write_recipe_to_file(&mut temp_file, recipe) {
1257            // Best-effort cleanup of temp file on write failure
1258            #[allow(clippy::let_underscore_must_use)]
1259            let _ = self.fs.remove_file(&temp_path);
1260            return Err(e.into());
1261        }
1262        temp_file.sync_all()?;
1263        drop(temp_file);
1264
1265        // Step 1.5: Save recovery metadata before the dangerous step
1266        // If we crash during step 2, this metadata + temp file allows recovery
1267        let recovery_meta_path = self.inplace_recovery_meta_path(dest_path);
1268        // Best effort - don't fail the save if we can't write recovery metadata
1269        #[allow(clippy::let_underscore_must_use)]
1270        let _ = self.write_inplace_recovery_meta(
1271            &recovery_meta_path,
1272            dest_path,
1273            &temp_path,
1274            &original_metadata,
1275        );
1276
1277        // Step 2: Stream temp file content to destination
1278        // Now it's safe to truncate the destination since all data is in temp
1279        match self.fs.open_file_for_write(dest_path) {
1280            Ok(mut out_file) => {
1281                if let Err(e) = self.stream_file_to_writer(&temp_path, &mut out_file) {
1282                    // Don't delete temp file or recovery metadata - allow recovery
1283                    return Err(e.into());
1284                }
1285                out_file.sync_all()?;
1286                // Success! Clean up temp file and recovery metadata (best-effort)
1287                #[allow(clippy::let_underscore_must_use)]
1288                let _ = self.fs.remove_file(&temp_path);
1289                #[allow(clippy::let_underscore_must_use)]
1290                let _ = self.fs.remove_file(&recovery_meta_path);
1291                Ok(())
1292            }
1293            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1294                // Can't write to destination - trigger sudo fallback
1295                // Keep temp file for sudo to use, clean up recovery metadata (best-effort)
1296                #[allow(clippy::let_underscore_must_use)]
1297                let _ = self.fs.remove_file(&recovery_meta_path);
1298                Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1299            }
1300            Err(e) => {
1301                // Don't delete temp file or recovery metadata - allow recovery
1302                Err(e.into())
1303            }
1304        }
1305    }
1306
1307    /// Write data directly to a file in-place, with sudo fallback on permission denied.
1308    fn write_data_inplace(
1309        &self,
1310        dest_path: &Path,
1311        data: &[u8],
1312        original_metadata: Option<FileMetadata>,
1313    ) -> anyhow::Result<()> {
1314        match self.fs.open_file_for_write(dest_path) {
1315            Ok(mut out_file) => {
1316                out_file.write_all(data)?;
1317                out_file.sync_all()?;
1318                Ok(())
1319            }
1320            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1321                // Create temp file for sudo fallback
1322                let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1323                temp_file.write_all(data)?;
1324                temp_file.sync_all()?;
1325                drop(temp_file);
1326                Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1327            }
1328            Err(e) => Err(e.into()),
1329        }
1330    }
1331
1332    /// Stream a file's content to a writer in chunks to avoid memory issues with large files.
1333    fn stream_file_to_writer(
1334        &self,
1335        src_path: &Path,
1336        out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1337    ) -> io::Result<()> {
1338        const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks
1339
1340        let file_size = self.fs.metadata(src_path)?.size;
1341        let mut offset = 0u64;
1342
1343        while offset < file_size {
1344            let remaining = file_size - offset;
1345            let chunk_len = std::cmp::min(remaining, CHUNK_SIZE as u64) as usize;
1346            let chunk = self.fs.read_range(src_path, offset, chunk_len)?;
1347            out_file.write_all(&chunk)?;
1348            offset += chunk_len as u64;
1349        }
1350
1351        Ok(())
1352    }
1353
1354    /// Write the recipe content to a file writer.
1355    fn write_recipe_to_file(
1356        &self,
1357        out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1358        recipe: &WriteRecipe,
1359    ) -> io::Result<()> {
1360        for action in &recipe.actions {
1361            match action {
1362                RecipeAction::Copy { offset, len } => {
1363                    // Read from source and write to output
1364                    let src_path = recipe.src_path.as_ref().ok_or_else(|| {
1365                        io::Error::new(io::ErrorKind::InvalidData, "Copy action without source")
1366                    })?;
1367                    let data = self.fs.read_range(src_path, *offset, *len as usize)?;
1368                    out_file.write_all(&data)?;
1369                }
1370                RecipeAction::Insert { index } => {
1371                    out_file.write_all(&recipe.insert_data[*index])?;
1372                }
1373            }
1374        }
1375        Ok(())
1376    }
1377
1378    /// Finalize save state after successful write.
1379    fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
1380        let new_size = self.fs.metadata(dest_path)?.size as usize;
1381        tracing::debug!(
1382            "Buffer::save: updating saved_file_size from {:?} to {}",
1383            self.saved_file_size,
1384            new_size
1385        );
1386        self.saved_file_size = Some(new_size);
1387        self.file_path = Some(dest_path.to_path_buf());
1388
1389        // Consolidate the piece tree to synchronize with disk (for large files)
1390        // or to simplify structure (for small files).
1391        self.consolidate_after_save(dest_path, new_size);
1392
1393        self.mark_saved_snapshot();
1394        self.original_line_ending = self.line_ending;
1395        self.original_encoding = self.encoding;
1396        Ok(())
1397    }
1398
1399    /// Finalize buffer state after an external save operation (e.g., via sudo).
1400    ///
1401    /// This updates the saved snapshot and file size to match the new state on disk.
1402    pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
1403        let new_size = self.fs.metadata(&dest_path)?.size as usize;
1404        self.saved_file_size = Some(new_size);
1405        self.file_path = Some(dest_path.clone());
1406
1407        // Consolidate the piece tree to synchronize with disk or simplify structure.
1408        self.consolidate_after_save(&dest_path, new_size);
1409
1410        self.mark_saved_snapshot();
1411        self.original_line_ending = self.line_ending;
1412        self.original_encoding = self.encoding;
1413        Ok(())
1414    }
1415
1416    /// Consolidate the piece tree into a single piece.
1417    /// For large files, this creates a reference to the disk file to save memory and sync offsets.
1418    /// For small files, this flattens all edits into a single in-memory buffer.
1419    fn consolidate_after_save(&mut self, path: &Path, file_size: usize) {
1420        if self.large_file {
1421            self.consolidate_large_file(path, file_size);
1422        } else {
1423            self.consolidate_small_file();
1424        }
1425    }
1426
1427    /// Consolidate large file piece tree into a single piece pointing to the new file.
1428    /// This ensures that subsequent operations correctly reference the new content and offsets.
1429    /// Preserves total line feed count from the old tree if a scan was previously done.
1430    fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
1431        // Preserve line feed count from the old tree if we had scanned it
1432        let preserved_lf = if self.line_feeds_scanned {
1433            self.piece_tree.line_count().map(|c| c.saturating_sub(1))
1434        } else {
1435            None
1436        };
1437
1438        let buffer = StringBuffer {
1439            id: 0,
1440            data: BufferData::Unloaded {
1441                file_path: path.to_path_buf(),
1442                file_offset: 0,
1443                bytes: file_size,
1444            },
1445            stored_file_offset: None,
1446        };
1447
1448        self.piece_tree = if file_size > 0 {
1449            PieceTree::new(BufferLocation::Stored(0), 0, file_size, preserved_lf)
1450        } else {
1451            PieceTree::empty()
1452        };
1453
1454        self.buffers = vec![buffer];
1455        self.next_buffer_id = 1;
1456
1457        tracing::debug!(
1458            "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
1459            file_size
1460        );
1461    }
1462
1463    /// Consolidate small file edits into a single in-memory buffer and re-index lines.
1464    fn consolidate_small_file(&mut self) {
1465        if let Some(bytes) = self.get_all_text() {
1466            let line_feed_cnt = bytes.iter().filter(|&&b| b == b'\n').count();
1467            let len = bytes.len();
1468
1469            // Create a single loaded buffer with line indexing
1470            let buffer = StringBuffer::new_loaded(0, bytes, true);
1471
1472            self.piece_tree = if len > 0 {
1473                PieceTree::new(BufferLocation::Stored(0), 0, len, Some(line_feed_cnt))
1474            } else {
1475                PieceTree::empty()
1476            };
1477
1478            self.buffers = vec![buffer];
1479            self.next_buffer_id = 1;
1480
1481            tracing::debug!(
1482                "Buffer::consolidate_small_file: consolidated into single loaded buffer of {} bytes",
1483                len
1484            );
1485        }
1486    }
1487
1488    /// Internal helper to create a SudoSaveRequired error.
1489    fn make_sudo_error(
1490        &self,
1491        temp_path: PathBuf,
1492        dest_path: &Path,
1493        original_metadata: Option<FileMetadata>,
1494    ) -> anyhow::Error {
1495        #[cfg(unix)]
1496        let (uid, gid, mode) = if let Some(ref meta) = original_metadata {
1497            (
1498                meta.uid.unwrap_or(0),
1499                meta.gid.unwrap_or(0),
1500                meta.permissions
1501                    .as_ref()
1502                    .map(|p| p.mode() & 0o7777)
1503                    .unwrap_or(0),
1504            )
1505        } else {
1506            (0, 0, 0)
1507        };
1508        #[cfg(not(unix))]
1509        let (uid, gid, mode) = (0u32, 0u32, 0u32);
1510
1511        let _ = original_metadata; // suppress unused warning on non-Unix
1512
1513        anyhow::anyhow!(SudoSaveRequired {
1514            temp_path,
1515            dest_path: dest_path.to_path_buf(),
1516            uid,
1517            gid,
1518            mode,
1519        })
1520    }
1521
1522    /// Get the total number of bytes in the document
1523    pub fn total_bytes(&self) -> usize {
1524        self.piece_tree.total_bytes()
1525    }
1526
1527    /// Get the total number of lines in the document
1528    /// Uses the piece tree's integrated line tracking
1529    /// Returns None if line count is unknown (e.g., for large files without line indexing)
1530    pub fn line_count(&self) -> Option<usize> {
1531        self.piece_tree.line_count()
1532    }
1533
1534    /// Snapshot the current tree as the saved baseline
1535    pub fn mark_saved_snapshot(&mut self) {
1536        self.saved_root = self.piece_tree.root();
1537        self.modified = false;
1538    }
1539
1540    /// Refresh the saved root to match the current tree structure without
1541    /// clearing the modified flag.  Call this after structural-only changes
1542    /// (e.g. chunk_split_and_load during search scan) so that
1543    /// `diff_since_saved()` can take the fast `Arc::ptr_eq` path.
1544    pub fn refresh_saved_root_if_unmodified(&mut self) {
1545        if !self.modified {
1546            self.saved_root = self.piece_tree.root();
1547        }
1548    }
1549
1550    /// Apply a chunk-load buffer replacement to `saved_root`.
1551    ///
1552    /// When viewport loading converts a `Stored(buffer_id)` piece to
1553    /// `Added(new_buffer_id)` in the current tree and the buffer is already
1554    /// modified, we must apply the same transformation to `saved_root` so
1555    /// that `diff_since_saved()` can match loaded-but-unedited regions by
1556    /// `(location, offset)` identity.
1557    fn apply_chunk_load_to_saved_root(
1558        &mut self,
1559        old_buffer_id: usize,
1560        chunk_offset_in_buffer: usize,
1561        chunk_bytes: usize,
1562        new_buffer_id: usize,
1563    ) {
1564        use crate::model::piece_tree::{LeafData, PieceTree};
1565
1566        let mut leaves = Vec::new();
1567        self.saved_root.collect_leaves(&mut leaves);
1568
1569        let mut modified = false;
1570        let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + 2);
1571
1572        for leaf in &leaves {
1573            if leaf.location.buffer_id() != old_buffer_id {
1574                new_leaves.push(*leaf);
1575                continue;
1576            }
1577
1578            let leaf_start = leaf.offset;
1579            let leaf_end = leaf.offset + leaf.bytes;
1580            let chunk_start = chunk_offset_in_buffer;
1581            let chunk_end = chunk_offset_in_buffer + chunk_bytes;
1582
1583            // Check if this leaf overlaps the chunk range
1584            if chunk_start >= leaf_end || chunk_end <= leaf_start {
1585                // No overlap — keep as-is
1586                new_leaves.push(*leaf);
1587                continue;
1588            }
1589
1590            modified = true;
1591
1592            // Prefix: portion of this leaf before the chunk
1593            if chunk_start > leaf_start {
1594                new_leaves.push(LeafData::new(
1595                    leaf.location,
1596                    leaf.offset,
1597                    chunk_start - leaf_start,
1598                    None, // line feed count unknown after split
1599                ));
1600            }
1601
1602            // The chunk itself — replaced with Added(new_buffer_id)
1603            let actual_start = chunk_start.max(leaf_start);
1604            let actual_end = chunk_end.min(leaf_end);
1605            let offset_in_chunk = actual_start - chunk_start;
1606            new_leaves.push(LeafData::new(
1607                BufferLocation::Added(new_buffer_id),
1608                offset_in_chunk,
1609                actual_end - actual_start,
1610                None,
1611            ));
1612
1613            // Suffix: portion of this leaf after the chunk
1614            if chunk_end < leaf_end {
1615                new_leaves.push(LeafData::new(
1616                    leaf.location,
1617                    chunk_end,
1618                    leaf_end - chunk_end,
1619                    None,
1620                ));
1621            }
1622        }
1623
1624        if modified {
1625            self.saved_root = PieceTree::from_leaves(&new_leaves).root();
1626        }
1627    }
1628
1629    /// Diff the current piece tree against the last saved snapshot.
1630    ///
1631    /// This compares actual byte content, not just tree structure. This means
1632    /// that if you delete text and then paste it back, the diff will correctly
1633    /// show no changes (even though the tree structure differs).
1634    ///
1635    /// Uses a two-phase algorithm for efficiency:
1636    /// - Phase 1: Fast structure-based diff to find changed byte ranges (O(num_leaves))
1637    /// - Phase 2: Only compare actual content within changed ranges (O(edit_size))
1638    ///
1639    /// This is O(edit_size) instead of O(file_size) for small edits in large files.
1640    pub fn diff_since_saved(&self) -> PieceTreeDiff {
1641        let _span = tracing::info_span!(
1642            "diff_since_saved",
1643            large_file = self.large_file,
1644            modified = self.modified,
1645            lf_scanned = self.line_feeds_scanned
1646        )
1647        .entered();
1648
1649        // Fast path: if the buffer hasn't been modified since loading/saving,
1650        // the content is identical to the saved version by definition.
1651        // This avoids an expensive O(num_leaves) structure walk when the tree
1652        // has been restructured for non-edit reasons (viewport chunk loading,
1653        // line-scan preparation, search-scan splits).
1654        if !self.modified {
1655            tracing::trace!("diff_since_saved: not modified → equal");
1656            return PieceTreeDiff {
1657                equal: true,
1658                byte_ranges: Vec::new(),
1659                nodes_visited: 0,
1660            };
1661        }
1662
1663        // Quick check: if tree roots are identical (Arc pointer equality),
1664        // the content is definitely the same.
1665        if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
1666            tracing::trace!("diff_since_saved: Arc::ptr_eq fast path → equal");
1667            return PieceTreeDiff {
1668                equal: true,
1669                byte_ranges: Vec::new(),
1670                nodes_visited: 0,
1671            };
1672        }
1673
1674        // Phase 1: Fast structure-based diff to find which byte ranges differ
1675        // This is O(number of leaves) - very fast even for large files
1676        let structure_diff = self.diff_trees_by_structure();
1677
1678        // If structure says trees are equal (same pieces in same order), we're done
1679        if structure_diff.equal {
1680            tracing::trace!("diff_since_saved: structure equal");
1681            return structure_diff;
1682        }
1683
1684        // Phase 2: For small changed regions, verify with actual content comparison
1685        // This handles the case where different pieces contain identical content
1686        // (e.g., delete text then paste it back)
1687        let total_changed_bytes: usize = structure_diff
1688            .byte_ranges
1689            .iter()
1690            .map(|r| r.end.saturating_sub(r.start))
1691            .sum();
1692
1693        // Only do content verification if the changed region is reasonably small
1694        // For large changes, trust the structure-based diff
1695        const MAX_VERIFY_BYTES: usize = 64 * 1024; // 64KB threshold for verification
1696
1697        if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
1698            // Check if content in the changed ranges is actually different
1699            if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
1700                tracing::trace!(
1701                    "diff_since_saved: content differs, byte_ranges={}",
1702                    structure_diff.byte_ranges.len(),
1703                );
1704                // Content actually differs - return the structure diff result
1705                return structure_diff;
1706            } else {
1707                // Content is the same despite structure differences (rare case: undo/redo)
1708                return PieceTreeDiff {
1709                    equal: true,
1710                    byte_ranges: Vec::new(),
1711                    nodes_visited: structure_diff.nodes_visited,
1712                };
1713            }
1714        }
1715
1716        tracing::info!(
1717            "diff_since_saved: large change, byte_ranges={}, nodes_visited={}",
1718            structure_diff.byte_ranges.len(),
1719            structure_diff.nodes_visited
1720        );
1721        // For large changes or when we can't verify, trust the structure diff
1722        structure_diff
1723    }
1724
1725    /// Check if the actual byte content differs in the given ranges.
1726    /// Returns true if content differs, false if content is identical.
1727    fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
1728        let saved_bytes = self.tree_total_bytes(&self.saved_root);
1729        let current_bytes = self.piece_tree.total_bytes();
1730
1731        // Different total sizes means content definitely differs
1732        if saved_bytes != current_bytes {
1733            return true;
1734        }
1735
1736        // For each changed range, compare the actual bytes
1737        for range in byte_ranges {
1738            if range.start >= range.end {
1739                continue;
1740            }
1741
1742            // Extract bytes from saved tree for this range
1743            let saved_slice =
1744                self.extract_range_from_tree(&self.saved_root, range.start, range.end);
1745            // Extract bytes from current tree for this range
1746            let current_slice = self.get_text_range(range.start, range.end);
1747
1748            match (saved_slice, current_slice) {
1749                (Some(saved), Some(current)) => {
1750                    if saved != current {
1751                        return true; // Content differs
1752                    }
1753                }
1754                _ => {
1755                    // Couldn't read content, assume it differs to be safe
1756                    return true;
1757                }
1758            }
1759        }
1760
1761        // All ranges have identical content
1762        false
1763    }
1764
1765    /// Extract a byte range from a saved tree root
1766    fn extract_range_from_tree(
1767        &self,
1768        root: &Arc<crate::model::piece_tree::PieceTreeNode>,
1769        start: usize,
1770        end: usize,
1771    ) -> Option<Vec<u8>> {
1772        let mut result = Vec::with_capacity(end.saturating_sub(start));
1773        self.collect_range_from_node(root, start, end, 0, &mut result)?;
1774        Some(result)
1775    }
1776
1777    /// Recursively collect bytes from a range within a tree node
1778    fn collect_range_from_node(
1779        &self,
1780        node: &Arc<crate::model::piece_tree::PieceTreeNode>,
1781        range_start: usize,
1782        range_end: usize,
1783        node_offset: usize,
1784        result: &mut Vec<u8>,
1785    ) -> Option<()> {
1786        use crate::model::piece_tree::PieceTreeNode;
1787
1788        match node.as_ref() {
1789            PieceTreeNode::Internal {
1790                left_bytes,
1791                left,
1792                right,
1793                ..
1794            } => {
1795                let left_end = node_offset + left_bytes;
1796
1797                // Check if range overlaps with left subtree
1798                if range_start < left_end {
1799                    self.collect_range_from_node(
1800                        left,
1801                        range_start,
1802                        range_end,
1803                        node_offset,
1804                        result,
1805                    )?;
1806                }
1807
1808                // Check if range overlaps with right subtree
1809                if range_end > left_end {
1810                    self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
1811                }
1812            }
1813            PieceTreeNode::Leaf {
1814                location,
1815                offset,
1816                bytes,
1817                ..
1818            } => {
1819                let node_end = node_offset + bytes;
1820
1821                // Check if this leaf overlaps with our range
1822                if range_start < node_end && range_end > node_offset {
1823                    let buf = self.buffers.get(location.buffer_id())?;
1824                    let data = buf.get_data()?;
1825
1826                    // Calculate the slice within this leaf
1827                    let leaf_start = range_start.saturating_sub(node_offset);
1828                    let leaf_end = (range_end - node_offset).min(*bytes);
1829
1830                    if leaf_start < leaf_end {
1831                        let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
1832                        result.extend_from_slice(slice);
1833                    }
1834                }
1835            }
1836        }
1837        Some(())
1838    }
1839
1840    /// Helper to get total bytes from a tree root
1841    fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
1842        use crate::model::piece_tree::PieceTreeNode;
1843        match root.as_ref() {
1844            PieceTreeNode::Internal {
1845                left_bytes, right, ..
1846            } => left_bytes + self.tree_total_bytes(right),
1847            PieceTreeNode::Leaf { bytes, .. } => *bytes,
1848        }
1849    }
1850
1851    /// Structure-based diff comparing piece tree leaves
1852    fn diff_trees_by_structure(&self) -> PieceTreeDiff {
1853        crate::model::piece_tree_diff::diff_piece_trees(&self.saved_root, &self.piece_tree.root())
1854    }
1855
1856    /// Convert a byte offset to a line/column position
1857    pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
1858        self.piece_tree
1859            .offset_to_position(offset, &self.buffers)
1860            .map(|(line, column)| Position { line, column })
1861    }
1862
1863    /// Convert a line/column position to a byte offset
1864    pub fn position_to_offset(&self, position: Position) -> usize {
1865        self.piece_tree
1866            .position_to_offset(position.line, position.column, &self.buffers)
1867    }
1868
1869    /// Insert text at the given byte offset
1870    pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
1871        if text.is_empty() {
1872            return self.piece_tree.cursor_at_offset(offset);
1873        }
1874
1875        // Mark as modified (updates version)
1876        self.mark_content_modified();
1877
1878        // Count line feeds in the text to insert
1879        let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
1880
1881        // Optimization: try to append to existing buffer if insertion is at piece boundary
1882        let (buffer_location, buffer_offset, text_len) =
1883            if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
1884                append_info
1885            } else {
1886                // Create a new StringBuffer for this insertion
1887                let buffer_id = self.next_buffer_id;
1888                self.next_buffer_id += 1;
1889                let buffer = StringBuffer::new(buffer_id, text.clone());
1890                self.buffers.push(buffer);
1891                (BufferLocation::Added(buffer_id), 0, text.len())
1892            };
1893
1894        // When line feeds have been scanned, ensure the chunk at the insertion
1895        // point is loaded so compute_line_feeds_static can recount during splits.
1896        if self.line_feeds_scanned {
1897            self.ensure_chunk_loaded_at(offset);
1898        }
1899
1900        // Update piece tree (need to pass buffers reference)
1901        self.piece_tree.insert(
1902            offset,
1903            buffer_location,
1904            buffer_offset,
1905            text_len,
1906            line_feed_cnt,
1907            &self.buffers,
1908        )
1909    }
1910
1911    /// Try to append to an existing buffer if insertion point aligns with buffer end
1912    /// Returns (BufferLocation, buffer_offset, text_len) if append succeeds, None otherwise
1913    fn try_append_to_existing_buffer(
1914        &mut self,
1915        offset: usize,
1916        text: &[u8],
1917    ) -> Option<(BufferLocation, usize, usize)> {
1918        // Only optimize for non-empty insertions after existing content
1919        if text.is_empty() || offset == 0 {
1920            return None;
1921        }
1922
1923        // Find the piece containing the byte just before the insertion point
1924        // This avoids the saturating_sub issue
1925        let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1926
1927        // Check if insertion is exactly at the end of this piece
1928        // offset_in_piece tells us where (offset-1) is within the piece
1929        // For insertion to be at piece end, (offset-1) must be the last byte
1930        let offset_in_piece = piece_info.offset_in_piece?;
1931        if offset_in_piece + 1 != piece_info.bytes {
1932            return None; // Not at the end of the piece
1933        }
1934
1935        // Only append to "Added" buffers (not original Stored buffers)
1936        if !matches!(piece_info.location, BufferLocation::Added(_)) {
1937            return None;
1938        }
1939
1940        let buffer_id = piece_info.location.buffer_id();
1941        let buffer = self.buffers.get_mut(buffer_id)?;
1942
1943        // Check if buffer is loaded
1944        let buffer_len = buffer.get_data()?.len();
1945
1946        // Check if this piece ends exactly at the end of its buffer
1947        if piece_info.offset + piece_info.bytes != buffer_len {
1948            return None;
1949        }
1950
1951        // Perfect! Append to this buffer
1952        let append_offset = buffer.append(text);
1953
1954        Some((piece_info.location, append_offset, text.len()))
1955    }
1956
1957    /// Insert text (from &str) at the given byte offset
1958    pub fn insert(&mut self, offset: usize, text: &str) {
1959        self.insert_bytes(offset, text.as_bytes().to_vec());
1960    }
1961
1962    /// Insert text at a line/column position
1963    /// This now uses the optimized piece_tree.insert_at_position() for a single traversal
1964    pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1965        if text.is_empty() {
1966            let offset = self.position_to_offset(position);
1967            return self.piece_tree.cursor_at_offset(offset);
1968        }
1969
1970        self.mark_content_modified();
1971
1972        // Count line feeds in the text to insert
1973        let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1974
1975        // Create a new StringBuffer for this insertion
1976        let buffer_id = self.next_buffer_id;
1977        self.next_buffer_id += 1;
1978        let buffer = StringBuffer::new(buffer_id, text.clone());
1979        self.buffers.push(buffer);
1980
1981        // Use the optimized position-based insertion (single traversal)
1982        self.piece_tree.insert_at_position(
1983            position.line,
1984            position.column,
1985            BufferLocation::Added(buffer_id),
1986            0,
1987            text.len(),
1988            line_feed_cnt,
1989            &self.buffers,
1990        )
1991    }
1992
1993    /// Delete text starting at the given byte offset
1994    pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1995        if bytes == 0 || offset >= self.total_bytes() {
1996            return;
1997        }
1998
1999        // When line feeds have been scanned, ensure chunks at delete boundaries
2000        // are loaded so compute_line_feeds_static can recount during splits.
2001        if self.line_feeds_scanned {
2002            self.ensure_chunk_loaded_at(offset);
2003            let end = (offset + bytes).min(self.total_bytes());
2004            if end > offset {
2005                self.ensure_chunk_loaded_at(end.saturating_sub(1));
2006            }
2007        }
2008
2009        // Update piece tree
2010        self.piece_tree.delete(offset, bytes, &self.buffers);
2011
2012        self.mark_content_modified();
2013    }
2014
2015    /// Delete text in a range
2016    pub fn delete(&mut self, range: Range<usize>) {
2017        if range.end > range.start {
2018            self.delete_bytes(range.start, range.end - range.start);
2019        }
2020    }
2021
2022    /// Delete text in a line/column range
2023    /// This now uses the optimized piece_tree.delete_position_range() for a single traversal
2024    pub fn delete_range(&mut self, start: Position, end: Position) {
2025        // Use the optimized position-based deletion
2026        self.piece_tree.delete_position_range(
2027            start.line,
2028            start.column,
2029            end.line,
2030            end.column,
2031            &self.buffers,
2032        );
2033        self.mark_content_modified();
2034    }
2035
2036    /// Replace the entire buffer content with new content
2037    /// This is an O(n) operation that rebuilds the piece tree in a single pass,
2038    /// avoiding the O(n²) complexity of applying individual edits.
2039    ///
2040    /// This is used for bulk operations like "replace all" where applying
2041    /// individual edits would be prohibitively slow.
2042    pub fn replace_content(&mut self, new_content: &str) {
2043        let bytes = new_content.len();
2044        let content_bytes = new_content.as_bytes().to_vec();
2045
2046        // Count line feeds in the new content
2047        let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
2048
2049        // Create a new StringBuffer for the new content
2050        let buffer_id = self.next_buffer_id;
2051        self.next_buffer_id += 1;
2052        let buffer = StringBuffer::new(buffer_id, content_bytes);
2053        self.buffers.push(buffer);
2054
2055        // Rebuild the piece tree with a single piece containing all the new content
2056        if bytes > 0 {
2057            self.piece_tree = PieceTree::new(
2058                BufferLocation::Added(buffer_id),
2059                0,
2060                bytes,
2061                Some(line_feed_cnt),
2062            );
2063        } else {
2064            self.piece_tree = PieceTree::empty();
2065        }
2066
2067        self.mark_content_modified();
2068    }
2069
2070    /// Restore a previously saved buffer state (for undo/redo of BulkEdit).
2071    ///
2072    /// This restores the piece tree AND the buffers list, which is critical
2073    /// because consolidate_after_save() replaces self.buffers. Without restoring
2074    /// buffers, the piece tree would reference buffer IDs that no longer exist.
2075    pub fn restore_buffer_state(&mut self, snapshot: &BufferSnapshot) {
2076        self.piece_tree = snapshot.piece_tree.clone();
2077        self.buffers = snapshot.buffers.clone();
2078        self.next_buffer_id = snapshot.next_buffer_id;
2079        self.mark_content_modified();
2080    }
2081
2082    /// Snapshot the current buffer state (piece tree + buffers) for BulkEdit undo/redo.
2083    ///
2084    /// The snapshot includes buffers because consolidate_after_save() can replace
2085    /// self.buffers between the snapshot and restore, which would otherwise cause
2086    /// the restored piece tree to reference nonexistent buffer IDs.
2087    pub fn snapshot_buffer_state(&self) -> Arc<BufferSnapshot> {
2088        Arc::new(BufferSnapshot {
2089            piece_tree: self.piece_tree.clone(),
2090            buffers: self.buffers.clone(),
2091            next_buffer_id: self.next_buffer_id,
2092        })
2093    }
2094
2095    /// Apply bulk edits efficiently in a single pass
2096    /// Returns the net change in bytes
2097    pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
2098        // Pre-allocate buffers for all insert texts (only non-empty texts)
2099        // This avoids the borrow conflict in the closure
2100        // IMPORTANT: Only add entries for non-empty texts because the closure
2101        // is only called for edits with non-empty insert text
2102        let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2103
2104        for (_, _, text) in edits {
2105            if !text.is_empty() {
2106                let buffer_id = self.next_buffer_id;
2107                self.next_buffer_id += 1;
2108                let content = text.as_bytes().to_vec();
2109                let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
2110                let bytes = content.len();
2111                let buffer = StringBuffer::new(buffer_id, content);
2112                self.buffers.push(buffer);
2113                buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
2114            }
2115            // No placeholder for empty texts - the closure is only called for non-empty texts
2116        }
2117
2118        // Now call apply_bulk_edits with a simple index-based closure
2119        let mut idx = 0;
2120        let delta = self
2121            .piece_tree
2122            .apply_bulk_edits(edits, &self.buffers, |_text| {
2123                let info = buffer_info[idx];
2124                idx += 1;
2125                info
2126            });
2127
2128        self.mark_content_modified();
2129        delta
2130    }
2131
2132    /// Get text from a byte offset range
2133    /// This now uses the optimized piece_tree.iter_pieces_in_range() for a single traversal
2134    /// Get text from a byte offset range (read-only)
2135    /// Returns None if any buffer in the range is unloaded
2136    /// PRIVATE: External code should use get_text_range_mut() which handles lazy loading
2137    fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
2138        if bytes == 0 {
2139            return Some(Vec::new());
2140        }
2141
2142        let mut result = Vec::with_capacity(bytes);
2143        let end_offset = offset + bytes;
2144        let mut collected = 0;
2145
2146        // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
2147        for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
2148            let buffer_id = piece_view.location.buffer_id();
2149            if let Some(buffer) = self.buffers.get(buffer_id) {
2150                // Calculate the range to read from this piece
2151                let piece_start_in_doc = piece_view.doc_offset;
2152                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2153
2154                // Clip to the requested range
2155                let read_start = offset.max(piece_start_in_doc);
2156                let read_end = end_offset.min(piece_end_in_doc);
2157
2158                if read_end > read_start {
2159                    let offset_in_piece = read_start - piece_start_in_doc;
2160                    let bytes_to_read = read_end - read_start;
2161
2162                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
2163                    let buffer_end = buffer_start + bytes_to_read;
2164
2165                    // Return None if buffer is unloaded (type-safe)
2166                    let data = buffer.get_data()?;
2167
2168                    if buffer_end <= data.len() {
2169                        result.extend_from_slice(&data[buffer_start..buffer_end]);
2170                        collected += bytes_to_read;
2171
2172                        if collected >= bytes {
2173                            break;
2174                        }
2175                    }
2176                }
2177            }
2178        }
2179
2180        Some(result)
2181    }
2182
2183    /// Get text from a byte offset range with lazy loading
2184    /// This will load unloaded chunks on-demand and always returns complete data
2185    ///
2186    /// Returns an error if loading fails or if data cannot be read for any reason.
2187    ///
2188    /// NOTE: Currently loads entire buffers on-demand. Future optimization would split
2189    /// large pieces and load only LOAD_CHUNK_SIZE chunks at a time.
2190    pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
2191        let _span = tracing::info_span!("get_text_range_mut", offset, bytes).entered();
2192        if bytes == 0 {
2193            return Ok(Vec::new());
2194        }
2195
2196        let mut result = Vec::with_capacity(bytes);
2197        // Clamp end_offset to buffer length to handle reads beyond EOF
2198        let end_offset = (offset + bytes).min(self.len());
2199        let mut current_offset = offset;
2200        let mut iteration_count = 0u32;
2201
2202        // Keep iterating until we've collected all requested bytes
2203        while current_offset < end_offset {
2204            iteration_count += 1;
2205            let mut made_progress = false;
2206            let mut restarted_iteration = false;
2207
2208            // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
2209            for piece_view in self
2210                .piece_tree
2211                .iter_pieces_in_range(current_offset, end_offset)
2212            {
2213                let buffer_id = piece_view.location.buffer_id();
2214
2215                // Check if buffer needs loading
2216                let needs_loading = self
2217                    .buffers
2218                    .get(buffer_id)
2219                    .map(|b| !b.is_loaded())
2220                    .unwrap_or(false);
2221
2222                if needs_loading && self.chunk_split_and_load(&piece_view, current_offset)? {
2223                    restarted_iteration = true;
2224                    break;
2225                }
2226
2227                // Calculate the range to read from this piece
2228                let piece_start_in_doc = piece_view.doc_offset;
2229                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2230
2231                // Clip to the requested range
2232                let read_start = current_offset.max(piece_start_in_doc);
2233                let read_end = end_offset.min(piece_end_in_doc);
2234
2235                if read_end > read_start {
2236                    let offset_in_piece = read_start - piece_start_in_doc;
2237                    let bytes_to_read = read_end - read_start;
2238
2239                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
2240                    let buffer_end = buffer_start + bytes_to_read;
2241
2242                    // Buffer should be loaded now
2243                    let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
2244                    let data = buffer
2245                        .get_data()
2246                        .context("Buffer data unavailable after load")?;
2247
2248                    anyhow::ensure!(
2249                        buffer_end <= data.len(),
2250                        "Buffer range out of bounds: requested {}..{}, buffer size {}",
2251                        buffer_start,
2252                        buffer_end,
2253                        data.len()
2254                    );
2255
2256                    result.extend_from_slice(&data[buffer_start..buffer_end]);
2257                    current_offset = read_end;
2258                    made_progress = true;
2259                }
2260            }
2261
2262            // If we didn't make progress and didn't restart iteration, this is an error
2263            if !made_progress && !restarted_iteration {
2264                tracing::error!(
2265                    "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
2266                    current_offset,
2267                    offset,
2268                    end_offset,
2269                    self.len()
2270                );
2271                tracing::error!(
2272                    "Piece tree stats: {} total bytes",
2273                    self.piece_tree.stats().total_bytes
2274                );
2275                anyhow::bail!(
2276                    "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
2277                    current_offset,
2278                    offset,
2279                    end_offset,
2280                    self.len()
2281                );
2282            }
2283        }
2284
2285        if iteration_count > 1 {
2286            tracing::info!(
2287                iteration_count,
2288                result_len = result.len(),
2289                "get_text_range_mut: completed with multiple iterations"
2290            );
2291        }
2292
2293        Ok(result)
2294    }
2295
2296    /// Prepare a viewport for rendering
2297    ///
2298    /// This is called before rendering with &mut access to pre-load all data
2299    /// that will be needed for the viewport. It estimates the number of bytes
2300    /// needed based on the line count and pre-loads them.
2301    ///
2302    /// # Arguments
2303    /// * `start_offset` - The byte offset where the viewport starts
2304    /// * `line_count` - The number of lines to prepare (estimate)
2305    ///
2306    /// # Returns
2307    /// Ok(()) if preparation succeeded, Err if loading failed
2308    pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
2309        let _span = tracing::info_span!("prepare_viewport", start_offset, line_count).entered();
2310        // Estimate how many bytes we need (pessimistic assumption)
2311        // Average line length is typically 80-100 bytes, but we use 200 to be safe
2312        let estimated_bytes = line_count.saturating_mul(200);
2313
2314        // Cap the estimate at the remaining bytes in the document
2315        let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
2316        let bytes_to_load = estimated_bytes.min(remaining_bytes);
2317        tracing::trace!(
2318            bytes_to_load,
2319            total_bytes = self.total_bytes(),
2320            "prepare_viewport loading"
2321        );
2322
2323        // Pre-load with full chunk-splitting support
2324        // This may load more than we need, but ensures all data is available
2325        self.get_text_range_mut(start_offset, bytes_to_load)?;
2326
2327        Ok(())
2328    }
2329
2330    /// Split a piece that references a large unloaded buffer, create a chunk
2331    /// buffer for the region around `current_offset`, and load it.
2332    ///
2333    /// Returns `true` if the piece tree was modified (caller must restart its
2334    /// iteration), `false` if the piece was small enough to load in-place.
2335    fn chunk_split_and_load(
2336        &mut self,
2337        piece_view: &PieceView,
2338        current_offset: usize,
2339    ) -> Result<bool> {
2340        let buffer_id = piece_view.location.buffer_id();
2341
2342        // The underlying buffer may be much larger than this piece (e.g. the
2343        // whole-file Stored buffer after rebuild_with_pristine_saved_root).
2344        // We must chunk-split if either the piece or its buffer exceeds
2345        // LOAD_CHUNK_SIZE, because `load()` loads the entire buffer.
2346        let buffer_bytes = self
2347            .buffers
2348            .get(buffer_id)
2349            .and_then(|b| b.unloaded_bytes())
2350            .unwrap_or(0);
2351        let needs_chunk_split =
2352            piece_view.bytes > LOAD_CHUNK_SIZE || buffer_bytes > piece_view.bytes;
2353
2354        tracing::info!(
2355            buffer_id,
2356            piece_bytes = piece_view.bytes,
2357            buffer_bytes,
2358            needs_chunk_split,
2359            piece_doc_offset = piece_view.doc_offset,
2360            current_offset,
2361            "chunk_split_and_load: loading unloaded piece"
2362        );
2363
2364        if !needs_chunk_split {
2365            // Piece is small enough and its buffer matches — load in-place.
2366            let _span = tracing::info_span!(
2367                "load_small_buffer",
2368                piece_bytes = piece_view.bytes,
2369                buffer_id,
2370            )
2371            .entered();
2372            self.buffers
2373                .get_mut(buffer_id)
2374                .context("Buffer not found")?
2375                .load(&*self.fs)
2376                .context("Failed to load buffer")?;
2377            return Ok(false);
2378        }
2379
2380        let _span = tracing::info_span!(
2381            "chunk_split_and_load",
2382            piece_bytes = piece_view.bytes,
2383            buffer_id,
2384        )
2385        .entered();
2386
2387        let piece_start_in_doc = piece_view.doc_offset;
2388        let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
2389
2390        // When the piece already fits within LOAD_CHUNK_SIZE, create a chunk
2391        // buffer for the exact piece range (no alignment/splitting needed).
2392        // Alignment rounding is only useful when carving a sub-range out of a
2393        // piece larger than LOAD_CHUNK_SIZE.
2394        let (chunk_start_in_buffer, chunk_bytes) = if piece_view.bytes <= LOAD_CHUNK_SIZE {
2395            (piece_view.buffer_offset, piece_view.bytes)
2396        } else {
2397            let start =
2398                (piece_view.buffer_offset + offset_in_piece) / CHUNK_ALIGNMENT * CHUNK_ALIGNMENT;
2399            let bytes = LOAD_CHUNK_SIZE
2400                .min((piece_view.buffer_offset + piece_view.bytes).saturating_sub(start));
2401            (start, bytes)
2402        };
2403
2404        // Calculate document offsets for splitting
2405        let chunk_start_offset_in_piece =
2406            chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
2407        let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
2408        let split_end_in_doc = split_start_in_doc + chunk_bytes;
2409
2410        // Split the piece to isolate the chunk
2411        if chunk_start_offset_in_piece > 0 {
2412            self.piece_tree
2413                .split_at_offset(split_start_in_doc, &self.buffers);
2414        }
2415        if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
2416            self.piece_tree
2417                .split_at_offset(split_end_in_doc, &self.buffers);
2418        }
2419
2420        // Create a new buffer for this chunk
2421        let chunk_buffer = self
2422            .buffers
2423            .get(buffer_id)
2424            .context("Buffer not found")?
2425            .create_chunk_buffer(self.next_buffer_id, chunk_start_in_buffer, chunk_bytes)
2426            .context("Failed to create chunk buffer")?;
2427
2428        self.next_buffer_id += 1;
2429        let new_buffer_id = chunk_buffer.id;
2430        self.buffers.push(chunk_buffer);
2431
2432        // Update the piece to reference the new chunk buffer
2433        self.piece_tree.replace_buffer_reference(
2434            buffer_id,
2435            piece_view.buffer_offset + chunk_start_offset_in_piece,
2436            chunk_bytes,
2437            BufferLocation::Added(new_buffer_id),
2438        );
2439
2440        // Load the chunk buffer
2441        self.buffers
2442            .get_mut(new_buffer_id)
2443            .context("Chunk buffer not found")?
2444            .load(&*self.fs)
2445            .context("Failed to load chunk")?;
2446
2447        // split_at_offset uses compute_line_feeds_static which returns None
2448        // for unloaded buffers, destroying the scanned line feed counts.
2449        // Fix up: the loaded chunk is counted from memory, remaining unloaded
2450        // pieces use the filesystem's count_line_feeds_in_range.
2451        if self.line_feeds_scanned {
2452            let leaves = self.piece_tree.get_leaves();
2453            let mut fixups: Vec<(usize, usize)> = Vec::new();
2454            for (idx, leaf) in leaves.iter().enumerate() {
2455                if leaf.line_feed_cnt.is_none() {
2456                    if let Ok(count) = self.scan_leaf(leaf) {
2457                        fixups.push((idx, count));
2458                    }
2459                }
2460            }
2461            if !fixups.is_empty() {
2462                self.piece_tree.update_leaf_line_feeds_path_copy(&fixups);
2463            }
2464        }
2465
2466        // Keep saved_root in sync with viewport-loading tree restructures so
2467        // that diff_since_saved() can match by (location, offset) identity.
2468        //
2469        // When !modified the current tree IS the saved state, so just snapshot.
2470        // When modified, we must apply the same Stored→Added leaf replacement
2471        // to saved_root so the diff doesn't see loaded-but-unedited regions as
2472        // changed.
2473        if !self.modified {
2474            self.saved_root = self.piece_tree.root();
2475        } else {
2476            self.apply_chunk_load_to_saved_root(
2477                buffer_id,
2478                chunk_start_in_buffer,
2479                chunk_bytes,
2480                new_buffer_id,
2481            );
2482        }
2483
2484        Ok(true)
2485    }
2486
2487    /// Get all text as a single Vec<u8>
2488    /// Returns None if any buffers are unloaded (lazy loading)
2489    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
2490    pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
2491        self.get_text_range(0, self.total_bytes())
2492    }
2493
2494    /// Get all text as a String
2495    /// Returns None if any buffers are unloaded (lazy loading)
2496    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
2497    pub(crate) fn get_all_text_string(&self) -> Option<String> {
2498        self.get_all_text()
2499            .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
2500    }
2501
2502    /// Get text from a byte range as bytes
2503    /// CRATE-PRIVATE: Returns empty vector if any buffers are unloaded (silently fails!)
2504    /// Only use this when you KNOW the data is loaded (e.g., for syntax highlighting small regions)
2505    /// External code should use get_text_range_mut() or DocumentModel methods
2506    pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
2507        self.get_text_range(range.start, range.end.saturating_sub(range.start))
2508            .unwrap_or_default()
2509    }
2510
2511    /// Get all text as a String
2512    /// Returns None if any buffers are unloaded (lazy loading)
2513    pub fn to_string(&self) -> Option<String> {
2514        self.get_all_text_string()
2515    }
2516
2517    /// Get the total number of bytes
2518    pub fn len(&self) -> usize {
2519        self.total_bytes()
2520    }
2521
2522    /// Check if the buffer is empty
2523    pub fn is_empty(&self) -> bool {
2524        self.total_bytes() == 0
2525    }
2526
2527    /// Get the file path associated with this buffer
2528    pub fn file_path(&self) -> Option<&Path> {
2529        self.file_path.as_deref()
2530    }
2531
2532    /// Update the file path after a rename operation on disk.
2533    pub fn rename_file_path(&mut self, path: PathBuf) {
2534        self.file_path = Some(path);
2535    }
2536
2537    /// Clear the file path (make buffer unnamed)
2538    /// Note: This does NOT affect Unloaded chunk file_paths used for lazy loading.
2539    /// Those still point to the original source file for chunk loading.
2540    pub fn clear_file_path(&mut self) {
2541        self.file_path = None;
2542    }
2543
2544    /// Extend buffer to include more bytes from a streaming source file.
2545    /// Used for stdin streaming where the temp file grows over time.
2546    /// Appends a new Unloaded chunk for the new bytes.
2547    pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
2548        let old_size = self.total_bytes();
2549        if new_size <= old_size {
2550            return;
2551        }
2552
2553        let additional_bytes = new_size - old_size;
2554
2555        // Create new Unloaded buffer for the appended region
2556        let buffer_id = self.next_buffer_id;
2557        self.next_buffer_id += 1;
2558
2559        let new_buffer = StringBuffer::new_unloaded(
2560            buffer_id,
2561            source_path.to_path_buf(),
2562            old_size,         // file_offset - where this chunk starts in the file
2563            additional_bytes, // bytes - size of this chunk
2564        );
2565        self.buffers.push(new_buffer);
2566
2567        // Append piece at end of document (insert at offset == total_bytes)
2568        self.piece_tree.insert(
2569            old_size,
2570            BufferLocation::Stored(buffer_id),
2571            0,
2572            additional_bytes,
2573            None, // line_feed_cnt unknown for unloaded chunk
2574            &self.buffers,
2575        );
2576    }
2577
2578    /// Check if the buffer has been modified since last save
2579    pub fn is_modified(&self) -> bool {
2580        self.modified
2581    }
2582
2583    /// Clear the modified flag (after save)
2584    pub fn clear_modified(&mut self) {
2585        self.modified = false;
2586    }
2587
2588    /// Set the modified flag explicitly
2589    /// Used by undo/redo to restore the correct modified state
2590    pub fn set_modified(&mut self, modified: bool) {
2591        self.modified = modified;
2592    }
2593
2594    /// Check if buffer has pending changes for recovery auto-save
2595    pub fn is_recovery_pending(&self) -> bool {
2596        self.recovery_pending
2597    }
2598
2599    /// Mark buffer as needing recovery auto-save (call after edits)
2600    pub fn set_recovery_pending(&mut self, pending: bool) {
2601        self.recovery_pending = pending;
2602    }
2603
2604    /// Ensure the buffer chunk at the given byte offset is loaded.
2605    ///
2606    /// When `line_feeds_scanned` is true, piece splits during insert/delete need
2607    /// the buffer data to be loaded so `compute_line_feeds_static` can accurately
2608    /// recount line feeds for each half. This method loads the chunk if needed.
2609    fn ensure_chunk_loaded_at(&mut self, offset: usize) {
2610        if let Some(piece_info) = self.piece_tree.find_by_offset(offset) {
2611            let buffer_id = piece_info.location.buffer_id();
2612            if let Some(buffer) = self.buffers.get_mut(buffer_id) {
2613                if !buffer.is_loaded() {
2614                    let buf_bytes = buffer.unloaded_bytes().unwrap_or(0);
2615                    tracing::info!(
2616                        "ensure_chunk_loaded_at: loading buffer {} ({} bytes) for offset {}",
2617                        buffer_id,
2618                        buf_bytes,
2619                        offset
2620                    );
2621                    if let Err(e) = buffer.load(&*self.fs) {
2622                        tracing::warn!("Failed to load chunk at offset {offset}: {e}");
2623                    }
2624                }
2625            }
2626        }
2627    }
2628
2629    /// Check if this is a large file with lazy loading enabled
2630    pub fn is_large_file(&self) -> bool {
2631        self.large_file
2632    }
2633
2634    /// Check if line feeds have been scanned for this large file.
2635    /// When true, `line_count()` returns exact values.
2636    pub fn has_line_feed_scan(&self) -> bool {
2637        self.line_feeds_scanned
2638    }
2639
2640    /// Get the raw piece tree leaves (for storing alongside scan chunks).
2641    pub fn piece_tree_leaves(&self) -> Vec<crate::model::piece_tree::LeafData> {
2642        self.piece_tree.get_leaves()
2643    }
2644
2645    /// Prepare work items for an incremental line scan.
2646    ///
2647    /// First splits any oversized leaves in the piece tree so every leaf is
2648    /// at most `LOAD_CHUNK_SIZE` bytes.  Then returns one work item per leaf.
2649    /// After scanning, `get_text_range_mut` will never need to split a scanned
2650    /// leaf (it's already chunk-sized), so line-feed counts are preserved.
2651    ///
2652    /// Returns `(chunks, total_bytes)`.
2653    pub fn prepare_line_scan(&mut self) -> (Vec<LineScanChunk>, usize) {
2654        // Pre-split the tree so every leaf ≤ LOAD_CHUNK_SIZE.
2655        self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2656
2657        let leaves = self.piece_tree.get_leaves();
2658        let total_bytes: usize = leaves.iter().map(|l| l.bytes).sum();
2659        let mut chunks = Vec::new();
2660
2661        for (idx, leaf) in leaves.iter().enumerate() {
2662            chunks.push(LineScanChunk {
2663                leaf_index: idx,
2664                byte_len: leaf.bytes,
2665                already_known: leaf.line_feed_cnt.is_some(),
2666            });
2667        }
2668
2669        (chunks, total_bytes)
2670    }
2671
2672    /// Initialize a chunked search scan over this buffer's piece tree.
2673    ///
2674    /// Used for in-editor Ctrl+F (incremental, yields to the event loop
2675    /// between chunks) and for searching dirty buffers during project grep.
2676    /// For searching files on disk, use `FileSystem::search_file` instead.
2677    pub fn search_scan_init(
2678        &mut self,
2679        regex: regex::bytes::Regex,
2680        max_matches: usize,
2681        query_len: usize,
2682    ) -> ChunkedSearchState {
2683        let (chunks, total_bytes) = self.prepare_line_scan();
2684        ChunkedSearchState {
2685            chunks,
2686            next_chunk: 0,
2687            next_doc_offset: 0,
2688            total_bytes,
2689            scanned_bytes: 0,
2690            regex,
2691            matches: Vec::new(),
2692            overlap_tail: Vec::new(),
2693            overlap_doc_offset: 0,
2694            max_matches,
2695            capped: false,
2696            query_len,
2697            running_line: 1,
2698        }
2699    }
2700
2701    /// Process one chunk of a chunked search scan.
2702    ///
2703    /// Loads the next chunk via `get_text_range_mut`, prepends overlap from
2704    /// the previous chunk, runs the regex, and appends matches to `state`
2705    /// with line/column/context computed on the fly from the loaded bytes.
2706    ///
2707    /// Line numbers are tracked incrementally via `running_line` — each
2708    /// chunk counts newlines in its non-overlap portion to advance the
2709    /// counter for the next chunk, and matches use an incremental cursor
2710    /// so total line-counting work is O(chunk_size), not O(chunk × matches).
2711    ///
2712    /// Returns `Ok(true)` if there are more chunks to process, `Ok(false)`
2713    /// when the scan is complete.
2714    ///
2715    /// TODO: For concurrent/parallel search (searching multiple files at once),
2716    /// chunks would need to return chunk-relative line numbers and have them
2717    /// fixed up with each file's starting line offset after all chunks complete.
2718    pub fn search_scan_next_chunk(
2719        &mut self,
2720        state: &mut ChunkedSearchState,
2721    ) -> std::io::Result<bool> {
2722        if state.is_done() {
2723            return Ok(false);
2724        }
2725
2726        let chunk_info = state.chunks[state.next_chunk].clone();
2727        let doc_offset = state.next_doc_offset;
2728
2729        state.next_chunk += 1;
2730        state.scanned_bytes += chunk_info.byte_len;
2731        state.next_doc_offset += chunk_info.byte_len;
2732
2733        // Load the chunk bytes
2734        let chunk_bytes = self
2735            .get_text_range_mut(doc_offset, chunk_info.byte_len)
2736            .map_err(std::io::Error::other)?;
2737
2738        // Build search buffer: overlap tail + new chunk
2739        let overlap_len = state.overlap_tail.len();
2740        let mut search_buf = Vec::with_capacity(overlap_len + chunk_bytes.len());
2741        search_buf.extend_from_slice(&state.overlap_tail);
2742        search_buf.extend_from_slice(&chunk_bytes);
2743
2744        let buf_doc_offset = if overlap_len > 0 {
2745            state.overlap_doc_offset
2746        } else {
2747            doc_offset
2748        };
2749
2750        // Line number at buf_doc_offset: running_line tracks the line at
2751        // doc_offset (start of new chunk data). Count newlines in the overlap
2752        // prefix to get the line at the start of the full search_buf.
2753        let newlines_in_overlap = search_buf[..overlap_len]
2754            .iter()
2755            .filter(|&&b| b == b'\n')
2756            .count();
2757        let mut line_at = state.running_line - newlines_in_overlap;
2758        let mut counted_to = 0usize;
2759
2760        // Run regex on the combined buffer
2761        for m in state.regex.find_iter(&search_buf) {
2762            // Skip matches entirely within the overlap (already found)
2763            if overlap_len > 0 && m.end() <= overlap_len {
2764                continue;
2765            }
2766
2767            if state.matches.len() >= state.max_matches {
2768                state.capped = true;
2769                break;
2770            }
2771
2772            // Advance line counter incrementally to this match
2773            line_at += search_buf[counted_to..m.start()]
2774                .iter()
2775                .filter(|&&b| b == b'\n')
2776                .count();
2777            counted_to = m.start();
2778
2779            // Find line boundaries in search_buf for context
2780            let line_start = search_buf[..m.start()]
2781                .iter()
2782                .rposition(|&b| b == b'\n')
2783                .map(|p| p + 1)
2784                .unwrap_or(0);
2785            let line_end = search_buf[m.start()..]
2786                .iter()
2787                .position(|&b| b == b'\n')
2788                .map(|p| m.start() + p)
2789                .unwrap_or(search_buf.len());
2790
2791            let match_doc_offset = buf_doc_offset + m.start();
2792            let match_len = m.end() - m.start();
2793            let column = m.start() - line_start + 1;
2794            let context = String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
2795
2796            state.matches.push(SearchMatch {
2797                byte_offset: match_doc_offset,
2798                length: match_len,
2799                line: line_at,
2800                column,
2801                context,
2802            });
2803        }
2804
2805        // Advance running_line by newlines in the new (non-overlap) chunk data
2806        let newlines_in_chunk = chunk_bytes.iter().filter(|&&b| b == b'\n').count();
2807        state.running_line += newlines_in_chunk;
2808
2809        // Save overlap tail for next chunk
2810        let max_overlap = state.query_len.max(256).min(chunk_bytes.len());
2811        let tail_start = chunk_bytes.len().saturating_sub(max_overlap);
2812        state.overlap_tail = chunk_bytes[tail_start..].to_vec();
2813        state.overlap_doc_offset = doc_offset + tail_start;
2814
2815        Ok(!state.is_done())
2816    }
2817
2818    /// Run a complete chunked search over the piece tree (all chunks).
2819    ///
2820    /// Synchronous variant — used for dirty buffer snapshots in project
2821    /// grep and in tests.  For on-disk files, use `FileSystem::search_file`.
2822    pub fn search_scan_all(
2823        &mut self,
2824        regex: regex::bytes::Regex,
2825        max_matches: usize,
2826        query_len: usize,
2827    ) -> std::io::Result<ChunkedSearchState> {
2828        let mut state = self.search_scan_init(regex, max_matches, query_len);
2829        while self.search_scan_next_chunk(&mut state)? {}
2830        Ok(state)
2831    }
2832
2833    /// Build a hybrid search plan from the piece tree.
2834    ///
2835    /// Extracts regions (unloaded file ranges + loaded in-memory data) that
2836    /// can be searched independently.  The plan is `Send` so it can be
2837    /// executed on a background thread via `HybridSearchPlan::execute`.
2838    ///
2839    /// Returns `None` if the buffer has no file path (caller should fall
2840    /// back to `search_scan_all`).
2841    pub fn search_hybrid_plan(&mut self) -> Option<HybridSearchPlan> {
2842        let file_path = self.file_path.clone()?;
2843
2844        self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2845        let leaves = self.piece_tree.get_leaves();
2846
2847        let mut regions: Vec<SearchRegion> = Vec::new();
2848        let mut doc_offset = 0usize;
2849
2850        for leaf in &leaves {
2851            let buf = self.buffers.get(leaf.location.buffer_id());
2852            let is_unloaded_stored = matches!(
2853                (&leaf.location, buf),
2854                (
2855                    BufferLocation::Stored(_),
2856                    Some(StringBuffer {
2857                        data: BufferData::Unloaded { .. },
2858                        ..
2859                    }),
2860                )
2861            );
2862
2863            if is_unloaded_stored {
2864                let file_offset = match buf.unwrap().data {
2865                    BufferData::Unloaded {
2866                        file_offset: fo, ..
2867                    } => fo + leaf.offset,
2868                    _ => unreachable!(),
2869                };
2870
2871                // Merge with previous unloaded region if contiguous
2872                if let Some(SearchRegion::Unloaded {
2873                    file_offset: prev_fo,
2874                    bytes: prev_bytes,
2875                    ..
2876                }) = regions.last_mut()
2877                {
2878                    if *prev_fo + *prev_bytes == file_offset {
2879                        *prev_bytes += leaf.bytes;
2880                        doc_offset += leaf.bytes;
2881                        continue;
2882                    }
2883                }
2884                regions.push(SearchRegion::Unloaded {
2885                    file_offset,
2886                    bytes: leaf.bytes,
2887                    doc_offset,
2888                });
2889            } else {
2890                let data = match buf.and_then(|b| b.get_data()) {
2891                    Some(full) => {
2892                        let end = (leaf.offset + leaf.bytes).min(full.len());
2893                        full[leaf.offset..end].to_vec()
2894                    }
2895                    None => match self.get_text_range_mut(doc_offset, leaf.bytes) {
2896                        Ok(d) => d,
2897                        Err(_) => {
2898                            doc_offset += leaf.bytes;
2899                            continue;
2900                        }
2901                    },
2902                };
2903
2904                // Merge with previous loaded region
2905                if let Some(SearchRegion::Loaded {
2906                    data: prev_data, ..
2907                }) = regions.last_mut()
2908                {
2909                    prev_data.extend_from_slice(&data);
2910                    doc_offset += leaf.bytes;
2911                    continue;
2912                }
2913                regions.push(SearchRegion::Loaded { data, doc_offset });
2914            }
2915
2916            doc_offset += leaf.bytes;
2917        }
2918
2919        Some(HybridSearchPlan { file_path, regions })
2920    }
2921
2922    /// Hybrid search: uses `fs.search_file` for unloaded piece-tree regions
2923    /// (searches where the data lives, no network transfer) and in-memory regex
2924    /// for loaded/edited regions.  Handles overlap at region boundaries.
2925    ///
2926    /// For a huge remote file with a small local edit, this avoids transferring
2927    /// the entire file — only match metadata crosses the network.
2928    ///
2929    /// Falls back to `search_scan_all` when the buffer has no file path or is
2930    /// fully loaded.
2931    pub fn search_hybrid(
2932        &mut self,
2933        pattern: &str,
2934        opts: &FileSearchOptions,
2935        regex: Regex,
2936        max_matches: usize,
2937        query_len: usize,
2938    ) -> io::Result<Vec<SearchMatch>> {
2939        let plan = match self.search_hybrid_plan() {
2940            Some(p) => p,
2941            None => {
2942                let state = self.search_scan_all(regex, max_matches, query_len)?;
2943                return Ok(state.matches);
2944            }
2945        };
2946        plan.execute(&*self.fs, pattern, opts, &regex, max_matches, query_len)
2947    }
2948
2949    /// Count `\n` bytes in a single leaf.
2950    ///
2951    /// Uses `count_line_feeds_in_range` for unloaded buffers, which remote
2952    /// filesystem implementations can override to count server-side.
2953    pub fn scan_leaf(&self, leaf: &crate::model::piece_tree::LeafData) -> std::io::Result<usize> {
2954        let buffer_id = leaf.location.buffer_id();
2955        let buffer = self
2956            .buffers
2957            .get(buffer_id)
2958            .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::NotFound, "buffer not found"))?;
2959
2960        let count = match &buffer.data {
2961            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2962                let end = (leaf.offset + leaf.bytes).min(data.len());
2963                data[leaf.offset..end]
2964                    .iter()
2965                    .filter(|&&b| b == b'\n')
2966                    .count()
2967            }
2968            crate::model::piece_tree::BufferData::Unloaded {
2969                file_path,
2970                file_offset,
2971                ..
2972            } => {
2973                let read_offset = *file_offset as u64 + leaf.offset as u64;
2974                self.fs
2975                    .count_line_feeds_in_range(file_path, read_offset, leaf.bytes)?
2976            }
2977        };
2978        Ok(count)
2979    }
2980
2981    /// Return the I/O parameters for an unloaded leaf, or `None` if loaded.
2982    ///
2983    /// Used by the incremental scan to distinguish leaves that can be counted
2984    /// in-memory (via `scan_leaf`) from those that need filesystem I/O.
2985    pub fn leaf_io_params(
2986        &self,
2987        leaf: &crate::model::piece_tree::LeafData,
2988    ) -> Option<(std::path::PathBuf, u64, usize)> {
2989        let buffer_id = leaf.location.buffer_id();
2990        let buffer = self.buffers.get(buffer_id)?;
2991        match &buffer.data {
2992            crate::model::piece_tree::BufferData::Loaded { .. } => None,
2993            crate::model::piece_tree::BufferData::Unloaded {
2994                file_path,
2995                file_offset,
2996                ..
2997            } => {
2998                let read_offset = *file_offset as u64 + leaf.offset as u64;
2999                Some((file_path.clone(), read_offset, leaf.bytes))
3000            }
3001        }
3002    }
3003
3004    /// Get a reference to the string buffers (for parallel scanning).
3005    pub fn buffer_slice(&self) -> &[StringBuffer] {
3006        &self.buffers
3007    }
3008
3009    /// Apply the results of an incremental line scan.
3010    pub fn apply_scan_updates(&mut self, updates: &[(usize, usize)]) {
3011        self.piece_tree.update_leaf_line_feeds(updates);
3012        self.line_feeds_scanned = true;
3013    }
3014
3015    /// After an incremental line-feed scan completes, rebuild the tree so that
3016    /// `saved_root` and the current tree share `Arc` pointers for unedited
3017    /// subtrees. This makes `diff_since_saved()` O(edited regions) instead of
3018    /// O(file size).
3019    pub fn rebuild_with_pristine_saved_root(&mut self, scan_updates: &[(usize, usize)]) {
3020        let file_size = match self.saved_file_size {
3021            Some(s) => s,
3022            None => {
3023                // Fallback: no saved file size means we can't build a pristine
3024                // tree. Just apply updates the old way.
3025                self.apply_scan_updates(scan_updates);
3026                return;
3027            }
3028        };
3029
3030        // --- Walk the current tree to extract deletions and insertions ---
3031        let total = self.total_bytes();
3032        // Deletions: gaps in Stored coverage (orig_offset, len).
3033        let mut deletions: Vec<(usize, usize)> = Vec::new();
3034        // Insertions: (post_delete_offset, location, buf_offset, bytes, lf_cnt).
3035        // post_delete_offset = cumulative surviving Stored bytes before this point.
3036        let mut insertions: Vec<(usize, BufferLocation, usize, usize, Option<usize>)> = Vec::new();
3037        let mut orig_cursor: usize = 0;
3038        let mut stored_bytes_in_doc: usize = 0;
3039
3040        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
3041            match piece.location {
3042                BufferLocation::Stored(_) => {
3043                    if piece.buffer_offset > orig_cursor {
3044                        deletions.push((orig_cursor, piece.buffer_offset - orig_cursor));
3045                    }
3046                    orig_cursor = piece.buffer_offset + piece.bytes;
3047                    stored_bytes_in_doc += piece.bytes;
3048                }
3049                BufferLocation::Added(id) => {
3050                    // Check if this Added buffer was created by loading a chunk
3051                    // from the stored file (via get_text_range_mut chunk loading).
3052                    // If so, treat it as stored content, not a user edit.
3053                    if let Some(file_off) = self.buffers.get(id).and_then(|b| b.stored_file_offset)
3054                    {
3055                        if file_off > orig_cursor {
3056                            deletions.push((orig_cursor, file_off - orig_cursor));
3057                        }
3058                        orig_cursor = file_off + piece.bytes;
3059                        stored_bytes_in_doc += piece.bytes;
3060                    } else {
3061                        insertions.push((
3062                            stored_bytes_in_doc,
3063                            piece.location,
3064                            piece.buffer_offset,
3065                            piece.bytes,
3066                            piece.line_feed_cnt,
3067                        ));
3068                    }
3069                }
3070            }
3071        }
3072        // Trailing deletion.
3073        if orig_cursor < file_size {
3074            deletions.push((orig_cursor, file_size - orig_cursor));
3075        }
3076
3077        // --- Build pristine tree (full original file, pre-split, with lf counts) ---
3078        let mut pristine = if file_size > 0 {
3079            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
3080        } else {
3081            PieceTree::empty()
3082        };
3083        pristine.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
3084        pristine.update_leaf_line_feeds(scan_updates);
3085
3086        // Snapshot the pristine tree as saved_root.
3087        self.saved_root = pristine.root();
3088
3089        // If no edits, the pristine tree IS the current tree.
3090        if deletions.is_empty() && insertions.is_empty() {
3091            self.piece_tree = pristine;
3092            self.line_feeds_scanned = true;
3093            return;
3094        }
3095
3096        // --- Replay edits onto a clone of the pristine tree ---
3097        let mut tree = pristine;
3098
3099        // Apply deletions from HIGH to LOW offset so earlier offsets stay valid.
3100        deletions.sort_by(|a, b| b.0.cmp(&a.0));
3101        for &(offset, len) in &deletions {
3102            tree.delete(offset, len, &self.buffers);
3103        }
3104
3105        // Apply insertions from LOW to HIGH. Each insertion shifts subsequent
3106        // offsets by its byte count, tracked via insert_delta.
3107        let mut insert_delta: usize = 0;
3108        for &(offset, location, buf_offset, bytes, lf_cnt) in &insertions {
3109            tree.insert(
3110                offset + insert_delta,
3111                location,
3112                buf_offset,
3113                bytes,
3114                lf_cnt,
3115                &self.buffers,
3116            );
3117            insert_delta += bytes;
3118        }
3119
3120        // Path-copy insert/delete may split Stored leaves whose data is
3121        // Unloaded, producing fragments with line_feed_cnt = None
3122        // (compute_line_feeds_static can't read unloaded data). Fix them up
3123        // by scanning any remaining None leaves.
3124        let leaves = tree.get_leaves();
3125        let mut fixups: Vec<(usize, usize)> = Vec::new();
3126        for (idx, leaf) in leaves.iter().enumerate() {
3127            if leaf.line_feed_cnt.is_none() {
3128                if let Ok(count) = self.scan_leaf(leaf) {
3129                    fixups.push((idx, count));
3130                }
3131            }
3132        }
3133        if !fixups.is_empty() {
3134            tree.update_leaf_line_feeds_path_copy(&fixups);
3135        }
3136
3137        self.piece_tree = tree;
3138        self.line_feeds_scanned = true;
3139    }
3140
3141    /// Resolve the exact byte offset for a given line number (0-indexed).
3142    ///
3143    /// Uses the tree's line feed counts to find the piece containing the target line,
3144    /// then loads/reads that piece's data to find the exact newline position.
3145    /// This works even when buffers are unloaded (large file with scanned line index).
3146    pub fn resolve_line_byte_offset(&mut self, target_line: usize) -> Option<usize> {
3147        if target_line == 0 {
3148            return Some(0);
3149        }
3150
3151        // Use tree metadata to find the piece containing the target line
3152        let (doc_offset, buffer_id, piece_offset, piece_bytes, lines_before) =
3153            self.piece_tree.piece_info_for_line(target_line)?;
3154
3155        // We need to find the (target_line - lines_before)-th newline within this piece
3156        let lines_to_skip = target_line - lines_before;
3157
3158        // Get the piece data — either from loaded buffer or read from disk
3159        let buffer = self.buffers.get(buffer_id)?;
3160        let piece_data: Vec<u8> = match &buffer.data {
3161            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
3162                let end = (piece_offset + piece_bytes).min(data.len());
3163                data[piece_offset..end].to_vec()
3164            }
3165            crate::model::piece_tree::BufferData::Unloaded {
3166                file_path,
3167                file_offset,
3168                ..
3169            } => {
3170                let read_offset = *file_offset as u64 + piece_offset as u64;
3171                self.fs
3172                    .read_range(file_path, read_offset, piece_bytes)
3173                    .ok()?
3174            }
3175        };
3176
3177        // Count newlines to find the target line start
3178        let mut newlines_found = 0;
3179        for (i, &byte) in piece_data.iter().enumerate() {
3180            if byte == b'\n' {
3181                newlines_found += 1;
3182                if newlines_found == lines_to_skip {
3183                    // The target line starts right after this newline
3184                    return Some(doc_offset + i + 1);
3185                }
3186            }
3187        }
3188
3189        // If we didn't find enough newlines, the line starts in the next piece
3190        // Return the end of this piece as an approximation
3191        Some(doc_offset + piece_bytes)
3192    }
3193
3194    /// Get the saved file size (size of the file on disk after last load/save)
3195    /// For large files, this is used during recovery to know the expected original file size.
3196    /// Returns None for new unsaved buffers.
3197    pub fn original_file_size(&self) -> Option<usize> {
3198        // Return the tracked saved file size - this is updated when the file is
3199        // loaded or saved, so it always reflects the current file on disk.
3200        self.saved_file_size
3201    }
3202
3203    /// Get recovery chunks for this buffer (only modified portions)
3204    ///
3205    /// For large files, this returns only the pieces that come from Added buffers
3206    /// (i.e., the modifications), not the original file content. This allows
3207    /// efficient incremental recovery without reading/writing the entire file.
3208    ///
3209    /// Returns: Vec of (original_file_offset, data) for each modified chunk
3210    /// The offset is the position in the ORIGINAL file where this chunk should be inserted.
3211    pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
3212        use crate::model::piece_tree::BufferLocation;
3213
3214        let mut chunks = Vec::new();
3215        let total = self.total_bytes();
3216
3217        // Track cumulative bytes from Stored pieces as we iterate.
3218        // This gives us the original file offset for Added pieces.
3219        // The key insight: Added pieces should be inserted at the position
3220        // corresponding to where they appear relative to Stored content,
3221        // not their position in the current document.
3222        let mut stored_bytes_before = 0;
3223
3224        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
3225            match piece.location {
3226                BufferLocation::Stored(_) => {
3227                    // Accumulate stored bytes to track position in original file
3228                    stored_bytes_before += piece.bytes;
3229                }
3230                BufferLocation::Added(buffer_id) => {
3231                    if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
3232                        // Skip buffers that originate from the original file
3233                        // (loaded by chunk_split_and_load for viewport display).
3234                        // These have stored_file_offset set and are not user edits.
3235                        //
3236                        // Why Added and not Stored? The piece tree only has two
3237                        // variants: Stored and Added. chunk_split_and_load marks
3238                        // loaded chunks as Added(new_id) because
3239                        // rebuild_with_pristine_saved_root interprets Stored
3240                        // pieces' buffer_offset as a position in the original
3241                        // file — but a chunk buffer starts at offset 0, so using
3242                        // Stored would corrupt the rebuild logic. We rely on
3243                        // stored_file_offset instead to distinguish "loaded from
3244                        // disk" from "user edit". A third BufferLocation variant
3245                        // (e.g. LoadedChunk) would make this distinction explicit
3246                        // in the type system rather than requiring this runtime
3247                        // check.
3248                        if buffer.stored_file_offset.is_some() {
3249                            stored_bytes_before += piece.bytes;
3250                            continue;
3251                        }
3252                        // Get the data from the buffer if loaded
3253                        if let Some(data) = buffer.get_data() {
3254                            // Extract just the portion this piece references
3255                            let start = piece.buffer_offset;
3256                            let end = start + piece.bytes;
3257                            if end <= data.len() {
3258                                // Use stored_bytes_before as the original file offset.
3259                                // This is where this insertion should go relative to
3260                                // the original file content.
3261                                chunks.push((stored_bytes_before, data[start..end].to_vec()));
3262                            }
3263                        }
3264                    }
3265                }
3266            }
3267        }
3268
3269        chunks
3270    }
3271
3272    /// Check if this buffer contains binary content
3273    pub fn is_binary(&self) -> bool {
3274        self.is_binary
3275    }
3276
3277    /// Get the line ending format for this buffer
3278    pub fn line_ending(&self) -> LineEnding {
3279        self.line_ending
3280    }
3281
3282    /// Set the line ending format for this buffer
3283    ///
3284    /// This marks the buffer as modified since the line ending format has changed.
3285    /// On save, the buffer content will be converted to the new format.
3286    pub fn set_line_ending(&mut self, line_ending: LineEnding) {
3287        self.line_ending = line_ending;
3288        self.mark_content_modified();
3289    }
3290
3291    /// Set the default line ending format for a new/empty buffer
3292    ///
3293    /// Unlike `set_line_ending`, this does NOT mark the buffer as modified.
3294    /// This should be used when initializing a new buffer with a configured default.
3295    pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
3296        self.line_ending = line_ending;
3297        self.original_line_ending = line_ending;
3298    }
3299
3300    /// Get the encoding format for this buffer
3301    pub fn encoding(&self) -> Encoding {
3302        self.encoding
3303    }
3304
3305    /// Set the encoding format for this buffer
3306    ///
3307    /// This marks the buffer as modified since the encoding format has changed.
3308    /// On save, the buffer content will be converted to the new encoding.
3309    pub fn set_encoding(&mut self, encoding: Encoding) {
3310        self.encoding = encoding;
3311        self.mark_content_modified();
3312    }
3313
3314    /// Set the default encoding format for a new/empty buffer
3315    ///
3316    /// Unlike `set_encoding`, this does NOT mark the buffer as modified.
3317    /// This should be used when initializing a new buffer with a configured default.
3318    pub fn set_default_encoding(&mut self, encoding: Encoding) {
3319        self.encoding = encoding;
3320        self.original_encoding = encoding;
3321    }
3322
3323    /// Detect the line ending format from a sample of bytes
3324    ///
3325    /// Uses majority voting: counts CRLF, LF-only, and CR-only occurrences
3326    /// and returns the most common format.
3327    pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
3328        // Only check the first 8KB for line ending detection (same as binary detection)
3329        let check_len = bytes.len().min(8 * 1024);
3330        let sample = &bytes[..check_len];
3331
3332        let mut crlf_count = 0;
3333        let mut lf_only_count = 0;
3334        let mut cr_only_count = 0;
3335
3336        let mut i = 0;
3337        while i < sample.len() {
3338            if sample[i] == b'\r' {
3339                // Check if this is CRLF
3340                if i + 1 < sample.len() && sample[i + 1] == b'\n' {
3341                    crlf_count += 1;
3342                    i += 2; // Skip both \r and \n
3343                    continue;
3344                } else {
3345                    // CR only (old Mac format)
3346                    cr_only_count += 1;
3347                }
3348            } else if sample[i] == b'\n' {
3349                // LF only (Unix format)
3350                lf_only_count += 1;
3351            }
3352            i += 1;
3353        }
3354
3355        // Use majority voting to determine line ending
3356        if crlf_count > lf_only_count && crlf_count > cr_only_count {
3357            LineEnding::CRLF
3358        } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
3359            LineEnding::CR
3360        } else {
3361            // Default to LF if no clear winner or if LF wins
3362            LineEnding::LF
3363        }
3364    }
3365
3366    /// Detect the text encoding from a sample of bytes
3367    ///
3368    /// Delegates to the encoding module. Use `detect_encoding_or_binary`
3369    /// when you need to know if the content should be treated as binary.
3370    pub fn detect_encoding(bytes: &[u8]) -> Encoding {
3371        encoding::detect_encoding(bytes)
3372    }
3373
3374    /// Detect the text encoding and whether content is binary.
3375    ///
3376    /// Returns (Encoding, is_binary) where:
3377    /// - Encoding is the detected encoding (or default if binary)
3378    /// - is_binary is true if the content should be treated as raw binary
3379    ///
3380    /// Delegates to the encoding module for detection logic.
3381    pub fn detect_encoding_or_binary(bytes: &[u8], truncated: bool) -> (Encoding, bool) {
3382        encoding::detect_encoding_or_binary(bytes, truncated)
3383    }
3384
3385    /// Detect encoding and convert bytes to UTF-8
3386    ///
3387    /// Returns the detected encoding and the UTF-8 converted content.
3388    /// This is the core function for normalizing file content to UTF-8 on load.
3389    pub fn detect_and_convert_encoding(bytes: &[u8]) -> (Encoding, Vec<u8>) {
3390        encoding::detect_and_convert(bytes)
3391    }
3392
3393    /// Convert UTF-8 content to the specified encoding for saving
3394    ///
3395    /// Used when saving files to convert internal UTF-8 representation
3396    /// back to the original (or user-selected) encoding.
3397    /// Note: This does NOT add BOM - the BOM is handled separately in build_write_recipe.
3398    pub fn convert_to_encoding(utf8_bytes: &[u8], target_encoding: Encoding) -> Vec<u8> {
3399        encoding::convert_from_utf8(utf8_bytes, target_encoding)
3400    }
3401
3402    /// Normalize line endings in the given bytes to LF only
3403    ///
3404    /// Converts CRLF (\r\n) and CR (\r) to LF (\n) for internal representation.
3405    /// This makes editing and cursor movement simpler while preserving the
3406    /// original format for saving.
3407    #[allow(dead_code)] // Kept for tests and potential future use
3408    pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
3409        let mut normalized = Vec::with_capacity(bytes.len());
3410        let mut i = 0;
3411
3412        while i < bytes.len() {
3413            if bytes[i] == b'\r' {
3414                // Check if this is CRLF
3415                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3416                    // CRLF -> LF
3417                    normalized.push(b'\n');
3418                    i += 2; // Skip both \r and \n
3419                    continue;
3420                } else {
3421                    // CR only -> LF
3422                    normalized.push(b'\n');
3423                }
3424            } else {
3425                // Copy byte as-is
3426                normalized.push(bytes[i]);
3427            }
3428            i += 1;
3429        }
3430
3431        normalized
3432    }
3433
3434    /// Convert line endings from any source format to any target format
3435    ///
3436    /// This first normalizes all line endings to LF, then converts to the target format.
3437    /// Used when saving files after the user has changed the line ending format.
3438    fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
3439        // First pass: normalize everything to LF
3440        let mut normalized = Vec::with_capacity(bytes.len());
3441        let mut i = 0;
3442        while i < bytes.len() {
3443            if bytes[i] == b'\r' {
3444                // Check if this is CRLF
3445                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3446                    // CRLF -> LF
3447                    normalized.push(b'\n');
3448                    i += 2;
3449                    continue;
3450                } else {
3451                    // CR only -> LF
3452                    normalized.push(b'\n');
3453                }
3454            } else {
3455                normalized.push(bytes[i]);
3456            }
3457            i += 1;
3458        }
3459
3460        // If target is LF, we're done
3461        if target_ending == LineEnding::LF {
3462            return normalized;
3463        }
3464
3465        // Second pass: convert LF to target format
3466        let replacement = target_ending.as_str().as_bytes();
3467        let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
3468
3469        for byte in normalized {
3470            if byte == b'\n' {
3471                result.extend_from_slice(replacement);
3472            } else {
3473                result.push(byte);
3474            }
3475        }
3476
3477        result
3478    }
3479
3480    /// Get text for a specific line
3481    pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
3482        let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
3483
3484        let bytes = if let Some(end_offset) = end {
3485            end_offset.saturating_sub(start)
3486        } else {
3487            self.total_bytes().saturating_sub(start)
3488        };
3489
3490        self.get_text_range(start, bytes)
3491    }
3492
3493    /// Get the byte offset where a line starts
3494    pub fn line_start_offset(&self, line: usize) -> Option<usize> {
3495        let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
3496        Some(start)
3497    }
3498
3499    /// Get piece information at a byte offset
3500    pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
3501        self.piece_tree.find_by_offset(offset)
3502    }
3503
3504    /// Get tree statistics for debugging
3505    pub fn stats(&self) -> TreeStats {
3506        self.piece_tree.stats()
3507    }
3508
3509    // Search and Replace Operations
3510
3511    /// Find the next occurrence of a pattern, with wrap-around
3512    pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
3513        if pattern.is_empty() {
3514            return None;
3515        }
3516
3517        let pattern_bytes = pattern.as_bytes();
3518        let buffer_len = self.len();
3519
3520        // Search from start_pos to end
3521        if start_pos < buffer_len {
3522            if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
3523                return Some(offset);
3524            }
3525        }
3526
3527        // Wrap around: search from beginning to start_pos
3528        if start_pos > 0 {
3529            if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
3530                return Some(offset);
3531            }
3532        }
3533
3534        None
3535    }
3536
3537    /// Find the next occurrence of a pattern within an optional range
3538    /// If range is None, searches the entire buffer with wrap-around (same as find_next)
3539    /// If range is Some, searches only within that range without wrap-around
3540    pub fn find_next_in_range(
3541        &self,
3542        pattern: &str,
3543        start_pos: usize,
3544        range: Option<Range<usize>>,
3545    ) -> Option<usize> {
3546        if pattern.is_empty() {
3547            return None;
3548        }
3549
3550        if let Some(search_range) = range {
3551            // Search within range only, no wrap-around
3552            let pattern_bytes = pattern.as_bytes();
3553            let search_start = start_pos.max(search_range.start);
3554            let search_end = search_range.end.min(self.len());
3555
3556            if search_start < search_end {
3557                self.find_pattern(search_start, search_end, pattern_bytes)
3558            } else {
3559                None
3560            }
3561        } else {
3562            // No range specified, use normal find_next with wrap-around
3563            self.find_next(pattern, start_pos)
3564        }
3565    }
3566
3567    /// Find pattern in a byte range using overlapping chunks
3568    fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
3569        if pattern.is_empty() || start >= end {
3570            return None;
3571        }
3572
3573        const CHUNK_SIZE: usize = 65536; // 64KB chunks
3574        let overlap = pattern.len().saturating_sub(1).max(1);
3575
3576        // Use the overlapping chunks iterator for efficient streaming search
3577        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
3578
3579        for chunk in chunks {
3580            // Search the entire chunk buffer
3581            if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
3582                let match_end = pos + pattern.len();
3583                // Only report if match ENDS in or after the valid zone
3584                // This ensures patterns spanning boundaries are found exactly once
3585                if match_end > chunk.valid_start {
3586                    let absolute_pos = chunk.absolute_pos + pos;
3587                    // Verify the match doesn't extend beyond our search range
3588                    if absolute_pos + pattern.len() <= end {
3589                        return Some(absolute_pos);
3590                    }
3591                }
3592            }
3593        }
3594
3595        None
3596    }
3597
3598    /// Simple byte pattern search using naive algorithm
3599    fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
3600        if needle.is_empty() || needle.len() > haystack.len() {
3601            return None;
3602        }
3603
3604        (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
3605    }
3606
3607    /// Find the next occurrence of a regex pattern, with wrap-around
3608    pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
3609        let buffer_len = self.len();
3610
3611        // Search from start_pos to end
3612        if start_pos < buffer_len {
3613            if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
3614                return Some(offset);
3615            }
3616        }
3617
3618        // Wrap around: search from beginning to start_pos
3619        if start_pos > 0 {
3620            if let Some(offset) = self.find_regex(0, start_pos, regex) {
3621                return Some(offset);
3622            }
3623        }
3624
3625        None
3626    }
3627
3628    /// Find the next occurrence of a regex pattern within an optional range
3629    pub fn find_next_regex_in_range(
3630        &self,
3631        regex: &Regex,
3632        start_pos: usize,
3633        range: Option<Range<usize>>,
3634    ) -> Option<usize> {
3635        if let Some(search_range) = range {
3636            let search_start = start_pos.max(search_range.start);
3637            let search_end = search_range.end.min(self.len());
3638
3639            if search_start < search_end {
3640                self.find_regex(search_start, search_end, regex)
3641            } else {
3642                None
3643            }
3644        } else {
3645            self.find_next_regex(regex, start_pos)
3646        }
3647    }
3648
3649    /// Find regex pattern in a byte range using overlapping chunks
3650    fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
3651        if start >= end {
3652            return None;
3653        }
3654
3655        const CHUNK_SIZE: usize = 1048576; // 1MB chunks
3656        const OVERLAP: usize = 4096; // 4KB overlap for regex
3657
3658        // Use the overlapping chunks iterator for efficient streaming search
3659        // This fixes the critical bug where regex patterns spanning chunk boundaries were missed
3660        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
3661
3662        for chunk in chunks {
3663            // Search the entire chunk buffer
3664            if let Some(mat) = regex.find(&chunk.buffer) {
3665                let match_end = mat.end();
3666                // Only report if match ENDS in or after the valid zone
3667                // This ensures patterns spanning boundaries are found exactly once
3668                if match_end > chunk.valid_start {
3669                    let absolute_pos = chunk.absolute_pos + mat.start();
3670                    // Verify the match doesn't extend beyond our search range
3671                    let match_len = mat.end() - mat.start();
3672                    if absolute_pos + match_len <= end {
3673                        return Some(absolute_pos);
3674                    }
3675                }
3676            }
3677        }
3678
3679        None
3680    }
3681
3682    /// Replace a range with replacement text
3683    pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
3684        if range.start >= self.len() {
3685            return false;
3686        }
3687
3688        let end = range.end.min(self.len());
3689        if end > range.start {
3690            self.delete_bytes(range.start, end - range.start);
3691        }
3692
3693        if !replacement.is_empty() {
3694            self.insert(range.start, replacement);
3695        }
3696
3697        true
3698    }
3699
3700    /// Find and replace the next occurrence of a pattern
3701    pub fn replace_next(
3702        &mut self,
3703        pattern: &str,
3704        replacement: &str,
3705        start_pos: usize,
3706        range: Option<Range<usize>>,
3707    ) -> Option<usize> {
3708        if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
3709            self.replace_range(pos..pos + pattern.len(), replacement);
3710            Some(pos)
3711        } else {
3712            None
3713        }
3714    }
3715
3716    /// Replace all occurrences of a pattern with replacement text
3717    pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
3718        if pattern.is_empty() {
3719            return 0;
3720        }
3721
3722        let mut count = 0;
3723        let mut pos = 0;
3724
3725        // Keep searching and replacing
3726        // Note: we search forward from last replacement to handle growth/shrinkage
3727        // Find next occurrence (no wrap-around for replace_all)
3728        while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
3729            self.replace_range(found_pos..found_pos + pattern.len(), replacement);
3730            count += 1;
3731
3732            // Move past the replacement
3733            pos = found_pos + replacement.len();
3734
3735            // If we're at or past the end, stop
3736            if pos >= self.len() {
3737                break;
3738            }
3739        }
3740
3741        count
3742    }
3743
3744    /// Replace all occurrences of a regex pattern with replacement text
3745    pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
3746        let mut count = 0;
3747        let mut pos = 0;
3748
3749        while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
3750            // Get the match to find its length
3751            let text = self
3752                .get_text_range_mut(found_pos, self.len() - found_pos)
3753                .context("Failed to read text for regex match")?;
3754
3755            if let Some(mat) = regex.find(&text) {
3756                self.replace_range(found_pos..found_pos + mat.len(), replacement);
3757                count += 1;
3758                pos = found_pos + replacement.len();
3759
3760                if pos >= self.len() {
3761                    break;
3762                }
3763            } else {
3764                break;
3765            }
3766        }
3767
3768        Ok(count)
3769    }
3770
3771    // LSP Support (UTF-16 conversions)
3772
3773    /// Convert byte position to (line, column) in bytes
3774    pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
3775        self.offset_to_position(byte_pos)
3776            .map(|pos| (pos.line, pos.column))
3777            .unwrap_or_else(|| (byte_pos / 80, 0)) // Estimate if metadata unavailable
3778    }
3779
3780    /// Convert (line, character) to byte position - 0-indexed
3781    /// character is in BYTES, not UTF-16 code units
3782    /// Optimized to use single line_range() call instead of two
3783    pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
3784        if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3785            // Calculate line length from the range
3786            let line_len = if let Some(end_offset) = end {
3787                end_offset.saturating_sub(start)
3788            } else {
3789                self.total_bytes().saturating_sub(start)
3790            };
3791            let byte_offset = character.min(line_len);
3792            start + byte_offset
3793        } else {
3794            // Line doesn't exist, return end of buffer
3795            self.len()
3796        }
3797    }
3798
3799    /// Convert byte position to LSP position (line, UTF-16 code units)
3800    /// LSP protocol uses UTF-16 code units for character offsets
3801    pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
3802        let (line, column_bytes) = self
3803            .offset_to_position(byte_pos)
3804            .map(|pos| (pos.line, pos.column))
3805            .unwrap_or_else(|| (byte_pos / 80, 0)); // Estimate if metadata unavailable
3806
3807        // Get the line content
3808        if let Some(line_bytes) = self.get_line(line) {
3809            // Convert byte offset to UTF-16 code units
3810            let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
3811            let text_str = String::from_utf8_lossy(text_before);
3812            let utf16_offset = text_str.encode_utf16().count();
3813            (line, utf16_offset)
3814        } else {
3815            (line, 0)
3816        }
3817    }
3818
3819    /// Convert LSP position (line, UTF-16 code units) to byte position
3820    /// LSP uses UTF-16 code units for character offsets, not bytes
3821    /// Optimized to use single line_range() call instead of two
3822    pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
3823        if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3824            // Calculate line length and get line content
3825            let line_len = if let Some(end_offset) = end {
3826                end_offset.saturating_sub(line_start)
3827            } else {
3828                self.total_bytes().saturating_sub(line_start)
3829            };
3830
3831            if line_len > 0 {
3832                // If data is unloaded, return line_start as fallback
3833                let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
3834                    return line_start;
3835                };
3836                let line_str = String::from_utf8_lossy(&line_bytes);
3837
3838                // Convert UTF-16 offset to byte offset
3839                let mut utf16_count = 0;
3840                let mut byte_offset = 0;
3841
3842                for ch in line_str.chars() {
3843                    if utf16_count >= utf16_offset {
3844                        break;
3845                    }
3846                    utf16_count += ch.len_utf16();
3847                    byte_offset += ch.len_utf8();
3848                }
3849
3850                line_start + byte_offset
3851            } else {
3852                line_start
3853            }
3854        } else {
3855            // Line doesn't exist, return end of buffer
3856            self.len()
3857        }
3858    }
3859
3860    // Navigation helpers
3861
3862    /// Find the previous character boundary (UTF-8 aware)
3863    pub fn prev_char_boundary(&self, pos: usize) -> usize {
3864        if pos == 0 {
3865            return 0;
3866        }
3867
3868        // Get a few bytes before pos to find the character boundary
3869        let start = pos.saturating_sub(4);
3870        let Some(bytes) = self.get_text_range(start, pos - start) else {
3871            // Data unloaded, return pos as fallback
3872            return pos;
3873        };
3874
3875        // Walk backwards to find a UTF-8 leading byte
3876        for i in (0..bytes.len()).rev() {
3877            let byte = bytes[i];
3878            // Check if this is a UTF-8 leading byte (not a continuation byte)
3879            if (byte & 0b1100_0000) != 0b1000_0000 {
3880                return start + i;
3881            }
3882        }
3883
3884        // Fallback
3885        pos.saturating_sub(1)
3886    }
3887
3888    /// Find the next character boundary (UTF-8 aware)
3889    pub fn next_char_boundary(&self, pos: usize) -> usize {
3890        let len = self.len();
3891        if pos >= len {
3892            return len;
3893        }
3894
3895        // Get a few bytes after pos to find the character boundary
3896        let end = (pos + 5).min(len);
3897        let Some(bytes) = self.get_text_range(pos, end - pos) else {
3898            // Data unloaded, return pos as fallback
3899            return pos;
3900        };
3901
3902        // Start from index 1 (we want the NEXT boundary)
3903        for (i, &byte) in bytes.iter().enumerate().skip(1) {
3904            // Check if this is a UTF-8 leading byte (not a continuation byte)
3905            if (byte & 0b1100_0000) != 0b1000_0000 {
3906                return pos + i;
3907            }
3908        }
3909
3910        // If we got here, we're at the end or found no boundary in the range
3911        end
3912    }
3913
3914    /// Check if a byte is a UTF-8 continuation byte (not at a char boundary)
3915    /// UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF)
3916    /// This is the same check that str::is_char_boundary uses internally.
3917    #[inline]
3918    fn is_utf8_continuation_byte(byte: u8) -> bool {
3919        (byte & 0b1100_0000) == 0b1000_0000
3920    }
3921
3922    /// Snap position to a valid UTF-8 character boundary
3923    /// If already at a boundary, returns the same position.
3924    /// Otherwise, moves to the previous valid boundary.
3925    pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
3926        let len = self.len();
3927        if pos == 0 || pos >= len {
3928            return pos.min(len);
3929        }
3930
3931        // Get the byte at pos to check if we're at a character boundary
3932        let Some(bytes) = self.get_text_range(pos, 1) else {
3933            // Data unloaded, return pos as fallback
3934            return pos;
3935        };
3936
3937        // A position is at a char boundary if the byte there is NOT a continuation byte
3938        if !Self::is_utf8_continuation_byte(bytes[0]) {
3939            // Already at a character boundary
3940            return pos;
3941        }
3942
3943        // Not at a boundary, find the previous one
3944        self.prev_char_boundary(pos)
3945    }
3946
3947    /// Find the previous grapheme cluster boundary (for proper cursor movement with combining characters)
3948    ///
3949    /// This handles complex scripts like Thai where multiple Unicode code points
3950    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
3951    /// is 3 code points but 1 grapheme cluster.
3952    pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
3953        if pos == 0 {
3954            return 0;
3955        }
3956
3957        // Get enough context before pos to find grapheme boundaries
3958        // Thai combining characters can have multiple marks, so get up to 32 bytes
3959        // IMPORTANT: Align start to a valid character boundary to avoid invalid UTF-8
3960        // when get_text_range starts mid-character
3961        let raw_start = pos.saturating_sub(32);
3962        let start = if raw_start == 0 {
3963            0
3964        } else {
3965            // Find the character boundary at or before raw_start
3966            self.prev_char_boundary(raw_start + 1)
3967        };
3968
3969        let Some(bytes) = self.get_text_range(start, pos - start) else {
3970            // Data unloaded, fall back to char boundary
3971            return self.prev_char_boundary(pos);
3972        };
3973
3974        let text = match std::str::from_utf8(&bytes) {
3975            Ok(s) => s,
3976            Err(e) => {
3977                // Still got invalid UTF-8 (shouldn't happen after alignment)
3978                // Try using just the valid portion
3979                let valid_bytes = &bytes[..e.valid_up_to()];
3980                match std::str::from_utf8(valid_bytes) {
3981                    Ok(s) if !s.is_empty() => s,
3982                    _ => return self.prev_char_boundary(pos),
3983                }
3984            }
3985        };
3986
3987        // Use shared grapheme utility with relative position
3988        let rel_pos = pos - start;
3989        let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
3990
3991        // If we landed at the start of this chunk and there's more before,
3992        // we might need to look further back
3993        if new_rel_pos == 0 && start > 0 {
3994            return self.prev_grapheme_boundary(start);
3995        }
3996
3997        start + new_rel_pos
3998    }
3999
4000    /// Find the next grapheme cluster boundary (for proper cursor movement with combining characters)
4001    ///
4002    /// This handles complex scripts like Thai where multiple Unicode code points
4003    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
4004    /// is 3 code points but 1 grapheme cluster.
4005    pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
4006        let len = self.len();
4007        if pos >= len {
4008            return len;
4009        }
4010
4011        // Get enough context after pos to find grapheme boundaries
4012        // Thai combining characters can have multiple marks, so get up to 32 bytes
4013        let end = (pos + 32).min(len);
4014        let Some(bytes) = self.get_text_range(pos, end - pos) else {
4015            // Data unloaded, fall back to char boundary
4016            return self.next_char_boundary(pos);
4017        };
4018
4019        // Convert to UTF-8 string, handling the case where we might have
4020        // grabbed bytes that end mid-character (truncate to valid UTF-8)
4021        let text = match std::str::from_utf8(&bytes) {
4022            Ok(s) => s,
4023            Err(e) => {
4024                // The bytes end in an incomplete UTF-8 sequence
4025                // Use only the valid portion (which includes at least the first grapheme)
4026                let valid_bytes = &bytes[..e.valid_up_to()];
4027                match std::str::from_utf8(valid_bytes) {
4028                    Ok(s) if !s.is_empty() => s,
4029                    _ => return self.next_char_boundary(pos),
4030                }
4031            }
4032        };
4033
4034        // Use shared grapheme utility
4035        let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
4036        pos + new_rel_pos
4037    }
4038
4039    /// Find the previous word boundary
4040    pub fn prev_word_boundary(&self, pos: usize) -> usize {
4041        if pos == 0 {
4042            return 0;
4043        }
4044
4045        // Get some text before pos
4046        let start = pos.saturating_sub(256).max(0);
4047        let Some(bytes) = self.get_text_range(start, pos - start) else {
4048            // Data unloaded, return pos as fallback
4049            return pos;
4050        };
4051        let text = String::from_utf8_lossy(&bytes);
4052
4053        let mut found_word_char = false;
4054        let chars: Vec<char> = text.chars().collect();
4055
4056        for i in (0..chars.len()).rev() {
4057            let ch = chars[i];
4058            let is_word_char = ch.is_alphanumeric() || ch == '_';
4059
4060            if found_word_char && !is_word_char {
4061                // We've transitioned from word to non-word
4062                // Calculate the byte position
4063                let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
4064                return start + byte_offset;
4065            }
4066
4067            if is_word_char {
4068                found_word_char = true;
4069            }
4070        }
4071
4072        0
4073    }
4074
4075    /// Find the next word boundary
4076    pub fn next_word_boundary(&self, pos: usize) -> usize {
4077        let len = self.len();
4078        if pos >= len {
4079            return len;
4080        }
4081
4082        // Get some text after pos
4083        let end = (pos + 256).min(len);
4084        let Some(bytes) = self.get_text_range(pos, end - pos) else {
4085            // Data unloaded, return pos as fallback
4086            return pos;
4087        };
4088        let text = String::from_utf8_lossy(&bytes);
4089
4090        let mut found_word_char = false;
4091        let mut byte_offset = 0;
4092
4093        for ch in text.chars() {
4094            let is_word_char = ch.is_alphanumeric() || ch == '_';
4095
4096            if found_word_char && !is_word_char {
4097                // We've transitioned from word to non-word
4098                return pos + byte_offset;
4099            }
4100
4101            if is_word_char {
4102                found_word_char = true;
4103            }
4104
4105            byte_offset += ch.len_utf8();
4106        }
4107
4108        len
4109    }
4110
4111    /// Create a line iterator starting at the given byte position
4112    ///
4113    /// This iterator lazily loads chunks as needed, never scanning the entire file.
4114    /// For large files with unloaded buffers, chunks are loaded on-demand (1MB at a time).
4115    pub fn line_iterator(
4116        &mut self,
4117        byte_pos: usize,
4118        estimated_line_length: usize,
4119    ) -> LineIterator<'_> {
4120        LineIterator::new(self, byte_pos, estimated_line_length)
4121    }
4122
4123    /// Iterate over lines starting from a given byte offset, with line numbers
4124    ///
4125    /// This is a more efficient alternative to using line_iterator() + offset_to_position()
4126    /// because it calculates line numbers incrementally during iteration by accumulating
4127    /// line_feed_cnt from pieces (which is already tracked in the piece tree).
4128    ///
4129    /// Returns: Iterator yielding (byte_offset, content, line_number: Option<usize>)
4130    /// - line_number is Some(n) for small files with line metadata
4131    /// - line_number is None for large files without line metadata
4132    ///
4133    /// # Performance
4134    /// - O(1) per line for line number calculation (vs O(log n) per line with offset_to_position)
4135    /// - Uses single source of truth: piece tree's existing line_feed_cnt metadata
4136    pub fn iter_lines_from(
4137        &mut self,
4138        byte_pos: usize,
4139        max_lines: usize,
4140    ) -> Result<TextBufferLineIterator> {
4141        TextBufferLineIterator::new(self, byte_pos, max_lines)
4142    }
4143
4144    // Legacy API methods for backwards compatibility
4145
4146    /// Get the line number for a given byte offset
4147    ///
4148    /// Returns exact line number if metadata available, otherwise estimates based on bytes.
4149    ///
4150    /// # Behavior by File Size:
4151    /// - **Small files (< 1MB)**: Returns exact line number from piece tree's `line_starts` metadata
4152    /// - **Large files (≥ 1MB)**: Returns estimated line number using `byte_offset / estimated_line_length`
4153    ///
4154    /// Large files don't maintain line metadata for performance reasons. The estimation
4155    /// uses the configured `estimated_line_length` (default 80 bytes).
4156    pub fn get_line_number(&self, byte_offset: usize) -> usize {
4157        self.offset_to_position(byte_offset)
4158            .map(|pos| pos.line)
4159            .unwrap_or_else(|| {
4160                // Estimate line number based on configured average line length
4161                byte_offset / self.config.estimated_line_length
4162            })
4163    }
4164
4165    /// Get the configured estimated line length for approximate line number calculations.
4166    pub fn estimated_line_length(&self) -> usize {
4167        self.config.estimated_line_length
4168    }
4169
4170    /// Get the starting line number at a byte offset (used for viewport rendering)
4171    ///
4172    /// # Line Cache Architecture (Post-Refactoring):
4173    ///
4174    /// The concept of a separate "line cache" is **now obsolete**. After the refactoring,
4175    /// line tracking is integrated directly into the piece tree via:
4176    /// ```rust
4177    /// BufferData::Loaded {
4178    ///     data: Vec<u8>,
4179    ///     line_starts: Option<Vec<usize>>  // None = large file mode (no line metadata)
4180    /// }
4181    /// ```
4182    ///
4183    /// ## Why This Method Still Exists:
4184    /// The rendering code needs to know what line number to display in the margin at the
4185    /// top of the viewport. This method returns that line number, handling both small
4186    /// and large file modes transparently.
4187    ///
4188    /// ## Small vs Large File Modes:
4189    /// - **Small files**: `line_starts = Some(vec)` → returns exact line number from metadata
4190    /// - **Large files**: `line_starts = None` → returns estimated line number (byte_offset / estimated_line_length)
4191    ///
4192    /// ## Legacy Line Cache Methods:
4193    /// These methods are now no-ops and can be removed in a future cleanup:
4194    /// - `invalidate_line_cache_from()` - No-op (piece tree updates automatically)
4195    /// - `handle_line_cache_insertion()` - No-op (piece tree updates automatically)
4196    /// - `handle_line_cache_deletion()` - No-op (piece tree updates automatically)
4197    /// - `clear_line_cache()` - No-op (can't clear piece tree metadata)
4198    ///
4199    /// ## Bug Fix (2025-11):
4200    /// Previously this method always returned `0`, causing line numbers in the margin
4201    /// to always show 1, 2, 3... regardless of scroll position. Now it correctly returns
4202    /// the actual line number at `start_byte`.
4203    pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
4204        // No-op for cache population: LineIndex maintains all line starts automatically
4205        // But we need to return the actual line number at start_byte for rendering
4206        self.get_line_number(start_byte)
4207    }
4208
4209    /// Get cached byte offset for line (compatibility method)
4210    pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
4211        self.line_start_offset(line_number)
4212    }
4213
4214    /// Invalidate line cache from offset (no-op in new implementation)
4215    pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
4216        // No-op: LineIndex updates automatically
4217    }
4218
4219    /// Handle line cache insertion (no-op in new implementation)
4220    pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
4221        // No-op: LineIndex updates automatically during insert
4222    }
4223
4224    /// Handle line cache deletion (no-op in new implementation)
4225    pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
4226        // No-op: LineIndex updates automatically during delete
4227    }
4228
4229    /// Clear line cache (no-op in new implementation)
4230    pub fn clear_line_cache(&mut self) {
4231        // No-op: LineIndex can't be cleared
4232    }
4233
4234    // Test helper methods
4235
4236    /// Create a buffer from a string for testing
4237    #[cfg(test)]
4238    pub fn from_str_test(s: &str) -> Self {
4239        Self::from_bytes(
4240            s.as_bytes().to_vec(),
4241            std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
4242        )
4243    }
4244
4245    /// Create a new empty buffer for testing
4246    #[cfg(test)]
4247    pub fn new_test() -> Self {
4248        Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
4249    }
4250}
4251
4252/// Type alias for backwards compatibility
4253pub type Buffer = TextBuffer;
4254
4255// Re-export LineIterator from the line_iterator module
4256pub use crate::primitives::line_iterator::LineIterator;
4257
4258// ============================================================================
4259// Overlapping Chunks Iterator for Efficient Search
4260// ============================================================================
4261
4262/// Information about a chunk of data for pattern matching
4263#[derive(Debug)]
4264pub struct ChunkInfo {
4265    /// The buffer containing this chunk's data (includes overlap from previous chunk)
4266    pub buffer: Vec<u8>,
4267
4268    /// Absolute position in the document where this buffer starts
4269    pub absolute_pos: usize,
4270
4271    /// Offset within buffer where "new" data starts (valid match zone)
4272    /// Matches starting before this offset were already checked in the previous chunk
4273    pub valid_start: usize,
4274}
4275
4276/// Iterator that yields overlapping chunks for pattern matching
4277///
4278/// This iterator implements the VSCode/Sublime approach: pull overlapping chunks
4279/// from the underlying piece tree and use standard search algorithms on them.
4280///
4281/// # Algorithm
4282///
4283/// ```text
4284/// Chunk 1: [------------ valid -----------]
4285/// Chunk 2:      [overlap][---- valid ----]
4286/// Chunk 3:                   [overlap][-- valid --]
4287///
4288/// Only matches starting in the "valid" zone are reported to avoid duplicates.
4289/// ```
4290///
4291/// # Example
4292///
4293/// ```ignore
4294/// let chunks = OverlappingChunks::new(&text_buffer, start, end, 4096, pattern.len()-1);
4295/// for chunk in chunks {
4296///     // Search only starting from chunk.valid_start
4297///     if let Some(pos) = search(&chunk.buffer[chunk.valid_start..]) {
4298///         let absolute_pos = chunk.absolute_pos + chunk.valid_start + pos;
4299///         return Some(absolute_pos);
4300///     }
4301/// }
4302/// ```
4303pub struct OverlappingChunks<'a> {
4304    piece_iter: PieceRangeIter,
4305    buffers: &'a [StringBuffer],
4306
4307    // Reusable chunk buffer that we fill from pieces
4308    buffer: Vec<u8>,
4309    buffer_absolute_pos: usize,
4310
4311    // Current state
4312    current_pos: usize,
4313    end_pos: usize,
4314
4315    // Configuration
4316    chunk_size: usize,
4317    overlap: usize,
4318
4319    // Track first chunk special case
4320    first_chunk: bool,
4321
4322    // Cached piece data for incremental reading
4323    current_piece_data: Option<Vec<u8>>,
4324    current_piece_offset: usize,
4325}
4326
4327impl<'a> OverlappingChunks<'a> {
4328    /// Create a new overlapping chunks iterator
4329    ///
4330    /// # Arguments
4331    ///
4332    /// * `text_buffer` - The text buffer to iterate over
4333    /// * `start` - Start position in the document
4334    /// * `end` - End position in the document (exclusive)
4335    /// * `chunk_size` - Target size for each chunk (excluding overlap)
4336    /// * `overlap` - Number of bytes to overlap between chunks
4337    ///
4338    /// # Recommendations
4339    ///
4340    /// * For literal string search: `chunk_size=65536, overlap=pattern.len()-1`
4341    /// * For regex search: `chunk_size=1048576, overlap=4096`
4342    pub fn new(
4343        text_buffer: &'a TextBuffer,
4344        start: usize,
4345        end: usize,
4346        chunk_size: usize,
4347        overlap: usize,
4348    ) -> Self {
4349        let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
4350
4351        Self {
4352            piece_iter,
4353            buffers: &text_buffer.buffers,
4354            buffer: Vec::with_capacity(chunk_size + overlap),
4355            buffer_absolute_pos: start,
4356            current_pos: start,
4357            end_pos: end,
4358            chunk_size,
4359            overlap,
4360            first_chunk: true,
4361            current_piece_data: None,
4362            current_piece_offset: 0,
4363        }
4364    }
4365
4366    /// Read one byte from the piece iterator
4367    fn read_byte(&mut self) -> Option<u8> {
4368        loop {
4369            // If we have cached piece data, read from it
4370            if let Some(ref data) = self.current_piece_data {
4371                if self.current_piece_offset < data.len() {
4372                    let byte = data[self.current_piece_offset];
4373                    self.current_piece_offset += 1;
4374                    self.current_pos += 1;
4375                    return Some(byte);
4376                } else {
4377                    // Exhausted current piece, move to next
4378                    self.current_piece_data = None;
4379                    self.current_piece_offset = 0;
4380                }
4381            }
4382
4383            // Get next piece
4384            if let Some(piece_view) = self.piece_iter.next() {
4385                let buffer_id = piece_view.location.buffer_id();
4386                if let Some(buffer) = self.buffers.get(buffer_id) {
4387                    // Extract the relevant slice from this piece
4388                    let piece_start_in_doc = piece_view.doc_offset;
4389                    let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
4390
4391                    // Clip to our search range
4392                    let read_start = self.current_pos.max(piece_start_in_doc);
4393                    let read_end = self.end_pos.min(piece_end_in_doc);
4394
4395                    if read_end > read_start {
4396                        let offset_in_piece = read_start - piece_start_in_doc;
4397                        let bytes_to_read = read_end - read_start;
4398
4399                        let buffer_start = piece_view.buffer_offset + offset_in_piece;
4400                        let buffer_end = buffer_start + bytes_to_read;
4401
4402                        if let Some(data) = buffer.get_data() {
4403                            if buffer_end <= data.len() {
4404                                // Cache this piece's data
4405                                self.current_piece_data =
4406                                    Some(data[buffer_start..buffer_end].to_vec());
4407                                self.current_piece_offset = 0;
4408                                continue;
4409                            }
4410                        }
4411                    }
4412                }
4413            }
4414
4415            // No more data
4416            return None;
4417        }
4418    }
4419
4420    /// Fill the buffer with the next chunk of data
4421    fn fill_next_chunk(&mut self) -> bool {
4422        if self.first_chunk {
4423            // First chunk: fill up to chunk_size
4424            self.first_chunk = false;
4425            while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
4426                if let Some(byte) = self.read_byte() {
4427                    self.buffer.push(byte);
4428                } else {
4429                    break;
4430                }
4431            }
4432            !self.buffer.is_empty()
4433        } else {
4434            // Subsequent chunks: keep overlap, fill chunk_size NEW bytes
4435            if self.current_pos >= self.end_pos {
4436                return false;
4437            }
4438
4439            // Keep overlap bytes at the end
4440            if self.buffer.len() > self.overlap {
4441                let drain_amount = self.buffer.len() - self.overlap;
4442                self.buffer.drain(0..drain_amount);
4443                self.buffer_absolute_pos += drain_amount;
4444            }
4445
4446            // Fill chunk_size NEW bytes (in addition to overlap)
4447            let before_len = self.buffer.len();
4448            let target_len = self.overlap + self.chunk_size;
4449            while self.buffer.len() < target_len && self.current_pos < self.end_pos {
4450                if let Some(byte) = self.read_byte() {
4451                    self.buffer.push(byte);
4452                } else {
4453                    break;
4454                }
4455            }
4456
4457            // Return true if we added new data
4458            self.buffer.len() > before_len
4459        }
4460    }
4461}
4462
4463impl<'a> Iterator for OverlappingChunks<'a> {
4464    type Item = ChunkInfo;
4465
4466    fn next(&mut self) -> Option<Self::Item> {
4467        // Track if this is the first chunk before filling
4468        let is_first = self.buffer_absolute_pos == self.current_pos;
4469
4470        if !self.fill_next_chunk() {
4471            return None;
4472        }
4473
4474        // First chunk: all data is valid (no overlap from previous)
4475        // Subsequent chunks: overlap bytes are not valid (already checked)
4476        let valid_start = if is_first {
4477            0
4478        } else {
4479            self.overlap.min(self.buffer.len())
4480        };
4481
4482        Some(ChunkInfo {
4483            buffer: self.buffer.clone(),
4484            absolute_pos: self.buffer_absolute_pos,
4485            valid_start,
4486        })
4487    }
4488}
4489
4490/// A region in a hybrid search plan — either an unloaded file range or
4491/// in-memory data from the piece tree.
4492#[derive(Debug)]
4493pub(crate) enum SearchRegion {
4494    /// Contiguous range on the original file that hasn't been loaded.
4495    Unloaded {
4496        file_offset: usize,
4497        bytes: usize,
4498        doc_offset: usize,
4499    },
4500    /// In-memory data (loaded original content or user edits).
4501    Loaded { data: Vec<u8>, doc_offset: usize },
4502}
4503
4504/// A plan for hybrid search — extracted from a `TextBuffer`'s piece tree
4505/// on the main thread, executable on any thread.
4506///
4507/// For a large remote file with a small edit, the plan captures the few
4508/// loaded regions (small) and unloaded file ranges (coordinates only).
4509/// `execute()` then searches unloaded regions via `fs.search_file` (no data
4510/// transfer) and loaded regions with in-memory regex.
4511#[derive(Debug)]
4512pub struct HybridSearchPlan {
4513    pub(crate) file_path: PathBuf,
4514    pub(crate) regions: Vec<SearchRegion>,
4515}
4516
4517impl HybridSearchPlan {
4518    /// Execute the search plan.  Can run on any thread — only needs a
4519    /// `FileSystem` reference for unloaded region searches.
4520    pub fn execute(
4521        &self,
4522        fs: &dyn FileSystem,
4523        pattern: &str,
4524        opts: &FileSearchOptions,
4525        regex: &Regex,
4526        max_matches: usize,
4527        query_len: usize,
4528    ) -> io::Result<Vec<SearchMatch>> {
4529        if self.regions.is_empty() {
4530            return Ok(vec![]);
4531        }
4532
4533        // Fast path: single unloaded region → search whole file
4534        if self.regions.len() == 1 {
4535            if let SearchRegion::Unloaded { .. } = &self.regions[0] {
4536                let mut cursor = FileSearchCursor::new();
4537                let mut all_matches = Vec::new();
4538                while !cursor.done && all_matches.len() < max_matches {
4539                    let batch = fs.search_file(&self.file_path, pattern, opts, &mut cursor)?;
4540                    all_matches.extend(batch);
4541                }
4542                all_matches.truncate(max_matches);
4543                return Ok(all_matches);
4544            }
4545        }
4546
4547        let overlap_size = query_len.max(256);
4548        let mut all_matches: Vec<SearchMatch> = Vec::new();
4549        let mut running_line: usize = 1;
4550        let mut prev_tail: Vec<u8> = Vec::new();
4551
4552        for region in &self.regions {
4553            if all_matches.len() >= max_matches {
4554                break;
4555            }
4556            let remaining = max_matches - all_matches.len();
4557
4558            match region {
4559                SearchRegion::Unloaded {
4560                    file_offset,
4561                    bytes,
4562                    doc_offset: region_doc_offset,
4563                } => {
4564                    // Boundary overlap: prev_tail + start of unloaded region
4565                    if !prev_tail.is_empty() {
4566                        let overlap_read = (*bytes).min(overlap_size);
4567                        if let Ok(head) =
4568                            fs.read_range(&self.file_path, *file_offset as u64, overlap_read)
4569                        {
4570                            let boundary = search_boundary_overlap(
4571                                &prev_tail,
4572                                &head,
4573                                *region_doc_offset - prev_tail.len(),
4574                                running_line,
4575                                regex,
4576                                remaining,
4577                            );
4578                            all_matches.extend(boundary);
4579                        }
4580                    }
4581
4582                    // Search unloaded range via fs.search_file
4583                    let mut opts_bounded = opts.clone();
4584                    opts_bounded.max_matches = remaining.saturating_sub(all_matches.len());
4585                    let mut cursor = FileSearchCursor::for_range(
4586                        *file_offset,
4587                        *file_offset + *bytes,
4588                        running_line,
4589                    );
4590                    while !cursor.done && all_matches.len() < max_matches {
4591                        let mut batch =
4592                            fs.search_file(&self.file_path, pattern, &opts_bounded, &mut cursor)?;
4593                        // Remap byte_offset from file-relative to doc-relative
4594                        for m in &mut batch {
4595                            m.byte_offset = *region_doc_offset + (m.byte_offset - *file_offset);
4596                        }
4597                        all_matches.extend(batch);
4598                    }
4599                    running_line = cursor.running_line;
4600
4601                    // Save tail for next boundary
4602                    if *bytes >= overlap_size {
4603                        let tail_off = *file_offset + *bytes - overlap_size;
4604                        prev_tail = fs
4605                            .read_range(&self.file_path, tail_off as u64, overlap_size)
4606                            .unwrap_or_default();
4607                    } else {
4608                        prev_tail = fs
4609                            .read_range(&self.file_path, *file_offset as u64, *bytes)
4610                            .unwrap_or_default();
4611                    }
4612                }
4613                SearchRegion::Loaded {
4614                    data,
4615                    doc_offset: region_doc_offset,
4616                } => {
4617                    // Build search buffer: overlap tail + loaded data
4618                    let mut search_buf = Vec::with_capacity(prev_tail.len() + data.len());
4619                    search_buf.extend_from_slice(&prev_tail);
4620                    search_buf.extend_from_slice(data);
4621
4622                    let overlap_len = prev_tail.len();
4623                    let buf_doc_offset = if overlap_len > 0 {
4624                        *region_doc_offset - overlap_len
4625                    } else {
4626                        *region_doc_offset
4627                    };
4628
4629                    let newlines_in_overlap = search_buf[..overlap_len]
4630                        .iter()
4631                        .filter(|&&b| b == b'\n')
4632                        .count();
4633                    let mut line_at = running_line.saturating_sub(newlines_in_overlap);
4634                    let mut counted_to = 0usize;
4635
4636                    for m in regex.find_iter(&search_buf) {
4637                        if overlap_len > 0 && m.end() <= overlap_len {
4638                            continue;
4639                        }
4640                        if all_matches.len() >= max_matches {
4641                            break;
4642                        }
4643
4644                        line_at += search_buf[counted_to..m.start()]
4645                            .iter()
4646                            .filter(|&&b| b == b'\n')
4647                            .count();
4648                        counted_to = m.start();
4649
4650                        let line_start = search_buf[..m.start()]
4651                            .iter()
4652                            .rposition(|&b| b == b'\n')
4653                            .map(|p| p + 1)
4654                            .unwrap_or(0);
4655                        let line_end = search_buf[m.start()..]
4656                            .iter()
4657                            .position(|&b| b == b'\n')
4658                            .map(|p| m.start() + p)
4659                            .unwrap_or(search_buf.len());
4660
4661                        let match_doc_offset = buf_doc_offset + m.start();
4662                        let column = m.start() - line_start + 1;
4663                        let context =
4664                            String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
4665
4666                        all_matches.push(SearchMatch {
4667                            byte_offset: match_doc_offset,
4668                            length: m.end() - m.start(),
4669                            line: line_at,
4670                            column,
4671                            context,
4672                        });
4673                    }
4674
4675                    running_line += data.iter().filter(|&&b| b == b'\n').count();
4676
4677                    let tail_start = data.len().saturating_sub(overlap_size);
4678                    prev_tail = data[tail_start..].to_vec();
4679                }
4680            }
4681        }
4682
4683        all_matches.truncate(max_matches);
4684        Ok(all_matches)
4685    }
4686}
4687
4688/// Search the overlap zone between two regions for matches that span the
4689/// boundary.  `prev_tail` is the tail of the previous region, `next_head`
4690/// is the head of the next region.  `doc_offset` is the document byte
4691/// offset of `prev_tail[0]`.  Only matches that cross the boundary (start
4692/// in tail, end in head) are returned — pure-tail matches were already found.
4693fn search_boundary_overlap(
4694    prev_tail: &[u8],
4695    next_head: &[u8],
4696    doc_offset: usize,
4697    running_line: usize,
4698    regex: &Regex,
4699    max_matches: usize,
4700) -> Vec<SearchMatch> {
4701    let mut buf = Vec::with_capacity(prev_tail.len() + next_head.len());
4702    buf.extend_from_slice(prev_tail);
4703    buf.extend_from_slice(next_head);
4704
4705    let overlap_len = prev_tail.len();
4706    let newlines_before = prev_tail.iter().filter(|&&b| b == b'\n').count();
4707    let mut line_at = running_line.saturating_sub(newlines_before);
4708    let mut counted_to = 0usize;
4709    let mut matches = Vec::new();
4710
4711    for m in regex.find_iter(&buf) {
4712        // Only keep matches that cross the boundary
4713        if m.start() < overlap_len && m.end() > overlap_len {
4714            if matches.len() >= max_matches {
4715                break;
4716            }
4717
4718            line_at += buf[counted_to..m.start()]
4719                .iter()
4720                .filter(|&&b| b == b'\n')
4721                .count();
4722            counted_to = m.start();
4723
4724            let line_start = buf[..m.start()]
4725                .iter()
4726                .rposition(|&b| b == b'\n')
4727                .map(|p| p + 1)
4728                .unwrap_or(0);
4729            let line_end = buf[m.start()..]
4730                .iter()
4731                .position(|&b| b == b'\n')
4732                .map(|p| m.start() + p)
4733                .unwrap_or(buf.len());
4734
4735            let column = m.start() - line_start + 1;
4736            let context = String::from_utf8_lossy(&buf[line_start..line_end]).into_owned();
4737
4738            matches.push(SearchMatch {
4739                byte_offset: doc_offset + m.start(),
4740                length: m.end() - m.start(),
4741                line: line_at,
4742                column,
4743                context,
4744            });
4745        }
4746    }
4747    matches
4748}
4749
4750#[cfg(test)]
4751mod tests {
4752    use crate::model::filesystem::StdFileSystem;
4753    use std::sync::Arc;
4754
4755    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
4756        Arc::new(StdFileSystem)
4757    }
4758    use super::*;
4759
4760    #[test]
4761    fn test_empty_buffer() {
4762        let buffer = TextBuffer::empty(test_fs());
4763        assert_eq!(buffer.total_bytes(), 0);
4764        assert_eq!(buffer.line_count(), Some(1)); // Empty doc has 1 line
4765    }
4766
4767    #[test]
4768    fn test_line_positions_multiline() {
4769        let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec(), test_fs());
4770
4771        // Check line count
4772        assert_eq!(buffer.line_count(), Some(3));
4773
4774        // Check line starts
4775        assert_eq!(buffer.line_start_offset(0), Some(0)); // "Hello\n" starts at 0
4776        assert_eq!(buffer.line_start_offset(1), Some(6)); // "New Line\n" starts at 6
4777        assert_eq!(buffer.line_start_offset(2), Some(15)); // "World!" starts at 15
4778
4779        // Check offset_to_position
4780        assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); // Start of "Hello"
4781        assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); // End of "Hello" (before \n)
4782        assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); // Start of "New Line"
4783        assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); // End of "New Line" (before \n)
4784        assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); // Start of "World!"
4785
4786        // Check line_col_to_position
4787        assert_eq!(buffer.line_col_to_position(0, 5), 5); // End of line 0
4788        assert_eq!(buffer.line_col_to_position(1, 0), 6); // Start of line 1
4789        assert_eq!(buffer.line_col_to_position(1, 8), 14); // End of line 1
4790        assert_eq!(buffer.line_col_to_position(2, 0), 15); // Start of line 2
4791    }
4792
4793    #[test]
4794    fn test_new_from_content() {
4795        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4796        assert_eq!(buffer.total_bytes(), 11);
4797        assert_eq!(buffer.line_count(), Some(2));
4798    }
4799
4800    #[test]
4801    fn test_get_all_text() {
4802        let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4803        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
4804    }
4805
4806    #[test]
4807    fn test_insert_at_start() {
4808        let mut buffer = TextBuffer::from_bytes(b"world".to_vec(), test_fs());
4809        buffer.insert_bytes(0, b"hello ".to_vec());
4810
4811        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4812        assert_eq!(buffer.total_bytes(), 11);
4813    }
4814
4815    #[test]
4816    fn test_insert_in_middle() {
4817        let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec(), test_fs());
4818        buffer.insert_bytes(5, b" ".to_vec());
4819
4820        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4821        assert_eq!(buffer.total_bytes(), 11);
4822    }
4823
4824    #[test]
4825    fn test_insert_at_end() {
4826        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4827        buffer.insert_bytes(5, b" world".to_vec());
4828
4829        assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4830        assert_eq!(buffer.total_bytes(), 11);
4831    }
4832
4833    #[test]
4834    fn test_insert_with_newlines() {
4835        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4836        buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
4837
4838        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
4839        assert_eq!(buffer.line_count(), Some(3));
4840    }
4841
4842    #[test]
4843    fn test_delete_from_start() {
4844        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4845        buffer.delete_bytes(0, 6);
4846
4847        assert_eq!(buffer.get_all_text().unwrap(), b"world");
4848        assert_eq!(buffer.total_bytes(), 5);
4849    }
4850
4851    #[test]
4852    fn test_delete_from_middle() {
4853        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4854        buffer.delete_bytes(5, 1);
4855
4856        assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
4857        assert_eq!(buffer.total_bytes(), 10);
4858    }
4859
4860    #[test]
4861    fn test_delete_from_end() {
4862        let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4863        buffer.delete_bytes(6, 5);
4864
4865        assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
4866        assert_eq!(buffer.total_bytes(), 6);
4867    }
4868
4869    #[test]
4870    fn test_delete_with_newlines() {
4871        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4872        buffer.delete_bytes(5, 7); // Delete "\nworld\n"
4873
4874        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4875        assert_eq!(buffer.line_count(), Some(1));
4876    }
4877
4878    #[test]
4879    fn test_offset_position_conversions() {
4880        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4881
4882        let pos = buffer.offset_to_position(0);
4883        assert_eq!(pos, Some(Position { line: 0, column: 0 }));
4884
4885        let pos = buffer.offset_to_position(6);
4886        assert_eq!(pos, Some(Position { line: 1, column: 0 }));
4887
4888        let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
4889        assert_eq!(offset, 6);
4890    }
4891
4892    #[test]
4893    fn test_insert_at_position() {
4894        let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4895        buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
4896
4897        assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
4898    }
4899
4900    #[test]
4901    fn test_delete_range() {
4902        let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4903
4904        let start = Position { line: 0, column: 5 };
4905        let end = Position { line: 2, column: 0 };
4906        buffer.delete_range(start, end);
4907
4908        assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4909    }
4910
4911    #[test]
4912    fn test_get_line() {
4913        let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4914
4915        assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
4916        assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
4917        assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
4918        assert_eq!(buffer.get_line(3), None);
4919    }
4920
4921    #[test]
4922    fn test_multiple_operations() {
4923        let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
4924
4925        buffer.insert_bytes(0, b"start\n".to_vec());
4926        assert_eq!(buffer.line_count(), Some(4));
4927
4928        buffer.delete_bytes(6, 6); // Delete "line1\n"
4929        assert_eq!(buffer.line_count(), Some(3));
4930
4931        buffer.insert_bytes(6, b"new\n".to_vec());
4932        assert_eq!(buffer.line_count(), Some(4));
4933
4934        let text = buffer.get_all_text().unwrap();
4935        assert_eq!(text, b"start\nnew\nline2\nline3");
4936    }
4937
4938    #[test]
4939    fn test_get_text_range() {
4940        let buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4941
4942        assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
4943        assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
4944        assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
4945    }
4946
4947    #[test]
4948    fn test_empty_operations() {
4949        let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4950
4951        buffer.insert_bytes(2, Vec::new());
4952        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4953
4954        buffer.delete_bytes(2, 0);
4955        assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4956    }
4957
4958    #[test]
4959    fn test_sequential_inserts_at_beginning() {
4960        // Regression test for piece tree duplicate insertion bug
4961        let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
4962
4963        // Delete all
4964        buffer.delete_bytes(0, 12);
4965        assert_eq!(buffer.get_all_text().unwrap(), b"");
4966
4967        // Insert 'a' at 0
4968        buffer.insert_bytes(0, vec![b'a']);
4969        assert_eq!(buffer.get_all_text().unwrap(), b"a");
4970
4971        // Insert 'b' at 0 (should give "ba")
4972        buffer.insert_bytes(0, vec![b'b']);
4973        assert_eq!(buffer.get_all_text().unwrap(), b"ba");
4974    }
4975
4976    // ===== Phase 1-3: Large File Support Tests =====
4977
4978    mod large_file_support {
4979        use super::*;
4980        use crate::model::piece_tree::StringBuffer;
4981        use std::fs::File;
4982        use std::io::Write;
4983        use tempfile::TempDir;
4984
4985        // Phase 1: Option<usize> Type Safety Tests
4986
4987        #[test]
4988        fn test_line_feed_count_is_some_for_loaded_buffer() {
4989            let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
4990            assert_eq!(buffer.line_feed_count(), Some(2));
4991        }
4992
4993        #[test]
4994        fn test_line_feed_count_is_none_for_unloaded_buffer() {
4995            let temp_dir = TempDir::new().unwrap();
4996            let file_path = temp_dir.path().join("test.txt");
4997
4998            let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
4999            assert_eq!(buffer.line_feed_count(), None);
5000        }
5001
5002        #[test]
5003        fn test_line_count_is_some_for_small_buffer() {
5004            let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
5005            assert_eq!(buffer.line_count(), Some(3));
5006        }
5007
5008        #[test]
5009        fn test_piece_tree_works_with_none_line_count() {
5010            // Create a buffer with no line count information
5011            let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
5012            assert_eq!(buffer.line_feed_count(), None);
5013
5014            // Create piece tree without line feed count
5015            use crate::model::piece_tree::{BufferLocation, PieceTree};
5016            let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
5017
5018            // line_count should return None
5019            assert_eq!(tree.line_count(), None);
5020        }
5021
5022        // Phase 2: BufferData Enum Tests
5023
5024        #[test]
5025        fn test_buffer_data_loaded_variant() {
5026            let data = b"hello world".to_vec();
5027            let buffer = StringBuffer::new_loaded(0, data.clone(), true);
5028
5029            assert!(buffer.is_loaded());
5030            assert_eq!(buffer.get_data(), Some(&data[..]));
5031            assert!(buffer.get_line_starts().is_some());
5032        }
5033
5034        #[test]
5035        fn test_buffer_data_loaded_without_line_starts() {
5036            let data = b"hello\nworld".to_vec();
5037            let buffer = StringBuffer::new_loaded(0, data.clone(), false);
5038
5039            assert!(buffer.is_loaded());
5040            assert_eq!(buffer.get_data(), Some(&data[..]));
5041            assert_eq!(buffer.get_line_starts(), None); // No line indexing
5042        }
5043
5044        #[test]
5045        fn test_buffer_data_unloaded_variant() {
5046            let temp_dir = TempDir::new().unwrap();
5047            let file_path = temp_dir.path().join("test.txt");
5048
5049            let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
5050
5051            assert!(!buffer.is_loaded());
5052            assert_eq!(buffer.get_data(), None);
5053            assert_eq!(buffer.get_line_starts(), None);
5054        }
5055
5056        #[test]
5057        fn test_buffer_load_method() {
5058            let temp_dir = TempDir::new().unwrap();
5059            let file_path = temp_dir.path().join("test.txt");
5060
5061            // Create test file
5062            let test_data = b"hello world";
5063            File::create(&file_path)
5064                .unwrap()
5065                .write_all(test_data)
5066                .unwrap();
5067
5068            // Create unloaded buffer
5069            let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
5070            assert!(!buffer.is_loaded());
5071
5072            // Load the buffer using local filesystem
5073            let fs = crate::model::filesystem::StdFileSystem;
5074            buffer.load(&fs).unwrap();
5075
5076            // Now it should be loaded
5077            assert!(buffer.is_loaded());
5078            assert_eq!(buffer.get_data(), Some(&test_data[..]));
5079        }
5080
5081        #[test]
5082        fn test_string_buffer_new_vs_new_loaded() {
5083            let data = b"hello\nworld".to_vec();
5084
5085            // StringBuffer::new should compute line starts
5086            let buf1 = StringBuffer::new(0, data.clone());
5087            assert!(buf1.is_loaded());
5088            assert!(buf1.get_line_starts().is_some());
5089            assert_eq!(buf1.line_feed_count(), Some(1));
5090
5091            // StringBuffer::new_loaded with compute_lines=false should not
5092            let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
5093            assert!(buf2.is_loaded());
5094            assert_eq!(buf2.get_line_starts(), None);
5095            assert_eq!(buf2.line_feed_count(), None);
5096        }
5097
5098        // Phase 3: Large File Detection Tests
5099
5100        #[test]
5101        fn test_load_small_file_eager_loading() {
5102            let temp_dir = TempDir::new().unwrap();
5103            let file_path = temp_dir.path().join("small.txt");
5104
5105            // Create a small file (10 bytes < 100MB threshold)
5106            let test_data = b"hello\ntest";
5107            File::create(&file_path)
5108                .unwrap()
5109                .write_all(test_data)
5110                .unwrap();
5111
5112            // Load with default threshold
5113            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5114
5115            // Should be eagerly loaded (not large_file mode)
5116            assert!(!buffer.large_file);
5117            assert_eq!(buffer.total_bytes(), test_data.len());
5118            assert_eq!(buffer.line_count(), Some(2)); // Has line indexing
5119            assert_eq!(buffer.get_all_text().unwrap(), test_data);
5120
5121            // The buffer should be loaded
5122            assert!(buffer.buffers[0].is_loaded());
5123        }
5124
5125        #[test]
5126        fn test_load_large_file_lazy_loading() {
5127            let temp_dir = TempDir::new().unwrap();
5128            let file_path = temp_dir.path().join("large.txt");
5129
5130            // Create a "large" file by using a small threshold
5131            let test_data = b"hello\nworld\ntest";
5132            File::create(&file_path)
5133                .unwrap()
5134                .write_all(test_data)
5135                .unwrap();
5136
5137            // Load with threshold of 10 bytes (file is 17 bytes, so it's "large")
5138            let buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5139
5140            // Should be in large_file mode
5141            assert!(buffer.large_file);
5142            assert_eq!(buffer.total_bytes(), test_data.len());
5143
5144            // Should NOT have line indexing
5145            assert_eq!(buffer.line_count(), None);
5146
5147            // The buffer should be unloaded
5148            assert!(!buffer.buffers[0].is_loaded());
5149            assert_eq!(buffer.buffers[0].get_data(), None);
5150        }
5151
5152        /// Test that reproduces issue #657: Search on large plain text files
5153        ///
5154        /// The bug: When a large file is opened with lazy loading, buffer.to_string()
5155        /// returns None because some buffers are unloaded. This causes search to fail
5156        /// with "Buffer not fully loaded" error.
5157        ///
5158        /// The fix: Use get_text_range_mut() which loads the buffer on demand.
5159        #[test]
5160        fn test_issue_657_search_on_large_file_unloaded_buffer() {
5161            let temp_dir = TempDir::new().unwrap();
5162            let file_path = temp_dir.path().join("large_search_test.txt");
5163
5164            // Create test content with a searchable string
5165            let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
5166            File::create(&file_path)
5167                .unwrap()
5168                .write_all(test_data)
5169                .unwrap();
5170
5171            // Load with small threshold to force lazy loading
5172            let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5173
5174            // Verify we're in large file mode with unloaded buffer
5175            assert!(buffer.large_file, "Buffer should be in large file mode");
5176            assert!(
5177                !buffer.buffers[0].is_loaded(),
5178                "Buffer should be unloaded initially"
5179            );
5180
5181            // REPRODUCE THE BUG: to_string() returns None for unloaded buffers
5182            // This is what the old perform_search() code did, causing the error
5183            assert!(
5184                buffer.to_string().is_none(),
5185                "BUG REPRODUCED: to_string() returns None for unloaded buffer"
5186            );
5187
5188            // THE FIX: get_text_range_mut() loads the buffer on demand
5189            let total_bytes = buffer.len();
5190            let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
5191            let content_str = String::from_utf8_lossy(&content);
5192
5193            // Verify the content is now available and contains our search target
5194            assert!(
5195                content_str.contains("SEARCH_TARGET"),
5196                "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
5197            );
5198
5199            // After loading, to_string() should also work
5200            assert!(
5201                buffer.to_string().is_some(),
5202                "After get_text_range_mut(), to_string() should work"
5203            );
5204        }
5205
5206        #[test]
5207        fn test_large_file_threshold_boundary() {
5208            let temp_dir = TempDir::new().unwrap();
5209
5210            // Test exactly at threshold
5211            let file_path = temp_dir.path().join("at_threshold.txt");
5212            let test_data = vec![b'x'; 100];
5213            File::create(&file_path)
5214                .unwrap()
5215                .write_all(&test_data)
5216                .unwrap();
5217
5218            // Load with threshold of 100 bytes - should be large file (>= threshold)
5219            let buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
5220            assert!(buffer.large_file);
5221
5222            // Test just below threshold
5223            let file_path2 = temp_dir.path().join("below_threshold.txt");
5224            let test_data2 = vec![b'x'; 99];
5225            File::create(&file_path2)
5226                .unwrap()
5227                .write_all(&test_data2)
5228                .unwrap();
5229
5230            // Load with threshold of 100 bytes - should be small file (< threshold)
5231            let buffer2 = TextBuffer::load_from_file(&file_path2, 100, test_fs()).unwrap();
5232            assert!(!buffer2.large_file);
5233        }
5234
5235        #[test]
5236        fn test_large_file_default_threshold() {
5237            let temp_dir = TempDir::new().unwrap();
5238            let file_path = temp_dir.path().join("test.txt");
5239
5240            // Create a small file
5241            File::create(&file_path)
5242                .unwrap()
5243                .write_all(b"hello")
5244                .unwrap();
5245
5246            // Load with threshold 0 - should use DEFAULT_LARGE_FILE_THRESHOLD
5247            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5248
5249            // 5 bytes < 100MB, so should not be large file
5250            assert!(!buffer.large_file);
5251        }
5252
5253        #[test]
5254        fn test_large_file_has_correct_piece_tree_structure() {
5255            let temp_dir = TempDir::new().unwrap();
5256            let file_path = temp_dir.path().join("large.txt");
5257
5258            let test_data = b"hello world";
5259            File::create(&file_path)
5260                .unwrap()
5261                .write_all(test_data)
5262                .unwrap();
5263
5264            // Load as large file
5265            let buffer = TextBuffer::load_from_file(&file_path, 5, test_fs()).unwrap();
5266
5267            // Should have correct total bytes
5268            assert_eq!(buffer.total_bytes(), test_data.len());
5269
5270            // Should have 1 buffer
5271            assert_eq!(buffer.buffers.len(), 1);
5272
5273            // Buffer should be unloaded
5274            assert!(!buffer.buffers[0].is_loaded());
5275        }
5276
5277        #[test]
5278        fn test_empty_large_file() {
5279            let temp_dir = TempDir::new().unwrap();
5280            let file_path = temp_dir.path().join("empty.txt");
5281
5282            // Create an empty file
5283            File::create(&file_path).unwrap();
5284
5285            // Load as large file
5286            let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5287
5288            // Empty file is handled gracefully
5289            assert_eq!(buffer.total_bytes(), 0);
5290            assert!(buffer.is_empty());
5291        }
5292
5293        #[test]
5294        fn test_large_file_basic_api_operations() {
5295            let temp_dir = TempDir::new().unwrap();
5296            let file_path = temp_dir.path().join("large_test.txt");
5297
5298            // Create a test file with known content
5299            let test_data = b"line1\nline2\nline3\nline4\n";
5300            File::create(&file_path)
5301                .unwrap()
5302                .write_all(test_data)
5303                .unwrap();
5304
5305            // Load as large file (use small threshold to trigger large file mode)
5306            let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5307
5308            // Verify it's in large file mode
5309            assert!(buffer.large_file);
5310            assert_eq!(buffer.line_count(), None); // No line indexing
5311
5312            // Test basic access functions
5313            assert_eq!(buffer.total_bytes(), test_data.len());
5314            assert!(!buffer.is_empty());
5315            assert_eq!(buffer.len(), test_data.len());
5316
5317            // Test reading operations using get_text_range_mut (lazy loads on demand)
5318            let range_result = buffer.get_text_range_mut(0, 5).unwrap();
5319            assert_eq!(range_result, b"line1");
5320
5321            let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
5322            assert_eq!(range_result2, b"line2");
5323
5324            // Test get_all_text (via get_text_range after lazy loading)
5325            let all_text = buffer.get_all_text().unwrap();
5326            assert_eq!(all_text, test_data);
5327
5328            // Test slice_bytes method
5329            assert_eq!(buffer.slice_bytes(0..5), b"line1");
5330
5331            // Test basic editing operations
5332            // Insert at offset 0
5333            buffer.insert_bytes(0, b"prefix_".to_vec());
5334            assert_eq!(buffer.total_bytes(), test_data.len() + 7);
5335            assert!(buffer.is_modified());
5336
5337            // Verify the insertion worked
5338            let text_after_insert = buffer.get_all_text().unwrap();
5339            assert_eq!(&text_after_insert[0..7], b"prefix_");
5340            assert_eq!(&text_after_insert[7..12], b"line1");
5341
5342            // Delete some bytes
5343            buffer.delete_bytes(0, 7);
5344            assert_eq!(buffer.total_bytes(), test_data.len());
5345
5346            // Verify deletion worked - should be back to original
5347            let text_after_delete = buffer.get_all_text().unwrap();
5348            assert_eq!(text_after_delete, test_data);
5349
5350            // Insert at end
5351            let end_offset = buffer.total_bytes();
5352            buffer.insert_bytes(end_offset, b"suffix".to_vec());
5353            assert_eq!(buffer.total_bytes(), test_data.len() + 6);
5354
5355            // Verify end insertion
5356            let final_text = buffer.get_all_text().unwrap();
5357            assert!(final_text.ends_with(b"suffix"));
5358            assert_eq!(&final_text[0..test_data.len()], test_data);
5359
5360            // Test offset_to_position
5361            // Note: Without line indexing, position tracking is limited
5362            // but byte-level operations still work
5363            let pos = buffer.offset_to_position(0).unwrap();
5364            assert_eq!(pos.column, 0);
5365
5366            // Test position_to_offset
5367            let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
5368            assert_eq!(offset, 0);
5369
5370            // Test replace operations
5371            let replace_result = buffer.replace_range(0..5, "START");
5372            assert!(replace_result);
5373
5374            let text_after_replace = buffer.get_all_text().unwrap();
5375            assert!(text_after_replace.starts_with(b"START"));
5376        }
5377
5378        #[test]
5379        fn test_large_file_chunk_based_loading() {
5380            let temp_dir = TempDir::new().unwrap();
5381            let file_path = temp_dir.path().join("huge.txt");
5382
5383            // Create a file larger than LOAD_CHUNK_SIZE (1MB)
5384            // We'll create a 3MB file with a repeating pattern so we can verify chunks
5385            let chunk_size = LOAD_CHUNK_SIZE; // 1MB
5386            let file_size = chunk_size * 3; // 3MB
5387
5388            // Pattern: "AAAA...AAAA" (1MB of A's), "BBBB...BBBB" (1MB of B's), "CCCC...CCCC" (1MB of C's)
5389            let mut file = File::create(&file_path).unwrap();
5390            file.write_all(&vec![b'A'; chunk_size]).unwrap();
5391            file.write_all(&vec![b'B'; chunk_size]).unwrap();
5392            file.write_all(&vec![b'C'; chunk_size]).unwrap();
5393            file.flush().unwrap();
5394
5395            // Load as large file (use threshold of 1 byte to ensure large file mode)
5396            let mut buffer = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
5397
5398            // Verify it's in large file mode
5399            assert!(buffer.large_file);
5400            assert_eq!(buffer.total_bytes(), file_size);
5401
5402            // Buffer should be unloaded initially
5403            assert!(!buffer.buffers[0].is_loaded());
5404
5405            // Read from the first chunk (should load only first 1MB)
5406            let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
5407            assert_eq!(first_chunk_data.len(), 1024);
5408            assert!(first_chunk_data.iter().all(|&b| b == b'A'));
5409
5410            // Read from the middle chunk (offset = 1MB, should load second 1MB)
5411            let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
5412            assert_eq!(second_chunk_data.len(), 1024);
5413            assert!(second_chunk_data.iter().all(|&b| b == b'B'));
5414
5415            // Read from the last chunk (offset = 2MB, should load third 1MB)
5416            let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
5417            assert_eq!(third_chunk_data.len(), 1024);
5418            assert!(third_chunk_data.iter().all(|&b| b == b'C'));
5419
5420            // Verify we can read across chunk boundaries
5421            // Read from middle of first chunk to middle of second chunk
5422            let cross_chunk_offset = chunk_size - 512;
5423            let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
5424            assert_eq!(cross_chunk_data.len(), 1024);
5425            // First 512 bytes should be 'A', next 512 bytes should be 'B'
5426            assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
5427            assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
5428
5429            // After chunk-based loading, verify the piece tree has been split
5430            // The number of buffers should be greater than 1 (original + chunks)
5431            assert!(
5432                buffer.buffers.len() > 1,
5433                "Expected multiple buffers after chunk-based loading, got {}",
5434                buffer.buffers.len()
5435            );
5436
5437            // Test that editing still works after chunk-based loading
5438            buffer.insert_bytes(0, b"PREFIX".to_vec());
5439            assert_eq!(buffer.total_bytes(), file_size + 6);
5440
5441            let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
5442            assert_eq!(after_insert, b"PREFIX");
5443
5444            // Verify the original data is still there after the prefix
5445            let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
5446            assert!(after_prefix.iter().all(|&b| b == b'A'));
5447
5448            // Most importantly: validate the entire buffer content matches the original file
5449            // Create a fresh buffer to read the original file
5450            let mut buffer2 = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
5451
5452            // Read the entire file in chunks and verify each chunk
5453            let chunk_read_size = 64 * 1024; // Read in 64KB chunks for efficiency
5454            let mut offset = 0;
5455            while offset < file_size {
5456                let bytes_to_read = chunk_read_size.min(file_size - offset);
5457                let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
5458
5459                // Determine which section of the file we're reading
5460                let first_mb_end = chunk_size;
5461                let second_mb_end = chunk_size * 2;
5462
5463                // Validate the data based on which MB section we're in
5464                for (i, &byte) in chunk_data.iter().enumerate() {
5465                    let file_offset = offset + i;
5466                    let expected = if file_offset < first_mb_end {
5467                        b'A'
5468                    } else if file_offset < second_mb_end {
5469                        b'B'
5470                    } else {
5471                        b'C'
5472                    };
5473                    assert_eq!(
5474                        byte, expected,
5475                        "Mismatch at file offset {}: expected {}, got {}",
5476                        file_offset, expected as char, byte as char
5477                    );
5478                }
5479
5480                offset += bytes_to_read;
5481            }
5482        }
5483
5484        /// Test that save_to_file works correctly with partially loaded large files
5485        /// This is a regression test for a bug where saving would silently produce
5486        /// an empty file if any buffer regions were still unloaded.
5487        #[test]
5488        fn test_large_file_incremental_save() {
5489            let temp_dir = TempDir::new().unwrap();
5490            let file_path = temp_dir.path().join("large_save_test.txt");
5491
5492            // Create a small file but use tiny threshold to trigger large file mode
5493            let chunk_size = 1000; // 1KB chunks
5494            let file_size = chunk_size * 2; // 2KB total
5495
5496            let mut file = File::create(&file_path).unwrap();
5497            // First half: 'A' repeated
5498            file.write_all(&vec![b'A'; chunk_size]).unwrap();
5499            // Second half: 'B' repeated
5500            file.write_all(&vec![b'B'; chunk_size]).unwrap();
5501            file.flush().unwrap();
5502
5503            // Load as large file (threshold of 100 bytes)
5504            let mut buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
5505            assert!(buffer.large_file);
5506            assert_eq!(buffer.total_bytes(), file_size);
5507
5508            // Only read from the beginning - this loads only a small region
5509            let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
5510            assert!(first_bytes.iter().all(|&b| b == b'A'));
5511
5512            // Make an edit at the beginning
5513            buffer.insert_bytes(0, b"PREFIX_".to_vec());
5514
5515            // Save to a new file (to avoid issues with reading while writing same file)
5516            let save_path = temp_dir.path().join("saved.txt");
5517            buffer.save_to_file(&save_path).unwrap();
5518
5519            // Verify the saved file
5520            let saved_content = std::fs::read(&save_path).unwrap();
5521
5522            // Check total size: original + "PREFIX_" (7 bytes)
5523            assert_eq!(
5524                saved_content.len(),
5525                file_size + 7,
5526                "Saved file should be {} bytes, got {}",
5527                file_size + 7,
5528                saved_content.len()
5529            );
5530
5531            // Check prefix
5532            assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
5533
5534            // Check that first chunk (after prefix) contains A's
5535            assert!(
5536                saved_content[7..100].iter().all(|&b| b == b'A'),
5537                "First chunk after prefix should be A's"
5538            );
5539
5540            // Check that second chunk contains B's (this was unloaded!)
5541            let second_chunk_start = 7 + chunk_size;
5542            assert!(
5543                saved_content[second_chunk_start..second_chunk_start + 100]
5544                    .iter()
5545                    .all(|&b| b == b'B'),
5546                "Second chunk should be B's (was unloaded, should be preserved)"
5547            );
5548        }
5549
5550        /// Test that save_to_file handles edits at multiple positions
5551        #[test]
5552        fn test_large_file_save_with_multiple_edits() {
5553            let temp_dir = TempDir::new().unwrap();
5554            let file_path = temp_dir.path().join("multi_edit.txt");
5555
5556            // Create a ~5KB file with numbered lines for easier verification
5557            let mut content = Vec::new();
5558            for i in 0..100 {
5559                content.extend_from_slice(
5560                    format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
5561                );
5562            }
5563            let original_len = content.len();
5564            std::fs::write(&file_path, &content).unwrap();
5565
5566            // Load as large file (threshold of 500 bytes)
5567            let mut buffer = TextBuffer::load_from_file(&file_path, 500, test_fs()).unwrap();
5568            assert!(
5569                buffer.line_count().is_none(),
5570                "Should be in large file mode"
5571            );
5572
5573            // Edit at the beginning
5574            buffer.insert_bytes(0, b"[START]".to_vec());
5575
5576            // Edit somewhere in the middle (load that region first)
5577            let mid_offset = original_len / 2;
5578            let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); // +7 for our insert
5579            buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
5580
5581            // Save
5582            let save_path = temp_dir.path().join("multi_edit_saved.txt");
5583            buffer.save_to_file(&save_path).unwrap();
5584
5585            // Verify
5586            let saved = std::fs::read_to_string(&save_path).unwrap();
5587
5588            assert!(
5589                saved.starts_with("[START]Line 0000"),
5590                "Should start with our edit"
5591            );
5592            assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
5593            assert!(saved.contains("Line 0099"), "Should preserve end of file");
5594
5595            // Verify total length
5596            let expected_len = original_len + 7 + 8; // [START] + [MIDDLE]
5597            assert_eq!(
5598                saved.len(),
5599                expected_len,
5600                "Length should be original + edits"
5601            );
5602        }
5603    }
5604
5605    // ===== Offset to Position Tests =====
5606    // These tests focus on the offset_to_position correctness
5607
5608    #[test]
5609    fn test_offset_to_position_simple() {
5610        // Create a buffer with known line structure
5611        // Line 0: "a\n" (bytes 0-1, newline at 1)
5612        // Line 1: "b\n" (bytes 2-3, newline at 3)
5613        // Line 2: "c\n" (bytes 4-5, newline at 5)
5614        // Line 3: "d" (bytes 6, no newline)
5615        let content = b"a\nb\nc\nd";
5616        let buffer = TextBuffer::from_bytes(content.to_vec(), test_fs());
5617
5618        // Verify specific positions
5619        let pos = buffer
5620            .offset_to_position(0)
5621            .expect("small buffer should have line metadata");
5622        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5623        assert_eq!(pos.column, 0);
5624
5625        let pos = buffer
5626            .offset_to_position(1)
5627            .expect("small buffer should have line metadata");
5628        assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
5629        assert_eq!(pos.column, 1);
5630
5631        let pos = buffer
5632            .offset_to_position(2)
5633            .expect("small buffer should have line metadata");
5634        assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
5635        assert_eq!(pos.column, 0);
5636
5637        let pos = buffer
5638            .offset_to_position(3)
5639            .expect("small buffer should have line metadata");
5640        assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
5641        assert_eq!(pos.column, 1);
5642
5643        let pos = buffer
5644            .offset_to_position(4)
5645            .expect("small buffer should have line metadata");
5646        assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
5647        assert_eq!(pos.column, 0);
5648
5649        let pos = buffer
5650            .offset_to_position(6)
5651            .expect("small buffer should have line metadata");
5652        assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
5653        assert_eq!(pos.column, 0);
5654    }
5655
5656    #[test]
5657    fn test_offset_to_position_after_insert() {
5658        // Start with simple content
5659        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5660
5661        // Insert at position 2 (start of line 1)
5662        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5663
5664        // After insert, buffer should be: "a\nx\nb\n"
5665        // Line 0: "a\n" (bytes 0-1)
5666        // Line 1: "x\n" (bytes 2-3)
5667        // Line 2: "b\n" (bytes 4-5)
5668
5669        let pos = buffer
5670            .offset_to_position(0)
5671            .expect("small buffer should have line metadata");
5672        assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
5673
5674        let pos = buffer
5675            .offset_to_position(2)
5676            .expect("small buffer should have line metadata");
5677        assert_eq!(
5678            pos.line, 1,
5679            "Byte 2 (start of inserted line) should be on line 1"
5680        );
5681
5682        let pos = buffer
5683            .offset_to_position(4)
5684            .expect("small buffer should have line metadata");
5685        assert_eq!(
5686            pos.line, 2,
5687            "Byte 4 (start of 'b') should be on line 2 after insert"
5688        );
5689    }
5690
5691    #[test]
5692    fn test_offset_to_position_empty_lines() {
5693        // Test with empty lines: "\n\n\n"
5694        let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec(), test_fs());
5695
5696        // Line 0: "\n" (byte 0)
5697        // Line 1: "\n" (byte 1)
5698        // Line 2: "\n" (byte 2)
5699        // Line 3: "" (empty, after last newline)
5700
5701        let pos = buffer
5702            .offset_to_position(0)
5703            .expect("small buffer should have line metadata");
5704        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5705
5706        let pos = buffer
5707            .offset_to_position(1)
5708            .expect("small buffer should have line metadata");
5709        assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
5710
5711        let pos = buffer
5712            .offset_to_position(2)
5713            .expect("small buffer should have line metadata");
5714        assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
5715
5716        let pos = buffer
5717            .offset_to_position(3)
5718            .expect("small buffer should have line metadata");
5719        assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
5720    }
5721
5722    #[test]
5723    fn test_offset_to_position_long_lines() {
5724        // Test with long lines to ensure it's not just line counting
5725        let mut content = Vec::new();
5726        content.extend_from_slice(b"aaaaaaaaaa\n"); // Line 0: 11 bytes (10 'a's + newline)
5727        content.extend_from_slice(b"bbbbbbbbbb\n"); // Line 1: 11 bytes
5728        content.extend_from_slice(b"cccccccccc"); // Line 2: 10 bytes (no newline)
5729
5730        let buffer = TextBuffer::from_bytes(content.clone(), test_fs());
5731
5732        // Test positions at start of each line
5733        let pos = buffer
5734            .offset_to_position(0)
5735            .expect("small buffer should have line metadata");
5736        assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5737        assert_eq!(pos.column, 0);
5738
5739        let pos = buffer
5740            .offset_to_position(11)
5741            .expect("small buffer should have line metadata");
5742        assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
5743        assert_eq!(pos.column, 0);
5744
5745        let pos = buffer
5746            .offset_to_position(22)
5747            .expect("small buffer should have line metadata");
5748        assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
5749        assert_eq!(pos.column, 0);
5750
5751        // Test mid-line positions
5752        let pos = buffer
5753            .offset_to_position(5)
5754            .expect("small buffer should have line metadata");
5755        assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
5756        assert_eq!(pos.column, 5);
5757
5758        let pos = buffer
5759            .offset_to_position(16)
5760            .expect("small buffer should have line metadata");
5761        assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
5762        assert_eq!(pos.column, 5);
5763    }
5764
5765    #[test]
5766    fn test_line_iterator_with_offset_to_position() {
5767        // This combines line iterator with offset_to_position to find issues
5768        let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec(), test_fs());
5769
5770        // Test creating line iterator at various positions
5771        for byte_pos in 0..=buffer.len() {
5772            let iter = buffer.line_iterator(byte_pos, 80);
5773            let iter_pos = iter.current_position();
5774            let expected_line = buffer
5775                .offset_to_position(byte_pos)
5776                .expect("small buffer should have line metadata")
5777                .line;
5778            let expected_line_start = buffer.position_to_offset(Position {
5779                line: expected_line,
5780                column: 0,
5781            });
5782
5783            assert_eq!(
5784                iter_pos, expected_line_start,
5785                "LineIterator at byte {} should position at line start {} but got {}",
5786                byte_pos, expected_line_start, iter_pos
5787            );
5788        }
5789    }
5790
5791    #[test]
5792    fn test_piece_tree_line_count_after_insert() {
5793        // Debug the piece tree structure after insert
5794        let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5795
5796        // Insert at line 1, column 0
5797        buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5798
5799        // Manually verify line counts
5800        let content = buffer.slice_bytes(0..buffer.len());
5801        let newline_count = content.iter().filter(|&&b| b == b'\n').count();
5802        let expected_line_count = newline_count + 1;
5803        let actual_line_count = buffer.line_count();
5804
5805        assert_eq!(
5806            actual_line_count,
5807            Some(expected_line_count),
5808            "Line count mismatch after insert"
5809        );
5810    }
5811
5812    #[test]
5813    fn test_position_to_lsp_position_after_modification() {
5814        // This test demonstrates a bug in the piece tree's offset_to_position
5815        // where column calculation is incorrect after buffer modifications.
5816        // The position_to_lsp_position function works around this by using
5817        // line_start_offset to calculate the column correctly.
5818
5819        // Initial content: "fn foo(val: i32) {\n    val + 1\n}\n"
5820        let initial = b"fn foo(val: i32) {\n    val + 1\n}\n";
5821        let mut buffer = TextBuffer::from_bytes(initial.to_vec(), test_fs());
5822
5823        // Verify initial positions work correctly
5824        // Position 23 is 'v' of second "val" on line 1
5825        let (line, char) = buffer.position_to_lsp_position(23);
5826        assert_eq!(line, 1, "Initial: position 23 should be on line 1");
5827        assert_eq!(char, 4, "Initial: position 23 should be at char 4");
5828
5829        // Simulate rename: delete "val" at position 23 (line 1, char 4) and insert "value"
5830        // Position 23 = line 1, char 4; Position 26 = line 1, char 7
5831        buffer.delete_range(
5832            Position { line: 1, column: 4 },
5833            Position { line: 1, column: 7 },
5834        );
5835        buffer.insert_bytes(23, b"value".to_vec()); // Insert "value"
5836
5837        // Also rename the first occurrence
5838        // Position 7 = line 0, char 7; Position 10 = line 0, char 10
5839        buffer.delete_range(
5840            Position { line: 0, column: 7 },
5841            Position {
5842                line: 0,
5843                column: 10,
5844            },
5845        );
5846        buffer.insert_bytes(7, b"value".to_vec()); // Insert "value"
5847
5848        // Buffer is now: "fn foo(value: i32) {\n    value + 1\n}\n"
5849        let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
5850        assert_eq!(content, "fn foo(value: i32) {\n    value + 1\n}\n");
5851
5852        // Position 25 is now 'v' of second "value" on line 1
5853        // Line 0: "fn foo(value: i32) {\n" = 21 chars (positions 0-20)
5854        // Line 1: "    value + 1\n" starts at position 21
5855        // Position 25 = 21 + 4 = line 1, char 4
5856
5857        // The workaround in position_to_lsp_position should give correct result
5858        let (line, char) = buffer.position_to_lsp_position(25);
5859        assert_eq!(
5860            line, 1,
5861            "After modification: position 25 should be on line 1"
5862        );
5863        assert_eq!(
5864            char, 4,
5865            "After modification: position 25 should be at char 4"
5866        );
5867
5868        // Also verify position 21 (start of line 1) works
5869        let (line, char) = buffer.position_to_lsp_position(21);
5870        assert_eq!(line, 1, "Position 21 should be on line 1");
5871        assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
5872    }
5873
5874    #[test]
5875    fn test_detect_crlf() {
5876        assert_eq!(
5877            TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
5878            LineEnding::CRLF
5879        );
5880    }
5881
5882    #[test]
5883    fn test_detect_lf() {
5884        assert_eq!(
5885            TextBuffer::detect_line_ending(b"hello\nworld\n"),
5886            LineEnding::LF
5887        );
5888    }
5889
5890    #[test]
5891    fn test_normalize_crlf() {
5892        let input = b"hello\r\nworld\r\n".to_vec();
5893        let output = TextBuffer::normalize_line_endings(input);
5894        assert_eq!(output, b"hello\nworld\n");
5895    }
5896
5897    #[test]
5898    fn test_normalize_empty() {
5899        let input = Vec::new();
5900        let output = TextBuffer::normalize_line_endings(input);
5901        assert_eq!(output, Vec::<u8>::new());
5902    }
5903
5904    /// Regression test: get_all_text() returns empty for large files with unloaded regions
5905    ///
5906    /// This was the root cause of a bug where recovery auto-save would save 0 bytes
5907    /// for large files, causing data loss on crash recovery.
5908    ///
5909    /// The fix is to use get_text_range_mut() which handles lazy loading.
5910    #[test]
5911    fn test_get_all_text_returns_empty_for_unloaded_buffers() {
5912        use tempfile::TempDir;
5913        let temp_dir = TempDir::new().unwrap();
5914        let file_path = temp_dir.path().join("large_test.txt");
5915
5916        // Create a 50KB file
5917        let original_content = "X".repeat(50_000);
5918        std::fs::write(&file_path, &original_content).unwrap();
5919
5920        // Load with small threshold to trigger large file mode
5921        let mut buffer = TextBuffer::load_from_file(&file_path, 1024, test_fs()).unwrap();
5922        assert!(buffer.large_file, "Should be in large file mode");
5923        assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
5924
5925        // Make a small edit
5926        buffer.insert_bytes(0, b"EDITED: ".to_vec());
5927
5928        // get_all_text() now returns None for unloaded buffers instead of empty
5929        // This is the correct behavior - it signals that content is not available
5930        let content_immutable = buffer.get_all_text();
5931
5932        // get_all_text() returns None because it uses get_text_range() which
5933        // returns None for unloaded regions
5934        assert!(
5935            content_immutable.is_none(),
5936            "get_all_text() should return None for large files with unloaded regions. \
5937             Got Some({} bytes) instead of None.",
5938            content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
5939        );
5940
5941        // CORRECT BEHAVIOR: get_text_range_mut() handles lazy loading
5942        let total = buffer.total_bytes();
5943        let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
5944        assert_eq!(
5945            content_lazy.len(),
5946            50_000 + 8,
5947            "get_text_range_mut() should return all content with lazy loading"
5948        );
5949        assert!(
5950            String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
5951            "Content should start with our edit"
5952        );
5953    }
5954
5955    // ===== Line Ending Conversion Tests =====
5956
5957    mod line_ending_conversion {
5958        use super::*;
5959
5960        #[test]
5961        fn test_convert_lf_to_crlf() {
5962            let input = b"Line 1\nLine 2\nLine 3\n";
5963            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5964            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5965        }
5966
5967        #[test]
5968        fn test_convert_crlf_to_lf() {
5969            let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
5970            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5971            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5972        }
5973
5974        #[test]
5975        fn test_convert_cr_to_lf() {
5976            let input = b"Line 1\rLine 2\rLine 3\r";
5977            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5978            assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5979        }
5980
5981        #[test]
5982        fn test_convert_mixed_to_crlf() {
5983            // Mixed line endings: LF, CRLF, CR
5984            let input = b"Line 1\nLine 2\r\nLine 3\r";
5985            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5986            assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5987        }
5988
5989        #[test]
5990        fn test_convert_lf_to_lf_is_noop() {
5991            let input = b"Line 1\nLine 2\nLine 3\n";
5992            let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5993            assert_eq!(result, input.to_vec());
5994        }
5995
5996        #[test]
5997        fn test_convert_empty_content() {
5998            let input = b"";
5999            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
6000            assert_eq!(result, b"".to_vec());
6001        }
6002
6003        #[test]
6004        fn test_convert_no_line_endings() {
6005            let input = b"No line endings here";
6006            let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
6007            assert_eq!(result, b"No line endings here".to_vec());
6008        }
6009
6010        #[test]
6011        fn test_set_line_ending_marks_modified() {
6012            let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec(), test_fs());
6013            assert!(!buffer.is_modified());
6014
6015            buffer.set_line_ending(LineEnding::CRLF);
6016            assert!(buffer.is_modified());
6017        }
6018
6019        #[test]
6020        fn test_set_default_line_ending_does_not_mark_modified() {
6021            let mut buffer = TextBuffer::empty(test_fs());
6022            assert!(!buffer.is_modified());
6023
6024            buffer.set_default_line_ending(LineEnding::CRLF);
6025            assert!(!buffer.is_modified());
6026            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6027        }
6028
6029        #[test]
6030        fn test_save_to_file_converts_lf_to_crlf() {
6031            use tempfile::TempDir;
6032
6033            let temp_dir = TempDir::new().unwrap();
6034            let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
6035
6036            // Create a file with LF line endings
6037            let original_content = b"Line 1\nLine 2\nLine 3\n";
6038            std::fs::write(&file_path, original_content).unwrap();
6039
6040            // Load the file
6041            let mut buffer =
6042                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
6043                    .unwrap();
6044            assert_eq!(buffer.line_ending(), LineEnding::LF);
6045
6046            // Change line ending to CRLF
6047            buffer.set_line_ending(LineEnding::CRLF);
6048            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6049            assert!(buffer.is_modified());
6050
6051            // Save the file
6052            buffer.save_to_file(&file_path).unwrap();
6053
6054            // Read back and verify CRLF
6055            let saved_bytes = std::fs::read(&file_path).unwrap();
6056            assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
6057        }
6058
6059        #[test]
6060        fn test_save_to_file_converts_crlf_to_lf() {
6061            use tempfile::TempDir;
6062
6063            let temp_dir = TempDir::new().unwrap();
6064            let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
6065
6066            // Create a file with CRLF line endings
6067            let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
6068            std::fs::write(&file_path, original_content).unwrap();
6069
6070            // Load the file
6071            let mut buffer =
6072                TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
6073                    .unwrap();
6074            assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6075
6076            // Change line ending to LF
6077            buffer.set_line_ending(LineEnding::LF);
6078            assert_eq!(buffer.line_ending(), LineEnding::LF);
6079            assert!(buffer.is_modified());
6080
6081            // Save the file
6082            buffer.save_to_file(&file_path).unwrap();
6083
6084            // Read back and verify LF (no CRLF)
6085            let saved_bytes = std::fs::read(&file_path).unwrap();
6086            assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
6087        }
6088
6089        #[test]
6090        #[cfg(unix)]
6091        fn test_save_to_unwritable_file() -> anyhow::Result<()> {
6092            // Root (uid 0) bypasses Unix file permission checks, so these
6093            // permission-denied tests are meaningless when running as root.
6094            if unsafe { libc::getuid() } == 0 {
6095                eprintln!("Skipping test: root bypasses file permission checks");
6096                return Ok(());
6097            }
6098            use std::fs::Permissions;
6099            use std::os::unix::fs::PermissionsExt;
6100            use tempfile::TempDir;
6101
6102            let temp_dir = TempDir::new().unwrap();
6103            let unwritable_dir = temp_dir.path().join("unwritable_dir");
6104            std::fs::create_dir(&unwritable_dir)?;
6105
6106            let file_path = unwritable_dir.join("unwritable.txt");
6107            std::fs::write(&file_path, "original content")?;
6108
6109            // Make directory unwritable to prevent rename/temp file creation
6110            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
6111
6112            let mut buffer = TextBuffer::from_bytes(b"new content".to_vec(), test_fs());
6113            let result = buffer.save_to_file(&file_path);
6114
6115            // Verify that it returns SudoSaveRequired
6116            match result {
6117                Err(e) => {
6118                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
6119                        assert_eq!(sudo_err.dest_path, file_path);
6120                        assert!(sudo_err.temp_path.exists());
6121                        // Cleanup temp file
6122                        drop(std::fs::remove_file(&sudo_err.temp_path));
6123                    } else {
6124                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
6125                    }
6126                }
6127                Ok(_) => panic!("Expected error, but save succeeded"),
6128            }
6129
6130            Ok(())
6131        }
6132
6133        #[test]
6134        #[cfg(unix)]
6135        fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
6136            // Root (uid 0) bypasses Unix file permission checks, so these
6137            // permission-denied tests are meaningless when running as root.
6138            if unsafe { libc::getuid() } == 0 {
6139                eprintln!("Skipping test: root bypasses file permission checks");
6140                return Ok(());
6141            }
6142            use std::fs::Permissions;
6143            use std::os::unix::fs::PermissionsExt;
6144            use tempfile::TempDir;
6145
6146            let temp_dir = TempDir::new().unwrap();
6147            let unwritable_dir = temp_dir.path().join("unwritable_dir");
6148            std::fs::create_dir(&unwritable_dir)?;
6149
6150            let file_path = unwritable_dir.join("test.txt");
6151
6152            // Make directory unwritable (no write allowed)
6153            std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
6154
6155            let mut buffer = TextBuffer::from_bytes(b"content".to_vec(), test_fs());
6156            let result = buffer.save_to_file(&file_path);
6157
6158            match result {
6159                Err(e) => {
6160                    if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
6161                        assert_eq!(sudo_err.dest_path, file_path);
6162                        assert!(sudo_err.temp_path.exists());
6163                        // It should be in /tmp because the directory was not writable
6164                        assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
6165                        // Cleanup
6166                        drop(std::fs::remove_file(&sudo_err.temp_path));
6167                    } else {
6168                        panic!("Expected SudoSaveRequired error, got: {:?}", e);
6169                    }
6170                }
6171                Ok(_) => panic!("Expected error, but save succeeded"),
6172            }
6173
6174            Ok(())
6175        }
6176    }
6177
6178    mod large_file_encoding_tests {
6179        use super::*;
6180
6181        #[test]
6182        fn test_large_file_encoding_confirmation_display() {
6183            let confirmation = LargeFileEncodingConfirmation {
6184                path: PathBuf::from("/test/file.txt"),
6185                file_size: 150 * 1024 * 1024, // 150 MB
6186                encoding: Encoding::ShiftJis,
6187            };
6188
6189            let display = format!("{}", confirmation);
6190            assert!(display.contains("150 MB"), "Display: {}", display);
6191            assert!(display.contains("Shift-JIS"), "Display: {}", display);
6192            assert!(
6193                display.contains("requires full load"),
6194                "Display: {}",
6195                display
6196            );
6197        }
6198
6199        #[test]
6200        fn test_large_file_encoding_confirmation_equality() {
6201            let a = LargeFileEncodingConfirmation {
6202                path: PathBuf::from("/test/file.txt"),
6203                file_size: 100 * 1024 * 1024,
6204                encoding: Encoding::Gb18030,
6205            };
6206            let b = LargeFileEncodingConfirmation {
6207                path: PathBuf::from("/test/file.txt"),
6208                file_size: 100 * 1024 * 1024,
6209                encoding: Encoding::Gb18030,
6210            };
6211            let c = LargeFileEncodingConfirmation {
6212                path: PathBuf::from("/test/other.txt"),
6213                file_size: 100 * 1024 * 1024,
6214                encoding: Encoding::Gb18030,
6215            };
6216
6217            assert_eq!(a, b);
6218            assert_ne!(a, c);
6219        }
6220
6221        #[test]
6222        fn test_encoding_requires_confirmation() {
6223            // Resynchronizable encodings should NOT require confirmation
6224            assert!(!Encoding::Utf8.requires_full_file_load());
6225            assert!(!Encoding::Utf8Bom.requires_full_file_load());
6226            assert!(!Encoding::Ascii.requires_full_file_load());
6227            assert!(!Encoding::Latin1.requires_full_file_load());
6228            assert!(!Encoding::Windows1252.requires_full_file_load());
6229            assert!(!Encoding::Utf16Le.requires_full_file_load());
6230            assert!(!Encoding::Utf16Be.requires_full_file_load());
6231
6232            // Non-resynchronizable CJK encodings SHOULD require confirmation
6233            assert!(Encoding::Gb18030.requires_full_file_load());
6234            assert!(Encoding::Gbk.requires_full_file_load());
6235            assert!(Encoding::ShiftJis.requires_full_file_load());
6236            assert!(Encoding::EucKr.requires_full_file_load());
6237        }
6238
6239        #[test]
6240        fn test_check_large_file_encoding_small_file() {
6241            use tempfile::NamedTempFile;
6242
6243            // Create a small file (well under threshold)
6244            let temp = NamedTempFile::new().unwrap();
6245            std::fs::write(temp.path(), b"hello world").unwrap();
6246
6247            let result = TextBuffer::check_large_file_encoding(temp.path(), test_fs()).unwrap();
6248            assert!(
6249                result.is_none(),
6250                "Small files should not require confirmation"
6251            );
6252        }
6253
6254        #[test]
6255        fn test_large_file_encoding_error_downcast() {
6256            // Verify that LargeFileEncodingConfirmation can be used as an anyhow error
6257            let confirmation = LargeFileEncodingConfirmation {
6258                path: PathBuf::from("/test/file.txt"),
6259                file_size: 200 * 1024 * 1024,
6260                encoding: Encoding::EucKr,
6261            };
6262
6263            let error: anyhow::Error = confirmation.clone().into();
6264            let downcast = error.downcast_ref::<LargeFileEncodingConfirmation>();
6265            assert!(downcast.is_some());
6266            assert_eq!(downcast.unwrap().encoding, Encoding::EucKr);
6267        }
6268    }
6269
6270    mod rebuild_pristine_saved_root_tests {
6271        use super::*;
6272        use crate::model::piece_tree::BufferLocation;
6273        use std::sync::Arc;
6274
6275        /// Create a large-file-mode TextBuffer from raw bytes, simulating what
6276        /// `load_from_file` does for files above the large-file threshold.
6277        fn large_file_buffer(content: &[u8]) -> TextBuffer {
6278            let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6279                Arc::new(crate::model::filesystem::StdFileSystem);
6280            let bytes = content.len();
6281            let buffer =
6282                crate::model::piece_tree::StringBuffer::new_loaded(0, content.to_vec(), false);
6283            let piece_tree = if bytes > 0 {
6284                crate::model::piece_tree::PieceTree::new(BufferLocation::Stored(0), 0, bytes, None)
6285            } else {
6286                crate::model::piece_tree::PieceTree::empty()
6287            };
6288            let saved_root = piece_tree.root();
6289            TextBuffer {
6290                fs,
6291                piece_tree,
6292                saved_root,
6293                buffers: vec![buffer],
6294                next_buffer_id: 1,
6295                file_path: None,
6296                modified: false,
6297                recovery_pending: false,
6298                large_file: true,
6299                line_feeds_scanned: false,
6300                is_binary: false,
6301                line_ending: LineEnding::LF,
6302                original_line_ending: LineEnding::LF,
6303                encoding: Encoding::Utf8,
6304                original_encoding: Encoding::Utf8,
6305                saved_file_size: Some(bytes),
6306                version: 0,
6307                config: BufferConfig::default(),
6308            }
6309        }
6310
6311        /// Simulate prepare_line_scan + scanning: pre-split and compute lf counts.
6312        fn scan_line_feeds(buf: &mut TextBuffer) -> Vec<(usize, usize)> {
6313            buf.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
6314            let leaves = buf.piece_tree.get_leaves();
6315            let mut updates = Vec::new();
6316            for (idx, leaf) in leaves.iter().enumerate() {
6317                if leaf.line_feed_cnt.is_some() {
6318                    continue;
6319                }
6320                let count = buf.scan_leaf(leaf).unwrap();
6321                updates.push((idx, count));
6322            }
6323            updates
6324        }
6325
6326        /// Generate a repeating pattern with newlines for testing.
6327        fn make_content(size: usize) -> Vec<u8> {
6328            let line = b"abcdefghij0123456789ABCDEFGHIJ0123456789abcdefghij0123456789ABCDEFGHIJ\n";
6329            let mut out = Vec::with_capacity(size);
6330            while out.len() < size {
6331                let remaining = size - out.len();
6332                let take = remaining.min(line.len());
6333                out.extend_from_slice(&line[..take]);
6334            }
6335            out
6336        }
6337
6338        #[test]
6339        fn test_no_edits_arc_ptr_eq() {
6340            let content = make_content(2 * 1024 * 1024);
6341            let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6342            let mut buf = large_file_buffer(&content);
6343
6344            // Before scan, line_count should be None (large file, no indexing).
6345            assert!(buf.line_count().is_none());
6346
6347            let updates = scan_line_feeds(&mut buf);
6348            buf.rebuild_with_pristine_saved_root(&updates);
6349
6350            // After rebuild, line_count must be Some (exact).
6351            assert_eq!(buf.line_count(), Some(expected_lf + 1));
6352
6353            // After rebuild with no edits, roots should be identical (Arc::ptr_eq).
6354            assert!(Arc::ptr_eq(&buf.saved_root, &buf.piece_tree.root()));
6355            let diff = buf.diff_since_saved();
6356            assert!(diff.equal);
6357            assert!(buf.line_feeds_scanned);
6358            assert_eq!(buf.get_all_text().unwrap(), content);
6359        }
6360
6361        #[test]
6362        fn test_single_insertion() {
6363            let content = make_content(2 * 1024 * 1024);
6364            let mut buf = large_file_buffer(&content);
6365            let updates = scan_line_feeds(&mut buf);
6366
6367            // Insert some text in the middle.
6368            let insert_offset = 1_000_000;
6369            let insert_text = b"INSERTED_TEXT\n";
6370            buf.insert_bytes(insert_offset, insert_text.to_vec());
6371
6372            buf.rebuild_with_pristine_saved_root(&updates);
6373
6374            // Content should match the shadow model.
6375            let mut expected = content.clone();
6376            expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
6377            assert_eq!(buf.get_all_text().unwrap(), expected);
6378
6379            // line_count must be Some (exact) after rebuild, even with edits.
6380            let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6381            assert_eq!(buf.line_count(), Some(expected_lf + 1));
6382
6383            // Diff should NOT be equal.
6384            let diff = buf.diff_since_saved();
6385            assert!(!diff.equal);
6386            assert!(!diff.byte_ranges.is_empty());
6387        }
6388
6389        /// After rebuild + insert near EOF, diff byte_ranges must be
6390        /// document-absolute.  The bug: `with_doc_offsets` assigned consecutive
6391        /// offsets from 0 to the collected leaves, missing skipped (shared)
6392        /// subtrees' bytes.
6393        #[test]
6394        fn test_diff_byte_ranges_are_document_absolute_after_eof_insert() {
6395            let content = make_content(4 * 1024 * 1024); // 4MB → 4 chunks at 1MB each
6396            let mut buf = large_file_buffer(&content);
6397            let updates = scan_line_feeds(&mut buf);
6398            buf.rebuild_with_pristine_saved_root(&updates);
6399
6400            // Insert 5 bytes near EOF (last 100 bytes of the file).
6401            let insert_offset = content.len() - 100;
6402            buf.insert_bytes(insert_offset, b"HELLO".to_vec());
6403
6404            let diff = buf.diff_since_saved();
6405            assert!(!diff.equal, "diff should detect the insertion");
6406            assert!(
6407                !diff.byte_ranges.is_empty(),
6408                "byte_ranges should not be empty"
6409            );
6410
6411            // byte_ranges must be near the end of the document, not near 0.
6412            let first_range = &diff.byte_ranges[0];
6413            assert!(
6414                first_range.start >= content.len() - 200,
6415                "byte_ranges should be document-absolute (near EOF): got {:?}, expected near {}",
6416                first_range,
6417                insert_offset,
6418            );
6419        }
6420
6421        #[test]
6422        fn test_single_deletion() {
6423            let content = make_content(2 * 1024 * 1024);
6424            let mut buf = large_file_buffer(&content);
6425            let updates = scan_line_feeds(&mut buf);
6426
6427            // Delete a range.
6428            let del_start = 500_000;
6429            let del_len = 1000;
6430            buf.delete_bytes(del_start, del_len);
6431
6432            buf.rebuild_with_pristine_saved_root(&updates);
6433
6434            let mut expected = content.clone();
6435            expected.drain(del_start..del_start + del_len);
6436            assert_eq!(buf.get_all_text().unwrap(), expected);
6437
6438            let diff = buf.diff_since_saved();
6439            assert!(!diff.equal);
6440        }
6441
6442        #[test]
6443        fn test_insert_and_delete() {
6444            let content = make_content(2 * 1024 * 1024);
6445            let mut buf = large_file_buffer(&content);
6446            let updates = scan_line_feeds(&mut buf);
6447
6448            // Delete near the start, insert near the end.
6449            let del_start = 100_000;
6450            let del_len = 500;
6451            buf.delete_bytes(del_start, del_len);
6452
6453            let insert_offset = 1_500_000; // in the post-delete document
6454            let insert_text = b"NEW_CONTENT\n";
6455            buf.insert_bytes(insert_offset, insert_text.to_vec());
6456
6457            buf.rebuild_with_pristine_saved_root(&updates);
6458
6459            // Build expected content.
6460            let mut expected = content.clone();
6461            expected.drain(del_start..del_start + del_len);
6462            expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
6463            assert_eq!(buf.get_all_text().unwrap(), expected);
6464
6465            let diff = buf.diff_since_saved();
6466            assert!(!diff.equal);
6467        }
6468
6469        #[test]
6470        fn test_multiple_scattered_edits() {
6471            let content = make_content(3 * 1024 * 1024);
6472            let mut buf = large_file_buffer(&content);
6473            let updates = scan_line_feeds(&mut buf);
6474            let mut expected = content.clone();
6475
6476            // Apply several edits across chunk boundaries, tracking the shadow model.
6477            // Edit 1: delete at offset 100k
6478            buf.delete_bytes(100_000, 200);
6479            expected.drain(100_000..100_200);
6480
6481            // Edit 2: insert at offset 500k (in current doc, which shifted)
6482            buf.insert_bytes(500_000, b"AAAA\n".to_vec());
6483            expected.splice(500_000..500_000, b"AAAA\n".iter().copied());
6484
6485            // Edit 3: delete at offset 2M
6486            buf.delete_bytes(2_000_000, 300);
6487            expected.drain(2_000_000..2_000_300);
6488
6489            // Edit 4: insert at offset 1M
6490            buf.insert_bytes(1_000_000, b"BBBB\n".to_vec());
6491            expected.splice(1_000_000..1_000_000, b"BBBB\n".iter().copied());
6492
6493            buf.rebuild_with_pristine_saved_root(&updates);
6494
6495            assert_eq!(buf.get_all_text().unwrap(), expected);
6496            let diff = buf.diff_since_saved();
6497            assert!(!diff.equal);
6498        }
6499
6500        #[test]
6501        fn test_content_preserved_after_rebuild() {
6502            // Verify that get_all_text matches before and after rebuild for
6503            // a buffer with edits.
6504            let content = make_content(2 * 1024 * 1024);
6505            let mut buf = large_file_buffer(&content);
6506            let updates = scan_line_feeds(&mut buf);
6507
6508            buf.insert_bytes(0, b"HEADER\n".to_vec());
6509            buf.delete_bytes(1_000_000, 500);
6510
6511            let text_before = buf.get_all_text().unwrap();
6512            buf.rebuild_with_pristine_saved_root(&updates);
6513            let text_after = buf.get_all_text().unwrap();
6514
6515            assert_eq!(text_before, text_after);
6516        }
6517
6518        /// Create a large-file-mode TextBuffer backed by an actual file on disk
6519        /// (Unloaded buffer), matching the real `load_from_file` code path.
6520        fn large_file_buffer_unloaded(path: &std::path::Path, file_size: usize) -> TextBuffer {
6521            let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6522                Arc::new(crate::model::filesystem::StdFileSystem);
6523            let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6524                0,
6525                path.to_path_buf(),
6526                0,
6527                file_size,
6528            );
6529            let piece_tree = if file_size > 0 {
6530                crate::model::piece_tree::PieceTree::new(
6531                    BufferLocation::Stored(0),
6532                    0,
6533                    file_size,
6534                    None,
6535                )
6536            } else {
6537                crate::model::piece_tree::PieceTree::empty()
6538            };
6539            let saved_root = piece_tree.root();
6540            TextBuffer {
6541                fs,
6542                piece_tree,
6543                saved_root,
6544                buffers: vec![buffer],
6545                next_buffer_id: 1,
6546                file_path: Some(path.to_path_buf()),
6547                modified: false,
6548                recovery_pending: false,
6549                large_file: true,
6550                line_feeds_scanned: false,
6551                is_binary: false,
6552                line_ending: LineEnding::LF,
6553                original_line_ending: LineEnding::LF,
6554                encoding: Encoding::Utf8,
6555                original_encoding: Encoding::Utf8,
6556                saved_file_size: Some(file_size),
6557                version: 0,
6558                config: BufferConfig::default(),
6559            }
6560        }
6561
6562        #[test]
6563        fn test_unloaded_buffer_no_edits_line_count() {
6564            let content = make_content(2 * 1024 * 1024);
6565            let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6566
6567            let tmp = tempfile::NamedTempFile::new().unwrap();
6568            std::fs::write(tmp.path(), &content).unwrap();
6569            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6570
6571            assert!(
6572                buf.line_count().is_none(),
6573                "before scan, line_count should be None"
6574            );
6575
6576            let updates = scan_line_feeds(&mut buf);
6577            buf.rebuild_with_pristine_saved_root(&updates);
6578
6579            assert_eq!(
6580                buf.line_count(),
6581                Some(expected_lf + 1),
6582                "after rebuild, line_count must be exact"
6583            );
6584            assert!(buf.line_feeds_scanned);
6585        }
6586
6587        #[test]
6588        fn test_unloaded_buffer_with_edits_line_count() {
6589            let content = make_content(2 * 1024 * 1024);
6590
6591            let tmp = tempfile::NamedTempFile::new().unwrap();
6592            std::fs::write(tmp.path(), &content).unwrap();
6593            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6594
6595            let updates = scan_line_feeds(&mut buf);
6596
6597            // Insert text in the middle (creates an Added piece).
6598            let insert_text = b"INSERTED\n";
6599            buf.insert_bytes(1_000_000, insert_text.to_vec());
6600
6601            buf.rebuild_with_pristine_saved_root(&updates);
6602
6603            let mut expected = content.clone();
6604            expected.splice(1_000_000..1_000_000, insert_text.iter().copied());
6605            let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6606
6607            assert_eq!(
6608                buf.line_count(),
6609                Some(expected_lf + 1),
6610                "after rebuild with edits, line_count must be exact"
6611            );
6612            assert!(buf.line_feeds_scanned);
6613        }
6614
6615        /// After rebuild, diff_since_saved should visit a small number of nodes
6616        /// proportional to edit regions, NOT the full tree. This catches
6617        /// regressions where Arc pointers are accidentally destroyed (e.g. by
6618        /// flattening and rebuilding the tree).
6619        #[test]
6620        fn test_diff_efficiency_after_rebuild() {
6621            // Use 32MB so the tree has ~32 leaves (at 1MB chunk size),
6622            // making the efficiency difference between O(log N) and O(N) clear.
6623            let content = make_content(32 * 1024 * 1024);
6624            let mut buf = large_file_buffer(&content);
6625
6626            let updates = scan_line_feeds(&mut buf);
6627
6628            // Insert a small piece of text in one chunk.
6629            buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6630
6631            buf.rebuild_with_pristine_saved_root(&updates);
6632
6633            let diff = buf.diff_since_saved();
6634            assert!(!diff.equal);
6635
6636            let total_leaves = buf.piece_tree.get_leaves().len();
6637            // The diff should visit far fewer nodes than the total tree.
6638            // With path-copying, only the path from root to the edited leaf
6639            // (and its immediate neighbours) should be visited — roughly
6640            // O(log N) nodes, not O(N).
6641            assert!(
6642                diff.nodes_visited < total_leaves,
6643                "diff visited {} nodes but tree has {} leaves — \
6644                 Arc::ptr_eq short-circuiting is not working",
6645                diff.nodes_visited,
6646                total_leaves,
6647            );
6648        }
6649
6650        /// After rebuild_with_pristine_saved_root, loading a small viewport
6651        /// range must NOT cause the entire original file buffer to be loaded.
6652        /// This is a regression test for a bug where the pristine tree's 1MB
6653        /// pieces all referenced Stored(0) (the whole-file buffer). Because
6654        /// piece_view.bytes (1MB) <= LOAD_CHUNK_SIZE, get_text_range_mut took
6655        /// the "load_small_buffer" path, calling load() on the 814MB buffer.
6656        #[test]
6657        fn test_viewport_load_after_rebuild_does_not_load_entire_file() {
6658            use std::sync::atomic::{AtomicUsize, Ordering};
6659
6660            /// Filesystem wrapper that tracks the largest read_range call.
6661            struct TrackingFs {
6662                inner: crate::model::filesystem::StdFileSystem,
6663                max_read_range_len: Arc<AtomicUsize>,
6664            }
6665
6666            impl crate::model::filesystem::FileSystem for TrackingFs {
6667                fn read_file(&self, path: &Path) -> std::io::Result<Vec<u8>> {
6668                    self.inner.read_file(path)
6669                }
6670                fn read_range(
6671                    &self,
6672                    path: &Path,
6673                    offset: u64,
6674                    len: usize,
6675                ) -> std::io::Result<Vec<u8>> {
6676                    self.max_read_range_len.fetch_max(len, Ordering::SeqCst);
6677                    self.inner.read_range(path, offset, len)
6678                }
6679                fn write_file(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
6680                    self.inner.write_file(path, data)
6681                }
6682                fn create_file(
6683                    &self,
6684                    path: &Path,
6685                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6686                {
6687                    self.inner.create_file(path)
6688                }
6689                fn open_file(
6690                    &self,
6691                    path: &Path,
6692                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileReader>>
6693                {
6694                    self.inner.open_file(path)
6695                }
6696                fn open_file_for_write(
6697                    &self,
6698                    path: &Path,
6699                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6700                {
6701                    self.inner.open_file_for_write(path)
6702                }
6703                fn open_file_for_append(
6704                    &self,
6705                    path: &Path,
6706                ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6707                {
6708                    self.inner.open_file_for_append(path)
6709                }
6710                fn set_file_length(&self, path: &Path, len: u64) -> std::io::Result<()> {
6711                    self.inner.set_file_length(path, len)
6712                }
6713                fn rename(&self, from: &Path, to: &Path) -> std::io::Result<()> {
6714                    self.inner.rename(from, to)
6715                }
6716                fn copy(&self, from: &Path, to: &Path) -> std::io::Result<u64> {
6717                    self.inner.copy(from, to)
6718                }
6719                fn remove_file(&self, path: &Path) -> std::io::Result<()> {
6720                    self.inner.remove_file(path)
6721                }
6722                fn remove_dir(&self, path: &Path) -> std::io::Result<()> {
6723                    self.inner.remove_dir(path)
6724                }
6725                fn metadata(
6726                    &self,
6727                    path: &Path,
6728                ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6729                    self.inner.metadata(path)
6730                }
6731                fn symlink_metadata(
6732                    &self,
6733                    path: &Path,
6734                ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6735                    self.inner.symlink_metadata(path)
6736                }
6737                fn is_dir(&self, path: &Path) -> std::io::Result<bool> {
6738                    self.inner.is_dir(path)
6739                }
6740                fn is_file(&self, path: &Path) -> std::io::Result<bool> {
6741                    self.inner.is_file(path)
6742                }
6743                fn set_permissions(
6744                    &self,
6745                    path: &Path,
6746                    permissions: &crate::model::filesystem::FilePermissions,
6747                ) -> std::io::Result<()> {
6748                    self.inner.set_permissions(path, permissions)
6749                }
6750                fn is_owner(&self, path: &Path) -> bool {
6751                    self.inner.is_owner(path)
6752                }
6753                fn read_dir(
6754                    &self,
6755                    path: &Path,
6756                ) -> std::io::Result<Vec<crate::model::filesystem::DirEntry>> {
6757                    self.inner.read_dir(path)
6758                }
6759                fn create_dir(&self, path: &Path) -> std::io::Result<()> {
6760                    self.inner.create_dir(path)
6761                }
6762                fn create_dir_all(&self, path: &Path) -> std::io::Result<()> {
6763                    self.inner.create_dir_all(path)
6764                }
6765                fn canonicalize(&self, path: &Path) -> std::io::Result<PathBuf> {
6766                    self.inner.canonicalize(path)
6767                }
6768                fn current_uid(&self) -> u32 {
6769                    self.inner.current_uid()
6770                }
6771                fn sudo_write(
6772                    &self,
6773                    path: &Path,
6774                    data: &[u8],
6775                    mode: u32,
6776                    uid: u32,
6777                    gid: u32,
6778                ) -> std::io::Result<()> {
6779                    self.inner.sudo_write(path, data, mode, uid, gid)
6780                }
6781                fn search_file(
6782                    &self,
6783                    path: &Path,
6784                    pattern: &str,
6785                    opts: &crate::model::filesystem::FileSearchOptions,
6786                    cursor: &mut crate::model::filesystem::FileSearchCursor,
6787                ) -> std::io::Result<Vec<SearchMatch>> {
6788                    crate::model::filesystem::default_search_file(
6789                        &self.inner,
6790                        path,
6791                        pattern,
6792                        opts,
6793                        cursor,
6794                    )
6795                }
6796            }
6797
6798            // Create a 3MB file with newlines (3 chunks at LOAD_CHUNK_SIZE=1MB).
6799            let file_size = LOAD_CHUNK_SIZE * 3;
6800            let content = make_content(file_size);
6801
6802            let tmp = tempfile::NamedTempFile::new().unwrap();
6803            std::fs::write(tmp.path(), &content).unwrap();
6804
6805            let max_read = Arc::new(AtomicUsize::new(0));
6806            let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6807                Arc::new(TrackingFs {
6808                    inner: crate::model::filesystem::StdFileSystem,
6809                    max_read_range_len: max_read.clone(),
6810                });
6811
6812            // Build an unloaded large-file buffer with the tracking FS.
6813            let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6814                0,
6815                tmp.path().to_path_buf(),
6816                0,
6817                file_size,
6818            );
6819            let piece_tree = PieceTree::new(BufferLocation::Stored(0), 0, file_size, None);
6820            let saved_root = piece_tree.root();
6821            let mut buf = TextBuffer {
6822                fs,
6823                piece_tree,
6824                saved_root,
6825                buffers: vec![buffer],
6826                next_buffer_id: 1,
6827                file_path: Some(tmp.path().to_path_buf()),
6828                modified: false,
6829                recovery_pending: false,
6830                large_file: true,
6831                line_feeds_scanned: false,
6832                is_binary: false,
6833                line_ending: LineEnding::LF,
6834                original_line_ending: LineEnding::LF,
6835                encoding: Encoding::Utf8,
6836                original_encoding: Encoding::Utf8,
6837                saved_file_size: Some(file_size),
6838                version: 0,
6839                config: BufferConfig::default(),
6840            };
6841
6842            // Load a small viewport in the middle (forces chunk splitting).
6843            let viewport_offset = LOAD_CHUNK_SIZE + 100; // somewhere in chunk 2
6844            buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6845
6846            // Run the line-feed scan and rebuild the pristine tree.
6847            let updates = scan_line_feeds(&mut buf);
6848            buf.rebuild_with_pristine_saved_root(&updates);
6849
6850            // Reset the tracker — we only care about reads AFTER the rebuild.
6851            max_read.store(0, Ordering::SeqCst);
6852
6853            // Load the same viewport range again.
6854            buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6855
6856            let largest_read = max_read.load(Ordering::SeqCst);
6857            assert!(
6858                largest_read <= LOAD_CHUNK_SIZE,
6859                "After rebuild, loading a viewport triggered a read of {} bytes \
6860                 (file_size={}). This means the entire Stored buffer is being \
6861                 loaded instead of just the needed chunk.",
6862                largest_read,
6863                file_size,
6864            );
6865        }
6866
6867        /// After rebuild_with_pristine_saved_root, loading a viewport must not
6868        /// destroy the line feed counts on pieces. The chunk-split path in
6869        /// get_text_range_mut calls split_at_offset, which invokes
6870        /// compute_line_feeds_static — returning None for unloaded buffers.
6871        /// This turns exact line numbers back into byte-based estimates.
6872        #[test]
6873        fn test_viewport_load_after_rebuild_preserves_line_counts() {
6874            let file_size = LOAD_CHUNK_SIZE * 3;
6875            let content = make_content(file_size);
6876
6877            let tmp = tempfile::NamedTempFile::new().unwrap();
6878            std::fs::write(tmp.path(), &content).unwrap();
6879            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6880
6881            // Scan + rebuild so every leaf has a known line_feed_cnt.
6882            let updates = scan_line_feeds(&mut buf);
6883            buf.rebuild_with_pristine_saved_root(&updates);
6884
6885            let line_count_before = buf.piece_tree.line_count();
6886            assert!(
6887                line_count_before.is_some(),
6888                "line_count must be Some after rebuild"
6889            );
6890
6891            // Load a viewport that starts in the MIDDLE of a piece, forcing
6892            // split_at_offset (not just replace_buffer_reference).
6893            let mid_piece_offset = LOAD_CHUNK_SIZE + LOAD_CHUNK_SIZE / 2;
6894            buf.get_text_range_mut(mid_piece_offset, 4096).unwrap();
6895
6896            let line_count_after = buf.piece_tree.line_count();
6897            assert!(
6898                line_count_after.is_some(),
6899                "line_count must still be Some after viewport load \
6900                 (was {:?} before, now {:?})",
6901                line_count_before,
6902                line_count_after,
6903            );
6904            assert_eq!(
6905                line_count_before, line_count_after,
6906                "line_count must not change after viewport load"
6907            );
6908        }
6909
6910        /// Same test but with Unloaded data (the fixup path).
6911        #[test]
6912        fn test_diff_efficiency_after_rebuild_unloaded() {
6913            let content = make_content(32 * 1024 * 1024);
6914
6915            let tmp = tempfile::NamedTempFile::new().unwrap();
6916            std::fs::write(tmp.path(), &content).unwrap();
6917            let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6918
6919            let updates = scan_line_feeds(&mut buf);
6920
6921            buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6922
6923            buf.rebuild_with_pristine_saved_root(&updates);
6924
6925            let diff = buf.diff_since_saved();
6926            assert!(!diff.equal);
6927
6928            let total_leaves = buf.piece_tree.get_leaves().len();
6929            assert!(
6930                diff.nodes_visited < total_leaves,
6931                "diff visited {} nodes but tree has {} leaves — \
6932                 Arc::ptr_eq short-circuiting is not working (unloaded path)",
6933                diff.nodes_visited,
6934                total_leaves,
6935            );
6936        }
6937    }
6938
6939    mod chunked_search {
6940        use super::*;
6941
6942        fn make_buffer(content: &[u8]) -> TextBuffer {
6943            TextBuffer::from_bytes(content.to_vec(), test_fs())
6944        }
6945
6946        fn make_regex(pattern: &str) -> regex::bytes::Regex {
6947            regex::bytes::Regex::new(pattern).unwrap()
6948        }
6949
6950        #[test]
6951        fn single_chunk_line_col_context() {
6952            let mut buf = make_buffer(b"hello world\nfoo bar\nbaz quux\n");
6953            let state = buf.search_scan_all(make_regex("bar"), 100, 3).unwrap();
6954            assert_eq!(state.matches.len(), 1);
6955            let m = &state.matches[0];
6956            assert_eq!(m.line, 2);
6957            assert_eq!(m.column, 5); // "foo bar" → 'b' at column 5
6958            assert_eq!(m.context, "foo bar");
6959            assert_eq!(m.byte_offset, 16); // "hello world\nfoo " = 16 bytes
6960            assert_eq!(m.length, 3);
6961        }
6962
6963        #[test]
6964        fn multiple_matches_correct_lines() {
6965            let mut buf = make_buffer(b"aaa\nbbb\nccc\naaa\n");
6966            let state = buf.search_scan_all(make_regex("aaa"), 100, 3).unwrap();
6967            assert_eq!(state.matches.len(), 2);
6968            assert_eq!(state.matches[0].line, 1);
6969            assert_eq!(state.matches[0].context, "aaa");
6970            assert_eq!(state.matches[1].line, 4);
6971            assert_eq!(state.matches[1].context, "aaa");
6972        }
6973
6974        #[test]
6975        fn match_on_last_line_no_trailing_newline() {
6976            let mut buf = make_buffer(b"line1\nline2\ntarget");
6977            let state = buf.search_scan_all(make_regex("target"), 100, 6).unwrap();
6978            assert_eq!(state.matches.len(), 1);
6979            let m = &state.matches[0];
6980            assert_eq!(m.line, 3);
6981            assert_eq!(m.column, 1);
6982            assert_eq!(m.context, "target");
6983        }
6984
6985        #[test]
6986        fn match_at_first_byte() {
6987            let mut buf = make_buffer(b"target\nother\n");
6988            let state = buf.search_scan_all(make_regex("target"), 100, 6).unwrap();
6989            assert_eq!(state.matches.len(), 1);
6990            let m = &state.matches[0];
6991            assert_eq!(m.line, 1);
6992            assert_eq!(m.column, 1);
6993            assert_eq!(m.byte_offset, 0);
6994        }
6995
6996        #[test]
6997        fn max_matches_caps() {
6998            let mut buf = make_buffer(b"a\na\na\na\na\n");
6999            let state = buf.search_scan_all(make_regex("a"), 3, 1).unwrap();
7000            assert_eq!(state.matches.len(), 3);
7001            assert!(state.capped);
7002        }
7003
7004        #[test]
7005        fn case_insensitive_regex() {
7006            let mut buf = make_buffer(b"Hello\nhello\nHELLO\n");
7007            let state = buf
7008                .search_scan_all(make_regex("(?i)hello"), 100, 5)
7009                .unwrap();
7010            assert_eq!(state.matches.len(), 3);
7011            assert_eq!(state.matches[0].line, 1);
7012            assert_eq!(state.matches[1].line, 2);
7013            assert_eq!(state.matches[2].line, 3);
7014        }
7015
7016        #[test]
7017        fn whole_word_boundary() {
7018            let mut buf = make_buffer(b"foobar\nfoo bar\nfoo\n");
7019            let state = buf.search_scan_all(make_regex(r"\bfoo\b"), 100, 3).unwrap();
7020            assert_eq!(state.matches.len(), 2);
7021            assert_eq!(state.matches[0].line, 2);
7022            assert_eq!(state.matches[0].column, 1);
7023            assert_eq!(state.matches[1].line, 3);
7024        }
7025
7026        /// Force multi-chunk processing by creating a large file buffer
7027        /// with small piece-tree leaves, then verify line numbers are
7028        /// correct across chunk boundaries.
7029        #[test]
7030        fn multi_chunk_line_numbers_correct() {
7031            // Build content: 100 lines of "line_NNN\n"
7032            let mut content = Vec::new();
7033            for i in 1..=100 {
7034                content.extend_from_slice(format!("line_{:03}\n", i).as_bytes());
7035            }
7036
7037            // Load as a "large file" with tiny threshold to force multiple
7038            // piece-tree leaves (chunks).
7039            let temp_dir = tempfile::TempDir::new().unwrap();
7040            let path = temp_dir.path().join("test.txt");
7041            std::fs::write(&path, &content).unwrap();
7042            let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7043
7044            let state = buffer
7045                .search_scan_all(make_regex("line_050"), 100, 8)
7046                .unwrap();
7047            assert_eq!(state.matches.len(), 1);
7048            let m = &state.matches[0];
7049            assert_eq!(m.line, 50);
7050            assert_eq!(m.column, 1);
7051            assert_eq!(m.context, "line_050");
7052        }
7053
7054        /// Verify that matches near chunk boundaries don't produce
7055        /// duplicate results (overlap deduplication).
7056        #[test]
7057        fn multi_chunk_no_duplicate_matches() {
7058            let mut content = Vec::new();
7059            for i in 1..=100 {
7060                content.extend_from_slice(format!("word_{:03}\n", i).as_bytes());
7061            }
7062
7063            let temp_dir = tempfile::TempDir::new().unwrap();
7064            let path = temp_dir.path().join("test.txt");
7065            std::fs::write(&path, &content).unwrap();
7066            let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7067
7068            // Search for a pattern that appears exactly once per line
7069            let state = buffer.search_scan_all(make_regex("word_"), 200, 5).unwrap();
7070            assert_eq!(
7071                state.matches.len(),
7072                100,
7073                "Should find exactly 100 matches (one per line), no duplicates"
7074            );
7075
7076            // Verify line numbers are sequential 1..=100
7077            for (i, m) in state.matches.iter().enumerate() {
7078                assert_eq!(
7079                    m.line,
7080                    i + 1,
7081                    "Match {} should be on line {}, got {}",
7082                    i,
7083                    i + 1,
7084                    m.line
7085                );
7086            }
7087        }
7088
7089        /// The reviewer's counter-example: verify line counting when
7090        /// overlap contains part of a line that continues into the
7091        /// next chunk.
7092        #[test]
7093        fn overlap_mid_line_line_numbers() {
7094            // Create content where a line spans a chunk boundary.
7095            // Use a large-file load with tiny threshold to force chunking.
7096            let mut content = Vec::new();
7097            content.extend_from_slice(b"short\n");
7098            // A long line that will span chunk boundaries
7099            content.extend_from_slice(b"AAAA_");
7100            for _ in 0..50 {
7101                content.extend_from_slice(b"BBBBBBBBBB"); // 500 bytes of B
7102            }
7103            content.extend_from_slice(b"_TARGET_HERE\n");
7104            content.extend_from_slice(b"after\n");
7105
7106            let temp_dir = tempfile::TempDir::new().unwrap();
7107            let path = temp_dir.path().join("test.txt");
7108            std::fs::write(&path, &content).unwrap();
7109            let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7110
7111            let state = buffer
7112                .search_scan_all(make_regex("TARGET_HERE"), 100, 11)
7113                .unwrap();
7114            assert_eq!(state.matches.len(), 1);
7115            let m = &state.matches[0];
7116            assert_eq!(m.line, 2, "TARGET_HERE is on line 2 (the long line)");
7117            assert_eq!(m.length, 11);
7118
7119            // Also check "after" is on line 3
7120            let state2 = buffer.search_scan_all(make_regex("after"), 100, 5).unwrap();
7121            assert_eq!(state2.matches.len(), 1);
7122            assert_eq!(state2.matches[0].line, 3);
7123        }
7124
7125        /// Verify correct results when a match spans the overlap/chunk
7126        /// boundary (starts in overlap tail, ends in new chunk).
7127        #[test]
7128        fn match_spanning_chunk_boundary() {
7129            // Create content where "SPLIT" can appear at the boundary
7130            let mut content = Vec::new();
7131            content.extend_from_slice(b"line1\n");
7132            // Pad to push "SPLIT" near a chunk boundary
7133            for _ in 0..60 {
7134                content.extend_from_slice(b"XXXXXXXXXX"); // 600 bytes
7135            }
7136            content.extend_from_slice(b"SPLIT\n");
7137            content.extend_from_slice(b"end\n");
7138
7139            let temp_dir = tempfile::TempDir::new().unwrap();
7140            let path = temp_dir.path().join("test.txt");
7141            std::fs::write(&path, &content).unwrap();
7142            let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7143
7144            let state = buffer.search_scan_all(make_regex("SPLIT"), 100, 5).unwrap();
7145            assert_eq!(state.matches.len(), 1, "SPLIT should be found exactly once");
7146            assert_eq!(state.matches[0].line, 2); // Still on line 2 (the long X line)
7147        }
7148
7149        #[test]
7150        fn empty_buffer_no_matches() {
7151            let mut buf = make_buffer(b"");
7152            let state = buf.search_scan_all(make_regex("anything"), 100, 8).unwrap();
7153            assert!(state.matches.is_empty());
7154            assert!(!state.capped);
7155        }
7156
7157        #[test]
7158        fn single_line_no_newline() {
7159            let mut buf = make_buffer(b"hello world");
7160            let state = buf.search_scan_all(make_regex("world"), 100, 5).unwrap();
7161            assert_eq!(state.matches.len(), 1);
7162            let m = &state.matches[0];
7163            assert_eq!(m.line, 1);
7164            assert_eq!(m.column, 7);
7165            assert_eq!(m.context, "hello world");
7166        }
7167
7168        /// Verify that multiple matches on the same line get the same
7169        /// line number and correct columns.
7170        #[test]
7171        fn multiple_matches_same_line() {
7172            let mut buf = make_buffer(b"aa bb aa cc aa\nother\n");
7173            let state = buf.search_scan_all(make_regex("aa"), 100, 2).unwrap();
7174            assert_eq!(state.matches.len(), 3);
7175            for m in &state.matches {
7176                assert_eq!(m.line, 1);
7177                assert_eq!(m.context, "aa bb aa cc aa");
7178            }
7179            assert_eq!(state.matches[0].column, 1);
7180            assert_eq!(state.matches[1].column, 7);
7181            assert_eq!(state.matches[2].column, 13);
7182        }
7183    }
7184
7185    mod hybrid_search {
7186        use super::*;
7187
7188        fn make_regex(pattern: &str) -> regex::bytes::Regex {
7189            regex::bytes::Regex::new(pattern).unwrap()
7190        }
7191
7192        fn make_opts() -> crate::model::filesystem::FileSearchOptions {
7193            crate::model::filesystem::FileSearchOptions {
7194                fixed_string: false,
7195                case_sensitive: true,
7196                whole_word: false,
7197                max_matches: 100,
7198            }
7199        }
7200
7201        /// Hybrid search on a fully-loaded small buffer should produce
7202        /// the same results as search_scan_all.
7203        #[test]
7204        fn hybrid_matches_scan_all_for_loaded_buffer() {
7205            let content = b"foo bar baz\nfoo again\nlast line\n";
7206            let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7207            let regex = make_regex("foo");
7208            let opts = make_opts();
7209
7210            let hybrid = buf
7211                .search_hybrid("foo", &opts, regex.clone(), 100, 3)
7212                .unwrap();
7213            let scan = buf.search_scan_all(regex, 100, 3).unwrap();
7214
7215            assert_eq!(hybrid.len(), scan.matches.len());
7216            for (h, s) in hybrid.iter().zip(scan.matches.iter()) {
7217                assert_eq!(h.byte_offset, s.byte_offset);
7218                assert_eq!(h.line, s.line);
7219                assert_eq!(h.column, s.column);
7220                assert_eq!(h.length, s.length);
7221                assert_eq!(h.context, s.context);
7222            }
7223        }
7224
7225        /// Hybrid search on a file-backed buffer (large file with unloaded
7226        /// regions) should find matches using fs.search_file.
7227        #[test]
7228        fn hybrid_finds_matches_in_unloaded_regions() {
7229            let temp_dir = tempfile::TempDir::new().unwrap();
7230            let path = temp_dir.path().join("big.txt");
7231
7232            // Create a file with known content
7233            let mut content = Vec::new();
7234            for i in 0..100 {
7235                content.extend_from_slice(format!("line {:03}\n", i).as_bytes());
7236            }
7237            std::fs::write(&path, &content).unwrap();
7238
7239            // Load as a large file (unloaded mode)
7240            let mut buf = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7241
7242            // Verify some leaves are unloaded
7243            let leaves = buf.piece_tree.get_leaves();
7244            let has_unloaded = leaves.iter().any(|l| {
7245                matches!(l.location, BufferLocation::Stored(_))
7246                    && buf
7247                        .buffers
7248                        .get(l.location.buffer_id())
7249                        .map(|b| !b.is_loaded())
7250                        .unwrap_or(false)
7251            });
7252
7253            let regex = make_regex("line 050");
7254            let opts = make_opts();
7255            let matches = buf.search_hybrid("line 050", &opts, regex, 100, 8).unwrap();
7256
7257            assert_eq!(matches.len(), 1);
7258            assert_eq!(matches[0].line, 51); // 1-based
7259            assert!(matches[0].context.contains("line 050"));
7260            // If the buffer had unloaded regions, hybrid search used fs.search_file
7261            if has_unloaded {
7262                // Just verify it worked — the match was found without loading everything
7263            }
7264        }
7265
7266        /// Hybrid search on a dirty buffer should find matches in both
7267        /// edited (loaded) and unedited (unloaded) regions.
7268        #[test]
7269        fn hybrid_dirty_buffer_finds_all_matches() {
7270            let temp_dir = tempfile::TempDir::new().unwrap();
7271            let path = temp_dir.path().join("dirty.txt");
7272
7273            let mut content = Vec::new();
7274            for i in 0..50 {
7275                content.extend_from_slice(format!("target {:02}\n", i).as_bytes());
7276            }
7277            std::fs::write(&path, &content).unwrap();
7278
7279            let mut buf = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7280
7281            // Make a small edit near the beginning — insert "target XX" at position 0
7282            buf.insert(0, "target XX\n");
7283
7284            let regex = make_regex("target");
7285            let opts = make_opts();
7286            let matches = buf.search_hybrid("target", &opts, regex, 200, 6).unwrap();
7287
7288            // Should find the inserted "target XX" plus all 50 original "target NN"
7289            assert_eq!(matches.len(), 51);
7290            // First match should be the inserted one
7291            assert!(matches[0].context.contains("target XX"));
7292        }
7293
7294        /// Boundary match: pattern spans loaded→unloaded boundary.
7295        #[test]
7296        fn hybrid_boundary_match() {
7297            let temp_dir = tempfile::TempDir::new().unwrap();
7298            let path = temp_dir.path().join("boundary.txt");
7299
7300            // File content: "AAAAABBBBB" (no newlines)
7301            let content = b"AAAAABBBBB";
7302            std::fs::write(&path, content).unwrap();
7303
7304            let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7305            buf.rename_file_path(path);
7306
7307            let regex = make_regex("AAAAABBBBB");
7308            let opts = make_opts();
7309            let matches = buf
7310                .search_hybrid("AAAAABBBBB", &opts, regex, 100, 10)
7311                .unwrap();
7312
7313            assert_eq!(matches.len(), 1);
7314            assert_eq!(matches[0].byte_offset, 0);
7315        }
7316
7317        /// Max matches limit is respected.
7318        #[test]
7319        fn hybrid_max_matches_respected() {
7320            let content = b"aaa\naaa\naaa\naaa\naaa\n";
7321            let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7322            let regex = make_regex("aaa");
7323            let opts = crate::model::filesystem::FileSearchOptions {
7324                max_matches: 3,
7325                ..make_opts()
7326            };
7327            let matches = buf.search_hybrid("aaa", &opts, regex, 3, 3).unwrap();
7328            assert!(matches.len() <= 3);
7329        }
7330    }
7331
7332    mod boundary_overlap {
7333        use super::*;
7334
7335        fn make_regex(pattern: &str) -> regex::bytes::Regex {
7336            regex::bytes::Regex::new(pattern).unwrap()
7337        }
7338
7339        #[test]
7340        fn empty_prev_tail_returns_nothing() {
7341            let matches = search_boundary_overlap(b"", b"hello", 0, 1, &make_regex("hello"), 100);
7342            assert!(matches.is_empty());
7343        }
7344
7345        #[test]
7346        fn pure_tail_match_skipped() {
7347            // "foo" is entirely in prev_tail — should NOT be returned
7348            let matches =
7349                search_boundary_overlap(b"foo bar", b" baz", 0, 1, &make_regex("foo"), 100);
7350            assert!(matches.is_empty());
7351        }
7352
7353        #[test]
7354        fn cross_boundary_match_found() {
7355            // "SPLIT" spans: prev_tail="...SPL", next_head="IT..."
7356            let matches =
7357                search_boundary_overlap(b"xxSPL", b"ITyy", 0, 1, &make_regex("SPLIT"), 100);
7358            assert_eq!(matches.len(), 1);
7359            assert_eq!(matches[0].byte_offset, 2);
7360            assert_eq!(matches[0].length, 5);
7361        }
7362
7363        #[test]
7364        fn pure_head_match_skipped() {
7365            // "baz" is entirely in next_head — should NOT be returned
7366            // (it starts at offset 4 which is >= overlap_len 3)
7367            let matches = search_boundary_overlap(b"foo", b" baz", 0, 1, &make_regex("baz"), 100);
7368            assert!(matches.is_empty());
7369        }
7370
7371        #[test]
7372        fn line_number_tracking() {
7373            // prev_tail has a newline; running_line=5 means "line 5 at
7374            // the boundary".  The newline in the tail means SPLIT starts
7375            // on line 5 (the boundary line).
7376            let matches =
7377                search_boundary_overlap(b"line1\nSPL", b"IT end", 0, 5, &make_regex("SPLIT"), 100);
7378            assert_eq!(matches.len(), 1);
7379            assert_eq!(matches[0].line, 5);
7380        }
7381
7382        #[test]
7383        fn max_matches_respected() {
7384            // Two cross-boundary matches but max is 1
7385            let matches = search_boundary_overlap(b"aXb", b"Xc", 0, 1, &make_regex("X"), 1);
7386            assert!(matches.len() <= 1);
7387        }
7388    }
7389}
7390
7391#[cfg(test)]
7392mod property_tests {
7393    use crate::model::filesystem::StdFileSystem;
7394    use std::sync::Arc;
7395
7396    fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
7397        Arc::new(StdFileSystem)
7398    }
7399    use super::*;
7400    use proptest::prelude::*;
7401
7402    // Generate text with some newlines
7403    fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
7404        prop::collection::vec(
7405            prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
7406            0..100,
7407        )
7408    }
7409
7410    // Strategy to generate operations
7411    #[derive(Debug, Clone)]
7412    enum Operation {
7413        Insert { offset: usize, text: Vec<u8> },
7414        Delete { offset: usize, bytes: usize },
7415    }
7416
7417    fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
7418        prop::collection::vec(
7419            prop_oneof![
7420                (0usize..200, text_with_newlines())
7421                    .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
7422                (0usize..200, 1usize..50)
7423                    .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
7424            ],
7425            0..50,
7426        )
7427    }
7428
7429    proptest! {
7430        #[test]
7431        fn prop_line_count_consistent(text in text_with_newlines()) {
7432            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7433
7434            let newline_count = text.iter().filter(|&&b| b == b'\n').count();
7435            prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
7436        }
7437
7438        #[test]
7439        fn prop_get_all_text_matches_original(text in text_with_newlines()) {
7440            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7441            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
7442        }
7443
7444        #[test]
7445        fn prop_insert_increases_size(
7446            text in text_with_newlines(),
7447            offset in 0usize..100,
7448            insert_text in text_with_newlines()
7449        ) {
7450            let mut buffer = TextBuffer::from_bytes(text, test_fs());
7451            let initial_bytes = buffer.total_bytes();
7452
7453            let offset = offset.min(buffer.total_bytes());
7454            buffer.insert_bytes(offset, insert_text.clone());
7455
7456            prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
7457        }
7458
7459        #[test]
7460        fn prop_delete_decreases_size(
7461            text in text_with_newlines(),
7462            offset in 0usize..100,
7463            delete_bytes in 1usize..50
7464        ) {
7465            if text.is_empty() {
7466                return Ok(());
7467            }
7468
7469            let mut buffer = TextBuffer::from_bytes(text, test_fs());
7470            let initial_bytes = buffer.total_bytes();
7471
7472            let offset = offset.min(buffer.total_bytes());
7473            let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
7474
7475            if delete_bytes == 0 {
7476                return Ok(());
7477            }
7478
7479            buffer.delete_bytes(offset, delete_bytes);
7480
7481            prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
7482        }
7483
7484        #[test]
7485        fn prop_insert_then_delete_restores_original(
7486            text in text_with_newlines(),
7487            offset in 0usize..100,
7488            insert_text in text_with_newlines()
7489        ) {
7490            let mut buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7491
7492            let offset = offset.min(buffer.total_bytes());
7493            buffer.insert_bytes(offset, insert_text.clone());
7494            buffer.delete_bytes(offset, insert_text.len());
7495
7496            prop_assert_eq!(buffer.get_all_text().unwrap(), text);
7497        }
7498
7499        #[test]
7500        fn prop_offset_position_roundtrip(text in text_with_newlines()) {
7501            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7502
7503            for offset in 0..text.len() {
7504                let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
7505                let back = buffer.position_to_offset(pos);
7506                prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
7507            }
7508        }
7509
7510        #[test]
7511        fn prop_get_text_range_valid(
7512            text in text_with_newlines(),
7513            offset in 0usize..100,
7514            length in 1usize..50
7515        ) {
7516            if text.is_empty() {
7517                return Ok(());
7518            }
7519
7520            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7521            let offset = offset.min(buffer.total_bytes());
7522            let length = length.min(buffer.total_bytes() - offset);
7523
7524            if length == 0 {
7525                return Ok(());
7526            }
7527
7528            let result = buffer.get_text_range(offset, length);
7529            prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
7530        }
7531
7532        #[test]
7533        fn prop_operations_maintain_consistency(operations in operation_strategy()) {
7534            let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
7535            let mut expected_text = b"initial\ntext".to_vec();
7536
7537            for op in operations {
7538                match op {
7539                    Operation::Insert { offset, text } => {
7540                        let offset = offset.min(buffer.total_bytes());
7541                        buffer.insert_bytes(offset, text.clone());
7542
7543                        // Update expected
7544                        let offset = offset.min(expected_text.len());
7545                        expected_text.splice(offset..offset, text);
7546                    }
7547                    Operation::Delete { offset, bytes } => {
7548                        if offset < buffer.total_bytes() {
7549                            let bytes = bytes.min(buffer.total_bytes() - offset);
7550                            buffer.delete_bytes(offset, bytes);
7551
7552                            // Update expected
7553                            if offset < expected_text.len() {
7554                                let bytes = bytes.min(expected_text.len() - offset);
7555                                expected_text.drain(offset..offset + bytes);
7556                            }
7557                        }
7558                    }
7559                }
7560            }
7561
7562            prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
7563        }
7564
7565        #[test]
7566        fn prop_line_count_never_zero(operations in operation_strategy()) {
7567            let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
7568
7569            for op in operations {
7570                match op {
7571                    Operation::Insert { offset, text } => {
7572                        let offset = offset.min(buffer.total_bytes());
7573                        buffer.insert_bytes(offset, text);
7574                    }
7575                    Operation::Delete { offset, bytes } => {
7576                        buffer.delete_bytes(offset, bytes);
7577                    }
7578                }
7579
7580                // Document always has at least 1 line
7581                prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
7582            }
7583        }
7584
7585        #[test]
7586        fn prop_total_bytes_never_negative(operations in operation_strategy()) {
7587            let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
7588
7589            for op in operations {
7590                match op {
7591                    Operation::Insert { offset, text } => {
7592                        let offset = offset.min(buffer.total_bytes());
7593                        buffer.insert_bytes(offset, text);
7594                    }
7595                    Operation::Delete { offset, bytes } => {
7596                        buffer.delete_bytes(offset, bytes);
7597                    }
7598                }
7599
7600                // Bytes should never overflow
7601                prop_assert!(buffer.total_bytes() < 10_000_000);
7602            }
7603        }
7604
7605        #[test]
7606        fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
7607            let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
7608
7609            for op in operations {
7610                match op {
7611                    Operation::Insert { offset, text } => {
7612                        let offset = offset.min(buffer.total_bytes());
7613                        buffer.insert_bytes(offset, text);
7614                    }
7615                    Operation::Delete { offset, bytes } => {
7616                        buffer.delete_bytes(offset, bytes);
7617                    }
7618                }
7619
7620                // Verify we can still convert between offsets and positions
7621                if buffer.total_bytes() > 0 {
7622                    let mid_offset = buffer.total_bytes() / 2;
7623                    if let Some(pos) = buffer.offset_to_position(mid_offset) {
7624                        let back = buffer.position_to_offset(pos);
7625
7626                        // Should be able to roundtrip
7627                        prop_assert!(back <= buffer.total_bytes());
7628                    }
7629                }
7630            }
7631        }
7632
7633        #[test]
7634        fn prop_write_recipe_matches_content(text in text_with_newlines()) {
7635            let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7636            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7637
7638            // Apply the recipe to get the output
7639            let output = apply_recipe(&buffer, &recipe);
7640            prop_assert_eq!(output, text, "Recipe output should match original content");
7641        }
7642
7643        #[test]
7644        fn prop_write_recipe_after_edits(
7645            initial_text in text_with_newlines(),
7646            operations in operation_strategy()
7647        ) {
7648            let mut buffer = TextBuffer::from_bytes(initial_text, test_fs());
7649
7650            // Apply random operations
7651            for op in operations {
7652                match op {
7653                    Operation::Insert { offset, text } => {
7654                        let offset = offset.min(buffer.total_bytes());
7655                        buffer.insert_bytes(offset, text);
7656                    }
7657                    Operation::Delete { offset, bytes } => {
7658                        if offset < buffer.total_bytes() {
7659                            let bytes = bytes.min(buffer.total_bytes() - offset);
7660                            if bytes > 0 {
7661                                buffer.delete_bytes(offset, bytes);
7662                            }
7663                        }
7664                    }
7665                }
7666            }
7667
7668            // Build recipe and verify it matches buffer content
7669            let expected = buffer.get_all_text().unwrap();
7670            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7671            let output = apply_recipe(&buffer, &recipe);
7672
7673            prop_assert_eq!(output, expected, "Recipe output should match buffer content after edits");
7674        }
7675
7676        #[test]
7677        fn prop_write_recipe_copy_ops_valid(
7678            text in prop::collection::vec(prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n')], 10..200),
7679            edit_offset in 0usize..100,
7680            edit_text in text_with_newlines()
7681        ) {
7682            use tempfile::TempDir;
7683
7684            // Create a temp file with initial content
7685            let temp_dir = TempDir::new().unwrap();
7686            let file_path = temp_dir.path().join("test.txt");
7687            std::fs::write(&file_path, &text).unwrap();
7688
7689            // Load the file (creates unloaded buffer regions)
7690            let mut buffer = TextBuffer::load_from_file(&file_path, 1024 * 1024, test_fs()).unwrap();
7691
7692            // Make an edit in the middle
7693            let edit_offset = edit_offset.min(buffer.total_bytes());
7694            buffer.insert_bytes(edit_offset, edit_text.clone());
7695
7696            // Build recipe - should have Copy ops for unmodified regions
7697            let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7698
7699            // Verify recipe produces correct output
7700            let expected = buffer.get_all_text().unwrap();
7701            let output = apply_recipe(&buffer, &recipe);
7702            prop_assert_eq!(output, expected, "Recipe with Copy ops should match buffer content");
7703
7704            // Verify we have at least some Copy ops if the file was large enough
7705            // (Copy ops reference unloaded regions from the original file)
7706            if text.len() > 100 && edit_offset > 10 {
7707                let has_copy = recipe.actions.iter().any(|a| matches!(a, RecipeAction::Copy { .. }));
7708                // Note: We don't assert this because line ending conversion or other factors
7709                // might cause all Insert ops, which is valid behavior
7710                let _ = has_copy;
7711            }
7712        }
7713    }
7714
7715    /// Helper to apply a WriteRecipe and return the resulting bytes
7716    fn apply_recipe(buffer: &TextBuffer, recipe: &WriteRecipe) -> Vec<u8> {
7717        let mut output = Vec::new();
7718        for action in &recipe.actions {
7719            match action {
7720                RecipeAction::Copy { offset, len } => {
7721                    if let Some(src_path) = &recipe.src_path {
7722                        let data = buffer
7723                            .fs
7724                            .read_range(src_path, *offset, *len as usize)
7725                            .expect("read_range should succeed for Copy op");
7726                        output.extend_from_slice(&data);
7727                    } else {
7728                        panic!("Copy action without source path");
7729                    }
7730                }
7731                RecipeAction::Insert { index } => {
7732                    output.extend_from_slice(&recipe.insert_data[*index]);
7733                }
7734            }
7735        }
7736        output
7737    }
7738
7739    /// Helper to check if bytes are detected as binary
7740    fn is_detected_as_binary(bytes: &[u8]) -> bool {
7741        TextBuffer::detect_encoding_or_binary(bytes, false).1
7742    }
7743
7744    #[test]
7745    fn test_detect_binary_text_files() {
7746        // Plain text should not be detected as binary
7747        assert!(!is_detected_as_binary(b"Hello, world!"));
7748        assert!(!is_detected_as_binary(b"Line 1\nLine 2\nLine 3"));
7749        assert!(!is_detected_as_binary(b"Tabs\tand\tnewlines\n"));
7750        assert!(!is_detected_as_binary(b"Carriage return\r\n"));
7751
7752        // Empty content is not binary
7753        assert!(!is_detected_as_binary(b""));
7754
7755        // ANSI CSI escape sequences should be treated as text
7756        assert!(!is_detected_as_binary(b"\x1b[31mRed text\x1b[0m"));
7757    }
7758
7759    #[test]
7760    fn test_detect_binary_binary_files() {
7761        // Null bytes indicate binary
7762        assert!(is_detected_as_binary(b"Hello\x00World"));
7763        assert!(is_detected_as_binary(b"\x00"));
7764
7765        // Non-printable control characters (except tab, newline, CR, form feed, vertical tab)
7766        assert!(is_detected_as_binary(b"Text with \x01 control char"));
7767        assert!(is_detected_as_binary(b"\x02\x03\x04"));
7768
7769        // DEL character (0x7F)
7770        assert!(is_detected_as_binary(b"Text with DEL\x7F"));
7771    }
7772
7773    #[test]
7774    fn test_detect_binary_png_file() {
7775        // PNG file signature: 89 50 4E 47 0D 0A 1A 0A
7776        // The 0x1A byte (substitute character) is a control character that triggers binary detection
7777        let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
7778        assert!(is_detected_as_binary(png_header));
7779
7780        // Simulate a PNG file with more data after header
7781        let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
7782        png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); // IHDR chunk with null bytes
7783        assert!(is_detected_as_binary(&png_data));
7784    }
7785
7786    #[test]
7787    fn test_detect_binary_other_image_formats() {
7788        // JPEG signature: FF D8 FF
7789        let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
7790        assert!(is_detected_as_binary(jpeg_header));
7791
7792        // GIF signature: GIF89a or GIF87a - contains valid ASCII but typically followed by binary
7793        // GIF header is ASCII but the LSD (Logical Screen Descriptor) contains binary
7794        let gif_data: &[u8] = &[
7795            0x47, 0x49, 0x46, 0x38, 0x39, 0x61, // GIF89a
7796            0x01, 0x00, 0x01, 0x00, // Width=1, Height=1 (little endian)
7797            0x00, // Packed byte
7798            0x00, // Background color index
7799            0x00, // Pixel aspect ratio
7800        ];
7801        // The null bytes in the dimensions trigger binary detection
7802        assert!(is_detected_as_binary(gif_data));
7803
7804        // BMP signature: BM followed by file size (usually contains null bytes)
7805        let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
7806        assert!(is_detected_as_binary(bmp_header));
7807    }
7808
7809    #[test]
7810    fn test_detect_binary_executable_formats() {
7811        // ELF signature (Linux executables)
7812        let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
7813        assert!(is_detected_as_binary(elf_header));
7814
7815        // Mach-O signature (macOS executables) - magic + cpu type/subtype contain null bytes
7816        let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
7817        assert!(is_detected_as_binary(macho_header));
7818
7819        // PE/COFF (Windows executables) - MZ header
7820        let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
7821        assert!(is_detected_as_binary(pe_header));
7822    }
7823}
7824
7825/// Line data with optional line number
7826#[derive(Debug, Clone)]
7827pub struct LineData {
7828    /// Byte offset where this line starts in the document
7829    pub byte_offset: usize,
7830    /// Line content (without trailing newline)
7831    pub content: String,
7832    /// Whether this line ends with a newline
7833    pub has_newline: bool,
7834    /// Line number (None for large files without line metadata)
7835    pub line_number: Option<usize>,
7836}
7837
7838/// Iterator over lines in a TextBuffer that efficiently tracks line numbers
7839/// using piece tree metadata (single source of truth)
7840pub struct TextBufferLineIterator {
7841    /// Collected lines (we collect all at once since we need mutable access to load chunks)
7842    lines: Vec<LineData>,
7843    /// Current index in the lines vector
7844    current_index: usize,
7845    /// Whether there are more lines after these
7846    pub has_more: bool,
7847}
7848
7849impl TextBufferLineIterator {
7850    pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
7851        let buffer_len = buffer.len();
7852        if byte_pos >= buffer_len {
7853            return Ok(Self {
7854                lines: Vec::new(),
7855                current_index: 0,
7856                has_more: false,
7857            });
7858        }
7859
7860        // Check if buffer has line metadata (None for large files > 1MB)
7861        let has_line_metadata = buffer.line_count().is_some();
7862
7863        // Determine starting line number by querying piece tree once
7864        // (only if we have line metadata)
7865        let mut current_line = if has_line_metadata {
7866            buffer.offset_to_position(byte_pos).map(|pos| pos.line)
7867        } else {
7868            None
7869        };
7870
7871        let mut lines = Vec::with_capacity(max_lines);
7872        let mut current_offset = byte_pos;
7873        let estimated_line_length = 80; // Use default estimate
7874
7875        // Collect lines by scanning forward
7876        for _ in 0..max_lines {
7877            if current_offset >= buffer_len {
7878                break;
7879            }
7880
7881            let line_start = current_offset;
7882            let line_number = current_line;
7883
7884            // Estimate how many bytes to load for this line
7885            let estimated_max_line_length = estimated_line_length * 3;
7886            let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
7887
7888            // Load chunk (this handles lazy loading)
7889            let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
7890
7891            // Scan for newline
7892            let mut line_len = 0;
7893            let mut found_newline = false;
7894            for &byte in chunk.iter() {
7895                line_len += 1;
7896                if byte == b'\n' {
7897                    found_newline = true;
7898                    break;
7899                }
7900            }
7901
7902            // Handle long lines (rare case)
7903            if !found_newline && current_offset + line_len < buffer_len {
7904                // Line is longer than expected, load more data
7905                let remaining = buffer_len - current_offset - line_len;
7906                let additional_bytes = estimated_max_line_length.min(remaining);
7907                let more_chunk =
7908                    buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
7909
7910                let mut extended_chunk = chunk;
7911                extended_chunk.extend_from_slice(&more_chunk);
7912
7913                for &byte in more_chunk.iter() {
7914                    line_len += 1;
7915                    if byte == b'\n' {
7916                        found_newline = true;
7917                        break;
7918                    }
7919                }
7920
7921                let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
7922                let has_newline = line_string.ends_with('\n');
7923                let content = if has_newline {
7924                    line_string[..line_string.len() - 1].to_string()
7925                } else {
7926                    line_string
7927                };
7928
7929                lines.push(LineData {
7930                    byte_offset: line_start,
7931                    content,
7932                    has_newline,
7933                    line_number,
7934                });
7935
7936                current_offset += line_len;
7937                if has_line_metadata && found_newline {
7938                    current_line = current_line.map(|n| n + 1);
7939                }
7940                continue;
7941            }
7942
7943            // Normal case
7944            let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
7945            let has_newline = line_string.ends_with('\n');
7946            let content = if has_newline {
7947                line_string[..line_string.len() - 1].to_string()
7948            } else {
7949                line_string
7950            };
7951
7952            lines.push(LineData {
7953                byte_offset: line_start,
7954                content,
7955                has_newline,
7956                line_number,
7957            });
7958
7959            current_offset += line_len;
7960            // Increment line number if we have metadata and found a newline
7961            if has_line_metadata && found_newline {
7962                current_line = current_line.map(|n| n + 1);
7963            }
7964        }
7965
7966        // Check if there are more lines
7967        let has_more = current_offset < buffer_len;
7968
7969        Ok(Self {
7970            lines,
7971            current_index: 0,
7972            has_more,
7973        })
7974    }
7975}
7976
7977impl Iterator for TextBufferLineIterator {
7978    type Item = LineData;
7979
7980    fn next(&mut self) -> Option<Self::Item> {
7981        if self.current_index < self.lines.len() {
7982            let line = self.lines[self.current_index].clone();
7983            self.current_index += 1;
7984            Some(line)
7985        } else {
7986            None
7987        }
7988    }
7989}