Skip to main content

fresh/model/buffer/
mod.rs

1/// Text buffer that uses PieceTree with integrated line tracking
2/// Architecture where the tree is the single source of truth for text and line information
3use crate::model::encoding;
4use crate::model::filesystem::{FileSearchOptions, FileSystem};
5use crate::model::piece_tree::{
6    BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, PieceView, Position,
7    StringBuffer, TreeStats,
8};
9use crate::model::piece_tree_diff::PieceTreeDiff;
10use crate::primitives::grapheme;
11use anyhow::{Context, Result};
12use regex::bytes::Regex;
13use std::io;
14
15use std::ops::Range;
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18
19// Re-export Encoding for backward compatibility
20pub use encoding::Encoding;
21
22pub mod file_kind;
23pub mod format;
24pub mod persistence;
25pub mod save;
26pub mod search;
27pub use file_kind::BufferFileKind;
28pub use format::{BufferFormat, LineEnding};
29pub use persistence::Persistence;
30pub use save::SudoSaveRequired;
31#[cfg(test)]
32pub(crate) use save::{RecipeAction, WriteRecipe};
33#[cfg(test)]
34use search::search_boundary_overlap;
35use search::SearchRegion;
36pub use search::{ChunkedSearchState, HybridSearchPlan};
37
38/// Error returned when a large file has a non-resynchronizable encoding
39/// and requires user confirmation before loading the entire file into memory.
40///
41/// Non-resynchronizable encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot
42/// determine character boundaries when jumping into the middle of a file.
43/// This means the entire file must be loaded and decoded sequentially.
44#[derive(Debug, Clone, PartialEq)]
45pub struct LargeFileEncodingConfirmation {
46    /// Path to the file
47    pub path: PathBuf,
48    /// Size of the file in bytes
49    pub file_size: usize,
50    /// The detected encoding that requires full loading
51    pub encoding: Encoding,
52}
53
54impl std::fmt::Display for LargeFileEncodingConfirmation {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        let size_mb = self.file_size as f64 / (1024.0 * 1024.0);
57        write!(
58            f,
59            "{} ({:.0} MB) requires full load. (l)oad, (e)ncoding, (C)ancel? ",
60            self.encoding.display_name(),
61            size_mb
62        )
63    }
64}
65
66impl std::error::Error for LargeFileEncodingConfirmation {}
67
68/// A work item for incremental line-feed scanning (one per leaf).
69#[derive(Debug, Clone)]
70pub struct LineScanChunk {
71    /// Index of the leaf in the piece tree's leaf array.
72    pub leaf_index: usize,
73    /// Number of bytes in this leaf.
74    pub byte_len: usize,
75    /// True if the leaf already had a known line_feed_cnt (no I/O needed).
76    pub already_known: bool,
77}
78
79// Re-export SearchMatch from filesystem — same type is used by both
80// FileSystem::search_file (project grep on disk) and the piece-tree
81// search below (in-editor Ctrl+F and dirty buffers).
82pub use crate::model::filesystem::SearchMatch;
83
84// Large file support configuration
85/// Default threshold for considering a file "large" (100 MB)
86pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
87
88/// Chunk size to load when lazy loading (1 MB)
89pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
90
91/// Chunk alignment for lazy loading (64 KB)
92pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
93
94/// Configuration passed to TextBuffer constructors.
95#[derive(Debug, Clone)]
96pub struct BufferConfig {
97    /// Estimated average line length in bytes. Used for approximate line number
98    /// display in large files and for goto-line byte offset estimation.
99    pub estimated_line_length: usize,
100}
101
102impl Default for BufferConfig {
103    fn default() -> Self {
104        Self {
105            estimated_line_length: 80,
106        }
107    }
108}
109
110/// Line ending format used in the file
111
112/// Represents a line number (simplified for new implementation)
113/// Legacy enum kept for backwards compatibility - always Absolute now
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum LineNumber {
116    /// Absolute line number - this is the actual line number in the file
117    Absolute(usize),
118    /// Relative line number (deprecated - now same as Absolute)
119    Relative {
120        line: usize,
121        from_cached_line: usize,
122    },
123}
124
125impl LineNumber {
126    /// Get the line number value
127    pub fn value(&self) -> usize {
128        match self {
129            Self::Absolute(line) | Self::Relative { line, .. } => *line,
130        }
131    }
132
133    /// Check if this is an absolute line number
134    pub fn is_absolute(&self) -> bool {
135        matches!(self, LineNumber::Absolute(_))
136    }
137
138    /// Check if this is a relative line number
139    pub fn is_relative(&self) -> bool {
140        matches!(self, LineNumber::Relative { .. })
141    }
142
143    /// Format the line number for display
144    pub fn format(&self) -> String {
145        match self {
146            Self::Absolute(line) => format!("{}", line + 1),
147            Self::Relative { line, .. } => format!("~{}", line + 1),
148        }
149    }
150}
151
152/// A text buffer that manages document content using a piece table
153/// with integrated line tracking
154pub struct TextBuffer {
155    /// The piece tree for efficient text manipulation with integrated line tracking
156    piece_tree: PieceTree,
157
158    /// List of string buffers containing chunks of text data.
159    /// Index 0 is typically the original/stored buffer.
160    /// Additional buffers are added for modifications.
161    buffers: Vec<StringBuffer>,
162
163    /// Next buffer ID to assign.
164    next_buffer_id: usize,
165
166    /// Filesystem handle, optional file path, dirty/recovery flags,
167    /// saved-root snapshot, and saved-file size — see
168    /// `persistence.rs`.
169    persistence: Persistence,
170
171    /// File-kind flags (large_file, line_feeds_scanned, is_binary) —
172    /// see `file_kind.rs`.
173    file_kind: BufferFileKind,
174
175    /// Encoding + line-ending state — see `format.rs`.
176    format: BufferFormat,
177
178    /// Monotonic version counter for change tracking.
179    version: u64,
180
181    /// Buffer configuration (estimated line length, etc.)
182    config: BufferConfig,
183}
184
185/// Snapshot of a TextBuffer's piece tree and associated string buffers.
186///
187/// Used by BulkEdit undo/redo to capture the complete buffer state.
188/// Without this, consolidate_after_save() would destroy the string buffers
189/// that a BulkEdit's piece tree snapshot references, causing corruption on undo.
190#[derive(Debug, Clone)]
191pub struct BufferSnapshot {
192    pub piece_tree: PieceTree,
193    pub buffers: Vec<StringBuffer>,
194    pub next_buffer_id: usize,
195}
196
197impl TextBuffer {
198    /// Create a new text buffer with the given filesystem implementation.
199    /// Note: large_file_threshold is ignored in the new implementation
200    pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
201        let piece_tree = PieceTree::empty();
202        let saved_root = piece_tree.root();
203        let line_ending = LineEnding::default();
204        let encoding = Encoding::default();
205        TextBuffer {
206            piece_tree,
207            buffers: vec![StringBuffer::new(0, Vec::new())],
208            next_buffer_id: 1,
209            persistence: Persistence::new(fs, None, saved_root, None),
210            file_kind: BufferFileKind::new(false, false),
211            format: BufferFormat::new(line_ending, encoding),
212            version: 0,
213            config: BufferConfig::default(),
214        }
215    }
216
217    /// Create an empty buffer associated with a file path.
218    /// Used for files that don't exist yet — the path is set so saving will create the file.
219    pub fn new_with_path(
220        large_file_threshold: usize,
221        fs: Arc<dyn FileSystem + Send + Sync>,
222        path: PathBuf,
223    ) -> Self {
224        let mut buffer = Self::new(large_file_threshold, fs);
225        buffer.persistence.set_file_path(path);
226        buffer
227    }
228
229    /// Current buffer version (monotonic, wraps on overflow)
230    pub fn version(&self) -> u64 {
231        self.version
232    }
233
234    /// Get a reference to the filesystem implementation used by this buffer.
235    pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
236        self.persistence.fs()
237    }
238
239    /// Set the filesystem implementation for this buffer.
240    pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
241        self.persistence.set_fs(fs);
242    }
243
244    #[inline]
245    fn bump_version(&mut self) {
246        self.version = self.version.wrapping_add(1);
247    }
248
249    #[inline]
250    fn mark_content_modified(&mut self) {
251        self.persistence.mark_dirty();
252        self.bump_version();
253    }
254
255    /// Create a text buffer from raw bytes WITHOUT encoding conversion.
256    /// Used for binary files where we want to preserve the exact bytes.
257    fn from_bytes_raw(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
258        let bytes = content.len();
259
260        // For binary files, detect line ending but don't convert encoding
261        let line_ending = format::detect_line_ending(&content);
262
263        // Create initial StringBuffer with ID 0
264        let buffer = StringBuffer::new(0, content);
265        let line_feed_cnt = buffer.line_feed_count();
266
267        let piece_tree = if bytes > 0 {
268            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
269        } else {
270            PieceTree::empty()
271        };
272
273        let saved_root = piece_tree.root();
274
275        TextBuffer {
276            piece_tree,
277            buffers: vec![buffer],
278            next_buffer_id: 1,
279            persistence: Persistence::new(fs, None, saved_root, Some(bytes)),
280            file_kind: BufferFileKind::new(false, true),
281            format: BufferFormat::new(line_ending, Encoding::Utf8),
282            version: 0,
283            config: BufferConfig::default(),
284        }
285    }
286
287    /// Create a text buffer from initial content with the given filesystem.
288    pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
289        // Auto-detect encoding and convert to UTF-8 if needed
290        let (encoding, utf8_content) = format::detect_and_convert_encoding(&content);
291
292        let bytes = utf8_content.len();
293
294        // Auto-detect line ending format from content
295        let line_ending = format::detect_line_ending(&utf8_content);
296
297        // Create initial StringBuffer with ID 0
298        let buffer = StringBuffer::new(0, utf8_content);
299        let line_feed_cnt = buffer.line_feed_count();
300
301        let piece_tree = if bytes > 0 {
302            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
303        } else {
304            PieceTree::empty()
305        };
306
307        let saved_root = piece_tree.root();
308
309        TextBuffer {
310            piece_tree,
311            buffers: vec![buffer],
312            next_buffer_id: 1,
313            persistence: Persistence::new(fs, None, saved_root, Some(bytes)),
314            file_kind: BufferFileKind::new(false, false),
315            format: BufferFormat::new(line_ending, encoding),
316            version: 0,
317            config: BufferConfig::default(),
318        }
319    }
320
321    /// Create a text buffer from bytes with a specific encoding (no auto-detection).
322    pub fn from_bytes_with_encoding(
323        content: Vec<u8>,
324        encoding: Encoding,
325        fs: Arc<dyn FileSystem + Send + Sync>,
326    ) -> Self {
327        // Convert from specified encoding to UTF-8
328        let utf8_content = encoding::convert_to_utf8(&content, encoding);
329
330        let bytes = utf8_content.len();
331
332        // Auto-detect line ending format from content
333        let line_ending = format::detect_line_ending(&utf8_content);
334
335        // Create initial StringBuffer with ID 0
336        let buffer = StringBuffer::new(0, utf8_content);
337        let line_feed_cnt = buffer.line_feed_count();
338
339        let piece_tree = if bytes > 0 {
340            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
341        } else {
342            PieceTree::empty()
343        };
344
345        let saved_root = piece_tree.root();
346
347        TextBuffer {
348            piece_tree,
349            buffers: vec![buffer],
350            next_buffer_id: 1,
351            persistence: Persistence::new(fs, None, saved_root, Some(bytes)),
352            file_kind: BufferFileKind::new(false, false),
353            format: BufferFormat::new(line_ending, encoding),
354            version: 0,
355            config: BufferConfig::default(),
356        }
357    }
358
359    /// Create a text buffer from a string with the given filesystem.
360    pub fn from_str(
361        s: &str,
362        _large_file_threshold: usize,
363        fs: Arc<dyn FileSystem + Send + Sync>,
364    ) -> Self {
365        Self::from_bytes(s.as_bytes().to_vec(), fs)
366    }
367
368    /// Create an empty text buffer with the given filesystem.
369    pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
370        let piece_tree = PieceTree::empty();
371        let saved_root = piece_tree.root();
372        let line_ending = LineEnding::default();
373        let encoding = Encoding::default();
374        TextBuffer {
375            piece_tree,
376            buffers: vec![StringBuffer::new(0, Vec::new())],
377            next_buffer_id: 1,
378            persistence: Persistence::new(fs, None, saved_root, None),
379            file_kind: BufferFileKind::new(false, false),
380            format: BufferFormat::new(line_ending, encoding),
381            version: 0,
382            config: BufferConfig::default(),
383        }
384    }
385
386    /// Load a text buffer from a file using the given filesystem.
387    pub fn load_from_file<P: AsRef<Path>>(
388        path: P,
389        large_file_threshold: usize,
390        fs: Arc<dyn FileSystem + Send + Sync>,
391    ) -> anyhow::Result<Self> {
392        let path = path.as_ref();
393
394        // Get file size to determine loading strategy
395        let metadata = fs.metadata(path)?;
396        let file_size = metadata.size as usize;
397
398        // Use threshold parameter or default
399        let threshold = if large_file_threshold > 0 {
400            large_file_threshold
401        } else {
402            DEFAULT_LARGE_FILE_THRESHOLD
403        };
404
405        // Choose loading strategy based on file size
406        if file_size >= threshold {
407            Self::load_large_file(path, file_size, fs)
408        } else {
409            Self::load_small_file(path, fs)
410        }
411    }
412
413    /// Load a text buffer from a file with a specific encoding (no auto-detection).
414    pub fn load_from_file_with_encoding<P: AsRef<Path>>(
415        path: P,
416        encoding: Encoding,
417        fs: Arc<dyn FileSystem + Send + Sync>,
418        config: BufferConfig,
419    ) -> anyhow::Result<Self> {
420        let path = path.as_ref();
421        let contents = fs.read_file(path)?;
422
423        let mut buffer = Self::from_bytes_with_encoding(contents, encoding, fs);
424        buffer.persistence.set_file_path(path.to_path_buf());
425        buffer.persistence.clear_modified();
426        buffer.config = config;
427        Ok(buffer)
428    }
429
430    /// Load a small file with full eager loading and line indexing
431    fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
432        let contents = fs.read_file(path)?;
433
434        // Use unified encoding/binary detection
435        let (encoding, is_binary) = format::detect_encoding_or_binary(&contents, false);
436
437        // For binary files, skip encoding conversion to preserve raw bytes
438        let mut buffer = if is_binary {
439            Self::from_bytes_raw(contents, fs)
440        } else {
441            // from_bytes handles encoding detection/conversion and line ending detection
442            Self::from_bytes(contents, fs)
443        };
444        buffer.persistence.set_file_path(path.to_path_buf());
445        buffer.persistence.clear_modified();
446        buffer.file_kind.set_large_file(false);
447        buffer.file_kind.set_binary(is_binary);
448        // For binary files, ensure encoding matches detection
449        if is_binary {
450            buffer.format.set_default_encoding(encoding);
451        }
452        // Note: line_ending and encoding are already set by from_bytes/from_bytes_raw
453        Ok(buffer)
454    }
455
456    /// Check if loading a large file requires user confirmation due to encoding.
457    ///
458    /// Some encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot be "resynchronized" -
459    /// meaning you cannot determine character boundaries when jumping into the middle
460    /// of a file. These encodings require loading the entire file into memory.
461    ///
462    /// Returns `Some(confirmation)` if user confirmation is needed, `None` if the file
463    /// can be loaded with lazy/streaming loading.
464    pub fn check_large_file_encoding(
465        path: impl AsRef<Path>,
466        fs: Arc<dyn FileSystem + Send + Sync>,
467    ) -> anyhow::Result<Option<LargeFileEncodingConfirmation>> {
468        let path = path.as_ref();
469        let metadata = fs.metadata(path)?;
470        let file_size = metadata.size as usize;
471
472        // Only check for large files
473        if file_size < DEFAULT_LARGE_FILE_THRESHOLD {
474            return Ok(None);
475        }
476
477        // Read a sample to detect encoding
478        let sample_size = file_size.min(8 * 1024);
479        let sample = fs.read_range(path, 0, sample_size)?;
480        let (encoding, is_binary) =
481            format::detect_encoding_or_binary(&sample, file_size > sample_size);
482
483        // Binary files don't need confirmation (loaded as-is)
484        if is_binary {
485            return Ok(None);
486        }
487
488        // Check if the encoding requires full file loading
489        if encoding.requires_full_file_load() {
490            return Ok(Some(LargeFileEncodingConfirmation {
491                path: path.to_path_buf(),
492                file_size,
493                encoding,
494            }));
495        }
496
497        Ok(None)
498    }
499
500    /// Load a large file with unloaded buffer (no line indexing, lazy loading)
501    ///
502    /// If `force_full_load` is true, loads the entire file regardless of encoding.
503    /// This should be set to true after user confirms loading a non-resynchronizable encoding.
504    fn load_large_file(
505        path: &Path,
506        file_size: usize,
507        fs: Arc<dyn FileSystem + Send + Sync>,
508    ) -> anyhow::Result<Self> {
509        Self::load_large_file_internal(path, file_size, fs, false)
510    }
511
512    /// Load a large file, optionally forcing full load for non-resynchronizable encodings.
513    ///
514    /// Called with `force_full_load=true` after user confirms the warning about
515    /// non-resynchronizable encodings requiring full file loading.
516    pub fn load_large_file_confirmed(
517        path: impl AsRef<Path>,
518        fs: Arc<dyn FileSystem + Send + Sync>,
519    ) -> anyhow::Result<Self> {
520        let path = path.as_ref();
521        let metadata = fs.metadata(path)?;
522        let file_size = metadata.size as usize;
523        Self::load_large_file_internal(path, file_size, fs, true)
524    }
525
526    /// Internal implementation for loading large files.
527    fn load_large_file_internal(
528        path: &Path,
529        file_size: usize,
530        fs: Arc<dyn FileSystem + Send + Sync>,
531        force_full_load: bool,
532    ) -> anyhow::Result<Self> {
533        use crate::model::piece_tree::{BufferData, BufferLocation};
534
535        // Read a sample of the file to detect encoding and whether it's binary
536        // We read the first 8KB for detection
537        let sample_size = file_size.min(8 * 1024);
538        let sample = fs.read_range(path, 0, sample_size)?;
539
540        // Use unified encoding/binary detection
541        let (encoding, is_binary) =
542            format::detect_encoding_or_binary(&sample, file_size > sample_size);
543
544        // Binary files skip encoding conversion to preserve raw bytes
545        if is_binary {
546            tracing::info!("Large binary file detected, loading without encoding conversion");
547            let contents = fs.read_file(path)?;
548            let mut buffer = Self::from_bytes_raw(contents, fs);
549            buffer.persistence.set_file_path(path.to_path_buf());
550            buffer.persistence.clear_modified();
551            buffer.file_kind.set_large_file(true);
552            buffer.format.set_default_encoding(encoding);
553            return Ok(buffer);
554        }
555
556        // Check if encoding requires full file loading
557        let requires_full_load = encoding.requires_full_file_load();
558
559        // For non-resynchronizable encodings, require confirmation unless forced
560        if requires_full_load && !force_full_load {
561            anyhow::bail!(LargeFileEncodingConfirmation {
562                path: path.to_path_buf(),
563                file_size,
564                encoding,
565            });
566        }
567
568        // For encodings that require full load (non-resynchronizable or non-UTF-8),
569        // load the entire file and convert
570        if !matches!(encoding, Encoding::Utf8 | Encoding::Ascii) {
571            tracing::info!(
572                "Large file with non-UTF-8 encoding ({:?}), loading fully for conversion",
573                encoding
574            );
575            let contents = fs.read_file(path)?;
576            let mut buffer = Self::from_bytes(contents, fs);
577            buffer.persistence.set_file_path(path.to_path_buf());
578            buffer.persistence.clear_modified();
579            buffer.file_kind.set_large_file(true); // Still mark as large file for UI purposes
580            buffer.file_kind.set_binary(is_binary);
581            return Ok(buffer);
582        }
583
584        // UTF-8/ASCII files can use lazy loading
585        let line_ending = format::detect_line_ending(&sample);
586
587        // Create an unloaded buffer that references the entire file
588        let buffer = StringBuffer {
589            id: 0,
590            data: BufferData::Unloaded {
591                file_path: path.to_path_buf(),
592                file_offset: 0,
593                bytes: file_size,
594            },
595            stored_file_offset: None,
596        };
597
598        // Create piece tree with a single piece covering the whole file
599        // No line feed count (None) since we're not computing line indexing
600        let piece_tree = if file_size > 0 {
601            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
602        } else {
603            PieceTree::empty()
604        };
605        let saved_root = piece_tree.root();
606
607        tracing::debug!(
608            "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
609            file_size,
610            file_size
611        );
612
613        Ok(TextBuffer {
614            piece_tree,
615            buffers: vec![buffer],
616            next_buffer_id: 1,
617            persistence: Persistence::new(
618                fs,
619                Some(path.to_path_buf()),
620                saved_root,
621                Some(file_size),
622            ),
623            file_kind: BufferFileKind::new(true, is_binary),
624            format: BufferFormat::new(line_ending, encoding),
625            version: 0,
626            config: BufferConfig::default(),
627        })
628    }
629
630    /// Save the buffer to its associated file
631    pub fn save(&mut self) -> anyhow::Result<()> {
632        if let Some(path) = self.persistence.file_path_owned() {
633            self.save_to_file(path)
634        } else {
635            anyhow::bail!(io::Error::new(
636                io::ErrorKind::NotFound,
637                "No file path associated with buffer",
638            ))
639        }
640    }
641
642    /// Build a write recipe from the piece tree for saving.
643    ///
644    /// Delegates to `save::build_write_recipe`.
645    #[cfg(test)]
646    pub(crate) fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
647        save::build_write_recipe(
648            &self.piece_tree,
649            &self.buffers,
650            &self.format,
651            &self.file_kind,
652            &self.persistence,
653        )
654    }
655
656    /// Save the buffer to a specific file
657    ///
658    /// Uses the write recipe approach for both local and remote filesystems:
659    /// - Copy ops reference unchanged regions in the source file
660    /// - Insert ops contain new/modified data
661    ///
662    /// For remote filesystems, the recipe is sent to the agent which reconstructs
663    /// the file server-side, avoiding transfer of unchanged content.
664    ///
665    /// For local filesystems with ownership concerns (file owned by another user),
666    /// uses in-place writing to preserve ownership. Otherwise uses atomic writes.
667    ///
668    /// If the line ending format has been changed (via set_line_ending), all content
669    /// will be converted to the new format during save.
670    pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
671        let dest_path = path.as_ref();
672        let total = self.total_bytes();
673
674        // Handle empty files
675        if total == 0 {
676            self.persistence.fs().write_file(dest_path, &[])?;
677            self.finalize_save(dest_path)?;
678            return Ok(());
679        }
680
681        // Build the write recipe (unified for all filesystem types)
682        let recipe = save::build_write_recipe(
683            &self.piece_tree,
684            &self.buffers,
685            &self.format,
686            &self.file_kind,
687            &self.persistence,
688        )?;
689        let ops = recipe.to_write_ops();
690
691        // Check if we need in-place writing to preserve file ownership (local only)
692        // Remote filesystems handle this differently
693        let fs = self.persistence.fs();
694        let is_local = fs.remote_connection_info().is_none();
695        let use_inplace = is_local && save::should_use_inplace_write(fs, dest_path);
696
697        if use_inplace {
698            // In-place write: write directly to preserve ownership
699            save::save_with_inplace_write(fs, dest_path, &recipe)?;
700        } else if !recipe.has_copy_ops() && !is_local {
701            // Remote with no Copy ops: use write_file directly (more efficient)
702            let data = recipe.flatten_inserts();
703            fs.write_file(dest_path, &data)?;
704        } else if is_local {
705            // Local: use write_file or write_patched with sudo fallback
706            let write_result = if !recipe.has_copy_ops() {
707                let data = recipe.flatten_inserts();
708                fs.write_file(dest_path, &data)
709            } else {
710                let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
711                fs.write_patched(src_for_patch, dest_path, &ops)
712            };
713
714            if let Err(e) = write_result {
715                if e.kind() == io::ErrorKind::PermissionDenied {
716                    // Create temp file and return sudo error
717                    let original_metadata = fs.metadata_if_exists(dest_path);
718                    let (temp_path, mut temp_file) = save::create_temp_file(fs, dest_path)?;
719                    save::write_recipe_to_file(fs, &mut temp_file, &recipe)?;
720                    temp_file.sync_all()?;
721                    drop(temp_file);
722                    return Err(save::make_sudo_error(
723                        temp_path,
724                        dest_path,
725                        original_metadata,
726                    ));
727                }
728                return Err(e.into());
729            }
730        } else {
731            // Remote with Copy ops: use write_patched
732            let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
733            fs.write_patched(src_for_patch, dest_path, &ops)?;
734        }
735
736        self.finalize_save(dest_path)?;
737        Ok(())
738    }
739
740    /// Finalize save state after successful write.
741    fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
742        let new_size = self.persistence.fs().metadata(dest_path)?.size as usize;
743        tracing::debug!(
744            "Buffer::save: updating saved_file_size from {:?} to {}",
745            self.persistence.saved_file_size(),
746            new_size
747        );
748        self.persistence.set_saved_file_size(Some(new_size));
749        self.persistence.set_file_path(dest_path.to_path_buf());
750
751        // Consolidate the piece tree to synchronize with disk (for large files)
752        // or to simplify structure (for small files).
753        self.consolidate_after_save(dest_path, new_size);
754
755        self.mark_saved_snapshot();
756        self.format.promote_current_to_original();
757        Ok(())
758    }
759
760    /// Finalize buffer state after an external save operation (e.g., via sudo).
761    ///
762    /// This updates the saved snapshot and file size to match the new state on disk.
763    pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
764        let new_size = self.persistence.fs().metadata(&dest_path)?.size as usize;
765        self.persistence.set_saved_file_size(Some(new_size));
766        self.persistence.set_file_path(dest_path.clone());
767
768        // Consolidate the piece tree to synchronize with disk or simplify structure.
769        self.consolidate_after_save(&dest_path, new_size);
770
771        self.mark_saved_snapshot();
772        self.format.promote_current_to_original();
773        Ok(())
774    }
775
776    /// Consolidate the piece tree into a single piece.
777    /// For large files, this creates a reference to the disk file to save memory and sync offsets.
778    /// For small files, this flattens all edits into a single in-memory buffer.
779    fn consolidate_after_save(&mut self, path: &Path, file_size: usize) {
780        if self.file_kind.is_large_file() {
781            self.consolidate_large_file(path, file_size);
782        } else {
783            self.consolidate_small_file();
784        }
785    }
786
787    /// Consolidate large file piece tree into a single piece pointing to the new file.
788    /// This ensures that subsequent operations correctly reference the new content and offsets.
789    /// Preserves total line feed count from the old tree if a scan was previously done.
790    fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
791        // Preserve line feed count from the old tree if we had scanned it
792        let preserved_lf = if self.file_kind.has_line_feed_scan() {
793            self.piece_tree.line_count().map(|c| c.saturating_sub(1))
794        } else {
795            None
796        };
797
798        let buffer = StringBuffer {
799            id: 0,
800            data: BufferData::Unloaded {
801                file_path: path.to_path_buf(),
802                file_offset: 0,
803                bytes: file_size,
804            },
805            stored_file_offset: None,
806        };
807
808        self.piece_tree = if file_size > 0 {
809            PieceTree::new(BufferLocation::Stored(0), 0, file_size, preserved_lf)
810        } else {
811            PieceTree::empty()
812        };
813
814        self.buffers = vec![buffer];
815        self.next_buffer_id = 1;
816
817        tracing::debug!(
818            "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
819            file_size
820        );
821    }
822
823    /// Consolidate small file edits into a single in-memory buffer and re-index lines.
824    fn consolidate_small_file(&mut self) {
825        if let Some(bytes) = self.get_all_text() {
826            let line_feed_cnt = bytes.iter().filter(|&&b| b == b'\n').count();
827            let len = bytes.len();
828
829            // Create a single loaded buffer with line indexing
830            let buffer = StringBuffer::new_loaded(0, bytes, true);
831
832            self.piece_tree = if len > 0 {
833                PieceTree::new(BufferLocation::Stored(0), 0, len, Some(line_feed_cnt))
834            } else {
835                PieceTree::empty()
836            };
837
838            self.buffers = vec![buffer];
839            self.next_buffer_id = 1;
840
841            tracing::debug!(
842                "Buffer::consolidate_small_file: consolidated into single loaded buffer of {} bytes",
843                len
844            );
845        }
846    }
847
848    /// Get the total number of bytes in the document
849    pub fn total_bytes(&self) -> usize {
850        self.piece_tree.total_bytes()
851    }
852
853    /// Get the total number of lines in the document
854    /// Uses the piece tree's integrated line tracking
855    /// Returns None if line count is unknown (e.g., for large files without line indexing)
856    pub fn line_count(&self) -> Option<usize> {
857        self.piece_tree.line_count()
858    }
859
860    /// Snapshot the current tree as the saved baseline
861    pub fn mark_saved_snapshot(&mut self) {
862        self.persistence.mark_saved_snapshot(&self.piece_tree);
863    }
864
865    /// Refresh the saved root to match the current tree structure without
866    /// clearing the modified flag.  Call this after structural-only changes
867    /// (e.g. chunk_split_and_load during search scan) so that
868    /// `diff_since_saved()` can take the fast `Arc::ptr_eq` path.
869    pub fn refresh_saved_root_if_unmodified(&mut self) {
870        self.persistence
871            .refresh_saved_root_if_unmodified(&self.piece_tree);
872    }
873
874    /// Diff the current piece tree against the last saved snapshot.
875    ///
876    /// See `Persistence::diff_since_saved` for the algorithm.
877    pub fn diff_since_saved(&self) -> PieceTreeDiff {
878        let _span = tracing::info_span!(
879            "diff_since_saved",
880            large_file = self.file_kind.is_large_file(),
881            modified = self.persistence.is_modified(),
882            lf_scanned = self.file_kind.has_line_feed_scan()
883        )
884        .entered();
885
886        self.persistence
887            .diff_since_saved(&self.piece_tree, &self.buffers)
888    }
889
890    /// Convert a byte offset to a line/column position
891    pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
892        self.piece_tree
893            .offset_to_position(offset, &self.buffers)
894            .map(|(line, column)| Position { line, column })
895    }
896
897    /// Convert a line/column position to a byte offset
898    pub fn position_to_offset(&self, position: Position) -> usize {
899        self.piece_tree
900            .position_to_offset(position.line, position.column, &self.buffers)
901    }
902
903    /// Insert text at the given byte offset
904    pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
905        if text.is_empty() {
906            return self.piece_tree.cursor_at_offset(offset);
907        }
908
909        // Mark as modified (updates version)
910        self.mark_content_modified();
911
912        // Count line feeds in the text to insert
913        let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
914
915        // Optimization: try to append to existing buffer if insertion is at piece boundary
916        let (buffer_location, buffer_offset, text_len) =
917            if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
918                append_info
919            } else {
920                // Create a new StringBuffer for this insertion
921                let buffer_id = self.next_buffer_id;
922                self.next_buffer_id += 1;
923                let buffer = StringBuffer::new(buffer_id, text.clone());
924                self.buffers.push(buffer);
925                (BufferLocation::Added(buffer_id), 0, text.len())
926            };
927
928        // When line feeds have been scanned, ensure the chunk at the insertion
929        // point is loaded so compute_line_feeds_static can recount during splits.
930        if self.file_kind.has_line_feed_scan() {
931            self.ensure_chunk_loaded_at(offset);
932        }
933
934        // Update piece tree (need to pass buffers reference)
935        self.piece_tree.insert(
936            offset,
937            buffer_location,
938            buffer_offset,
939            text_len,
940            line_feed_cnt,
941            &self.buffers,
942        )
943    }
944
945    /// Try to append to an existing buffer if insertion point aligns with buffer end
946    /// Returns (BufferLocation, buffer_offset, text_len) if append succeeds, None otherwise
947    fn try_append_to_existing_buffer(
948        &mut self,
949        offset: usize,
950        text: &[u8],
951    ) -> Option<(BufferLocation, usize, usize)> {
952        // Only optimize for non-empty insertions after existing content
953        if text.is_empty() || offset == 0 {
954            return None;
955        }
956
957        // Find the piece containing the byte just before the insertion point
958        // This avoids the saturating_sub issue
959        let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
960
961        // Check if insertion is exactly at the end of this piece
962        // offset_in_piece tells us where (offset-1) is within the piece
963        // For insertion to be at piece end, (offset-1) must be the last byte
964        let offset_in_piece = piece_info.offset_in_piece?;
965        if offset_in_piece + 1 != piece_info.bytes {
966            return None; // Not at the end of the piece
967        }
968
969        // Only append to "Added" buffers (not original Stored buffers)
970        if !matches!(piece_info.location, BufferLocation::Added(_)) {
971            return None;
972        }
973
974        let buffer_id = piece_info.location.buffer_id();
975        let buffer = self.buffers.get_mut(buffer_id)?;
976
977        // Check if buffer is loaded
978        let buffer_len = buffer.get_data()?.len();
979
980        // Check if this piece ends exactly at the end of its buffer
981        if piece_info.offset + piece_info.bytes != buffer_len {
982            return None;
983        }
984
985        // Perfect! Append to this buffer
986        let append_offset = buffer.append(text);
987
988        Some((piece_info.location, append_offset, text.len()))
989    }
990
991    /// Insert text (from &str) at the given byte offset
992    pub fn insert(&mut self, offset: usize, text: &str) {
993        self.insert_bytes(offset, text.as_bytes().to_vec());
994    }
995
996    /// Insert text at a line/column position
997    /// This now uses the optimized piece_tree.insert_at_position() for a single traversal
998    pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
999        if text.is_empty() {
1000            let offset = self.position_to_offset(position);
1001            return self.piece_tree.cursor_at_offset(offset);
1002        }
1003
1004        self.mark_content_modified();
1005
1006        // Count line feeds in the text to insert
1007        let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1008
1009        // Create a new StringBuffer for this insertion
1010        let buffer_id = self.next_buffer_id;
1011        self.next_buffer_id += 1;
1012        let buffer = StringBuffer::new(buffer_id, text.clone());
1013        self.buffers.push(buffer);
1014
1015        // Use the optimized position-based insertion (single traversal)
1016        self.piece_tree.insert_at_position(
1017            position.line,
1018            position.column,
1019            BufferLocation::Added(buffer_id),
1020            0,
1021            text.len(),
1022            line_feed_cnt,
1023            &self.buffers,
1024        )
1025    }
1026
1027    /// Delete text starting at the given byte offset
1028    pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1029        if bytes == 0 || offset >= self.total_bytes() {
1030            return;
1031        }
1032
1033        // When line feeds have been scanned, ensure chunks at delete boundaries
1034        // are loaded so compute_line_feeds_static can recount during splits.
1035        if self.file_kind.has_line_feed_scan() {
1036            self.ensure_chunk_loaded_at(offset);
1037            let end = (offset + bytes).min(self.total_bytes());
1038            if end > offset {
1039                self.ensure_chunk_loaded_at(end.saturating_sub(1));
1040            }
1041        }
1042
1043        // Update piece tree
1044        self.piece_tree.delete(offset, bytes, &self.buffers);
1045
1046        self.mark_content_modified();
1047    }
1048
1049    /// Delete text in a range
1050    pub fn delete(&mut self, range: Range<usize>) {
1051        if range.end > range.start {
1052            self.delete_bytes(range.start, range.end - range.start);
1053        }
1054    }
1055
1056    /// Delete text in a line/column range
1057    /// This now uses the optimized piece_tree.delete_position_range() for a single traversal
1058    pub fn delete_range(&mut self, start: Position, end: Position) {
1059        // Use the optimized position-based deletion
1060        self.piece_tree.delete_position_range(
1061            start.line,
1062            start.column,
1063            end.line,
1064            end.column,
1065            &self.buffers,
1066        );
1067        self.mark_content_modified();
1068    }
1069
1070    /// Replace the entire buffer content with new content
1071    /// This is an O(n) operation that rebuilds the piece tree in a single pass,
1072    /// avoiding the O(n²) complexity of applying individual edits.
1073    ///
1074    /// This is used for bulk operations like "replace all" where applying
1075    /// individual edits would be prohibitively slow.
1076    pub fn replace_content(&mut self, new_content: &str) {
1077        let bytes = new_content.len();
1078        let content_bytes = new_content.as_bytes().to_vec();
1079
1080        // Count line feeds in the new content
1081        let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
1082
1083        // Create a new StringBuffer for the new content
1084        let buffer_id = self.next_buffer_id;
1085        self.next_buffer_id += 1;
1086        let buffer = StringBuffer::new(buffer_id, content_bytes);
1087        self.buffers.push(buffer);
1088
1089        // Rebuild the piece tree with a single piece containing all the new content
1090        if bytes > 0 {
1091            self.piece_tree = PieceTree::new(
1092                BufferLocation::Added(buffer_id),
1093                0,
1094                bytes,
1095                Some(line_feed_cnt),
1096            );
1097        } else {
1098            self.piece_tree = PieceTree::empty();
1099        }
1100
1101        self.mark_content_modified();
1102    }
1103
1104    /// Restore a previously saved buffer state (for undo/redo of BulkEdit).
1105    ///
1106    /// This restores the piece tree AND the buffers list, which is critical
1107    /// because consolidate_after_save() replaces self.buffers. Without restoring
1108    /// buffers, the piece tree would reference buffer IDs that no longer exist.
1109    pub fn restore_buffer_state(&mut self, snapshot: &BufferSnapshot) {
1110        self.piece_tree = snapshot.piece_tree.clone();
1111        self.buffers = snapshot.buffers.clone();
1112        self.next_buffer_id = snapshot.next_buffer_id;
1113        self.mark_content_modified();
1114    }
1115
1116    /// Snapshot the current buffer state (piece tree + buffers) for BulkEdit undo/redo.
1117    ///
1118    /// The snapshot includes buffers because consolidate_after_save() can replace
1119    /// self.buffers between the snapshot and restore, which would otherwise cause
1120    /// the restored piece tree to reference nonexistent buffer IDs.
1121    pub fn snapshot_buffer_state(&self) -> Arc<BufferSnapshot> {
1122        Arc::new(BufferSnapshot {
1123            piece_tree: self.piece_tree.clone(),
1124            buffers: self.buffers.clone(),
1125            next_buffer_id: self.next_buffer_id,
1126        })
1127    }
1128
1129    /// Apply bulk edits efficiently in a single pass
1130    /// Returns the net change in bytes
1131    pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
1132        // Pre-allocate buffers for all insert texts (only non-empty texts)
1133        // This avoids the borrow conflict in the closure
1134        // IMPORTANT: Only add entries for non-empty texts because the closure
1135        // is only called for edits with non-empty insert text
1136        let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
1137
1138        for (_, _, text) in edits {
1139            if !text.is_empty() {
1140                let buffer_id = self.next_buffer_id;
1141                self.next_buffer_id += 1;
1142                let content = text.as_bytes().to_vec();
1143                let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
1144                let bytes = content.len();
1145                let buffer = StringBuffer::new(buffer_id, content);
1146                self.buffers.push(buffer);
1147                buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
1148            }
1149            // No placeholder for empty texts - the closure is only called for non-empty texts
1150        }
1151
1152        // Now call apply_bulk_edits with a simple index-based closure
1153        let mut idx = 0;
1154        let delta = self
1155            .piece_tree
1156            .apply_bulk_edits(edits, &self.buffers, |_text| {
1157                let info = buffer_info[idx];
1158                idx += 1;
1159                info
1160            });
1161
1162        self.mark_content_modified();
1163        delta
1164    }
1165
1166    /// Get text from a byte offset range
1167    /// This now uses the optimized piece_tree.iter_pieces_in_range() for a single traversal
1168    /// Get text from a byte offset range (read-only)
1169    /// Returns None if any buffer in the range is unloaded
1170    /// PRIVATE: External code should use get_text_range_mut() which handles lazy loading
1171    fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
1172        if bytes == 0 {
1173            return Some(Vec::new());
1174        }
1175
1176        let mut result = Vec::with_capacity(bytes);
1177        let end_offset = offset + bytes;
1178        let mut collected = 0;
1179
1180        // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1181        for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
1182            let buffer_id = piece_view.location.buffer_id();
1183            if let Some(buffer) = self.buffers.get(buffer_id) {
1184                // Calculate the range to read from this piece
1185                let piece_start_in_doc = piece_view.doc_offset;
1186                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1187
1188                // Clip to the requested range
1189                let read_start = offset.max(piece_start_in_doc);
1190                let read_end = end_offset.min(piece_end_in_doc);
1191
1192                if read_end > read_start {
1193                    let offset_in_piece = read_start - piece_start_in_doc;
1194                    let bytes_to_read = read_end - read_start;
1195
1196                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1197                    let buffer_end = buffer_start + bytes_to_read;
1198
1199                    // Return None if buffer is unloaded (type-safe)
1200                    let data = buffer.get_data()?;
1201
1202                    if buffer_end <= data.len() {
1203                        result.extend_from_slice(&data[buffer_start..buffer_end]);
1204                        collected += bytes_to_read;
1205
1206                        if collected >= bytes {
1207                            break;
1208                        }
1209                    }
1210                }
1211            }
1212        }
1213
1214        Some(result)
1215    }
1216
1217    /// Get text from a byte offset range with lazy loading
1218    /// This will load unloaded chunks on-demand and always returns complete data
1219    ///
1220    /// Returns an error if loading fails or if data cannot be read for any reason.
1221    ///
1222    /// NOTE: Currently loads entire buffers on-demand. Future optimization would split
1223    /// large pieces and load only LOAD_CHUNK_SIZE chunks at a time.
1224    pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
1225        let _span = tracing::info_span!("get_text_range_mut", offset, bytes).entered();
1226        if bytes == 0 {
1227            return Ok(Vec::new());
1228        }
1229
1230        let mut result = Vec::with_capacity(bytes);
1231        // Clamp end_offset to buffer length to handle reads beyond EOF
1232        let end_offset = (offset + bytes).min(self.len());
1233        let mut current_offset = offset;
1234        let mut iteration_count = 0u32;
1235
1236        // Keep iterating until we've collected all requested bytes
1237        while current_offset < end_offset {
1238            iteration_count += 1;
1239            let mut made_progress = false;
1240            let mut restarted_iteration = false;
1241
1242            // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1243            for piece_view in self
1244                .piece_tree
1245                .iter_pieces_in_range(current_offset, end_offset)
1246            {
1247                let buffer_id = piece_view.location.buffer_id();
1248
1249                // Check if buffer needs loading
1250                let needs_loading = self
1251                    .buffers
1252                    .get(buffer_id)
1253                    .map(|b| !b.is_loaded())
1254                    .unwrap_or(false);
1255
1256                if needs_loading && self.chunk_split_and_load(&piece_view, current_offset)? {
1257                    restarted_iteration = true;
1258                    break;
1259                }
1260
1261                // Calculate the range to read from this piece
1262                let piece_start_in_doc = piece_view.doc_offset;
1263                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1264
1265                // Clip to the requested range
1266                let read_start = current_offset.max(piece_start_in_doc);
1267                let read_end = end_offset.min(piece_end_in_doc);
1268
1269                if read_end > read_start {
1270                    let offset_in_piece = read_start - piece_start_in_doc;
1271                    let bytes_to_read = read_end - read_start;
1272
1273                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1274                    let buffer_end = buffer_start + bytes_to_read;
1275
1276                    // Buffer should be loaded now
1277                    let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
1278                    let data = buffer
1279                        .get_data()
1280                        .context("Buffer data unavailable after load")?;
1281
1282                    anyhow::ensure!(
1283                        buffer_end <= data.len(),
1284                        "Buffer range out of bounds: requested {}..{}, buffer size {}",
1285                        buffer_start,
1286                        buffer_end,
1287                        data.len()
1288                    );
1289
1290                    result.extend_from_slice(&data[buffer_start..buffer_end]);
1291                    current_offset = read_end;
1292                    made_progress = true;
1293                }
1294            }
1295
1296            // If we didn't make progress and didn't restart iteration, this is an error
1297            if !made_progress && !restarted_iteration {
1298                tracing::error!(
1299                    "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
1300                    current_offset,
1301                    offset,
1302                    end_offset,
1303                    self.len()
1304                );
1305                tracing::error!(
1306                    "Piece tree stats: {} total bytes",
1307                    self.piece_tree.stats().total_bytes
1308                );
1309                anyhow::bail!(
1310                    "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
1311                    current_offset,
1312                    offset,
1313                    end_offset,
1314                    self.len()
1315                );
1316            }
1317        }
1318
1319        if iteration_count > 1 {
1320            tracing::info!(
1321                iteration_count,
1322                result_len = result.len(),
1323                "get_text_range_mut: completed with multiple iterations"
1324            );
1325        }
1326
1327        Ok(result)
1328    }
1329
1330    /// Prepare a viewport for rendering
1331    ///
1332    /// This is called before rendering with &mut access to pre-load all data
1333    /// that will be needed for the viewport. It estimates the number of bytes
1334    /// needed based on the line count and pre-loads them.
1335    ///
1336    /// # Arguments
1337    /// * `start_offset` - The byte offset where the viewport starts
1338    /// * `line_count` - The number of lines to prepare (estimate)
1339    ///
1340    /// # Returns
1341    /// Ok(()) if preparation succeeded, Err if loading failed
1342    pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
1343        let _span = tracing::info_span!("prepare_viewport", start_offset, line_count).entered();
1344        // Estimate how many bytes we need (pessimistic assumption)
1345        // Average line length is typically 80-100 bytes, but we use 200 to be safe
1346        let estimated_bytes = line_count.saturating_mul(200);
1347
1348        // Cap the estimate at the remaining bytes in the document
1349        let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
1350        let bytes_to_load = estimated_bytes.min(remaining_bytes);
1351        tracing::trace!(
1352            bytes_to_load,
1353            total_bytes = self.total_bytes(),
1354            "prepare_viewport loading"
1355        );
1356
1357        // Pre-load with full chunk-splitting support
1358        // This may load more than we need, but ensures all data is available
1359        self.get_text_range_mut(start_offset, bytes_to_load)?;
1360
1361        Ok(())
1362    }
1363
1364    /// Split a piece that references a large unloaded buffer, create a chunk
1365    /// buffer for the region around `current_offset`, and load it.
1366    ///
1367    /// Returns `true` if the piece tree was modified (caller must restart its
1368    /// iteration), `false` if the piece was small enough to load in-place.
1369    fn chunk_split_and_load(
1370        &mut self,
1371        piece_view: &PieceView,
1372        current_offset: usize,
1373    ) -> Result<bool> {
1374        let buffer_id = piece_view.location.buffer_id();
1375
1376        // The underlying buffer may be much larger than this piece (e.g. the
1377        // whole-file Stored buffer after rebuild_with_pristine_saved_root).
1378        // We must chunk-split if either the piece or its buffer exceeds
1379        // LOAD_CHUNK_SIZE, because `load()` loads the entire buffer.
1380        let buffer_bytes = self
1381            .buffers
1382            .get(buffer_id)
1383            .and_then(|b| b.unloaded_bytes())
1384            .unwrap_or(0);
1385        let needs_chunk_split =
1386            piece_view.bytes > LOAD_CHUNK_SIZE || buffer_bytes > piece_view.bytes;
1387
1388        tracing::info!(
1389            buffer_id,
1390            piece_bytes = piece_view.bytes,
1391            buffer_bytes,
1392            needs_chunk_split,
1393            piece_doc_offset = piece_view.doc_offset,
1394            current_offset,
1395            "chunk_split_and_load: loading unloaded piece"
1396        );
1397
1398        if !needs_chunk_split {
1399            // Piece is small enough and its buffer matches — load in-place.
1400            let _span = tracing::info_span!(
1401                "load_small_buffer",
1402                piece_bytes = piece_view.bytes,
1403                buffer_id,
1404            )
1405            .entered();
1406            self.buffers
1407                .get_mut(buffer_id)
1408                .context("Buffer not found")?
1409                .load(&**self.persistence.fs())
1410                .context("Failed to load buffer")?;
1411            return Ok(false);
1412        }
1413
1414        let _span = tracing::info_span!(
1415            "chunk_split_and_load",
1416            piece_bytes = piece_view.bytes,
1417            buffer_id,
1418        )
1419        .entered();
1420
1421        let piece_start_in_doc = piece_view.doc_offset;
1422        let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
1423
1424        // When the piece already fits within LOAD_CHUNK_SIZE, create a chunk
1425        // buffer for the exact piece range (no alignment/splitting needed).
1426        // Alignment rounding is only useful when carving a sub-range out of a
1427        // piece larger than LOAD_CHUNK_SIZE.
1428        let (chunk_start_in_buffer, chunk_bytes) = if piece_view.bytes <= LOAD_CHUNK_SIZE {
1429            (piece_view.buffer_offset, piece_view.bytes)
1430        } else {
1431            let start =
1432                (piece_view.buffer_offset + offset_in_piece) / CHUNK_ALIGNMENT * CHUNK_ALIGNMENT;
1433            let bytes = LOAD_CHUNK_SIZE
1434                .min((piece_view.buffer_offset + piece_view.bytes).saturating_sub(start));
1435            (start, bytes)
1436        };
1437
1438        // Calculate document offsets for splitting
1439        let chunk_start_offset_in_piece =
1440            chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
1441        let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
1442        let split_end_in_doc = split_start_in_doc + chunk_bytes;
1443
1444        // Split the piece to isolate the chunk
1445        if chunk_start_offset_in_piece > 0 {
1446            self.piece_tree
1447                .split_at_offset(split_start_in_doc, &self.buffers);
1448        }
1449        if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
1450            self.piece_tree
1451                .split_at_offset(split_end_in_doc, &self.buffers);
1452        }
1453
1454        // Create a new buffer for this chunk
1455        let chunk_buffer = self
1456            .buffers
1457            .get(buffer_id)
1458            .context("Buffer not found")?
1459            .create_chunk_buffer(self.next_buffer_id, chunk_start_in_buffer, chunk_bytes)
1460            .context("Failed to create chunk buffer")?;
1461
1462        self.next_buffer_id += 1;
1463        let new_buffer_id = chunk_buffer.id;
1464        self.buffers.push(chunk_buffer);
1465
1466        // Update the piece to reference the new chunk buffer
1467        self.piece_tree.replace_buffer_reference(
1468            buffer_id,
1469            piece_view.buffer_offset + chunk_start_offset_in_piece,
1470            chunk_bytes,
1471            BufferLocation::Added(new_buffer_id),
1472        );
1473
1474        // Load the chunk buffer
1475        self.buffers
1476            .get_mut(new_buffer_id)
1477            .context("Chunk buffer not found")?
1478            .load(&**self.persistence.fs())
1479            .context("Failed to load chunk")?;
1480
1481        // split_at_offset uses compute_line_feeds_static which returns None
1482        // for unloaded buffers, destroying the scanned line feed counts.
1483        // Fix up: the loaded chunk is counted from memory, remaining unloaded
1484        // pieces use the filesystem's count_line_feeds_in_range.
1485        if self.file_kind.has_line_feed_scan() {
1486            let leaves = self.piece_tree.get_leaves();
1487            let mut fixups: Vec<(usize, usize)> = Vec::new();
1488            for (idx, leaf) in leaves.iter().enumerate() {
1489                if leaf.line_feed_cnt.is_none() {
1490                    if let Ok(count) = self.scan_leaf(leaf) {
1491                        fixups.push((idx, count));
1492                    }
1493                }
1494            }
1495            if !fixups.is_empty() {
1496                self.piece_tree.update_leaf_line_feeds_path_copy(&fixups);
1497            }
1498        }
1499
1500        // Keep saved_root in sync with viewport-loading tree restructures so
1501        // that diff_since_saved() can match by (location, offset) identity.
1502        //
1503        // When !modified the current tree IS the saved state, so just snapshot.
1504        // When modified, we must apply the same Stored→Added leaf replacement
1505        // to saved_root so the diff doesn't see loaded-but-unedited regions as
1506        // changed.
1507        if !self.persistence.is_modified() {
1508            self.persistence.set_saved_root(self.piece_tree.root());
1509        } else {
1510            self.persistence.apply_chunk_load_to_saved_root(
1511                buffer_id,
1512                chunk_start_in_buffer,
1513                chunk_bytes,
1514                new_buffer_id,
1515            );
1516        }
1517
1518        Ok(true)
1519    }
1520
1521    /// Get all text as a single Vec<u8>
1522    /// Returns None if any buffers are unloaded (lazy loading)
1523    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1524    pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
1525        self.get_text_range(0, self.total_bytes())
1526    }
1527
1528    /// Get all text as a String
1529    /// Returns None if any buffers are unloaded (lazy loading)
1530    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1531    pub(crate) fn get_all_text_string(&self) -> Option<String> {
1532        self.get_all_text()
1533            .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
1534    }
1535
1536    /// Get text from a byte range as bytes
1537    /// CRATE-PRIVATE: Returns empty vector if any buffers are unloaded (silently fails!)
1538    /// Only use this when you KNOW the data is loaded (e.g., for syntax highlighting small regions)
1539    /// External code should use get_text_range_mut() or DocumentModel methods
1540    pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
1541        self.get_text_range(range.start, range.end.saturating_sub(range.start))
1542            .unwrap_or_default()
1543    }
1544
1545    /// Get all text as a String
1546    /// Returns None if any buffers are unloaded (lazy loading)
1547    pub fn to_string(&self) -> Option<String> {
1548        self.get_all_text_string()
1549    }
1550
1551    /// Get the total number of bytes
1552    pub fn len(&self) -> usize {
1553        self.total_bytes()
1554    }
1555
1556    /// Check if the buffer is empty
1557    pub fn is_empty(&self) -> bool {
1558        self.total_bytes() == 0
1559    }
1560
1561    /// Get the file path associated with this buffer
1562    pub fn file_path(&self) -> Option<&Path> {
1563        self.persistence.file_path()
1564    }
1565
1566    /// Update the file path after a rename operation on disk.
1567    pub fn rename_file_path(&mut self, path: PathBuf) {
1568        self.persistence.set_file_path(path);
1569    }
1570
1571    /// Clear the file path (make buffer unnamed)
1572    /// Note: This does NOT affect Unloaded chunk file_paths used for lazy loading.
1573    /// Those still point to the original source file for chunk loading.
1574    pub fn clear_file_path(&mut self) {
1575        self.persistence.clear_file_path();
1576    }
1577
1578    /// Extend buffer to include more bytes from a streaming source file.
1579    /// Used for stdin streaming where the temp file grows over time, and
1580    /// for plugin streaming via `RefreshBufferFromDisk`.
1581    ///
1582    /// Counts line feeds in the appended region so the new piece carries
1583    /// a real `line_feed_cnt` instead of `None`. Without this, any
1584    /// previously-known line count on the existing pieces propagates to
1585    /// `line_count() = None` (the piece-tree's `total_line_feeds`
1586    /// returns `None` if any piece is unknown), which in turn breaks the
1587    /// visual-row index used by the scrollbar.
1588    ///
1589    /// Falls back to `None` only when the filesystem can't count
1590    /// (errored stat / read). The buffer is still usable then — just
1591    /// without precise line indexing, same as a large file opened
1592    /// without a scan.
1593    pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
1594        let old_size = self.total_bytes();
1595        if new_size <= old_size {
1596            return;
1597        }
1598
1599        let additional_bytes = new_size - old_size;
1600
1601        // Create new Unloaded buffer for the appended region
1602        let buffer_id = self.next_buffer_id;
1603        self.next_buffer_id += 1;
1604
1605        let new_buffer = StringBuffer::new_unloaded(
1606            buffer_id,
1607            source_path.to_path_buf(),
1608            old_size,         // file_offset - where this chunk starts in the file
1609            additional_bytes, // bytes - size of this chunk
1610        );
1611        self.buffers.push(new_buffer);
1612
1613        // Count line feeds in the appended region from disk so the
1614        // piece carries a known line count. Counting is cheap — it's a
1615        // streaming scan of `additional_bytes`, no buffer materialisation.
1616        let line_feed_cnt = self
1617            .persistence
1618            .fs()
1619            .count_line_feeds_in_range(source_path, old_size as u64, additional_bytes)
1620            .ok();
1621
1622        // Append piece at end of document (insert at offset == total_bytes)
1623        self.piece_tree.insert(
1624            old_size,
1625            BufferLocation::Stored(buffer_id),
1626            0,
1627            additional_bytes,
1628            line_feed_cnt,
1629            &self.buffers,
1630        );
1631    }
1632
1633    /// Check if the buffer has been modified since last save
1634    pub fn is_modified(&self) -> bool {
1635        self.persistence.is_modified()
1636    }
1637
1638    /// Clear the modified flag (after save)
1639    pub fn clear_modified(&mut self) {
1640        self.persistence.clear_modified();
1641    }
1642
1643    /// Set the modified flag explicitly
1644    /// Used by undo/redo to restore the correct modified state
1645    pub fn set_modified(&mut self, modified: bool) {
1646        self.persistence.set_modified(modified);
1647    }
1648
1649    /// Check if buffer has pending changes for recovery auto-save
1650    pub fn is_recovery_pending(&self) -> bool {
1651        self.persistence.is_recovery_pending()
1652    }
1653
1654    /// Mark buffer as needing recovery auto-save (call after edits)
1655    pub fn set_recovery_pending(&mut self, pending: bool) {
1656        self.persistence.set_recovery_pending(pending);
1657    }
1658
1659    /// Ensure the buffer chunk at the given byte offset is loaded.
1660    ///
1661    /// When `line_feeds_scanned` is true, piece splits during insert/delete need
1662    /// the buffer data to be loaded so `compute_line_feeds_static` can accurately
1663    /// recount line feeds for each half. This method loads the chunk if needed.
1664    fn ensure_chunk_loaded_at(&mut self, offset: usize) {
1665        if let Some(piece_info) = self.piece_tree.find_by_offset(offset) {
1666            let buffer_id = piece_info.location.buffer_id();
1667            if let Some(buffer) = self.buffers.get_mut(buffer_id) {
1668                if !buffer.is_loaded() {
1669                    let buf_bytes = buffer.unloaded_bytes().unwrap_or(0);
1670                    tracing::info!(
1671                        "ensure_chunk_loaded_at: loading buffer {} ({} bytes) for offset {}",
1672                        buffer_id,
1673                        buf_bytes,
1674                        offset
1675                    );
1676                    if let Err(e) = buffer.load(&**self.persistence.fs()) {
1677                        tracing::warn!("Failed to load chunk at offset {offset}: {e}");
1678                    }
1679                }
1680            }
1681        }
1682    }
1683
1684    /// Check if this is a large file with lazy loading enabled
1685    pub fn is_large_file(&self) -> bool {
1686        self.file_kind.is_large_file()
1687    }
1688
1689    /// Check if line feeds have been scanned for this large file.
1690    /// When true, `line_count()` returns exact values.
1691    pub fn has_line_feed_scan(&self) -> bool {
1692        self.file_kind.has_line_feed_scan()
1693    }
1694
1695    /// Get the raw piece tree leaves (for storing alongside scan chunks).
1696    pub fn piece_tree_leaves(&self) -> Vec<crate::model::piece_tree::LeafData> {
1697        self.piece_tree.get_leaves()
1698    }
1699
1700    /// Prepare work items for an incremental line scan.
1701    ///
1702    /// First splits any oversized leaves in the piece tree so every leaf is
1703    /// at most `LOAD_CHUNK_SIZE` bytes.  Then returns one work item per leaf.
1704    /// After scanning, `get_text_range_mut` will never need to split a scanned
1705    /// leaf (it's already chunk-sized), so line-feed counts are preserved.
1706    ///
1707    /// Returns `(chunks, total_bytes)`.
1708    pub fn prepare_line_scan(&mut self) -> (Vec<LineScanChunk>, usize) {
1709        // Pre-split the tree so every leaf ≤ LOAD_CHUNK_SIZE.
1710        self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
1711
1712        let leaves = self.piece_tree.get_leaves();
1713        let total_bytes: usize = leaves.iter().map(|l| l.bytes).sum();
1714        let mut chunks = Vec::new();
1715
1716        for (idx, leaf) in leaves.iter().enumerate() {
1717            chunks.push(LineScanChunk {
1718                leaf_index: idx,
1719                byte_len: leaf.bytes,
1720                already_known: leaf.line_feed_cnt.is_some(),
1721            });
1722        }
1723
1724        (chunks, total_bytes)
1725    }
1726
1727    /// Initialize a chunked search scan over this buffer's piece tree.
1728    ///
1729    /// Used for in-editor Ctrl+F (incremental, yields to the event loop
1730    /// between chunks) and for searching dirty buffers during project grep.
1731    /// For searching files on disk, use `FileSystem::search_file` instead.
1732    pub fn search_scan_init(
1733        &mut self,
1734        regex: regex::bytes::Regex,
1735        max_matches: usize,
1736        query_len: usize,
1737    ) -> ChunkedSearchState {
1738        let (chunks, total_bytes) = self.prepare_line_scan();
1739        ChunkedSearchState {
1740            chunks,
1741            next_chunk: 0,
1742            next_doc_offset: 0,
1743            total_bytes,
1744            scanned_bytes: 0,
1745            regex,
1746            matches: Vec::new(),
1747            overlap_tail: Vec::new(),
1748            overlap_doc_offset: 0,
1749            max_matches,
1750            capped: false,
1751            query_len,
1752            running_line: 1,
1753        }
1754    }
1755
1756    /// Process one chunk of a chunked search scan.
1757    ///
1758    /// Loads the next chunk via `get_text_range_mut`, prepends overlap from
1759    /// the previous chunk, runs the regex, and appends matches to `state`
1760    /// with line/column/context computed on the fly from the loaded bytes.
1761    ///
1762    /// Line numbers are tracked incrementally via `running_line` — each
1763    /// chunk counts newlines in its non-overlap portion to advance the
1764    /// counter for the next chunk, and matches use an incremental cursor
1765    /// so total line-counting work is O(chunk_size), not O(chunk × matches).
1766    ///
1767    /// Returns `Ok(true)` if there are more chunks to process, `Ok(false)`
1768    /// when the scan is complete.
1769    ///
1770    /// TODO: For concurrent/parallel search (searching multiple files at once),
1771    /// chunks would need to return chunk-relative line numbers and have them
1772    /// fixed up with each file's starting line offset after all chunks complete.
1773    pub fn search_scan_next_chunk(
1774        &mut self,
1775        state: &mut ChunkedSearchState,
1776    ) -> std::io::Result<bool> {
1777        if state.is_done() {
1778            return Ok(false);
1779        }
1780
1781        let chunk_info = state.chunks[state.next_chunk].clone();
1782        let doc_offset = state.next_doc_offset;
1783
1784        state.next_chunk += 1;
1785        state.scanned_bytes += chunk_info.byte_len;
1786        state.next_doc_offset += chunk_info.byte_len;
1787
1788        // Load the chunk bytes
1789        let chunk_bytes = self
1790            .get_text_range_mut(doc_offset, chunk_info.byte_len)
1791            .map_err(std::io::Error::other)?;
1792
1793        // Build search buffer: overlap tail + new chunk
1794        let overlap_len = state.overlap_tail.len();
1795        let mut search_buf = Vec::with_capacity(overlap_len + chunk_bytes.len());
1796        search_buf.extend_from_slice(&state.overlap_tail);
1797        search_buf.extend_from_slice(&chunk_bytes);
1798
1799        let buf_doc_offset = if overlap_len > 0 {
1800            state.overlap_doc_offset
1801        } else {
1802            doc_offset
1803        };
1804
1805        // Line number at buf_doc_offset: running_line tracks the line at
1806        // doc_offset (start of new chunk data). Count newlines in the overlap
1807        // prefix to get the line at the start of the full search_buf.
1808        let newlines_in_overlap = search_buf[..overlap_len]
1809            .iter()
1810            .filter(|&&b| b == b'\n')
1811            .count();
1812        let mut line_at = state.running_line - newlines_in_overlap;
1813        let mut counted_to = 0usize;
1814
1815        // Run regex on the combined buffer
1816        for m in state.regex.find_iter(&search_buf) {
1817            // Skip matches entirely within the overlap (already found)
1818            if overlap_len > 0 && m.end() <= overlap_len {
1819                continue;
1820            }
1821
1822            if state.matches.len() >= state.max_matches {
1823                state.capped = true;
1824                break;
1825            }
1826
1827            // Advance line counter incrementally to this match
1828            line_at += search_buf[counted_to..m.start()]
1829                .iter()
1830                .filter(|&&b| b == b'\n')
1831                .count();
1832            counted_to = m.start();
1833
1834            // Find line boundaries in search_buf for context
1835            let line_start = search_buf[..m.start()]
1836                .iter()
1837                .rposition(|&b| b == b'\n')
1838                .map(|p| p + 1)
1839                .unwrap_or(0);
1840            let line_end = search_buf[m.start()..]
1841                .iter()
1842                .position(|&b| b == b'\n')
1843                .map(|p| m.start() + p)
1844                .unwrap_or(search_buf.len());
1845
1846            let match_doc_offset = buf_doc_offset + m.start();
1847            let match_len = m.end() - m.start();
1848            let column = m.start() - line_start + 1;
1849            let context = String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
1850
1851            state.matches.push(SearchMatch {
1852                byte_offset: match_doc_offset,
1853                length: match_len,
1854                line: line_at,
1855                column,
1856                context,
1857            });
1858        }
1859
1860        // Advance running_line by newlines in the new (non-overlap) chunk data
1861        let newlines_in_chunk = chunk_bytes.iter().filter(|&&b| b == b'\n').count();
1862        state.running_line += newlines_in_chunk;
1863
1864        // Save overlap tail for next chunk
1865        let max_overlap = state.query_len.max(256).min(chunk_bytes.len());
1866        let tail_start = chunk_bytes.len().saturating_sub(max_overlap);
1867        state.overlap_tail = chunk_bytes[tail_start..].to_vec();
1868        state.overlap_doc_offset = doc_offset + tail_start;
1869
1870        Ok(!state.is_done())
1871    }
1872
1873    /// Run a complete chunked search over the piece tree (all chunks).
1874    ///
1875    /// Synchronous variant — used for dirty buffer snapshots in project
1876    /// grep and in tests.  For on-disk files, use `FileSystem::search_file`.
1877    pub fn search_scan_all(
1878        &mut self,
1879        regex: regex::bytes::Regex,
1880        max_matches: usize,
1881        query_len: usize,
1882    ) -> std::io::Result<ChunkedSearchState> {
1883        let mut state = self.search_scan_init(regex, max_matches, query_len);
1884        while self.search_scan_next_chunk(&mut state)? {}
1885        Ok(state)
1886    }
1887
1888    /// Build a hybrid search plan from the piece tree.
1889    ///
1890    /// Extracts regions (unloaded file ranges + loaded in-memory data) that
1891    /// can be searched independently.  The plan is `Send` so it can be
1892    /// executed on a background thread via `HybridSearchPlan::execute`.
1893    ///
1894    /// Returns `None` if the buffer has no file path (caller should fall
1895    /// back to `search_scan_all`).
1896    pub fn search_hybrid_plan(&mut self) -> Option<HybridSearchPlan> {
1897        let file_path = self.persistence.file_path_owned()?;
1898
1899        self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
1900        let leaves = self.piece_tree.get_leaves();
1901
1902        let mut regions: Vec<SearchRegion> = Vec::new();
1903        let mut doc_offset = 0usize;
1904
1905        for leaf in &leaves {
1906            let buf = self.buffers.get(leaf.location.buffer_id());
1907            let is_unloaded_stored = matches!(
1908                (&leaf.location, buf),
1909                (
1910                    BufferLocation::Stored(_),
1911                    Some(StringBuffer {
1912                        data: BufferData::Unloaded { .. },
1913                        ..
1914                    }),
1915                )
1916            );
1917
1918            if is_unloaded_stored {
1919                let file_offset = match buf.unwrap().data {
1920                    BufferData::Unloaded {
1921                        file_offset: fo, ..
1922                    } => fo + leaf.offset,
1923                    _ => unreachable!(),
1924                };
1925
1926                // Merge with previous unloaded region if contiguous
1927                if let Some(SearchRegion::Unloaded {
1928                    file_offset: prev_fo,
1929                    bytes: prev_bytes,
1930                    ..
1931                }) = regions.last_mut()
1932                {
1933                    if *prev_fo + *prev_bytes == file_offset {
1934                        *prev_bytes += leaf.bytes;
1935                        doc_offset += leaf.bytes;
1936                        continue;
1937                    }
1938                }
1939                regions.push(SearchRegion::Unloaded {
1940                    file_offset,
1941                    bytes: leaf.bytes,
1942                    doc_offset,
1943                });
1944            } else {
1945                let data = match buf.and_then(|b| b.get_data()) {
1946                    Some(full) => {
1947                        let end = (leaf.offset + leaf.bytes).min(full.len());
1948                        full[leaf.offset..end].to_vec()
1949                    }
1950                    None => match self.get_text_range_mut(doc_offset, leaf.bytes) {
1951                        Ok(d) => d,
1952                        Err(_) => {
1953                            doc_offset += leaf.bytes;
1954                            continue;
1955                        }
1956                    },
1957                };
1958
1959                // Merge with previous loaded region
1960                if let Some(SearchRegion::Loaded {
1961                    data: prev_data, ..
1962                }) = regions.last_mut()
1963                {
1964                    prev_data.extend_from_slice(&data);
1965                    doc_offset += leaf.bytes;
1966                    continue;
1967                }
1968                regions.push(SearchRegion::Loaded { data, doc_offset });
1969            }
1970
1971            doc_offset += leaf.bytes;
1972        }
1973
1974        Some(HybridSearchPlan { file_path, regions })
1975    }
1976
1977    /// Hybrid search: uses `fs.search_file` for unloaded piece-tree regions
1978    /// (searches where the data lives, no network transfer) and in-memory regex
1979    /// for loaded/edited regions.  Handles overlap at region boundaries.
1980    ///
1981    /// For a huge remote file with a small local edit, this avoids transferring
1982    /// the entire file — only match metadata crosses the network.
1983    ///
1984    /// Falls back to `search_scan_all` when the buffer has no file path or is
1985    /// fully loaded.
1986    pub fn search_hybrid(
1987        &mut self,
1988        pattern: &str,
1989        opts: &FileSearchOptions,
1990        regex: Regex,
1991        max_matches: usize,
1992        query_len: usize,
1993    ) -> io::Result<Vec<SearchMatch>> {
1994        let plan = match self.search_hybrid_plan() {
1995            Some(p) => p,
1996            None => {
1997                let state = self.search_scan_all(regex, max_matches, query_len)?;
1998                return Ok(state.matches);
1999            }
2000        };
2001        plan.execute(
2002            &**self.persistence.fs(),
2003            pattern,
2004            opts,
2005            &regex,
2006            max_matches,
2007            query_len,
2008        )
2009    }
2010
2011    /// Count `\n` bytes in a single leaf.
2012    ///
2013    /// Uses `count_line_feeds_in_range` for unloaded buffers, which remote
2014    /// filesystem implementations can override to count server-side.
2015    pub fn scan_leaf(&self, leaf: &crate::model::piece_tree::LeafData) -> std::io::Result<usize> {
2016        let buffer_id = leaf.location.buffer_id();
2017        let buffer = self
2018            .buffers
2019            .get(buffer_id)
2020            .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::NotFound, "buffer not found"))?;
2021
2022        let count = match &buffer.data {
2023            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2024                let end = (leaf.offset + leaf.bytes).min(data.len());
2025                data[leaf.offset..end]
2026                    .iter()
2027                    .filter(|&&b| b == b'\n')
2028                    .count()
2029            }
2030            crate::model::piece_tree::BufferData::Unloaded {
2031                file_path,
2032                file_offset,
2033                ..
2034            } => {
2035                let read_offset = *file_offset as u64 + leaf.offset as u64;
2036                self.persistence.fs().count_line_feeds_in_range(
2037                    file_path,
2038                    read_offset,
2039                    leaf.bytes,
2040                )?
2041            }
2042        };
2043        Ok(count)
2044    }
2045
2046    /// Return the I/O parameters for an unloaded leaf, or `None` if loaded.
2047    ///
2048    /// Used by the incremental scan to distinguish leaves that can be counted
2049    /// in-memory (via `scan_leaf`) from those that need filesystem I/O.
2050    pub fn leaf_io_params(
2051        &self,
2052        leaf: &crate::model::piece_tree::LeafData,
2053    ) -> Option<(std::path::PathBuf, u64, usize)> {
2054        let buffer_id = leaf.location.buffer_id();
2055        let buffer = self.buffers.get(buffer_id)?;
2056        match &buffer.data {
2057            crate::model::piece_tree::BufferData::Loaded { .. } => None,
2058            crate::model::piece_tree::BufferData::Unloaded {
2059                file_path,
2060                file_offset,
2061                ..
2062            } => {
2063                let read_offset = *file_offset as u64 + leaf.offset as u64;
2064                Some((file_path.clone(), read_offset, leaf.bytes))
2065            }
2066        }
2067    }
2068
2069    /// Get a reference to the string buffers (for parallel scanning).
2070    pub fn buffer_slice(&self) -> &[StringBuffer] {
2071        &self.buffers
2072    }
2073
2074    /// Apply the results of an incremental line scan.
2075    pub fn apply_scan_updates(&mut self, updates: &[(usize, usize)]) {
2076        self.piece_tree.update_leaf_line_feeds(updates);
2077        self.file_kind.mark_line_feed_scan_complete();
2078    }
2079
2080    /// After an incremental line-feed scan completes, rebuild the tree so that
2081    /// `saved_root` and the current tree share `Arc` pointers for unedited
2082    /// subtrees. This makes `diff_since_saved()` O(edited regions) instead of
2083    /// O(file size).
2084    pub fn rebuild_with_pristine_saved_root(&mut self, scan_updates: &[(usize, usize)]) {
2085        let file_size = match self.persistence.saved_file_size() {
2086            Some(s) => s,
2087            None => {
2088                // Fallback: no saved file size means we can't build a pristine
2089                // tree. Just apply updates the old way.
2090                self.apply_scan_updates(scan_updates);
2091                return;
2092            }
2093        };
2094
2095        // --- Walk the current tree to extract deletions and insertions ---
2096        let total = self.total_bytes();
2097        // Deletions: gaps in Stored coverage (orig_offset, len).
2098        let mut deletions: Vec<(usize, usize)> = Vec::new();
2099        // Insertions: (post_delete_offset, location, buf_offset, bytes, lf_cnt).
2100        // post_delete_offset = cumulative surviving Stored bytes before this point.
2101        let mut insertions: Vec<(usize, BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2102        let mut orig_cursor: usize = 0;
2103        let mut stored_bytes_in_doc: usize = 0;
2104
2105        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2106            match piece.location {
2107                BufferLocation::Stored(_) => {
2108                    if piece.buffer_offset > orig_cursor {
2109                        deletions.push((orig_cursor, piece.buffer_offset - orig_cursor));
2110                    }
2111                    orig_cursor = piece.buffer_offset + piece.bytes;
2112                    stored_bytes_in_doc += piece.bytes;
2113                }
2114                BufferLocation::Added(id) => {
2115                    // Check if this Added buffer was created by loading a chunk
2116                    // from the stored file (via get_text_range_mut chunk loading).
2117                    // If so, treat it as stored content, not a user edit.
2118                    if let Some(file_off) = self.buffers.get(id).and_then(|b| b.stored_file_offset)
2119                    {
2120                        if file_off > orig_cursor {
2121                            deletions.push((orig_cursor, file_off - orig_cursor));
2122                        }
2123                        orig_cursor = file_off + piece.bytes;
2124                        stored_bytes_in_doc += piece.bytes;
2125                    } else {
2126                        insertions.push((
2127                            stored_bytes_in_doc,
2128                            piece.location,
2129                            piece.buffer_offset,
2130                            piece.bytes,
2131                            piece.line_feed_cnt,
2132                        ));
2133                    }
2134                }
2135            }
2136        }
2137        // Trailing deletion.
2138        if orig_cursor < file_size {
2139            deletions.push((orig_cursor, file_size - orig_cursor));
2140        }
2141
2142        // --- Build pristine tree (full original file, pre-split, with lf counts) ---
2143        let mut pristine = if file_size > 0 {
2144            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
2145        } else {
2146            PieceTree::empty()
2147        };
2148        pristine.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2149        pristine.update_leaf_line_feeds(scan_updates);
2150
2151        // Snapshot the pristine tree as saved_root.
2152        self.persistence.set_saved_root(pristine.root());
2153
2154        // If no edits, the pristine tree IS the current tree.
2155        if deletions.is_empty() && insertions.is_empty() {
2156            self.piece_tree = pristine;
2157            self.file_kind.mark_line_feed_scan_complete();
2158            return;
2159        }
2160
2161        // --- Replay edits onto a clone of the pristine tree ---
2162        let mut tree = pristine;
2163
2164        // Apply deletions from HIGH to LOW offset so earlier offsets stay valid.
2165        deletions.sort_by(|a, b| b.0.cmp(&a.0));
2166        for &(offset, len) in &deletions {
2167            tree.delete(offset, len, &self.buffers);
2168        }
2169
2170        // Apply insertions from LOW to HIGH. Each insertion shifts subsequent
2171        // offsets by its byte count, tracked via insert_delta.
2172        let mut insert_delta: usize = 0;
2173        for &(offset, location, buf_offset, bytes, lf_cnt) in &insertions {
2174            tree.insert(
2175                offset + insert_delta,
2176                location,
2177                buf_offset,
2178                bytes,
2179                lf_cnt,
2180                &self.buffers,
2181            );
2182            insert_delta += bytes;
2183        }
2184
2185        // Path-copy insert/delete may split Stored leaves whose data is
2186        // Unloaded, producing fragments with line_feed_cnt = None
2187        // (compute_line_feeds_static can't read unloaded data). Fix them up
2188        // by scanning any remaining None leaves.
2189        let leaves = tree.get_leaves();
2190        let mut fixups: Vec<(usize, usize)> = Vec::new();
2191        for (idx, leaf) in leaves.iter().enumerate() {
2192            if leaf.line_feed_cnt.is_none() {
2193                if let Ok(count) = self.scan_leaf(leaf) {
2194                    fixups.push((idx, count));
2195                }
2196            }
2197        }
2198        if !fixups.is_empty() {
2199            tree.update_leaf_line_feeds_path_copy(&fixups);
2200        }
2201
2202        self.piece_tree = tree;
2203        self.file_kind.mark_line_feed_scan_complete();
2204    }
2205
2206    /// Resolve the exact byte offset for a given line number (0-indexed).
2207    ///
2208    /// Uses the tree's line feed counts to find the piece containing the target line,
2209    /// then loads/reads that piece's data to find the exact newline position.
2210    /// This works even when buffers are unloaded (large file with scanned line index).
2211    pub fn resolve_line_byte_offset(&mut self, target_line: usize) -> Option<usize> {
2212        if target_line == 0 {
2213            return Some(0);
2214        }
2215
2216        // Use tree metadata to find the piece containing the target line
2217        let (doc_offset, buffer_id, piece_offset, piece_bytes, lines_before) =
2218            self.piece_tree.piece_info_for_line(target_line)?;
2219
2220        // We need to find the (target_line - lines_before)-th newline within this piece
2221        let lines_to_skip = target_line - lines_before;
2222
2223        // Get the piece data — either from loaded buffer or read from disk
2224        let buffer = self.buffers.get(buffer_id)?;
2225        let piece_data: Vec<u8> = match &buffer.data {
2226            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2227                let end = (piece_offset + piece_bytes).min(data.len());
2228                data[piece_offset..end].to_vec()
2229            }
2230            crate::model::piece_tree::BufferData::Unloaded {
2231                file_path,
2232                file_offset,
2233                ..
2234            } => {
2235                let read_offset = *file_offset as u64 + piece_offset as u64;
2236                self.persistence
2237                    .fs()
2238                    .read_range(file_path, read_offset, piece_bytes)
2239                    .ok()?
2240            }
2241        };
2242
2243        // Count newlines to find the target line start
2244        let mut newlines_found = 0;
2245        for (i, &byte) in piece_data.iter().enumerate() {
2246            if byte == b'\n' {
2247                newlines_found += 1;
2248                if newlines_found == lines_to_skip {
2249                    // The target line starts right after this newline
2250                    return Some(doc_offset + i + 1);
2251                }
2252            }
2253        }
2254
2255        // If we didn't find enough newlines, the line starts in the next piece
2256        // Return the end of this piece as an approximation
2257        Some(doc_offset + piece_bytes)
2258    }
2259
2260    /// Get the saved file size (size of the file on disk after last load/save)
2261    /// For large files, this is used during recovery to know the expected original file size.
2262    /// Returns None for new unsaved buffers.
2263    pub fn original_file_size(&self) -> Option<usize> {
2264        // Return the tracked saved file size - this is updated when the file is
2265        // loaded or saved, so it always reflects the current file on disk.
2266        self.persistence.saved_file_size()
2267    }
2268
2269    /// Get recovery chunks for this buffer (only modified portions)
2270    ///
2271    /// For large files, this returns only the pieces that come from Added buffers
2272    /// (i.e., the modifications), not the original file content. This allows
2273    /// efficient incremental recovery without reading/writing the entire file.
2274    ///
2275    /// Returns: Vec of (original_file_offset, data) for each modified chunk
2276    /// The offset is the position in the ORIGINAL file where this chunk should be inserted.
2277    pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
2278        use crate::model::piece_tree::BufferLocation;
2279
2280        let mut chunks = Vec::new();
2281        let total = self.total_bytes();
2282
2283        // Track cumulative bytes from Stored pieces as we iterate.
2284        // This gives us the original file offset for Added pieces.
2285        // The key insight: Added pieces should be inserted at the position
2286        // corresponding to where they appear relative to Stored content,
2287        // not their position in the current document.
2288        let mut stored_bytes_before = 0;
2289
2290        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2291            match piece.location {
2292                BufferLocation::Stored(_) => {
2293                    // Accumulate stored bytes to track position in original file
2294                    stored_bytes_before += piece.bytes;
2295                }
2296                BufferLocation::Added(buffer_id) => {
2297                    if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
2298                        // Skip buffers that originate from the original file
2299                        // (loaded by chunk_split_and_load for viewport display).
2300                        // These have stored_file_offset set and are not user edits.
2301                        //
2302                        // Why Added and not Stored? The piece tree only has two
2303                        // variants: Stored and Added. chunk_split_and_load marks
2304                        // loaded chunks as Added(new_id) because
2305                        // rebuild_with_pristine_saved_root interprets Stored
2306                        // pieces' buffer_offset as a position in the original
2307                        // file — but a chunk buffer starts at offset 0, so using
2308                        // Stored would corrupt the rebuild logic. We rely on
2309                        // stored_file_offset instead to distinguish "loaded from
2310                        // disk" from "user edit". A third BufferLocation variant
2311                        // (e.g. LoadedChunk) would make this distinction explicit
2312                        // in the type system rather than requiring this runtime
2313                        // check.
2314                        if buffer.stored_file_offset.is_some() {
2315                            stored_bytes_before += piece.bytes;
2316                            continue;
2317                        }
2318                        // Get the data from the buffer if loaded
2319                        if let Some(data) = buffer.get_data() {
2320                            // Extract just the portion this piece references
2321                            let start = piece.buffer_offset;
2322                            let end = start + piece.bytes;
2323                            if end <= data.len() {
2324                                // Use stored_bytes_before as the original file offset.
2325                                // This is where this insertion should go relative to
2326                                // the original file content.
2327                                chunks.push((stored_bytes_before, data[start..end].to_vec()));
2328                            }
2329                        }
2330                    }
2331                }
2332            }
2333        }
2334
2335        chunks
2336    }
2337
2338    /// Check if this buffer contains binary content
2339    pub fn is_binary(&self) -> bool {
2340        self.file_kind.is_binary()
2341    }
2342
2343    /// Get the line ending format for this buffer
2344    pub fn line_ending(&self) -> LineEnding {
2345        self.format.line_ending()
2346    }
2347
2348    /// Set the line ending format for this buffer
2349    ///
2350    /// This marks the buffer as modified since the line ending format has changed.
2351    /// On save, the buffer content will be converted to the new format.
2352    pub fn set_line_ending(&mut self, line_ending: LineEnding) {
2353        self.format.set_line_ending(line_ending);
2354        self.mark_content_modified();
2355    }
2356
2357    /// Set the default line ending format for a new/empty buffer
2358    ///
2359    /// Unlike `set_line_ending`, this does NOT mark the buffer as modified.
2360    /// This should be used when initializing a new buffer with a configured default.
2361    pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
2362        self.format.set_default_line_ending(line_ending);
2363    }
2364
2365    /// Get the encoding format for this buffer
2366    pub fn encoding(&self) -> Encoding {
2367        self.format.encoding()
2368    }
2369
2370    /// Set the encoding format for this buffer
2371    ///
2372    /// This marks the buffer as modified since the encoding format has changed.
2373    /// On save, the buffer content will be converted to the new encoding.
2374    pub fn set_encoding(&mut self, encoding: Encoding) {
2375        self.format.set_encoding(encoding);
2376        self.mark_content_modified();
2377    }
2378
2379    /// Set the default encoding format for a new/empty buffer
2380    ///
2381    /// Unlike `set_encoding`, this does NOT mark the buffer as modified.
2382    /// This should be used when initializing a new buffer with a configured default.
2383    pub fn set_default_encoding(&mut self, encoding: Encoding) {
2384        self.format.set_default_encoding(encoding);
2385    }
2386
2387    /// Get the first line of the buffer as a lossy UTF-8 string, suitable
2388    /// for shebang / first-line grammar detection. Returns `None` for an
2389    /// empty buffer. Non-UTF-8 bytes are replaced with U+FFFD.
2390    pub fn first_line_lossy(&self) -> Option<String> {
2391        let bytes = self.get_line(0)?;
2392        if bytes.is_empty() {
2393            return None;
2394        }
2395        Some(String::from_utf8_lossy(&bytes).into_owned())
2396    }
2397
2398    /// Get text for a specific line
2399    pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
2400        let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
2401
2402        let bytes = if let Some(end_offset) = end {
2403            end_offset.saturating_sub(start)
2404        } else {
2405            self.total_bytes().saturating_sub(start)
2406        };
2407
2408        self.get_text_range(start, bytes)
2409    }
2410
2411    /// Get the byte offset where a line starts
2412    pub fn line_start_offset(&self, line: usize) -> Option<usize> {
2413        let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
2414        Some(start)
2415    }
2416
2417    /// Get piece information at a byte offset
2418    pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
2419        self.piece_tree.find_by_offset(offset)
2420    }
2421
2422    /// Get tree statistics for debugging
2423    pub fn stats(&self) -> TreeStats {
2424        self.piece_tree.stats()
2425    }
2426
2427    // Search and Replace Operations
2428
2429    /// Find the next occurrence of a pattern, with wrap-around
2430    pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
2431        if pattern.is_empty() {
2432            return None;
2433        }
2434
2435        let pattern_bytes = pattern.as_bytes();
2436        let buffer_len = self.len();
2437
2438        // Search from start_pos to end
2439        if start_pos < buffer_len {
2440            if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
2441                return Some(offset);
2442            }
2443        }
2444
2445        // Wrap around: search from beginning to start_pos
2446        if start_pos > 0 {
2447            if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
2448                return Some(offset);
2449            }
2450        }
2451
2452        None
2453    }
2454
2455    /// Find the next occurrence of a pattern within an optional range
2456    /// If range is None, searches the entire buffer with wrap-around (same as find_next)
2457    /// If range is Some, searches only within that range without wrap-around
2458    pub fn find_next_in_range(
2459        &self,
2460        pattern: &str,
2461        start_pos: usize,
2462        range: Option<Range<usize>>,
2463    ) -> Option<usize> {
2464        if pattern.is_empty() {
2465            return None;
2466        }
2467
2468        if let Some(search_range) = range {
2469            // Search within range only, no wrap-around
2470            let pattern_bytes = pattern.as_bytes();
2471            let search_start = start_pos.max(search_range.start);
2472            let search_end = search_range.end.min(self.len());
2473
2474            if search_start < search_end {
2475                self.find_pattern(search_start, search_end, pattern_bytes)
2476            } else {
2477                None
2478            }
2479        } else {
2480            // No range specified, use normal find_next with wrap-around
2481            self.find_next(pattern, start_pos)
2482        }
2483    }
2484
2485    /// Find pattern in a byte range using overlapping chunks
2486    fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
2487        if pattern.is_empty() || start >= end {
2488            return None;
2489        }
2490
2491        const CHUNK_SIZE: usize = 65536; // 64KB chunks
2492        let overlap = pattern.len().saturating_sub(1).max(1);
2493
2494        // Use the overlapping chunks iterator for efficient streaming search
2495        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
2496
2497        for chunk in chunks {
2498            // Search the entire chunk buffer
2499            if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
2500                let match_end = pos + pattern.len();
2501                // Only report if match ENDS in or after the valid zone
2502                // This ensures patterns spanning boundaries are found exactly once
2503                if match_end > chunk.valid_start {
2504                    let absolute_pos = chunk.absolute_pos + pos;
2505                    // Verify the match doesn't extend beyond our search range
2506                    if absolute_pos + pattern.len() <= end {
2507                        return Some(absolute_pos);
2508                    }
2509                }
2510            }
2511        }
2512
2513        None
2514    }
2515
2516    /// Simple byte pattern search using naive algorithm
2517    fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
2518        if needle.is_empty() || needle.len() > haystack.len() {
2519            return None;
2520        }
2521
2522        (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
2523    }
2524
2525    /// Find the next occurrence of a regex pattern, with wrap-around
2526    pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
2527        let buffer_len = self.len();
2528
2529        // Search from start_pos to end
2530        if start_pos < buffer_len {
2531            if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
2532                return Some(offset);
2533            }
2534        }
2535
2536        // Wrap around: search from beginning to start_pos
2537        if start_pos > 0 {
2538            if let Some(offset) = self.find_regex(0, start_pos, regex) {
2539                return Some(offset);
2540            }
2541        }
2542
2543        None
2544    }
2545
2546    /// Find the next occurrence of a regex pattern within an optional range
2547    pub fn find_next_regex_in_range(
2548        &self,
2549        regex: &Regex,
2550        start_pos: usize,
2551        range: Option<Range<usize>>,
2552    ) -> Option<usize> {
2553        if let Some(search_range) = range {
2554            let search_start = start_pos.max(search_range.start);
2555            let search_end = search_range.end.min(self.len());
2556
2557            if search_start < search_end {
2558                self.find_regex(search_start, search_end, regex)
2559            } else {
2560                None
2561            }
2562        } else {
2563            self.find_next_regex(regex, start_pos)
2564        }
2565    }
2566
2567    /// Find regex pattern in a byte range using overlapping chunks
2568    fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
2569        if start >= end {
2570            return None;
2571        }
2572
2573        const CHUNK_SIZE: usize = 1048576; // 1MB chunks
2574        const OVERLAP: usize = 4096; // 4KB overlap for regex
2575
2576        // Use the overlapping chunks iterator for efficient streaming search
2577        // This fixes the critical bug where regex patterns spanning chunk boundaries were missed
2578        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
2579
2580        for chunk in chunks {
2581            // Search the entire chunk buffer
2582            if let Some(mat) = regex.find(&chunk.buffer) {
2583                let match_end = mat.end();
2584                // Only report if match ENDS in or after the valid zone
2585                // This ensures patterns spanning boundaries are found exactly once
2586                if match_end > chunk.valid_start {
2587                    let absolute_pos = chunk.absolute_pos + mat.start();
2588                    // Verify the match doesn't extend beyond our search range
2589                    let match_len = mat.end() - mat.start();
2590                    if absolute_pos + match_len <= end {
2591                        return Some(absolute_pos);
2592                    }
2593                }
2594            }
2595        }
2596
2597        None
2598    }
2599
2600    /// Replace a range with replacement text
2601    pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
2602        if range.start >= self.len() {
2603            return false;
2604        }
2605
2606        let end = range.end.min(self.len());
2607        if end > range.start {
2608            self.delete_bytes(range.start, end - range.start);
2609        }
2610
2611        if !replacement.is_empty() {
2612            self.insert(range.start, replacement);
2613        }
2614
2615        true
2616    }
2617
2618    /// Find and replace the next occurrence of a pattern
2619    pub fn replace_next(
2620        &mut self,
2621        pattern: &str,
2622        replacement: &str,
2623        start_pos: usize,
2624        range: Option<Range<usize>>,
2625    ) -> Option<usize> {
2626        if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
2627            self.replace_range(pos..pos + pattern.len(), replacement);
2628            Some(pos)
2629        } else {
2630            None
2631        }
2632    }
2633
2634    /// Replace all occurrences of a pattern with replacement text
2635    pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
2636        if pattern.is_empty() {
2637            return 0;
2638        }
2639
2640        let mut count = 0;
2641        let mut pos = 0;
2642
2643        // Keep searching and replacing
2644        // Note: we search forward from last replacement to handle growth/shrinkage
2645        // Find next occurrence (no wrap-around for replace_all)
2646        while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
2647            self.replace_range(found_pos..found_pos + pattern.len(), replacement);
2648            count += 1;
2649
2650            // Move past the replacement
2651            pos = found_pos + replacement.len();
2652
2653            // If we're at or past the end, stop
2654            if pos >= self.len() {
2655                break;
2656            }
2657        }
2658
2659        count
2660    }
2661
2662    /// Replace all occurrences of a regex pattern with replacement text
2663    pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
2664        let mut count = 0;
2665        let mut pos = 0;
2666
2667        while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
2668            // Get the match to find its length
2669            let text = self
2670                .get_text_range_mut(found_pos, self.len() - found_pos)
2671                .context("Failed to read text for regex match")?;
2672
2673            if let Some(mat) = regex.find(&text) {
2674                self.replace_range(found_pos..found_pos + mat.len(), replacement);
2675                count += 1;
2676                pos = found_pos + replacement.len();
2677
2678                if pos >= self.len() {
2679                    break;
2680                }
2681            } else {
2682                break;
2683            }
2684        }
2685
2686        Ok(count)
2687    }
2688
2689    // LSP Support (UTF-16 conversions)
2690
2691    /// Convert byte position to (line, column) in bytes
2692    pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
2693        self.offset_to_position(byte_pos)
2694            .map(|pos| (pos.line, pos.column))
2695            .unwrap_or_else(|| (byte_pos / 80, 0)) // Estimate if metadata unavailable
2696    }
2697
2698    /// Convert (line, character) to byte position - 0-indexed
2699    /// character is in BYTES, not UTF-16 code units
2700    /// Optimized to use single line_range() call instead of two
2701    pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
2702        if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2703            // Calculate line length from the range
2704            let line_len = if let Some(end_offset) = end {
2705                end_offset.saturating_sub(start)
2706            } else {
2707                self.total_bytes().saturating_sub(start)
2708            };
2709            let byte_offset = character.min(line_len);
2710            start + byte_offset
2711        } else {
2712            // Line doesn't exist, return end of buffer
2713            self.len()
2714        }
2715    }
2716
2717    /// Convert byte position to LSP position (line, UTF-16 code units)
2718    /// LSP protocol uses UTF-16 code units for character offsets
2719    pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
2720        let (line, column_bytes) = self
2721            .offset_to_position(byte_pos)
2722            .map(|pos| (pos.line, pos.column))
2723            .unwrap_or_else(|| (byte_pos / 80, 0)); // Estimate if metadata unavailable
2724
2725        // Get the line content
2726        if let Some(line_bytes) = self.get_line(line) {
2727            // Convert byte offset to UTF-16 code units
2728            let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
2729            let text_str = String::from_utf8_lossy(text_before);
2730            let utf16_offset = text_str.encode_utf16().count();
2731            (line, utf16_offset)
2732        } else {
2733            (line, 0)
2734        }
2735    }
2736
2737    /// Convert LSP position (line, UTF-16 code units) to byte position
2738    /// LSP uses UTF-16 code units for character offsets, not bytes
2739    /// Optimized to use single line_range() call instead of two
2740    pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
2741        if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2742            // Calculate line length and get line content
2743            let line_len = if let Some(end_offset) = end {
2744                end_offset.saturating_sub(line_start)
2745            } else {
2746                self.total_bytes().saturating_sub(line_start)
2747            };
2748
2749            if line_len > 0 {
2750                // If data is unloaded, return line_start as fallback
2751                let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
2752                    return line_start;
2753                };
2754                let line_str = String::from_utf8_lossy(&line_bytes);
2755
2756                // Convert UTF-16 offset to byte offset
2757                let mut utf16_count = 0;
2758                let mut byte_offset = 0;
2759
2760                for ch in line_str.chars() {
2761                    if utf16_count >= utf16_offset {
2762                        break;
2763                    }
2764                    utf16_count += ch.len_utf16();
2765                    byte_offset += ch.len_utf8();
2766                }
2767
2768                line_start + byte_offset
2769            } else {
2770                line_start
2771            }
2772        } else {
2773            // Line doesn't exist, return end of buffer
2774            self.len()
2775        }
2776    }
2777
2778    // Navigation helpers
2779
2780    /// Find the previous character boundary (UTF-8 aware)
2781    pub fn prev_char_boundary(&self, pos: usize) -> usize {
2782        if pos == 0 {
2783            return 0;
2784        }
2785
2786        // Get a few bytes before pos to find the character boundary
2787        let start = pos.saturating_sub(4);
2788        let Some(bytes) = self.get_text_range(start, pos - start) else {
2789            // Data unloaded, return pos as fallback
2790            return pos;
2791        };
2792
2793        // Walk backwards to find a UTF-8 leading byte
2794        for i in (0..bytes.len()).rev() {
2795            let byte = bytes[i];
2796            // Check if this is a UTF-8 leading byte (not a continuation byte)
2797            if (byte & 0b1100_0000) != 0b1000_0000 {
2798                return start + i;
2799            }
2800        }
2801
2802        // Fallback
2803        pos.saturating_sub(1)
2804    }
2805
2806    /// Find the next character boundary (UTF-8 aware)
2807    pub fn next_char_boundary(&self, pos: usize) -> usize {
2808        let len = self.len();
2809        if pos >= len {
2810            return len;
2811        }
2812
2813        // Get a few bytes after pos to find the character boundary
2814        let end = (pos + 5).min(len);
2815        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2816            // Data unloaded, return pos as fallback
2817            return pos;
2818        };
2819
2820        // Start from index 1 (we want the NEXT boundary)
2821        for (i, &byte) in bytes.iter().enumerate().skip(1) {
2822            // Check if this is a UTF-8 leading byte (not a continuation byte)
2823            if (byte & 0b1100_0000) != 0b1000_0000 {
2824                return pos + i;
2825            }
2826        }
2827
2828        // If we got here, we're at the end or found no boundary in the range
2829        end
2830    }
2831
2832    /// Check if a byte is a UTF-8 continuation byte (not at a char boundary)
2833    /// UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF)
2834    /// This is the same check that str::is_char_boundary uses internally.
2835    #[inline]
2836    fn is_utf8_continuation_byte(byte: u8) -> bool {
2837        (byte & 0b1100_0000) == 0b1000_0000
2838    }
2839
2840    /// Snap position to a valid UTF-8 character boundary
2841    /// If already at a boundary, returns the same position.
2842    /// Otherwise, moves to the previous valid boundary.
2843    pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
2844        let len = self.len();
2845        if pos == 0 || pos >= len {
2846            return pos.min(len);
2847        }
2848
2849        // Get the byte at pos to check if we're at a character boundary
2850        let Some(bytes) = self.get_text_range(pos, 1) else {
2851            // Data unloaded, return pos as fallback
2852            return pos;
2853        };
2854
2855        // A position is at a char boundary if the byte there is NOT a continuation byte
2856        if !Self::is_utf8_continuation_byte(bytes[0]) {
2857            // Already at a character boundary
2858            return pos;
2859        }
2860
2861        // Not at a boundary, find the previous one
2862        self.prev_char_boundary(pos)
2863    }
2864
2865    /// Find the previous grapheme cluster boundary (for proper cursor movement with combining characters)
2866    ///
2867    /// This handles complex scripts like Thai where multiple Unicode code points
2868    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2869    /// is 3 code points but 1 grapheme cluster.
2870    ///
2871    /// The lookahead window starts at 32 bytes but grows whenever the
2872    /// returned boundary sits at the start of the chunk — that is, whenever
2873    /// the chunk might not contain the full grapheme. This matters for ZWJ
2874    /// emoji sequences and Zalgo strings with many combining marks, which
2875    /// can easily exceed 32 bytes.
2876    pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
2877        if pos == 0 {
2878            return 0;
2879        }
2880
2881        let mut lookback: usize = 32;
2882        loop {
2883            // IMPORTANT: Align start to a valid character boundary to avoid invalid UTF-8
2884            // when get_text_range starts mid-character
2885            let raw_start = pos.saturating_sub(lookback);
2886            let start = if raw_start == 0 {
2887                0
2888            } else {
2889                // Find the character boundary at or before raw_start
2890                self.prev_char_boundary(raw_start + 1)
2891            };
2892
2893            let Some(bytes) = self.get_text_range(start, pos - start) else {
2894                // Data unloaded, fall back to char boundary
2895                return self.prev_char_boundary(pos);
2896            };
2897
2898            let text = match std::str::from_utf8(&bytes) {
2899                Ok(s) => s,
2900                Err(e) => {
2901                    // Still got invalid UTF-8 (shouldn't happen after alignment)
2902                    // Try using just the valid portion
2903                    let valid_bytes = &bytes[..e.valid_up_to()];
2904                    match std::str::from_utf8(valid_bytes) {
2905                        Ok(s) if !s.is_empty() => s,
2906                        _ => return self.prev_char_boundary(pos),
2907                    }
2908                }
2909            };
2910
2911            // Use shared grapheme utility with relative position
2912            let rel_pos = pos - start;
2913            let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
2914
2915            // If the returned boundary is at the start of our chunk, the
2916            // grapheme may extend further back. Only trust the answer when
2917            // either we already reached the beginning of the buffer or the
2918            // boundary sits strictly inside the chunk.
2919            if new_rel_pos > 0 || start == 0 {
2920                return start + new_rel_pos;
2921            }
2922
2923            // Expand the lookback window and retry. Cap at the full buffer.
2924            if lookback >= pos {
2925                return 0;
2926            }
2927            lookback = lookback.saturating_mul(2);
2928        }
2929    }
2930
2931    /// Find the next grapheme cluster boundary (for proper cursor movement with combining characters)
2932    ///
2933    /// This handles complex scripts like Thai where multiple Unicode code points
2934    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2935    /// is 3 code points but 1 grapheme cluster.
2936    ///
2937    /// The lookahead window grows whenever the first grapheme reaches the
2938    /// end of the chunk — otherwise ZWJ emoji and Zalgo strings whose byte
2939    /// length exceeds the initial 32-byte window would be split mid-cluster.
2940    pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
2941        let len = self.len();
2942        if pos >= len {
2943            return len;
2944        }
2945
2946        let mut lookahead: usize = 32;
2947        loop {
2948            let end = (pos + lookahead).min(len);
2949            let Some(bytes) = self.get_text_range(pos, end - pos) else {
2950                // Data unloaded, fall back to char boundary
2951                return self.next_char_boundary(pos);
2952            };
2953
2954            // Convert to UTF-8 string, handling the case where we might have
2955            // grabbed bytes that end mid-character (truncate to valid UTF-8)
2956            let text = match std::str::from_utf8(&bytes) {
2957                Ok(s) => s,
2958                Err(e) => {
2959                    // The bytes end in an incomplete UTF-8 sequence
2960                    // Use only the valid portion (which includes at least the first grapheme)
2961                    let valid_bytes = &bytes[..e.valid_up_to()];
2962                    match std::str::from_utf8(valid_bytes) {
2963                        Ok(s) if !s.is_empty() => s,
2964                        _ => return self.next_char_boundary(pos),
2965                    }
2966                }
2967            };
2968
2969            let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
2970
2971            // If the first grapheme reaches the end of our chunk and there
2972            // is more buffer left beyond it, the grapheme may extend further.
2973            // Expand the window and retry.
2974            if new_rel_pos == text.len() && end < len {
2975                if lookahead >= len - pos {
2976                    return len;
2977                }
2978                lookahead = lookahead.saturating_mul(2);
2979                continue;
2980            }
2981
2982            return pos + new_rel_pos;
2983        }
2984    }
2985
2986    /// Find the previous word boundary
2987    pub fn prev_word_boundary(&self, pos: usize) -> usize {
2988        if pos == 0 {
2989            return 0;
2990        }
2991
2992        // Get some text before pos
2993        let start = pos.saturating_sub(256).max(0);
2994        let Some(bytes) = self.get_text_range(start, pos - start) else {
2995            // Data unloaded, return pos as fallback
2996            return pos;
2997        };
2998        let text = String::from_utf8_lossy(&bytes);
2999
3000        let mut found_word_char = false;
3001        let chars: Vec<char> = text.chars().collect();
3002
3003        for i in (0..chars.len()).rev() {
3004            let ch = chars[i];
3005            let is_word_char = ch.is_alphanumeric() || ch == '_';
3006
3007            if found_word_char && !is_word_char {
3008                // We've transitioned from word to non-word
3009                // Calculate the byte position
3010                let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
3011                return start + byte_offset;
3012            }
3013
3014            if is_word_char {
3015                found_word_char = true;
3016            }
3017        }
3018
3019        0
3020    }
3021
3022    /// Find the next word boundary
3023    pub fn next_word_boundary(&self, pos: usize) -> usize {
3024        let len = self.len();
3025        if pos >= len {
3026            return len;
3027        }
3028
3029        // Get some text after pos
3030        let end = (pos + 256).min(len);
3031        let Some(bytes) = self.get_text_range(pos, end - pos) else {
3032            // Data unloaded, return pos as fallback
3033            return pos;
3034        };
3035        let text = String::from_utf8_lossy(&bytes);
3036
3037        let mut found_word_char = false;
3038        let mut byte_offset = 0;
3039
3040        for ch in text.chars() {
3041            let is_word_char = ch.is_alphanumeric() || ch == '_';
3042
3043            if found_word_char && !is_word_char {
3044                // We've transitioned from word to non-word
3045                return pos + byte_offset;
3046            }
3047
3048            if is_word_char {
3049                found_word_char = true;
3050            }
3051
3052            byte_offset += ch.len_utf8();
3053        }
3054
3055        len
3056    }
3057
3058    /// Create a line iterator starting at the given byte position
3059    ///
3060    /// This iterator lazily loads chunks as needed, never scanning the entire file.
3061    /// For large files with unloaded buffers, chunks are loaded on-demand (1MB at a time).
3062    pub fn line_iterator(
3063        &mut self,
3064        byte_pos: usize,
3065        estimated_line_length: usize,
3066    ) -> LineIterator<'_> {
3067        LineIterator::new(self, byte_pos, estimated_line_length)
3068    }
3069
3070    /// Iterate over lines starting from a given byte offset, with line numbers
3071    ///
3072    /// This is a more efficient alternative to using line_iterator() + offset_to_position()
3073    /// because it calculates line numbers incrementally during iteration by accumulating
3074    /// line_feed_cnt from pieces (which is already tracked in the piece tree).
3075    ///
3076    /// Returns: Iterator yielding (byte_offset, content, line_number: Option<usize>)
3077    /// - line_number is Some(n) for small files with line metadata
3078    /// - line_number is None for large files without line metadata
3079    ///
3080    /// # Performance
3081    /// - O(1) per line for line number calculation (vs O(log n) per line with offset_to_position)
3082    /// - Uses single source of truth: piece tree's existing line_feed_cnt metadata
3083    pub fn iter_lines_from(
3084        &mut self,
3085        byte_pos: usize,
3086        max_lines: usize,
3087    ) -> Result<TextBufferLineIterator> {
3088        TextBufferLineIterator::new(self, byte_pos, max_lines)
3089    }
3090
3091    // Legacy API methods for backwards compatibility
3092
3093    /// Get the line number for a given byte offset
3094    ///
3095    /// Returns exact line number if metadata available, otherwise estimates based on bytes.
3096    ///
3097    /// # Behavior by File Size:
3098    /// - **Small files (< 1MB)**: Returns exact line number from piece tree's `line_starts` metadata
3099    /// - **Large files (≥ 1MB)**: Returns estimated line number using `byte_offset / estimated_line_length`
3100    ///
3101    /// Large files don't maintain line metadata for performance reasons. The estimation
3102    /// uses the configured `estimated_line_length` (default 80 bytes).
3103    pub fn get_line_number(&self, byte_offset: usize) -> usize {
3104        self.offset_to_position(byte_offset)
3105            .map(|pos| pos.line)
3106            .unwrap_or_else(|| {
3107                // Estimate line number based on configured average line length
3108                byte_offset / self.config.estimated_line_length
3109            })
3110    }
3111
3112    /// Get the configured estimated line length for approximate line number calculations.
3113    pub fn estimated_line_length(&self) -> usize {
3114        self.config.estimated_line_length
3115    }
3116
3117    /// Get the starting line number at a byte offset (used for viewport rendering)
3118    ///
3119    /// # Line Cache Architecture (Post-Refactoring):
3120    ///
3121    /// The concept of a separate "line cache" is **now obsolete**. After the refactoring,
3122    /// line tracking is integrated directly into the piece tree via:
3123    /// ```rust
3124    /// BufferData::Loaded {
3125    ///     data: Vec<u8>,
3126    ///     line_starts: Option<Vec<usize>>  // None = large file mode (no line metadata)
3127    /// }
3128    /// ```
3129    ///
3130    /// ## Why This Method Still Exists:
3131    /// The rendering code needs to know what line number to display in the margin at the
3132    /// top of the viewport. This method returns that line number, handling both small
3133    /// and large file modes transparently.
3134    ///
3135    /// ## Small vs Large File Modes:
3136    /// - **Small files**: `line_starts = Some(vec)` → returns exact line number from metadata
3137    /// - **Large files**: `line_starts = None` → returns estimated line number (byte_offset / estimated_line_length)
3138    ///
3139    /// ## Legacy Line Cache Methods:
3140    /// These methods are now no-ops and can be removed in a future cleanup:
3141    /// - `invalidate_line_cache_from()` - No-op (piece tree updates automatically)
3142    /// - `handle_line_cache_insertion()` - No-op (piece tree updates automatically)
3143    /// - `handle_line_cache_deletion()` - No-op (piece tree updates automatically)
3144    /// - `clear_line_cache()` - No-op (can't clear piece tree metadata)
3145    ///
3146    /// ## Bug Fix (2025-11):
3147    /// Previously this method always returned `0`, causing line numbers in the margin
3148    /// to always show 1, 2, 3... regardless of scroll position. Now it correctly returns
3149    /// the actual line number at `start_byte`.
3150    pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
3151        // No-op for cache population: LineIndex maintains all line starts automatically
3152        // But we need to return the actual line number at start_byte for rendering
3153        self.get_line_number(start_byte)
3154    }
3155
3156    /// Get cached byte offset for line (compatibility method)
3157    pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
3158        self.line_start_offset(line_number)
3159    }
3160
3161    /// Invalidate line cache from offset (no-op in new implementation)
3162    pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
3163        // No-op: LineIndex updates automatically
3164    }
3165
3166    /// Handle line cache insertion (no-op in new implementation)
3167    pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
3168        // No-op: LineIndex updates automatically during insert
3169    }
3170
3171    /// Handle line cache deletion (no-op in new implementation)
3172    pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
3173        // No-op: LineIndex updates automatically during delete
3174    }
3175
3176    /// Clear line cache (no-op in new implementation)
3177    pub fn clear_line_cache(&mut self) {
3178        // No-op: LineIndex can't be cleared
3179    }
3180
3181    // Test helper methods
3182
3183    /// Create a buffer from a string for testing
3184    #[cfg(test)]
3185    pub fn from_str_test(s: &str) -> Self {
3186        Self::from_bytes(
3187            s.as_bytes().to_vec(),
3188            std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
3189        )
3190    }
3191
3192    /// Create a new empty buffer for testing
3193    #[cfg(test)]
3194    pub fn new_test() -> Self {
3195        Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
3196    }
3197}
3198
3199/// Type alias for backwards compatibility
3200pub type Buffer = TextBuffer;
3201
3202// Re-export LineIterator from the line_iterator module
3203pub use crate::primitives::line_iterator::LineIterator;
3204
3205// ============================================================================
3206// Overlapping Chunks Iterator for Efficient Search
3207// ============================================================================
3208
3209/// Information about a chunk of data for pattern matching
3210#[derive(Debug)]
3211pub struct ChunkInfo {
3212    /// The buffer containing this chunk's data (includes overlap from previous chunk)
3213    pub buffer: Vec<u8>,
3214
3215    /// Absolute position in the document where this buffer starts
3216    pub absolute_pos: usize,
3217
3218    /// Offset within buffer where "new" data starts (valid match zone)
3219    /// Matches starting before this offset were already checked in the previous chunk
3220    pub valid_start: usize,
3221}
3222
3223/// Iterator that yields overlapping chunks for pattern matching
3224///
3225/// This iterator implements the VSCode/Sublime approach: pull overlapping chunks
3226/// from the underlying piece tree and use standard search algorithms on them.
3227///
3228/// # Algorithm
3229///
3230/// ```text
3231/// Chunk 1: [------------ valid -----------]
3232/// Chunk 2:      [overlap][---- valid ----]
3233/// Chunk 3:                   [overlap][-- valid --]
3234///
3235/// Only matches starting in the "valid" zone are reported to avoid duplicates.
3236/// ```
3237///
3238/// # Example
3239///
3240/// ```ignore
3241/// let chunks = OverlappingChunks::new(&text_buffer, start, end, 4096, pattern.len()-1);
3242/// for chunk in chunks {
3243///     // Search only starting from chunk.valid_start
3244///     if let Some(pos) = search(&chunk.buffer[chunk.valid_start..]) {
3245///         let absolute_pos = chunk.absolute_pos + chunk.valid_start + pos;
3246///         return Some(absolute_pos);
3247///     }
3248/// }
3249/// ```
3250pub struct OverlappingChunks<'a> {
3251    piece_iter: PieceRangeIter,
3252    buffers: &'a [StringBuffer],
3253
3254    // Reusable chunk buffer that we fill from pieces
3255    buffer: Vec<u8>,
3256    buffer_absolute_pos: usize,
3257
3258    // Current state
3259    current_pos: usize,
3260    end_pos: usize,
3261
3262    // Configuration
3263    chunk_size: usize,
3264    overlap: usize,
3265
3266    // Track first chunk special case
3267    first_chunk: bool,
3268
3269    // Cached piece data for incremental reading
3270    current_piece_data: Option<Vec<u8>>,
3271    current_piece_offset: usize,
3272}
3273
3274impl<'a> OverlappingChunks<'a> {
3275    /// Create a new overlapping chunks iterator
3276    ///
3277    /// # Arguments
3278    ///
3279    /// * `text_buffer` - The text buffer to iterate over
3280    /// * `start` - Start position in the document
3281    /// * `end` - End position in the document (exclusive)
3282    /// * `chunk_size` - Target size for each chunk (excluding overlap)
3283    /// * `overlap` - Number of bytes to overlap between chunks
3284    ///
3285    /// # Recommendations
3286    ///
3287    /// * For literal string search: `chunk_size=65536, overlap=pattern.len()-1`
3288    /// * For regex search: `chunk_size=1048576, overlap=4096`
3289    pub fn new(
3290        text_buffer: &'a TextBuffer,
3291        start: usize,
3292        end: usize,
3293        chunk_size: usize,
3294        overlap: usize,
3295    ) -> Self {
3296        let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
3297
3298        Self {
3299            piece_iter,
3300            buffers: &text_buffer.buffers,
3301            buffer: Vec::with_capacity(chunk_size + overlap),
3302            buffer_absolute_pos: start,
3303            current_pos: start,
3304            end_pos: end,
3305            chunk_size,
3306            overlap,
3307            first_chunk: true,
3308            current_piece_data: None,
3309            current_piece_offset: 0,
3310        }
3311    }
3312
3313    /// Read one byte from the piece iterator
3314    fn read_byte(&mut self) -> Option<u8> {
3315        loop {
3316            // If we have cached piece data, read from it
3317            if let Some(ref data) = self.current_piece_data {
3318                if self.current_piece_offset < data.len() {
3319                    let byte = data[self.current_piece_offset];
3320                    self.current_piece_offset += 1;
3321                    self.current_pos += 1;
3322                    return Some(byte);
3323                } else {
3324                    // Exhausted current piece, move to next
3325                    self.current_piece_data = None;
3326                    self.current_piece_offset = 0;
3327                }
3328            }
3329
3330            // Get next piece
3331            if let Some(piece_view) = self.piece_iter.next() {
3332                let buffer_id = piece_view.location.buffer_id();
3333                if let Some(buffer) = self.buffers.get(buffer_id) {
3334                    // Extract the relevant slice from this piece
3335                    let piece_start_in_doc = piece_view.doc_offset;
3336                    let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
3337
3338                    // Clip to our search range
3339                    let read_start = self.current_pos.max(piece_start_in_doc);
3340                    let read_end = self.end_pos.min(piece_end_in_doc);
3341
3342                    if read_end > read_start {
3343                        let offset_in_piece = read_start - piece_start_in_doc;
3344                        let bytes_to_read = read_end - read_start;
3345
3346                        let buffer_start = piece_view.buffer_offset + offset_in_piece;
3347                        let buffer_end = buffer_start + bytes_to_read;
3348
3349                        if let Some(data) = buffer.get_data() {
3350                            if buffer_end <= data.len() {
3351                                // Cache this piece's data
3352                                self.current_piece_data =
3353                                    Some(data[buffer_start..buffer_end].to_vec());
3354                                self.current_piece_offset = 0;
3355                                continue;
3356                            }
3357                        }
3358                    }
3359                }
3360            }
3361
3362            // No more data
3363            return None;
3364        }
3365    }
3366
3367    /// Fill the buffer with the next chunk of data
3368    fn fill_next_chunk(&mut self) -> bool {
3369        if self.first_chunk {
3370            // First chunk: fill up to chunk_size
3371            self.first_chunk = false;
3372            while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
3373                if let Some(byte) = self.read_byte() {
3374                    self.buffer.push(byte);
3375                } else {
3376                    break;
3377                }
3378            }
3379            !self.buffer.is_empty()
3380        } else {
3381            // Subsequent chunks: keep overlap, fill chunk_size NEW bytes
3382            if self.current_pos >= self.end_pos {
3383                return false;
3384            }
3385
3386            // Keep overlap bytes at the end
3387            if self.buffer.len() > self.overlap {
3388                let drain_amount = self.buffer.len() - self.overlap;
3389                self.buffer.drain(0..drain_amount);
3390                self.buffer_absolute_pos += drain_amount;
3391            }
3392
3393            // Fill chunk_size NEW bytes (in addition to overlap)
3394            let before_len = self.buffer.len();
3395            let target_len = self.overlap + self.chunk_size;
3396            while self.buffer.len() < target_len && self.current_pos < self.end_pos {
3397                if let Some(byte) = self.read_byte() {
3398                    self.buffer.push(byte);
3399                } else {
3400                    break;
3401                }
3402            }
3403
3404            // Return true if we added new data
3405            self.buffer.len() > before_len
3406        }
3407    }
3408}
3409
3410impl<'a> Iterator for OverlappingChunks<'a> {
3411    type Item = ChunkInfo;
3412
3413    fn next(&mut self) -> Option<Self::Item> {
3414        // Track if this is the first chunk before filling
3415        let is_first = self.buffer_absolute_pos == self.current_pos;
3416
3417        if !self.fill_next_chunk() {
3418            return None;
3419        }
3420
3421        // First chunk: all data is valid (no overlap from previous)
3422        // Subsequent chunks: overlap bytes are not valid (already checked)
3423        let valid_start = if is_first {
3424            0
3425        } else {
3426            self.overlap.min(self.buffer.len())
3427        };
3428
3429        Some(ChunkInfo {
3430            buffer: self.buffer.clone(),
3431            absolute_pos: self.buffer_absolute_pos,
3432            valid_start,
3433        })
3434    }
3435}
3436
3437#[cfg(test)]
3438mod tests;
3439
3440#[cfg(test)]
3441mod property_tests;
3442
3443/// Line data with optional line number
3444#[derive(Debug, Clone)]
3445pub struct LineData {
3446    /// Byte offset where this line starts in the document
3447    pub byte_offset: usize,
3448    /// Line content (without trailing newline)
3449    pub content: String,
3450    /// Whether this line ends with a newline
3451    pub has_newline: bool,
3452    /// Line number (None for large files without line metadata)
3453    pub line_number: Option<usize>,
3454}
3455
3456/// Iterator over lines in a TextBuffer that efficiently tracks line numbers
3457/// using piece tree metadata (single source of truth)
3458pub struct TextBufferLineIterator {
3459    /// Collected lines (we collect all at once since we need mutable access to load chunks)
3460    lines: Vec<LineData>,
3461    /// Current index in the lines vector
3462    current_index: usize,
3463    /// Whether there are more lines after these
3464    pub has_more: bool,
3465}
3466
3467impl TextBufferLineIterator {
3468    pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
3469        let buffer_len = buffer.len();
3470        if byte_pos >= buffer_len {
3471            return Ok(Self {
3472                lines: Vec::new(),
3473                current_index: 0,
3474                has_more: false,
3475            });
3476        }
3477
3478        // Check if buffer has line metadata (None for large files > 1MB)
3479        let has_line_metadata = buffer.line_count().is_some();
3480
3481        // Determine starting line number by querying piece tree once
3482        // (only if we have line metadata)
3483        let mut current_line = if has_line_metadata {
3484            buffer.offset_to_position(byte_pos).map(|pos| pos.line)
3485        } else {
3486            None
3487        };
3488
3489        let mut lines = Vec::with_capacity(max_lines);
3490        let mut current_offset = byte_pos;
3491        let estimated_line_length = 80; // Use default estimate
3492
3493        // Collect lines by scanning forward
3494        for _ in 0..max_lines {
3495            if current_offset >= buffer_len {
3496                break;
3497            }
3498
3499            let line_start = current_offset;
3500            let line_number = current_line;
3501
3502            // Estimate how many bytes to load for this line
3503            let estimated_max_line_length = estimated_line_length * 3;
3504            let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
3505
3506            // Load chunk (this handles lazy loading)
3507            let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
3508
3509            // Scan for newline
3510            let mut line_len = 0;
3511            let mut found_newline = false;
3512            for &byte in chunk.iter() {
3513                line_len += 1;
3514                if byte == b'\n' {
3515                    found_newline = true;
3516                    break;
3517                }
3518            }
3519
3520            // Handle long lines (rare case)
3521            if !found_newline && current_offset + line_len < buffer_len {
3522                // Line is longer than expected, load more data
3523                let remaining = buffer_len - current_offset - line_len;
3524                let additional_bytes = estimated_max_line_length.min(remaining);
3525                let more_chunk =
3526                    buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
3527
3528                let mut extended_chunk = chunk;
3529                extended_chunk.extend_from_slice(&more_chunk);
3530
3531                for &byte in more_chunk.iter() {
3532                    line_len += 1;
3533                    if byte == b'\n' {
3534                        found_newline = true;
3535                        break;
3536                    }
3537                }
3538
3539                let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
3540                let has_newline = line_string.ends_with('\n');
3541                let content = if has_newline {
3542                    line_string[..line_string.len() - 1].to_string()
3543                } else {
3544                    line_string
3545                };
3546
3547                lines.push(LineData {
3548                    byte_offset: line_start,
3549                    content,
3550                    has_newline,
3551                    line_number,
3552                });
3553
3554                current_offset += line_len;
3555                if has_line_metadata && found_newline {
3556                    current_line = current_line.map(|n| n + 1);
3557                }
3558                continue;
3559            }
3560
3561            // Normal case
3562            let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
3563            let has_newline = line_string.ends_with('\n');
3564            let content = if has_newline {
3565                line_string[..line_string.len() - 1].to_string()
3566            } else {
3567                line_string
3568            };
3569
3570            lines.push(LineData {
3571                byte_offset: line_start,
3572                content,
3573                has_newline,
3574                line_number,
3575            });
3576
3577            current_offset += line_len;
3578            // Increment line number if we have metadata and found a newline
3579            if has_line_metadata && found_newline {
3580                current_line = current_line.map(|n| n + 1);
3581            }
3582        }
3583
3584        // Check if there are more lines
3585        let has_more = current_offset < buffer_len;
3586
3587        Ok(Self {
3588            lines,
3589            current_index: 0,
3590            has_more,
3591        })
3592    }
3593}
3594
3595impl Iterator for TextBufferLineIterator {
3596    type Item = LineData;
3597
3598    fn next(&mut self) -> Option<Self::Item> {
3599        if self.current_index < self.lines.len() {
3600            let line = self.lines[self.current_index].clone();
3601            self.current_index += 1;
3602            Some(line)
3603        } else {
3604            None
3605        }
3606    }
3607}