fresh/model/buffer/
mod.rs

1/// Text buffer that uses PieceTree with integrated line tracking
2/// Architecture where the tree is the single source of truth for text and line information
3use crate::model::encoding;
4use crate::model::filesystem::{FileSearchOptions, FileSystem};
5use crate::model::piece_tree::{
6    BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, PieceView, Position,
7    StringBuffer, TreeStats,
8};
9use crate::model::piece_tree_diff::PieceTreeDiff;
10use crate::primitives::grapheme;
11use anyhow::{Context, Result};
12use regex::bytes::Regex;
13use std::io;
14
15use std::ops::Range;
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18
19// Re-export Encoding for backward compatibility
20pub use encoding::Encoding;
21
22pub mod file_kind;
23pub mod format;
24pub mod persistence;
25pub mod save;
26pub mod search;
27pub use file_kind::BufferFileKind;
28pub use format::{BufferFormat, LineEnding};
29pub use persistence::Persistence;
30pub use save::SudoSaveRequired;
31#[cfg(test)]
32pub(crate) use save::{RecipeAction, WriteRecipe};
33#[cfg(test)]
34use search::search_boundary_overlap;
35use search::SearchRegion;
36pub use search::{ChunkedSearchState, HybridSearchPlan};
37
38/// Error returned when a large file has a non-resynchronizable encoding
39/// and requires user confirmation before loading the entire file into memory.
40///
41/// Non-resynchronizable encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot
42/// determine character boundaries when jumping into the middle of a file.
43/// This means the entire file must be loaded and decoded sequentially.
44#[derive(Debug, Clone, PartialEq)]
45pub struct LargeFileEncodingConfirmation {
46    /// Path to the file
47    pub path: PathBuf,
48    /// Size of the file in bytes
49    pub file_size: usize,
50    /// The detected encoding that requires full loading
51    pub encoding: Encoding,
52}
53
54impl std::fmt::Display for LargeFileEncodingConfirmation {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        let size_mb = self.file_size as f64 / (1024.0 * 1024.0);
57        write!(
58            f,
59            "{} ({:.0} MB) requires full load. (l)oad, (e)ncoding, (C)ancel? ",
60            self.encoding.display_name(),
61            size_mb
62        )
63    }
64}
65
66impl std::error::Error for LargeFileEncodingConfirmation {}
67
68/// A work item for incremental line-feed scanning (one per leaf).
69#[derive(Debug, Clone)]
70pub struct LineScanChunk {
71    /// Index of the leaf in the piece tree's leaf array.
72    pub leaf_index: usize,
73    /// Number of bytes in this leaf.
74    pub byte_len: usize,
75    /// True if the leaf already had a known line_feed_cnt (no I/O needed).
76    pub already_known: bool,
77}
78
79// Re-export SearchMatch from filesystem — same type is used by both
80// FileSystem::search_file (project grep on disk) and the piece-tree
81// search below (in-editor Ctrl+F and dirty buffers).
82pub use crate::model::filesystem::SearchMatch;
83
84// Large file support configuration
85/// Default threshold for considering a file "large" (100 MB)
86pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
87
88/// Chunk size to load when lazy loading (1 MB)
89pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
90
91/// Chunk alignment for lazy loading (64 KB)
92pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
93
94/// Configuration passed to TextBuffer constructors.
95#[derive(Debug, Clone)]
96pub struct BufferConfig {
97    /// Estimated average line length in bytes. Used for approximate line number
98    /// display in large files and for goto-line byte offset estimation.
99    pub estimated_line_length: usize,
100}
101
102impl Default for BufferConfig {
103    fn default() -> Self {
104        Self {
105            estimated_line_length: 80,
106        }
107    }
108}
109
110/// Line ending format used in the file
111
112/// Represents a line number (simplified for new implementation)
113/// Legacy enum kept for backwards compatibility - always Absolute now
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum LineNumber {
116    /// Absolute line number - this is the actual line number in the file
117    Absolute(usize),
118    /// Relative line number (deprecated - now same as Absolute)
119    Relative {
120        line: usize,
121        from_cached_line: usize,
122    },
123}
124
125impl LineNumber {
126    /// Get the line number value
127    pub fn value(&self) -> usize {
128        match self {
129            Self::Absolute(line) | Self::Relative { line, .. } => *line,
130        }
131    }
132
133    /// Check if this is an absolute line number
134    pub fn is_absolute(&self) -> bool {
135        matches!(self, LineNumber::Absolute(_))
136    }
137
138    /// Check if this is a relative line number
139    pub fn is_relative(&self) -> bool {
140        matches!(self, LineNumber::Relative { .. })
141    }
142
143    /// Format the line number for display
144    pub fn format(&self) -> String {
145        match self {
146            Self::Absolute(line) => format!("{}", line + 1),
147            Self::Relative { line, .. } => format!("~{}", line + 1),
148        }
149    }
150}
151
152/// A text buffer that manages document content using a piece table
153/// with integrated line tracking
154pub struct TextBuffer {
155    /// The piece tree for efficient text manipulation with integrated line tracking
156    piece_tree: PieceTree,
157
158    /// List of string buffers containing chunks of text data.
159    /// Index 0 is typically the original/stored buffer.
160    /// Additional buffers are added for modifications.
161    buffers: Vec<StringBuffer>,
162
163    /// Next buffer ID to assign.
164    next_buffer_id: usize,
165
166    /// Filesystem handle, optional file path, dirty/recovery flags,
167    /// saved-root snapshot, and saved-file size — see
168    /// `persistence.rs`.
169    persistence: Persistence,
170
171    /// File-kind flags (large_file, line_feeds_scanned, is_binary) —
172    /// see `file_kind.rs`.
173    file_kind: BufferFileKind,
174
175    /// Encoding + line-ending state — see `format.rs`.
176    format: BufferFormat,
177
178    /// Monotonic version counter for change tracking.
179    version: u64,
180
181    /// Buffer configuration (estimated line length, etc.)
182    config: BufferConfig,
183}
184
185/// Snapshot of a TextBuffer's piece tree and associated string buffers.
186///
187/// Used by BulkEdit undo/redo to capture the complete buffer state.
188/// Without this, consolidate_after_save() would destroy the string buffers
189/// that a BulkEdit's piece tree snapshot references, causing corruption on undo.
190#[derive(Debug, Clone)]
191pub struct BufferSnapshot {
192    pub piece_tree: PieceTree,
193    pub buffers: Vec<StringBuffer>,
194    pub next_buffer_id: usize,
195}
196
197impl TextBuffer {
198    /// Create a new text buffer with the given filesystem implementation.
199    /// Note: large_file_threshold is ignored in the new implementation
200    pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
201        let piece_tree = PieceTree::empty();
202        let saved_root = piece_tree.root();
203        let line_ending = LineEnding::default();
204        let encoding = Encoding::default();
205        TextBuffer {
206            piece_tree,
207            buffers: vec![StringBuffer::new(0, Vec::new())],
208            next_buffer_id: 1,
209            persistence: Persistence::new(fs, None, saved_root, None),
210            file_kind: BufferFileKind::new(false, false),
211            format: BufferFormat::new(line_ending, encoding),
212            version: 0,
213            config: BufferConfig::default(),
214        }
215    }
216
217    /// Create an empty buffer associated with a file path.
218    /// Used for files that don't exist yet — the path is set so saving will create the file.
219    pub fn new_with_path(
220        large_file_threshold: usize,
221        fs: Arc<dyn FileSystem + Send + Sync>,
222        path: PathBuf,
223    ) -> Self {
224        let mut buffer = Self::new(large_file_threshold, fs);
225        buffer.persistence.set_file_path(path);
226        buffer
227    }
228
229    /// Current buffer version (monotonic, wraps on overflow)
230    pub fn version(&self) -> u64 {
231        self.version
232    }
233
234    /// Get a reference to the filesystem implementation used by this buffer.
235    pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
236        self.persistence.fs()
237    }
238
239    /// Set the filesystem implementation for this buffer.
240    pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
241        self.persistence.set_fs(fs);
242    }
243
244    #[inline]
245    fn bump_version(&mut self) {
246        self.version = self.version.wrapping_add(1);
247    }
248
249    #[inline]
250    fn mark_content_modified(&mut self) {
251        self.persistence.mark_dirty();
252        self.bump_version();
253    }
254
255    /// Create a text buffer from raw bytes WITHOUT encoding conversion.
256    /// Used for binary files where we want to preserve the exact bytes.
257    fn from_bytes_raw(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
258        let bytes = content.len();
259
260        // For binary files, detect line ending but don't convert encoding
261        let line_ending = format::detect_line_ending(&content);
262
263        // Create initial StringBuffer with ID 0
264        let buffer = StringBuffer::new(0, content);
265        let line_feed_cnt = buffer.line_feed_count();
266
267        let piece_tree = if bytes > 0 {
268            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
269        } else {
270            PieceTree::empty()
271        };
272
273        let saved_root = piece_tree.root();
274
275        TextBuffer {
276            piece_tree,
277            buffers: vec![buffer],
278            next_buffer_id: 1,
279            persistence: Persistence::new(fs, None, saved_root, Some(bytes)),
280            file_kind: BufferFileKind::new(false, true),
281            format: BufferFormat::new(line_ending, Encoding::Utf8),
282            version: 0,
283            config: BufferConfig::default(),
284        }
285    }
286
287    /// Create a text buffer from initial content with the given filesystem.
288    pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
289        // Auto-detect encoding and convert to UTF-8 if needed
290        let (encoding, utf8_content) = format::detect_and_convert_encoding(&content);
291
292        let bytes = utf8_content.len();
293
294        // Auto-detect line ending format from content
295        let line_ending = format::detect_line_ending(&utf8_content);
296
297        // Create initial StringBuffer with ID 0
298        let buffer = StringBuffer::new(0, utf8_content);
299        let line_feed_cnt = buffer.line_feed_count();
300
301        let piece_tree = if bytes > 0 {
302            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
303        } else {
304            PieceTree::empty()
305        };
306
307        let saved_root = piece_tree.root();
308
309        TextBuffer {
310            piece_tree,
311            buffers: vec![buffer],
312            next_buffer_id: 1,
313            persistence: Persistence::new(fs, None, saved_root, Some(bytes)),
314            file_kind: BufferFileKind::new(false, false),
315            format: BufferFormat::new(line_ending, encoding),
316            version: 0,
317            config: BufferConfig::default(),
318        }
319    }
320
321    /// Create a text buffer from bytes with a specific encoding (no auto-detection).
322    pub fn from_bytes_with_encoding(
323        content: Vec<u8>,
324        encoding: Encoding,
325        fs: Arc<dyn FileSystem + Send + Sync>,
326    ) -> Self {
327        // Convert from specified encoding to UTF-8
328        let utf8_content = encoding::convert_to_utf8(&content, encoding);
329
330        let bytes = utf8_content.len();
331
332        // Auto-detect line ending format from content
333        let line_ending = format::detect_line_ending(&utf8_content);
334
335        // Create initial StringBuffer with ID 0
336        let buffer = StringBuffer::new(0, utf8_content);
337        let line_feed_cnt = buffer.line_feed_count();
338
339        let piece_tree = if bytes > 0 {
340            PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
341        } else {
342            PieceTree::empty()
343        };
344
345        let saved_root = piece_tree.root();
346
347        TextBuffer {
348            piece_tree,
349            buffers: vec![buffer],
350            next_buffer_id: 1,
351            persistence: Persistence::new(fs, None, saved_root, Some(bytes)),
352            file_kind: BufferFileKind::new(false, false),
353            format: BufferFormat::new(line_ending, encoding),
354            version: 0,
355            config: BufferConfig::default(),
356        }
357    }
358
359    /// Create a text buffer from a string with the given filesystem.
360    pub fn from_str(
361        s: &str,
362        _large_file_threshold: usize,
363        fs: Arc<dyn FileSystem + Send + Sync>,
364    ) -> Self {
365        Self::from_bytes(s.as_bytes().to_vec(), fs)
366    }
367
368    /// Create an empty text buffer with the given filesystem.
369    pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
370        let piece_tree = PieceTree::empty();
371        let saved_root = piece_tree.root();
372        let line_ending = LineEnding::default();
373        let encoding = Encoding::default();
374        TextBuffer {
375            piece_tree,
376            buffers: vec![StringBuffer::new(0, Vec::new())],
377            next_buffer_id: 1,
378            persistence: Persistence::new(fs, None, saved_root, None),
379            file_kind: BufferFileKind::new(false, false),
380            format: BufferFormat::new(line_ending, encoding),
381            version: 0,
382            config: BufferConfig::default(),
383        }
384    }
385
386    /// Load a text buffer from a file using the given filesystem.
387    pub fn load_from_file<P: AsRef<Path>>(
388        path: P,
389        large_file_threshold: usize,
390        fs: Arc<dyn FileSystem + Send + Sync>,
391    ) -> anyhow::Result<Self> {
392        let path = path.as_ref();
393
394        // Get file size to determine loading strategy
395        let metadata = fs.metadata(path)?;
396        let file_size = metadata.size as usize;
397
398        // Use threshold parameter or default
399        let threshold = if large_file_threshold > 0 {
400            large_file_threshold
401        } else {
402            DEFAULT_LARGE_FILE_THRESHOLD
403        };
404
405        // Choose loading strategy based on file size
406        if file_size >= threshold {
407            Self::load_large_file(path, file_size, fs)
408        } else {
409            Self::load_small_file(path, fs)
410        }
411    }
412
413    /// Load a text buffer from a file with a specific encoding (no auto-detection).
414    pub fn load_from_file_with_encoding<P: AsRef<Path>>(
415        path: P,
416        encoding: Encoding,
417        fs: Arc<dyn FileSystem + Send + Sync>,
418        config: BufferConfig,
419    ) -> anyhow::Result<Self> {
420        let path = path.as_ref();
421        let contents = fs.read_file(path)?;
422
423        let mut buffer = Self::from_bytes_with_encoding(contents, encoding, fs);
424        buffer.persistence.set_file_path(path.to_path_buf());
425        buffer.persistence.clear_modified();
426        buffer.config = config;
427        Ok(buffer)
428    }
429
430    /// Load a small file with full eager loading and line indexing
431    fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
432        let contents = fs.read_file(path)?;
433
434        // Use unified encoding/binary detection
435        let (encoding, is_binary) = format::detect_encoding_or_binary(&contents, false);
436
437        // For binary files, skip encoding conversion to preserve raw bytes
438        let mut buffer = if is_binary {
439            Self::from_bytes_raw(contents, fs)
440        } else {
441            // from_bytes handles encoding detection/conversion and line ending detection
442            Self::from_bytes(contents, fs)
443        };
444        buffer.persistence.set_file_path(path.to_path_buf());
445        buffer.persistence.clear_modified();
446        buffer.file_kind.set_large_file(false);
447        buffer.file_kind.set_binary(is_binary);
448        // For binary files, ensure encoding matches detection
449        if is_binary {
450            buffer.format.set_default_encoding(encoding);
451        }
452        // Note: line_ending and encoding are already set by from_bytes/from_bytes_raw
453        Ok(buffer)
454    }
455
456    /// Check if loading a large file requires user confirmation due to encoding.
457    ///
458    /// Some encodings (like Shift-JIS, GB18030, GBK, EUC-KR) cannot be "resynchronized" -
459    /// meaning you cannot determine character boundaries when jumping into the middle
460    /// of a file. These encodings require loading the entire file into memory.
461    ///
462    /// Returns `Some(confirmation)` if user confirmation is needed, `None` if the file
463    /// can be loaded with lazy/streaming loading.
464    pub fn check_large_file_encoding(
465        path: impl AsRef<Path>,
466        fs: Arc<dyn FileSystem + Send + Sync>,
467    ) -> anyhow::Result<Option<LargeFileEncodingConfirmation>> {
468        let path = path.as_ref();
469        let metadata = fs.metadata(path)?;
470        let file_size = metadata.size as usize;
471
472        // Only check for large files
473        if file_size < DEFAULT_LARGE_FILE_THRESHOLD {
474            return Ok(None);
475        }
476
477        // Read a sample to detect encoding
478        let sample_size = file_size.min(8 * 1024);
479        let sample = fs.read_range(path, 0, sample_size)?;
480        let (encoding, is_binary) =
481            format::detect_encoding_or_binary(&sample, file_size > sample_size);
482
483        // Binary files don't need confirmation (loaded as-is)
484        if is_binary {
485            return Ok(None);
486        }
487
488        // Check if the encoding requires full file loading
489        if encoding.requires_full_file_load() {
490            return Ok(Some(LargeFileEncodingConfirmation {
491                path: path.to_path_buf(),
492                file_size,
493                encoding,
494            }));
495        }
496
497        Ok(None)
498    }
499
500    /// Load a large file with unloaded buffer (no line indexing, lazy loading)
501    ///
502    /// If `force_full_load` is true, loads the entire file regardless of encoding.
503    /// This should be set to true after user confirms loading a non-resynchronizable encoding.
504    fn load_large_file(
505        path: &Path,
506        file_size: usize,
507        fs: Arc<dyn FileSystem + Send + Sync>,
508    ) -> anyhow::Result<Self> {
509        Self::load_large_file_internal(path, file_size, fs, false)
510    }
511
512    /// Load a large file, optionally forcing full load for non-resynchronizable encodings.
513    ///
514    /// Called with `force_full_load=true` after user confirms the warning about
515    /// non-resynchronizable encodings requiring full file loading.
516    pub fn load_large_file_confirmed(
517        path: impl AsRef<Path>,
518        fs: Arc<dyn FileSystem + Send + Sync>,
519    ) -> anyhow::Result<Self> {
520        let path = path.as_ref();
521        let metadata = fs.metadata(path)?;
522        let file_size = metadata.size as usize;
523        Self::load_large_file_internal(path, file_size, fs, true)
524    }
525
526    /// Internal implementation for loading large files.
527    fn load_large_file_internal(
528        path: &Path,
529        file_size: usize,
530        fs: Arc<dyn FileSystem + Send + Sync>,
531        force_full_load: bool,
532    ) -> anyhow::Result<Self> {
533        use crate::model::piece_tree::{BufferData, BufferLocation};
534
535        // Read a sample of the file to detect encoding and whether it's binary
536        // We read the first 8KB for detection
537        let sample_size = file_size.min(8 * 1024);
538        let sample = fs.read_range(path, 0, sample_size)?;
539
540        // Use unified encoding/binary detection
541        let (encoding, is_binary) =
542            format::detect_encoding_or_binary(&sample, file_size > sample_size);
543
544        // Binary files skip encoding conversion to preserve raw bytes
545        if is_binary {
546            tracing::info!("Large binary file detected, loading without encoding conversion");
547            let contents = fs.read_file(path)?;
548            let mut buffer = Self::from_bytes_raw(contents, fs);
549            buffer.persistence.set_file_path(path.to_path_buf());
550            buffer.persistence.clear_modified();
551            buffer.file_kind.set_large_file(true);
552            buffer.format.set_default_encoding(encoding);
553            return Ok(buffer);
554        }
555
556        // Check if encoding requires full file loading
557        let requires_full_load = encoding.requires_full_file_load();
558
559        // For non-resynchronizable encodings, require confirmation unless forced
560        if requires_full_load && !force_full_load {
561            anyhow::bail!(LargeFileEncodingConfirmation {
562                path: path.to_path_buf(),
563                file_size,
564                encoding,
565            });
566        }
567
568        // For encodings that require full load (non-resynchronizable or non-UTF-8),
569        // load the entire file and convert
570        if !matches!(encoding, Encoding::Utf8 | Encoding::Ascii) {
571            tracing::info!(
572                "Large file with non-UTF-8 encoding ({:?}), loading fully for conversion",
573                encoding
574            );
575            let contents = fs.read_file(path)?;
576            let mut buffer = Self::from_bytes(contents, fs);
577            buffer.persistence.set_file_path(path.to_path_buf());
578            buffer.persistence.clear_modified();
579            buffer.file_kind.set_large_file(true); // Still mark as large file for UI purposes
580            buffer.file_kind.set_binary(is_binary);
581            return Ok(buffer);
582        }
583
584        // UTF-8/ASCII files can use lazy loading
585        let line_ending = format::detect_line_ending(&sample);
586
587        // Create an unloaded buffer that references the entire file
588        let buffer = StringBuffer {
589            id: 0,
590            data: BufferData::Unloaded {
591                file_path: path.to_path_buf(),
592                file_offset: 0,
593                bytes: file_size,
594            },
595            stored_file_offset: None,
596        };
597
598        // Create piece tree with a single piece covering the whole file
599        // No line feed count (None) since we're not computing line indexing
600        let piece_tree = if file_size > 0 {
601            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
602        } else {
603            PieceTree::empty()
604        };
605        let saved_root = piece_tree.root();
606
607        tracing::debug!(
608            "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
609            file_size,
610            file_size
611        );
612
613        Ok(TextBuffer {
614            piece_tree,
615            buffers: vec![buffer],
616            next_buffer_id: 1,
617            persistence: Persistence::new(
618                fs,
619                Some(path.to_path_buf()),
620                saved_root,
621                Some(file_size),
622            ),
623            file_kind: BufferFileKind::new(true, is_binary),
624            format: BufferFormat::new(line_ending, encoding),
625            version: 0,
626            config: BufferConfig::default(),
627        })
628    }
629
630    /// Save the buffer to its associated file
631    pub fn save(&mut self) -> anyhow::Result<()> {
632        if let Some(path) = self.persistence.file_path_owned() {
633            self.save_to_file(path)
634        } else {
635            anyhow::bail!(io::Error::new(
636                io::ErrorKind::NotFound,
637                "No file path associated with buffer",
638            ))
639        }
640    }
641
642    /// Build a write recipe from the piece tree for saving.
643    ///
644    /// Delegates to `save::build_write_recipe`.
645    #[cfg(test)]
646    pub(crate) fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
647        save::build_write_recipe(
648            &self.piece_tree,
649            &self.buffers,
650            &self.format,
651            &self.file_kind,
652            &self.persistence,
653        )
654    }
655
656    /// Save the buffer to a specific file
657    ///
658    /// Uses the write recipe approach for both local and remote filesystems:
659    /// - Copy ops reference unchanged regions in the source file
660    /// - Insert ops contain new/modified data
661    ///
662    /// For remote filesystems, the recipe is sent to the agent which reconstructs
663    /// the file server-side, avoiding transfer of unchanged content.
664    ///
665    /// For local filesystems with ownership concerns (file owned by another user),
666    /// uses in-place writing to preserve ownership. Otherwise uses atomic writes.
667    ///
668    /// If the line ending format has been changed (via set_line_ending), all content
669    /// will be converted to the new format during save.
670    pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
671        let dest_path = path.as_ref();
672        let total = self.total_bytes();
673
674        // Handle empty files
675        if total == 0 {
676            self.persistence.fs().write_file(dest_path, &[])?;
677            self.finalize_save(dest_path)?;
678            return Ok(());
679        }
680
681        // Build the write recipe (unified for all filesystem types)
682        let recipe = save::build_write_recipe(
683            &self.piece_tree,
684            &self.buffers,
685            &self.format,
686            &self.file_kind,
687            &self.persistence,
688        )?;
689        let ops = recipe.to_write_ops();
690
691        // Check if we need in-place writing to preserve file ownership (local only)
692        // Remote filesystems handle this differently
693        let fs = self.persistence.fs();
694        let is_local = fs.remote_connection_info().is_none();
695        let use_inplace = is_local && save::should_use_inplace_write(fs, dest_path);
696
697        if use_inplace {
698            // In-place write: write directly to preserve ownership
699            save::save_with_inplace_write(fs, dest_path, &recipe)?;
700        } else if !recipe.has_copy_ops() && !is_local {
701            // Remote with no Copy ops: use write_file directly (more efficient)
702            let data = recipe.flatten_inserts();
703            fs.write_file(dest_path, &data)?;
704        } else if is_local {
705            // Local: use write_file or write_patched with sudo fallback
706            let write_result = if !recipe.has_copy_ops() {
707                let data = recipe.flatten_inserts();
708                fs.write_file(dest_path, &data)
709            } else {
710                let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
711                fs.write_patched(src_for_patch, dest_path, &ops)
712            };
713
714            if let Err(e) = write_result {
715                if e.kind() == io::ErrorKind::PermissionDenied {
716                    // Create temp file and return sudo error
717                    let original_metadata = fs.metadata_if_exists(dest_path);
718                    let (temp_path, mut temp_file) = save::create_temp_file(fs, dest_path)?;
719                    save::write_recipe_to_file(fs, &mut temp_file, &recipe)?;
720                    temp_file.sync_all()?;
721                    drop(temp_file);
722                    return Err(save::make_sudo_error(
723                        temp_path,
724                        dest_path,
725                        original_metadata,
726                    ));
727                }
728                return Err(e.into());
729            }
730        } else {
731            // Remote with Copy ops: use write_patched
732            let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
733            fs.write_patched(src_for_patch, dest_path, &ops)?;
734        }
735
736        self.finalize_save(dest_path)?;
737        Ok(())
738    }
739
740    /// Finalize save state after successful write.
741    fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
742        let new_size = self.persistence.fs().metadata(dest_path)?.size as usize;
743        tracing::debug!(
744            "Buffer::save: updating saved_file_size from {:?} to {}",
745            self.persistence.saved_file_size(),
746            new_size
747        );
748        self.persistence.set_saved_file_size(Some(new_size));
749        self.persistence.set_file_path(dest_path.to_path_buf());
750
751        // Consolidate the piece tree to synchronize with disk (for large files)
752        // or to simplify structure (for small files).
753        self.consolidate_after_save(dest_path, new_size);
754
755        self.mark_saved_snapshot();
756        self.format.promote_current_to_original();
757        Ok(())
758    }
759
760    /// Finalize buffer state after an external save operation (e.g., via sudo).
761    ///
762    /// This updates the saved snapshot and file size to match the new state on disk.
763    pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
764        let new_size = self.persistence.fs().metadata(&dest_path)?.size as usize;
765        self.persistence.set_saved_file_size(Some(new_size));
766        self.persistence.set_file_path(dest_path.clone());
767
768        // Consolidate the piece tree to synchronize with disk or simplify structure.
769        self.consolidate_after_save(&dest_path, new_size);
770
771        self.mark_saved_snapshot();
772        self.format.promote_current_to_original();
773        Ok(())
774    }
775
776    /// Consolidate the piece tree into a single piece.
777    /// For large files, this creates a reference to the disk file to save memory and sync offsets.
778    /// For small files, this flattens all edits into a single in-memory buffer.
779    fn consolidate_after_save(&mut self, path: &Path, file_size: usize) {
780        if self.file_kind.is_large_file() {
781            self.consolidate_large_file(path, file_size);
782        } else {
783            self.consolidate_small_file();
784        }
785    }
786
787    /// Consolidate large file piece tree into a single piece pointing to the new file.
788    /// This ensures that subsequent operations correctly reference the new content and offsets.
789    /// Preserves total line feed count from the old tree if a scan was previously done.
790    fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
791        // Preserve line feed count from the old tree if we had scanned it
792        let preserved_lf = if self.file_kind.has_line_feed_scan() {
793            self.piece_tree.line_count().map(|c| c.saturating_sub(1))
794        } else {
795            None
796        };
797
798        let buffer = StringBuffer {
799            id: 0,
800            data: BufferData::Unloaded {
801                file_path: path.to_path_buf(),
802                file_offset: 0,
803                bytes: file_size,
804            },
805            stored_file_offset: None,
806        };
807
808        self.piece_tree = if file_size > 0 {
809            PieceTree::new(BufferLocation::Stored(0), 0, file_size, preserved_lf)
810        } else {
811            PieceTree::empty()
812        };
813
814        self.buffers = vec![buffer];
815        self.next_buffer_id = 1;
816
817        tracing::debug!(
818            "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
819            file_size
820        );
821    }
822
823    /// Consolidate small file edits into a single in-memory buffer and re-index lines.
824    fn consolidate_small_file(&mut self) {
825        if let Some(bytes) = self.get_all_text() {
826            let line_feed_cnt = bytes.iter().filter(|&&b| b == b'\n').count();
827            let len = bytes.len();
828
829            // Create a single loaded buffer with line indexing
830            let buffer = StringBuffer::new_loaded(0, bytes, true);
831
832            self.piece_tree = if len > 0 {
833                PieceTree::new(BufferLocation::Stored(0), 0, len, Some(line_feed_cnt))
834            } else {
835                PieceTree::empty()
836            };
837
838            self.buffers = vec![buffer];
839            self.next_buffer_id = 1;
840
841            tracing::debug!(
842                "Buffer::consolidate_small_file: consolidated into single loaded buffer of {} bytes",
843                len
844            );
845        }
846    }
847
848    /// Get the total number of bytes in the document
849    pub fn total_bytes(&self) -> usize {
850        self.piece_tree.total_bytes()
851    }
852
853    /// Get the total number of lines in the document
854    /// Uses the piece tree's integrated line tracking
855    /// Returns None if line count is unknown (e.g., for large files without line indexing)
856    pub fn line_count(&self) -> Option<usize> {
857        self.piece_tree.line_count()
858    }
859
860    /// Snapshot the current tree as the saved baseline
861    pub fn mark_saved_snapshot(&mut self) {
862        self.persistence.mark_saved_snapshot(&self.piece_tree);
863    }
864
865    /// Refresh the saved root to match the current tree structure without
866    /// clearing the modified flag.  Call this after structural-only changes
867    /// (e.g. chunk_split_and_load during search scan) so that
868    /// `diff_since_saved()` can take the fast `Arc::ptr_eq` path.
869    pub fn refresh_saved_root_if_unmodified(&mut self) {
870        self.persistence
871            .refresh_saved_root_if_unmodified(&self.piece_tree);
872    }
873
874    /// Diff the current piece tree against the last saved snapshot.
875    ///
876    /// See `Persistence::diff_since_saved` for the algorithm.
877    pub fn diff_since_saved(&self) -> PieceTreeDiff {
878        let _span = tracing::info_span!(
879            "diff_since_saved",
880            large_file = self.file_kind.is_large_file(),
881            modified = self.persistence.is_modified(),
882            lf_scanned = self.file_kind.has_line_feed_scan()
883        )
884        .entered();
885
886        self.persistence
887            .diff_since_saved(&self.piece_tree, &self.buffers)
888    }
889
890    /// Convert a byte offset to a line/column position
891    pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
892        self.piece_tree
893            .offset_to_position(offset, &self.buffers)
894            .map(|(line, column)| Position { line, column })
895    }
896
897    /// Convert a line/column position to a byte offset
898    pub fn position_to_offset(&self, position: Position) -> usize {
899        self.piece_tree
900            .position_to_offset(position.line, position.column, &self.buffers)
901    }
902
903    /// Insert text at the given byte offset
904    pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
905        if text.is_empty() {
906            return self.piece_tree.cursor_at_offset(offset);
907        }
908
909        // Mark as modified (updates version)
910        self.mark_content_modified();
911
912        // Count line feeds in the text to insert
913        let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
914
915        // Optimization: try to append to existing buffer if insertion is at piece boundary
916        let (buffer_location, buffer_offset, text_len) =
917            if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
918                append_info
919            } else {
920                // Create a new StringBuffer for this insertion
921                let buffer_id = self.next_buffer_id;
922                self.next_buffer_id += 1;
923                let buffer = StringBuffer::new(buffer_id, text.clone());
924                self.buffers.push(buffer);
925                (BufferLocation::Added(buffer_id), 0, text.len())
926            };
927
928        // When line feeds have been scanned, ensure the chunk at the insertion
929        // point is loaded so compute_line_feeds_static can recount during splits.
930        if self.file_kind.has_line_feed_scan() {
931            self.ensure_chunk_loaded_at(offset);
932        }
933
934        // Update piece tree (need to pass buffers reference)
935        self.piece_tree.insert(
936            offset,
937            buffer_location,
938            buffer_offset,
939            text_len,
940            line_feed_cnt,
941            &self.buffers,
942        )
943    }
944
945    /// Try to append to an existing buffer if insertion point aligns with buffer end
946    /// Returns (BufferLocation, buffer_offset, text_len) if append succeeds, None otherwise
947    fn try_append_to_existing_buffer(
948        &mut self,
949        offset: usize,
950        text: &[u8],
951    ) -> Option<(BufferLocation, usize, usize)> {
952        // Only optimize for non-empty insertions after existing content
953        if text.is_empty() || offset == 0 {
954            return None;
955        }
956
957        // Find the piece containing the byte just before the insertion point
958        // This avoids the saturating_sub issue
959        let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
960
961        // Check if insertion is exactly at the end of this piece
962        // offset_in_piece tells us where (offset-1) is within the piece
963        // For insertion to be at piece end, (offset-1) must be the last byte
964        let offset_in_piece = piece_info.offset_in_piece?;
965        if offset_in_piece + 1 != piece_info.bytes {
966            return None; // Not at the end of the piece
967        }
968
969        // Only append to "Added" buffers (not original Stored buffers)
970        if !matches!(piece_info.location, BufferLocation::Added(_)) {
971            return None;
972        }
973
974        let buffer_id = piece_info.location.buffer_id();
975        let buffer = self.buffers.get_mut(buffer_id)?;
976
977        // Check if buffer is loaded
978        let buffer_len = buffer.get_data()?.len();
979
980        // Check if this piece ends exactly at the end of its buffer
981        if piece_info.offset + piece_info.bytes != buffer_len {
982            return None;
983        }
984
985        // Perfect! Append to this buffer
986        let append_offset = buffer.append(text);
987
988        Some((piece_info.location, append_offset, text.len()))
989    }
990
991    /// Insert text (from &str) at the given byte offset
992    pub fn insert(&mut self, offset: usize, text: &str) {
993        self.insert_bytes(offset, text.as_bytes().to_vec());
994    }
995
996    /// Insert text at a line/column position
997    /// This now uses the optimized piece_tree.insert_at_position() for a single traversal
998    pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
999        if text.is_empty() {
1000            let offset = self.position_to_offset(position);
1001            return self.piece_tree.cursor_at_offset(offset);
1002        }
1003
1004        self.mark_content_modified();
1005
1006        // Count line feeds in the text to insert
1007        let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1008
1009        // Create a new StringBuffer for this insertion
1010        let buffer_id = self.next_buffer_id;
1011        self.next_buffer_id += 1;
1012        let buffer = StringBuffer::new(buffer_id, text.clone());
1013        self.buffers.push(buffer);
1014
1015        // Use the optimized position-based insertion (single traversal)
1016        self.piece_tree.insert_at_position(
1017            position.line,
1018            position.column,
1019            BufferLocation::Added(buffer_id),
1020            0,
1021            text.len(),
1022            line_feed_cnt,
1023            &self.buffers,
1024        )
1025    }
1026
1027    /// Delete text starting at the given byte offset
1028    pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1029        if bytes == 0 || offset >= self.total_bytes() {
1030            return;
1031        }
1032
1033        // When line feeds have been scanned, ensure chunks at delete boundaries
1034        // are loaded so compute_line_feeds_static can recount during splits.
1035        if self.file_kind.has_line_feed_scan() {
1036            self.ensure_chunk_loaded_at(offset);
1037            let end = (offset + bytes).min(self.total_bytes());
1038            if end > offset {
1039                self.ensure_chunk_loaded_at(end.saturating_sub(1));
1040            }
1041        }
1042
1043        // Update piece tree
1044        self.piece_tree.delete(offset, bytes, &self.buffers);
1045
1046        self.mark_content_modified();
1047    }
1048
1049    /// Delete text in a range
1050    pub fn delete(&mut self, range: Range<usize>) {
1051        if range.end > range.start {
1052            self.delete_bytes(range.start, range.end - range.start);
1053        }
1054    }
1055
1056    /// Delete text in a line/column range
1057    /// This now uses the optimized piece_tree.delete_position_range() for a single traversal
1058    pub fn delete_range(&mut self, start: Position, end: Position) {
1059        // Use the optimized position-based deletion
1060        self.piece_tree.delete_position_range(
1061            start.line,
1062            start.column,
1063            end.line,
1064            end.column,
1065            &self.buffers,
1066        );
1067        self.mark_content_modified();
1068    }
1069
1070    /// Replace the entire buffer content with new content
1071    /// This is an O(n) operation that rebuilds the piece tree in a single pass,
1072    /// avoiding the O(n²) complexity of applying individual edits.
1073    ///
1074    /// This is used for bulk operations like "replace all" where applying
1075    /// individual edits would be prohibitively slow.
1076    pub fn replace_content(&mut self, new_content: &str) {
1077        let bytes = new_content.len();
1078        let content_bytes = new_content.as_bytes().to_vec();
1079
1080        // Count line feeds in the new content
1081        let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
1082
1083        // Create a new StringBuffer for the new content
1084        let buffer_id = self.next_buffer_id;
1085        self.next_buffer_id += 1;
1086        let buffer = StringBuffer::new(buffer_id, content_bytes);
1087        self.buffers.push(buffer);
1088
1089        // Rebuild the piece tree with a single piece containing all the new content
1090        if bytes > 0 {
1091            self.piece_tree = PieceTree::new(
1092                BufferLocation::Added(buffer_id),
1093                0,
1094                bytes,
1095                Some(line_feed_cnt),
1096            );
1097        } else {
1098            self.piece_tree = PieceTree::empty();
1099        }
1100
1101        self.mark_content_modified();
1102    }
1103
1104    /// Restore a previously saved buffer state (for undo/redo of BulkEdit).
1105    ///
1106    /// This restores the piece tree AND the buffers list, which is critical
1107    /// because consolidate_after_save() replaces self.buffers. Without restoring
1108    /// buffers, the piece tree would reference buffer IDs that no longer exist.
1109    pub fn restore_buffer_state(&mut self, snapshot: &BufferSnapshot) {
1110        self.piece_tree = snapshot.piece_tree.clone();
1111        self.buffers = snapshot.buffers.clone();
1112        self.next_buffer_id = snapshot.next_buffer_id;
1113        self.mark_content_modified();
1114    }
1115
1116    /// Snapshot the current buffer state (piece tree + buffers) for BulkEdit undo/redo.
1117    ///
1118    /// The snapshot includes buffers because consolidate_after_save() can replace
1119    /// self.buffers between the snapshot and restore, which would otherwise cause
1120    /// the restored piece tree to reference nonexistent buffer IDs.
1121    pub fn snapshot_buffer_state(&self) -> Arc<BufferSnapshot> {
1122        Arc::new(BufferSnapshot {
1123            piece_tree: self.piece_tree.clone(),
1124            buffers: self.buffers.clone(),
1125            next_buffer_id: self.next_buffer_id,
1126        })
1127    }
1128
1129    /// Apply bulk edits efficiently in a single pass
1130    /// Returns the net change in bytes
1131    pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
1132        // Pre-allocate buffers for all insert texts (only non-empty texts)
1133        // This avoids the borrow conflict in the closure
1134        // IMPORTANT: Only add entries for non-empty texts because the closure
1135        // is only called for edits with non-empty insert text
1136        let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
1137
1138        for (_, _, text) in edits {
1139            if !text.is_empty() {
1140                let buffer_id = self.next_buffer_id;
1141                self.next_buffer_id += 1;
1142                let content = text.as_bytes().to_vec();
1143                let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
1144                let bytes = content.len();
1145                let buffer = StringBuffer::new(buffer_id, content);
1146                self.buffers.push(buffer);
1147                buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
1148            }
1149            // No placeholder for empty texts - the closure is only called for non-empty texts
1150        }
1151
1152        // Now call apply_bulk_edits with a simple index-based closure
1153        let mut idx = 0;
1154        let delta = self
1155            .piece_tree
1156            .apply_bulk_edits(edits, &self.buffers, |_text| {
1157                let info = buffer_info[idx];
1158                idx += 1;
1159                info
1160            });
1161
1162        self.mark_content_modified();
1163        delta
1164    }
1165
1166    /// Get text from a byte offset range
1167    /// This now uses the optimized piece_tree.iter_pieces_in_range() for a single traversal
1168    /// Get text from a byte offset range (read-only)
1169    /// Returns None if any buffer in the range is unloaded
1170    /// PRIVATE: External code should use get_text_range_mut() which handles lazy loading
1171    fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
1172        if bytes == 0 {
1173            return Some(Vec::new());
1174        }
1175
1176        let mut result = Vec::with_capacity(bytes);
1177        let end_offset = offset + bytes;
1178        let mut collected = 0;
1179
1180        // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1181        for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
1182            let buffer_id = piece_view.location.buffer_id();
1183            if let Some(buffer) = self.buffers.get(buffer_id) {
1184                // Calculate the range to read from this piece
1185                let piece_start_in_doc = piece_view.doc_offset;
1186                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1187
1188                // Clip to the requested range
1189                let read_start = offset.max(piece_start_in_doc);
1190                let read_end = end_offset.min(piece_end_in_doc);
1191
1192                if read_end > read_start {
1193                    let offset_in_piece = read_start - piece_start_in_doc;
1194                    let bytes_to_read = read_end - read_start;
1195
1196                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1197                    let buffer_end = buffer_start + bytes_to_read;
1198
1199                    // Return None if buffer is unloaded (type-safe)
1200                    let data = buffer.get_data()?;
1201
1202                    if buffer_end <= data.len() {
1203                        result.extend_from_slice(&data[buffer_start..buffer_end]);
1204                        collected += bytes_to_read;
1205
1206                        if collected >= bytes {
1207                            break;
1208                        }
1209                    }
1210                }
1211            }
1212        }
1213
1214        Some(result)
1215    }
1216
1217    /// Get text from a byte offset range with lazy loading
1218    /// This will load unloaded chunks on-demand and always returns complete data
1219    ///
1220    /// Returns an error if loading fails or if data cannot be read for any reason.
1221    ///
1222    /// NOTE: Currently loads entire buffers on-demand. Future optimization would split
1223    /// large pieces and load only LOAD_CHUNK_SIZE chunks at a time.
1224    pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
1225        let _span = tracing::info_span!("get_text_range_mut", offset, bytes).entered();
1226        if bytes == 0 {
1227            return Ok(Vec::new());
1228        }
1229
1230        let mut result = Vec::with_capacity(bytes);
1231        // Clamp end_offset to buffer length to handle reads beyond EOF
1232        let end_offset = (offset + bytes).min(self.len());
1233        let mut current_offset = offset;
1234        let mut iteration_count = 0u32;
1235
1236        // Keep iterating until we've collected all requested bytes
1237        while current_offset < end_offset {
1238            iteration_count += 1;
1239            let mut made_progress = false;
1240            let mut restarted_iteration = false;
1241
1242            // Use the efficient piece iterator (single O(log n) traversal + O(N) iteration)
1243            for piece_view in self
1244                .piece_tree
1245                .iter_pieces_in_range(current_offset, end_offset)
1246            {
1247                let buffer_id = piece_view.location.buffer_id();
1248
1249                // Check if buffer needs loading
1250                let needs_loading = self
1251                    .buffers
1252                    .get(buffer_id)
1253                    .map(|b| !b.is_loaded())
1254                    .unwrap_or(false);
1255
1256                if needs_loading && self.chunk_split_and_load(&piece_view, current_offset)? {
1257                    restarted_iteration = true;
1258                    break;
1259                }
1260
1261                // Calculate the range to read from this piece
1262                let piece_start_in_doc = piece_view.doc_offset;
1263                let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1264
1265                // Clip to the requested range
1266                let read_start = current_offset.max(piece_start_in_doc);
1267                let read_end = end_offset.min(piece_end_in_doc);
1268
1269                if read_end > read_start {
1270                    let offset_in_piece = read_start - piece_start_in_doc;
1271                    let bytes_to_read = read_end - read_start;
1272
1273                    let buffer_start = piece_view.buffer_offset + offset_in_piece;
1274                    let buffer_end = buffer_start + bytes_to_read;
1275
1276                    // Buffer should be loaded now
1277                    let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
1278                    let data = buffer
1279                        .get_data()
1280                        .context("Buffer data unavailable after load")?;
1281
1282                    anyhow::ensure!(
1283                        buffer_end <= data.len(),
1284                        "Buffer range out of bounds: requested {}..{}, buffer size {}",
1285                        buffer_start,
1286                        buffer_end,
1287                        data.len()
1288                    );
1289
1290                    result.extend_from_slice(&data[buffer_start..buffer_end]);
1291                    current_offset = read_end;
1292                    made_progress = true;
1293                }
1294            }
1295
1296            // If we didn't make progress and didn't restart iteration, this is an error
1297            if !made_progress && !restarted_iteration {
1298                tracing::error!(
1299                    "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
1300                    current_offset,
1301                    offset,
1302                    end_offset,
1303                    self.len()
1304                );
1305                tracing::error!(
1306                    "Piece tree stats: {} total bytes",
1307                    self.piece_tree.stats().total_bytes
1308                );
1309                anyhow::bail!(
1310                    "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
1311                    current_offset,
1312                    offset,
1313                    end_offset,
1314                    self.len()
1315                );
1316            }
1317        }
1318
1319        if iteration_count > 1 {
1320            tracing::info!(
1321                iteration_count,
1322                result_len = result.len(),
1323                "get_text_range_mut: completed with multiple iterations"
1324            );
1325        }
1326
1327        Ok(result)
1328    }
1329
1330    /// Prepare a viewport for rendering
1331    ///
1332    /// This is called before rendering with &mut access to pre-load all data
1333    /// that will be needed for the viewport. It estimates the number of bytes
1334    /// needed based on the line count and pre-loads them.
1335    ///
1336    /// # Arguments
1337    /// * `start_offset` - The byte offset where the viewport starts
1338    /// * `line_count` - The number of lines to prepare (estimate)
1339    ///
1340    /// # Returns
1341    /// Ok(()) if preparation succeeded, Err if loading failed
1342    pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
1343        let _span = tracing::info_span!("prepare_viewport", start_offset, line_count).entered();
1344        // Estimate how many bytes we need (pessimistic assumption)
1345        // Average line length is typically 80-100 bytes, but we use 200 to be safe
1346        let estimated_bytes = line_count.saturating_mul(200);
1347
1348        // Cap the estimate at the remaining bytes in the document
1349        let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
1350        let bytes_to_load = estimated_bytes.min(remaining_bytes);
1351        tracing::trace!(
1352            bytes_to_load,
1353            total_bytes = self.total_bytes(),
1354            "prepare_viewport loading"
1355        );
1356
1357        // Pre-load with full chunk-splitting support
1358        // This may load more than we need, but ensures all data is available
1359        self.get_text_range_mut(start_offset, bytes_to_load)?;
1360
1361        Ok(())
1362    }
1363
1364    /// Split a piece that references a large unloaded buffer, create a chunk
1365    /// buffer for the region around `current_offset`, and load it.
1366    ///
1367    /// Returns `true` if the piece tree was modified (caller must restart its
1368    /// iteration), `false` if the piece was small enough to load in-place.
1369    fn chunk_split_and_load(
1370        &mut self,
1371        piece_view: &PieceView,
1372        current_offset: usize,
1373    ) -> Result<bool> {
1374        let buffer_id = piece_view.location.buffer_id();
1375
1376        // The underlying buffer may be much larger than this piece (e.g. the
1377        // whole-file Stored buffer after rebuild_with_pristine_saved_root).
1378        // We must chunk-split if either the piece or its buffer exceeds
1379        // LOAD_CHUNK_SIZE, because `load()` loads the entire buffer.
1380        let buffer_bytes = self
1381            .buffers
1382            .get(buffer_id)
1383            .and_then(|b| b.unloaded_bytes())
1384            .unwrap_or(0);
1385        let needs_chunk_split =
1386            piece_view.bytes > LOAD_CHUNK_SIZE || buffer_bytes > piece_view.bytes;
1387
1388        tracing::info!(
1389            buffer_id,
1390            piece_bytes = piece_view.bytes,
1391            buffer_bytes,
1392            needs_chunk_split,
1393            piece_doc_offset = piece_view.doc_offset,
1394            current_offset,
1395            "chunk_split_and_load: loading unloaded piece"
1396        );
1397
1398        if !needs_chunk_split {
1399            // Piece is small enough and its buffer matches — load in-place.
1400            let _span = tracing::info_span!(
1401                "load_small_buffer",
1402                piece_bytes = piece_view.bytes,
1403                buffer_id,
1404            )
1405            .entered();
1406            self.buffers
1407                .get_mut(buffer_id)
1408                .context("Buffer not found")?
1409                .load(&**self.persistence.fs())
1410                .context("Failed to load buffer")?;
1411            return Ok(false);
1412        }
1413
1414        let _span = tracing::info_span!(
1415            "chunk_split_and_load",
1416            piece_bytes = piece_view.bytes,
1417            buffer_id,
1418        )
1419        .entered();
1420
1421        let piece_start_in_doc = piece_view.doc_offset;
1422        let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
1423
1424        // When the piece already fits within LOAD_CHUNK_SIZE, create a chunk
1425        // buffer for the exact piece range (no alignment/splitting needed).
1426        // Alignment rounding is only useful when carving a sub-range out of a
1427        // piece larger than LOAD_CHUNK_SIZE.
1428        let (chunk_start_in_buffer, chunk_bytes) = if piece_view.bytes <= LOAD_CHUNK_SIZE {
1429            (piece_view.buffer_offset, piece_view.bytes)
1430        } else {
1431            let start =
1432                (piece_view.buffer_offset + offset_in_piece) / CHUNK_ALIGNMENT * CHUNK_ALIGNMENT;
1433            let bytes = LOAD_CHUNK_SIZE
1434                .min((piece_view.buffer_offset + piece_view.bytes).saturating_sub(start));
1435            (start, bytes)
1436        };
1437
1438        // Calculate document offsets for splitting
1439        let chunk_start_offset_in_piece =
1440            chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
1441        let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
1442        let split_end_in_doc = split_start_in_doc + chunk_bytes;
1443
1444        // Split the piece to isolate the chunk
1445        if chunk_start_offset_in_piece > 0 {
1446            self.piece_tree
1447                .split_at_offset(split_start_in_doc, &self.buffers);
1448        }
1449        if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
1450            self.piece_tree
1451                .split_at_offset(split_end_in_doc, &self.buffers);
1452        }
1453
1454        // Create a new buffer for this chunk
1455        let chunk_buffer = self
1456            .buffers
1457            .get(buffer_id)
1458            .context("Buffer not found")?
1459            .create_chunk_buffer(self.next_buffer_id, chunk_start_in_buffer, chunk_bytes)
1460            .context("Failed to create chunk buffer")?;
1461
1462        self.next_buffer_id += 1;
1463        let new_buffer_id = chunk_buffer.id;
1464        self.buffers.push(chunk_buffer);
1465
1466        // Update the piece to reference the new chunk buffer
1467        self.piece_tree.replace_buffer_reference(
1468            buffer_id,
1469            piece_view.buffer_offset + chunk_start_offset_in_piece,
1470            chunk_bytes,
1471            BufferLocation::Added(new_buffer_id),
1472        );
1473
1474        // Load the chunk buffer
1475        self.buffers
1476            .get_mut(new_buffer_id)
1477            .context("Chunk buffer not found")?
1478            .load(&**self.persistence.fs())
1479            .context("Failed to load chunk")?;
1480
1481        // split_at_offset uses compute_line_feeds_static which returns None
1482        // for unloaded buffers, destroying the scanned line feed counts.
1483        // Fix up: the loaded chunk is counted from memory, remaining unloaded
1484        // pieces use the filesystem's count_line_feeds_in_range.
1485        if self.file_kind.has_line_feed_scan() {
1486            let leaves = self.piece_tree.get_leaves();
1487            let mut fixups: Vec<(usize, usize)> = Vec::new();
1488            for (idx, leaf) in leaves.iter().enumerate() {
1489                if leaf.line_feed_cnt.is_none() {
1490                    if let Ok(count) = self.scan_leaf(leaf) {
1491                        fixups.push((idx, count));
1492                    }
1493                }
1494            }
1495            if !fixups.is_empty() {
1496                self.piece_tree.update_leaf_line_feeds_path_copy(&fixups);
1497            }
1498        }
1499
1500        // Keep saved_root in sync with viewport-loading tree restructures so
1501        // that diff_since_saved() can match by (location, offset) identity.
1502        //
1503        // When !modified the current tree IS the saved state, so just snapshot.
1504        // When modified, we must apply the same Stored→Added leaf replacement
1505        // to saved_root so the diff doesn't see loaded-but-unedited regions as
1506        // changed.
1507        if !self.persistence.is_modified() {
1508            self.persistence.set_saved_root(self.piece_tree.root());
1509        } else {
1510            self.persistence.apply_chunk_load_to_saved_root(
1511                buffer_id,
1512                chunk_start_in_buffer,
1513                chunk_bytes,
1514                new_buffer_id,
1515            );
1516        }
1517
1518        Ok(true)
1519    }
1520
1521    /// Get all text as a single Vec<u8>
1522    /// Returns None if any buffers are unloaded (lazy loading)
1523    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1524    pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
1525        self.get_text_range(0, self.total_bytes())
1526    }
1527
1528    /// Get all text as a String
1529    /// Returns None if any buffers are unloaded (lazy loading)
1530    /// CRATE-PRIVATE: External code should use get_text_range_mut() or DocumentModel methods
1531    pub(crate) fn get_all_text_string(&self) -> Option<String> {
1532        self.get_all_text()
1533            .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
1534    }
1535
1536    /// Get text from a byte range as bytes
1537    /// CRATE-PRIVATE: Returns empty vector if any buffers are unloaded (silently fails!)
1538    /// Only use this when you KNOW the data is loaded (e.g., for syntax highlighting small regions)
1539    /// External code should use get_text_range_mut() or DocumentModel methods
1540    pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
1541        self.get_text_range(range.start, range.end.saturating_sub(range.start))
1542            .unwrap_or_default()
1543    }
1544
1545    /// Get all text as a String
1546    /// Returns None if any buffers are unloaded (lazy loading)
1547    pub fn to_string(&self) -> Option<String> {
1548        self.get_all_text_string()
1549    }
1550
1551    /// Get the total number of bytes
1552    pub fn len(&self) -> usize {
1553        self.total_bytes()
1554    }
1555
1556    /// Check if the buffer is empty
1557    pub fn is_empty(&self) -> bool {
1558        self.total_bytes() == 0
1559    }
1560
1561    /// Get the file path associated with this buffer
1562    pub fn file_path(&self) -> Option<&Path> {
1563        self.persistence.file_path()
1564    }
1565
1566    /// Update the file path after a rename operation on disk.
1567    pub fn rename_file_path(&mut self, path: PathBuf) {
1568        self.persistence.set_file_path(path);
1569    }
1570
1571    /// Clear the file path (make buffer unnamed)
1572    /// Note: This does NOT affect Unloaded chunk file_paths used for lazy loading.
1573    /// Those still point to the original source file for chunk loading.
1574    pub fn clear_file_path(&mut self) {
1575        self.persistence.clear_file_path();
1576    }
1577
1578    /// Extend buffer to include more bytes from a streaming source file.
1579    /// Used for stdin streaming where the temp file grows over time.
1580    /// Appends a new Unloaded chunk for the new bytes.
1581    pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
1582        let old_size = self.total_bytes();
1583        if new_size <= old_size {
1584            return;
1585        }
1586
1587        let additional_bytes = new_size - old_size;
1588
1589        // Create new Unloaded buffer for the appended region
1590        let buffer_id = self.next_buffer_id;
1591        self.next_buffer_id += 1;
1592
1593        let new_buffer = StringBuffer::new_unloaded(
1594            buffer_id,
1595            source_path.to_path_buf(),
1596            old_size,         // file_offset - where this chunk starts in the file
1597            additional_bytes, // bytes - size of this chunk
1598        );
1599        self.buffers.push(new_buffer);
1600
1601        // Append piece at end of document (insert at offset == total_bytes)
1602        self.piece_tree.insert(
1603            old_size,
1604            BufferLocation::Stored(buffer_id),
1605            0,
1606            additional_bytes,
1607            None, // line_feed_cnt unknown for unloaded chunk
1608            &self.buffers,
1609        );
1610    }
1611
1612    /// Check if the buffer has been modified since last save
1613    pub fn is_modified(&self) -> bool {
1614        self.persistence.is_modified()
1615    }
1616
1617    /// Clear the modified flag (after save)
1618    pub fn clear_modified(&mut self) {
1619        self.persistence.clear_modified();
1620    }
1621
1622    /// Set the modified flag explicitly
1623    /// Used by undo/redo to restore the correct modified state
1624    pub fn set_modified(&mut self, modified: bool) {
1625        self.persistence.set_modified(modified);
1626    }
1627
1628    /// Check if buffer has pending changes for recovery auto-save
1629    pub fn is_recovery_pending(&self) -> bool {
1630        self.persistence.is_recovery_pending()
1631    }
1632
1633    /// Mark buffer as needing recovery auto-save (call after edits)
1634    pub fn set_recovery_pending(&mut self, pending: bool) {
1635        self.persistence.set_recovery_pending(pending);
1636    }
1637
1638    /// Ensure the buffer chunk at the given byte offset is loaded.
1639    ///
1640    /// When `line_feeds_scanned` is true, piece splits during insert/delete need
1641    /// the buffer data to be loaded so `compute_line_feeds_static` can accurately
1642    /// recount line feeds for each half. This method loads the chunk if needed.
1643    fn ensure_chunk_loaded_at(&mut self, offset: usize) {
1644        if let Some(piece_info) = self.piece_tree.find_by_offset(offset) {
1645            let buffer_id = piece_info.location.buffer_id();
1646            if let Some(buffer) = self.buffers.get_mut(buffer_id) {
1647                if !buffer.is_loaded() {
1648                    let buf_bytes = buffer.unloaded_bytes().unwrap_or(0);
1649                    tracing::info!(
1650                        "ensure_chunk_loaded_at: loading buffer {} ({} bytes) for offset {}",
1651                        buffer_id,
1652                        buf_bytes,
1653                        offset
1654                    );
1655                    if let Err(e) = buffer.load(&**self.persistence.fs()) {
1656                        tracing::warn!("Failed to load chunk at offset {offset}: {e}");
1657                    }
1658                }
1659            }
1660        }
1661    }
1662
1663    /// Check if this is a large file with lazy loading enabled
1664    pub fn is_large_file(&self) -> bool {
1665        self.file_kind.is_large_file()
1666    }
1667
1668    /// Check if line feeds have been scanned for this large file.
1669    /// When true, `line_count()` returns exact values.
1670    pub fn has_line_feed_scan(&self) -> bool {
1671        self.file_kind.has_line_feed_scan()
1672    }
1673
1674    /// Get the raw piece tree leaves (for storing alongside scan chunks).
1675    pub fn piece_tree_leaves(&self) -> Vec<crate::model::piece_tree::LeafData> {
1676        self.piece_tree.get_leaves()
1677    }
1678
1679    /// Prepare work items for an incremental line scan.
1680    ///
1681    /// First splits any oversized leaves in the piece tree so every leaf is
1682    /// at most `LOAD_CHUNK_SIZE` bytes.  Then returns one work item per leaf.
1683    /// After scanning, `get_text_range_mut` will never need to split a scanned
1684    /// leaf (it's already chunk-sized), so line-feed counts are preserved.
1685    ///
1686    /// Returns `(chunks, total_bytes)`.
1687    pub fn prepare_line_scan(&mut self) -> (Vec<LineScanChunk>, usize) {
1688        // Pre-split the tree so every leaf ≤ LOAD_CHUNK_SIZE.
1689        self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
1690
1691        let leaves = self.piece_tree.get_leaves();
1692        let total_bytes: usize = leaves.iter().map(|l| l.bytes).sum();
1693        let mut chunks = Vec::new();
1694
1695        for (idx, leaf) in leaves.iter().enumerate() {
1696            chunks.push(LineScanChunk {
1697                leaf_index: idx,
1698                byte_len: leaf.bytes,
1699                already_known: leaf.line_feed_cnt.is_some(),
1700            });
1701        }
1702
1703        (chunks, total_bytes)
1704    }
1705
1706    /// Initialize a chunked search scan over this buffer's piece tree.
1707    ///
1708    /// Used for in-editor Ctrl+F (incremental, yields to the event loop
1709    /// between chunks) and for searching dirty buffers during project grep.
1710    /// For searching files on disk, use `FileSystem::search_file` instead.
1711    pub fn search_scan_init(
1712        &mut self,
1713        regex: regex::bytes::Regex,
1714        max_matches: usize,
1715        query_len: usize,
1716    ) -> ChunkedSearchState {
1717        let (chunks, total_bytes) = self.prepare_line_scan();
1718        ChunkedSearchState {
1719            chunks,
1720            next_chunk: 0,
1721            next_doc_offset: 0,
1722            total_bytes,
1723            scanned_bytes: 0,
1724            regex,
1725            matches: Vec::new(),
1726            overlap_tail: Vec::new(),
1727            overlap_doc_offset: 0,
1728            max_matches,
1729            capped: false,
1730            query_len,
1731            running_line: 1,
1732        }
1733    }
1734
1735    /// Process one chunk of a chunked search scan.
1736    ///
1737    /// Loads the next chunk via `get_text_range_mut`, prepends overlap from
1738    /// the previous chunk, runs the regex, and appends matches to `state`
1739    /// with line/column/context computed on the fly from the loaded bytes.
1740    ///
1741    /// Line numbers are tracked incrementally via `running_line` — each
1742    /// chunk counts newlines in its non-overlap portion to advance the
1743    /// counter for the next chunk, and matches use an incremental cursor
1744    /// so total line-counting work is O(chunk_size), not O(chunk × matches).
1745    ///
1746    /// Returns `Ok(true)` if there are more chunks to process, `Ok(false)`
1747    /// when the scan is complete.
1748    ///
1749    /// TODO: For concurrent/parallel search (searching multiple files at once),
1750    /// chunks would need to return chunk-relative line numbers and have them
1751    /// fixed up with each file's starting line offset after all chunks complete.
1752    pub fn search_scan_next_chunk(
1753        &mut self,
1754        state: &mut ChunkedSearchState,
1755    ) -> std::io::Result<bool> {
1756        if state.is_done() {
1757            return Ok(false);
1758        }
1759
1760        let chunk_info = state.chunks[state.next_chunk].clone();
1761        let doc_offset = state.next_doc_offset;
1762
1763        state.next_chunk += 1;
1764        state.scanned_bytes += chunk_info.byte_len;
1765        state.next_doc_offset += chunk_info.byte_len;
1766
1767        // Load the chunk bytes
1768        let chunk_bytes = self
1769            .get_text_range_mut(doc_offset, chunk_info.byte_len)
1770            .map_err(std::io::Error::other)?;
1771
1772        // Build search buffer: overlap tail + new chunk
1773        let overlap_len = state.overlap_tail.len();
1774        let mut search_buf = Vec::with_capacity(overlap_len + chunk_bytes.len());
1775        search_buf.extend_from_slice(&state.overlap_tail);
1776        search_buf.extend_from_slice(&chunk_bytes);
1777
1778        let buf_doc_offset = if overlap_len > 0 {
1779            state.overlap_doc_offset
1780        } else {
1781            doc_offset
1782        };
1783
1784        // Line number at buf_doc_offset: running_line tracks the line at
1785        // doc_offset (start of new chunk data). Count newlines in the overlap
1786        // prefix to get the line at the start of the full search_buf.
1787        let newlines_in_overlap = search_buf[..overlap_len]
1788            .iter()
1789            .filter(|&&b| b == b'\n')
1790            .count();
1791        let mut line_at = state.running_line - newlines_in_overlap;
1792        let mut counted_to = 0usize;
1793
1794        // Run regex on the combined buffer
1795        for m in state.regex.find_iter(&search_buf) {
1796            // Skip matches entirely within the overlap (already found)
1797            if overlap_len > 0 && m.end() <= overlap_len {
1798                continue;
1799            }
1800
1801            if state.matches.len() >= state.max_matches {
1802                state.capped = true;
1803                break;
1804            }
1805
1806            // Advance line counter incrementally to this match
1807            line_at += search_buf[counted_to..m.start()]
1808                .iter()
1809                .filter(|&&b| b == b'\n')
1810                .count();
1811            counted_to = m.start();
1812
1813            // Find line boundaries in search_buf for context
1814            let line_start = search_buf[..m.start()]
1815                .iter()
1816                .rposition(|&b| b == b'\n')
1817                .map(|p| p + 1)
1818                .unwrap_or(0);
1819            let line_end = search_buf[m.start()..]
1820                .iter()
1821                .position(|&b| b == b'\n')
1822                .map(|p| m.start() + p)
1823                .unwrap_or(search_buf.len());
1824
1825            let match_doc_offset = buf_doc_offset + m.start();
1826            let match_len = m.end() - m.start();
1827            let column = m.start() - line_start + 1;
1828            let context = String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
1829
1830            state.matches.push(SearchMatch {
1831                byte_offset: match_doc_offset,
1832                length: match_len,
1833                line: line_at,
1834                column,
1835                context,
1836            });
1837        }
1838
1839        // Advance running_line by newlines in the new (non-overlap) chunk data
1840        let newlines_in_chunk = chunk_bytes.iter().filter(|&&b| b == b'\n').count();
1841        state.running_line += newlines_in_chunk;
1842
1843        // Save overlap tail for next chunk
1844        let max_overlap = state.query_len.max(256).min(chunk_bytes.len());
1845        let tail_start = chunk_bytes.len().saturating_sub(max_overlap);
1846        state.overlap_tail = chunk_bytes[tail_start..].to_vec();
1847        state.overlap_doc_offset = doc_offset + tail_start;
1848
1849        Ok(!state.is_done())
1850    }
1851
1852    /// Run a complete chunked search over the piece tree (all chunks).
1853    ///
1854    /// Synchronous variant — used for dirty buffer snapshots in project
1855    /// grep and in tests.  For on-disk files, use `FileSystem::search_file`.
1856    pub fn search_scan_all(
1857        &mut self,
1858        regex: regex::bytes::Regex,
1859        max_matches: usize,
1860        query_len: usize,
1861    ) -> std::io::Result<ChunkedSearchState> {
1862        let mut state = self.search_scan_init(regex, max_matches, query_len);
1863        while self.search_scan_next_chunk(&mut state)? {}
1864        Ok(state)
1865    }
1866
1867    /// Build a hybrid search plan from the piece tree.
1868    ///
1869    /// Extracts regions (unloaded file ranges + loaded in-memory data) that
1870    /// can be searched independently.  The plan is `Send` so it can be
1871    /// executed on a background thread via `HybridSearchPlan::execute`.
1872    ///
1873    /// Returns `None` if the buffer has no file path (caller should fall
1874    /// back to `search_scan_all`).
1875    pub fn search_hybrid_plan(&mut self) -> Option<HybridSearchPlan> {
1876        let file_path = self.persistence.file_path_owned()?;
1877
1878        self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
1879        let leaves = self.piece_tree.get_leaves();
1880
1881        let mut regions: Vec<SearchRegion> = Vec::new();
1882        let mut doc_offset = 0usize;
1883
1884        for leaf in &leaves {
1885            let buf = self.buffers.get(leaf.location.buffer_id());
1886            let is_unloaded_stored = matches!(
1887                (&leaf.location, buf),
1888                (
1889                    BufferLocation::Stored(_),
1890                    Some(StringBuffer {
1891                        data: BufferData::Unloaded { .. },
1892                        ..
1893                    }),
1894                )
1895            );
1896
1897            if is_unloaded_stored {
1898                let file_offset = match buf.unwrap().data {
1899                    BufferData::Unloaded {
1900                        file_offset: fo, ..
1901                    } => fo + leaf.offset,
1902                    _ => unreachable!(),
1903                };
1904
1905                // Merge with previous unloaded region if contiguous
1906                if let Some(SearchRegion::Unloaded {
1907                    file_offset: prev_fo,
1908                    bytes: prev_bytes,
1909                    ..
1910                }) = regions.last_mut()
1911                {
1912                    if *prev_fo + *prev_bytes == file_offset {
1913                        *prev_bytes += leaf.bytes;
1914                        doc_offset += leaf.bytes;
1915                        continue;
1916                    }
1917                }
1918                regions.push(SearchRegion::Unloaded {
1919                    file_offset,
1920                    bytes: leaf.bytes,
1921                    doc_offset,
1922                });
1923            } else {
1924                let data = match buf.and_then(|b| b.get_data()) {
1925                    Some(full) => {
1926                        let end = (leaf.offset + leaf.bytes).min(full.len());
1927                        full[leaf.offset..end].to_vec()
1928                    }
1929                    None => match self.get_text_range_mut(doc_offset, leaf.bytes) {
1930                        Ok(d) => d,
1931                        Err(_) => {
1932                            doc_offset += leaf.bytes;
1933                            continue;
1934                        }
1935                    },
1936                };
1937
1938                // Merge with previous loaded region
1939                if let Some(SearchRegion::Loaded {
1940                    data: prev_data, ..
1941                }) = regions.last_mut()
1942                {
1943                    prev_data.extend_from_slice(&data);
1944                    doc_offset += leaf.bytes;
1945                    continue;
1946                }
1947                regions.push(SearchRegion::Loaded { data, doc_offset });
1948            }
1949
1950            doc_offset += leaf.bytes;
1951        }
1952
1953        Some(HybridSearchPlan { file_path, regions })
1954    }
1955
1956    /// Hybrid search: uses `fs.search_file` for unloaded piece-tree regions
1957    /// (searches where the data lives, no network transfer) and in-memory regex
1958    /// for loaded/edited regions.  Handles overlap at region boundaries.
1959    ///
1960    /// For a huge remote file with a small local edit, this avoids transferring
1961    /// the entire file — only match metadata crosses the network.
1962    ///
1963    /// Falls back to `search_scan_all` when the buffer has no file path or is
1964    /// fully loaded.
1965    pub fn search_hybrid(
1966        &mut self,
1967        pattern: &str,
1968        opts: &FileSearchOptions,
1969        regex: Regex,
1970        max_matches: usize,
1971        query_len: usize,
1972    ) -> io::Result<Vec<SearchMatch>> {
1973        let plan = match self.search_hybrid_plan() {
1974            Some(p) => p,
1975            None => {
1976                let state = self.search_scan_all(regex, max_matches, query_len)?;
1977                return Ok(state.matches);
1978            }
1979        };
1980        plan.execute(
1981            &**self.persistence.fs(),
1982            pattern,
1983            opts,
1984            &regex,
1985            max_matches,
1986            query_len,
1987        )
1988    }
1989
1990    /// Count `\n` bytes in a single leaf.
1991    ///
1992    /// Uses `count_line_feeds_in_range` for unloaded buffers, which remote
1993    /// filesystem implementations can override to count server-side.
1994    pub fn scan_leaf(&self, leaf: &crate::model::piece_tree::LeafData) -> std::io::Result<usize> {
1995        let buffer_id = leaf.location.buffer_id();
1996        let buffer = self
1997            .buffers
1998            .get(buffer_id)
1999            .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::NotFound, "buffer not found"))?;
2000
2001        let count = match &buffer.data {
2002            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2003                let end = (leaf.offset + leaf.bytes).min(data.len());
2004                data[leaf.offset..end]
2005                    .iter()
2006                    .filter(|&&b| b == b'\n')
2007                    .count()
2008            }
2009            crate::model::piece_tree::BufferData::Unloaded {
2010                file_path,
2011                file_offset,
2012                ..
2013            } => {
2014                let read_offset = *file_offset as u64 + leaf.offset as u64;
2015                self.persistence.fs().count_line_feeds_in_range(
2016                    file_path,
2017                    read_offset,
2018                    leaf.bytes,
2019                )?
2020            }
2021        };
2022        Ok(count)
2023    }
2024
2025    /// Return the I/O parameters for an unloaded leaf, or `None` if loaded.
2026    ///
2027    /// Used by the incremental scan to distinguish leaves that can be counted
2028    /// in-memory (via `scan_leaf`) from those that need filesystem I/O.
2029    pub fn leaf_io_params(
2030        &self,
2031        leaf: &crate::model::piece_tree::LeafData,
2032    ) -> Option<(std::path::PathBuf, u64, usize)> {
2033        let buffer_id = leaf.location.buffer_id();
2034        let buffer = self.buffers.get(buffer_id)?;
2035        match &buffer.data {
2036            crate::model::piece_tree::BufferData::Loaded { .. } => None,
2037            crate::model::piece_tree::BufferData::Unloaded {
2038                file_path,
2039                file_offset,
2040                ..
2041            } => {
2042                let read_offset = *file_offset as u64 + leaf.offset as u64;
2043                Some((file_path.clone(), read_offset, leaf.bytes))
2044            }
2045        }
2046    }
2047
2048    /// Get a reference to the string buffers (for parallel scanning).
2049    pub fn buffer_slice(&self) -> &[StringBuffer] {
2050        &self.buffers
2051    }
2052
2053    /// Apply the results of an incremental line scan.
2054    pub fn apply_scan_updates(&mut self, updates: &[(usize, usize)]) {
2055        self.piece_tree.update_leaf_line_feeds(updates);
2056        self.file_kind.mark_line_feed_scan_complete();
2057    }
2058
2059    /// After an incremental line-feed scan completes, rebuild the tree so that
2060    /// `saved_root` and the current tree share `Arc` pointers for unedited
2061    /// subtrees. This makes `diff_since_saved()` O(edited regions) instead of
2062    /// O(file size).
2063    pub fn rebuild_with_pristine_saved_root(&mut self, scan_updates: &[(usize, usize)]) {
2064        let file_size = match self.persistence.saved_file_size() {
2065            Some(s) => s,
2066            None => {
2067                // Fallback: no saved file size means we can't build a pristine
2068                // tree. Just apply updates the old way.
2069                self.apply_scan_updates(scan_updates);
2070                return;
2071            }
2072        };
2073
2074        // --- Walk the current tree to extract deletions and insertions ---
2075        let total = self.total_bytes();
2076        // Deletions: gaps in Stored coverage (orig_offset, len).
2077        let mut deletions: Vec<(usize, usize)> = Vec::new();
2078        // Insertions: (post_delete_offset, location, buf_offset, bytes, lf_cnt).
2079        // post_delete_offset = cumulative surviving Stored bytes before this point.
2080        let mut insertions: Vec<(usize, BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2081        let mut orig_cursor: usize = 0;
2082        let mut stored_bytes_in_doc: usize = 0;
2083
2084        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2085            match piece.location {
2086                BufferLocation::Stored(_) => {
2087                    if piece.buffer_offset > orig_cursor {
2088                        deletions.push((orig_cursor, piece.buffer_offset - orig_cursor));
2089                    }
2090                    orig_cursor = piece.buffer_offset + piece.bytes;
2091                    stored_bytes_in_doc += piece.bytes;
2092                }
2093                BufferLocation::Added(id) => {
2094                    // Check if this Added buffer was created by loading a chunk
2095                    // from the stored file (via get_text_range_mut chunk loading).
2096                    // If so, treat it as stored content, not a user edit.
2097                    if let Some(file_off) = self.buffers.get(id).and_then(|b| b.stored_file_offset)
2098                    {
2099                        if file_off > orig_cursor {
2100                            deletions.push((orig_cursor, file_off - orig_cursor));
2101                        }
2102                        orig_cursor = file_off + piece.bytes;
2103                        stored_bytes_in_doc += piece.bytes;
2104                    } else {
2105                        insertions.push((
2106                            stored_bytes_in_doc,
2107                            piece.location,
2108                            piece.buffer_offset,
2109                            piece.bytes,
2110                            piece.line_feed_cnt,
2111                        ));
2112                    }
2113                }
2114            }
2115        }
2116        // Trailing deletion.
2117        if orig_cursor < file_size {
2118            deletions.push((orig_cursor, file_size - orig_cursor));
2119        }
2120
2121        // --- Build pristine tree (full original file, pre-split, with lf counts) ---
2122        let mut pristine = if file_size > 0 {
2123            PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
2124        } else {
2125            PieceTree::empty()
2126        };
2127        pristine.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2128        pristine.update_leaf_line_feeds(scan_updates);
2129
2130        // Snapshot the pristine tree as saved_root.
2131        self.persistence.set_saved_root(pristine.root());
2132
2133        // If no edits, the pristine tree IS the current tree.
2134        if deletions.is_empty() && insertions.is_empty() {
2135            self.piece_tree = pristine;
2136            self.file_kind.mark_line_feed_scan_complete();
2137            return;
2138        }
2139
2140        // --- Replay edits onto a clone of the pristine tree ---
2141        let mut tree = pristine;
2142
2143        // Apply deletions from HIGH to LOW offset so earlier offsets stay valid.
2144        deletions.sort_by(|a, b| b.0.cmp(&a.0));
2145        for &(offset, len) in &deletions {
2146            tree.delete(offset, len, &self.buffers);
2147        }
2148
2149        // Apply insertions from LOW to HIGH. Each insertion shifts subsequent
2150        // offsets by its byte count, tracked via insert_delta.
2151        let mut insert_delta: usize = 0;
2152        for &(offset, location, buf_offset, bytes, lf_cnt) in &insertions {
2153            tree.insert(
2154                offset + insert_delta,
2155                location,
2156                buf_offset,
2157                bytes,
2158                lf_cnt,
2159                &self.buffers,
2160            );
2161            insert_delta += bytes;
2162        }
2163
2164        // Path-copy insert/delete may split Stored leaves whose data is
2165        // Unloaded, producing fragments with line_feed_cnt = None
2166        // (compute_line_feeds_static can't read unloaded data). Fix them up
2167        // by scanning any remaining None leaves.
2168        let leaves = tree.get_leaves();
2169        let mut fixups: Vec<(usize, usize)> = Vec::new();
2170        for (idx, leaf) in leaves.iter().enumerate() {
2171            if leaf.line_feed_cnt.is_none() {
2172                if let Ok(count) = self.scan_leaf(leaf) {
2173                    fixups.push((idx, count));
2174                }
2175            }
2176        }
2177        if !fixups.is_empty() {
2178            tree.update_leaf_line_feeds_path_copy(&fixups);
2179        }
2180
2181        self.piece_tree = tree;
2182        self.file_kind.mark_line_feed_scan_complete();
2183    }
2184
2185    /// Resolve the exact byte offset for a given line number (0-indexed).
2186    ///
2187    /// Uses the tree's line feed counts to find the piece containing the target line,
2188    /// then loads/reads that piece's data to find the exact newline position.
2189    /// This works even when buffers are unloaded (large file with scanned line index).
2190    pub fn resolve_line_byte_offset(&mut self, target_line: usize) -> Option<usize> {
2191        if target_line == 0 {
2192            return Some(0);
2193        }
2194
2195        // Use tree metadata to find the piece containing the target line
2196        let (doc_offset, buffer_id, piece_offset, piece_bytes, lines_before) =
2197            self.piece_tree.piece_info_for_line(target_line)?;
2198
2199        // We need to find the (target_line - lines_before)-th newline within this piece
2200        let lines_to_skip = target_line - lines_before;
2201
2202        // Get the piece data — either from loaded buffer or read from disk
2203        let buffer = self.buffers.get(buffer_id)?;
2204        let piece_data: Vec<u8> = match &buffer.data {
2205            crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2206                let end = (piece_offset + piece_bytes).min(data.len());
2207                data[piece_offset..end].to_vec()
2208            }
2209            crate::model::piece_tree::BufferData::Unloaded {
2210                file_path,
2211                file_offset,
2212                ..
2213            } => {
2214                let read_offset = *file_offset as u64 + piece_offset as u64;
2215                self.persistence
2216                    .fs()
2217                    .read_range(file_path, read_offset, piece_bytes)
2218                    .ok()?
2219            }
2220        };
2221
2222        // Count newlines to find the target line start
2223        let mut newlines_found = 0;
2224        for (i, &byte) in piece_data.iter().enumerate() {
2225            if byte == b'\n' {
2226                newlines_found += 1;
2227                if newlines_found == lines_to_skip {
2228                    // The target line starts right after this newline
2229                    return Some(doc_offset + i + 1);
2230                }
2231            }
2232        }
2233
2234        // If we didn't find enough newlines, the line starts in the next piece
2235        // Return the end of this piece as an approximation
2236        Some(doc_offset + piece_bytes)
2237    }
2238
2239    /// Get the saved file size (size of the file on disk after last load/save)
2240    /// For large files, this is used during recovery to know the expected original file size.
2241    /// Returns None for new unsaved buffers.
2242    pub fn original_file_size(&self) -> Option<usize> {
2243        // Return the tracked saved file size - this is updated when the file is
2244        // loaded or saved, so it always reflects the current file on disk.
2245        self.persistence.saved_file_size()
2246    }
2247
2248    /// Get recovery chunks for this buffer (only modified portions)
2249    ///
2250    /// For large files, this returns only the pieces that come from Added buffers
2251    /// (i.e., the modifications), not the original file content. This allows
2252    /// efficient incremental recovery without reading/writing the entire file.
2253    ///
2254    /// Returns: Vec of (original_file_offset, data) for each modified chunk
2255    /// The offset is the position in the ORIGINAL file where this chunk should be inserted.
2256    pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
2257        use crate::model::piece_tree::BufferLocation;
2258
2259        let mut chunks = Vec::new();
2260        let total = self.total_bytes();
2261
2262        // Track cumulative bytes from Stored pieces as we iterate.
2263        // This gives us the original file offset for Added pieces.
2264        // The key insight: Added pieces should be inserted at the position
2265        // corresponding to where they appear relative to Stored content,
2266        // not their position in the current document.
2267        let mut stored_bytes_before = 0;
2268
2269        for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2270            match piece.location {
2271                BufferLocation::Stored(_) => {
2272                    // Accumulate stored bytes to track position in original file
2273                    stored_bytes_before += piece.bytes;
2274                }
2275                BufferLocation::Added(buffer_id) => {
2276                    if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
2277                        // Skip buffers that originate from the original file
2278                        // (loaded by chunk_split_and_load for viewport display).
2279                        // These have stored_file_offset set and are not user edits.
2280                        //
2281                        // Why Added and not Stored? The piece tree only has two
2282                        // variants: Stored and Added. chunk_split_and_load marks
2283                        // loaded chunks as Added(new_id) because
2284                        // rebuild_with_pristine_saved_root interprets Stored
2285                        // pieces' buffer_offset as a position in the original
2286                        // file — but a chunk buffer starts at offset 0, so using
2287                        // Stored would corrupt the rebuild logic. We rely on
2288                        // stored_file_offset instead to distinguish "loaded from
2289                        // disk" from "user edit". A third BufferLocation variant
2290                        // (e.g. LoadedChunk) would make this distinction explicit
2291                        // in the type system rather than requiring this runtime
2292                        // check.
2293                        if buffer.stored_file_offset.is_some() {
2294                            stored_bytes_before += piece.bytes;
2295                            continue;
2296                        }
2297                        // Get the data from the buffer if loaded
2298                        if let Some(data) = buffer.get_data() {
2299                            // Extract just the portion this piece references
2300                            let start = piece.buffer_offset;
2301                            let end = start + piece.bytes;
2302                            if end <= data.len() {
2303                                // Use stored_bytes_before as the original file offset.
2304                                // This is where this insertion should go relative to
2305                                // the original file content.
2306                                chunks.push((stored_bytes_before, data[start..end].to_vec()));
2307                            }
2308                        }
2309                    }
2310                }
2311            }
2312        }
2313
2314        chunks
2315    }
2316
2317    /// Check if this buffer contains binary content
2318    pub fn is_binary(&self) -> bool {
2319        self.file_kind.is_binary()
2320    }
2321
2322    /// Get the line ending format for this buffer
2323    pub fn line_ending(&self) -> LineEnding {
2324        self.format.line_ending()
2325    }
2326
2327    /// Set the line ending format for this buffer
2328    ///
2329    /// This marks the buffer as modified since the line ending format has changed.
2330    /// On save, the buffer content will be converted to the new format.
2331    pub fn set_line_ending(&mut self, line_ending: LineEnding) {
2332        self.format.set_line_ending(line_ending);
2333        self.mark_content_modified();
2334    }
2335
2336    /// Set the default line ending format for a new/empty buffer
2337    ///
2338    /// Unlike `set_line_ending`, this does NOT mark the buffer as modified.
2339    /// This should be used when initializing a new buffer with a configured default.
2340    pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
2341        self.format.set_default_line_ending(line_ending);
2342    }
2343
2344    /// Get the encoding format for this buffer
2345    pub fn encoding(&self) -> Encoding {
2346        self.format.encoding()
2347    }
2348
2349    /// Set the encoding format for this buffer
2350    ///
2351    /// This marks the buffer as modified since the encoding format has changed.
2352    /// On save, the buffer content will be converted to the new encoding.
2353    pub fn set_encoding(&mut self, encoding: Encoding) {
2354        self.format.set_encoding(encoding);
2355        self.mark_content_modified();
2356    }
2357
2358    /// Set the default encoding format for a new/empty buffer
2359    ///
2360    /// Unlike `set_encoding`, this does NOT mark the buffer as modified.
2361    /// This should be used when initializing a new buffer with a configured default.
2362    pub fn set_default_encoding(&mut self, encoding: Encoding) {
2363        self.format.set_default_encoding(encoding);
2364    }
2365
2366    /// Get the first line of the buffer as a lossy UTF-8 string, suitable
2367    /// for shebang / first-line grammar detection. Returns `None` for an
2368    /// empty buffer. Non-UTF-8 bytes are replaced with U+FFFD.
2369    pub fn first_line_lossy(&self) -> Option<String> {
2370        let bytes = self.get_line(0)?;
2371        if bytes.is_empty() {
2372            return None;
2373        }
2374        Some(String::from_utf8_lossy(&bytes).into_owned())
2375    }
2376
2377    /// Get text for a specific line
2378    pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
2379        let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
2380
2381        let bytes = if let Some(end_offset) = end {
2382            end_offset.saturating_sub(start)
2383        } else {
2384            self.total_bytes().saturating_sub(start)
2385        };
2386
2387        self.get_text_range(start, bytes)
2388    }
2389
2390    /// Get the byte offset where a line starts
2391    pub fn line_start_offset(&self, line: usize) -> Option<usize> {
2392        let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
2393        Some(start)
2394    }
2395
2396    /// Get piece information at a byte offset
2397    pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
2398        self.piece_tree.find_by_offset(offset)
2399    }
2400
2401    /// Get tree statistics for debugging
2402    pub fn stats(&self) -> TreeStats {
2403        self.piece_tree.stats()
2404    }
2405
2406    // Search and Replace Operations
2407
2408    /// Find the next occurrence of a pattern, with wrap-around
2409    pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
2410        if pattern.is_empty() {
2411            return None;
2412        }
2413
2414        let pattern_bytes = pattern.as_bytes();
2415        let buffer_len = self.len();
2416
2417        // Search from start_pos to end
2418        if start_pos < buffer_len {
2419            if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
2420                return Some(offset);
2421            }
2422        }
2423
2424        // Wrap around: search from beginning to start_pos
2425        if start_pos > 0 {
2426            if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
2427                return Some(offset);
2428            }
2429        }
2430
2431        None
2432    }
2433
2434    /// Find the next occurrence of a pattern within an optional range
2435    /// If range is None, searches the entire buffer with wrap-around (same as find_next)
2436    /// If range is Some, searches only within that range without wrap-around
2437    pub fn find_next_in_range(
2438        &self,
2439        pattern: &str,
2440        start_pos: usize,
2441        range: Option<Range<usize>>,
2442    ) -> Option<usize> {
2443        if pattern.is_empty() {
2444            return None;
2445        }
2446
2447        if let Some(search_range) = range {
2448            // Search within range only, no wrap-around
2449            let pattern_bytes = pattern.as_bytes();
2450            let search_start = start_pos.max(search_range.start);
2451            let search_end = search_range.end.min(self.len());
2452
2453            if search_start < search_end {
2454                self.find_pattern(search_start, search_end, pattern_bytes)
2455            } else {
2456                None
2457            }
2458        } else {
2459            // No range specified, use normal find_next with wrap-around
2460            self.find_next(pattern, start_pos)
2461        }
2462    }
2463
2464    /// Find pattern in a byte range using overlapping chunks
2465    fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
2466        if pattern.is_empty() || start >= end {
2467            return None;
2468        }
2469
2470        const CHUNK_SIZE: usize = 65536; // 64KB chunks
2471        let overlap = pattern.len().saturating_sub(1).max(1);
2472
2473        // Use the overlapping chunks iterator for efficient streaming search
2474        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
2475
2476        for chunk in chunks {
2477            // Search the entire chunk buffer
2478            if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
2479                let match_end = pos + pattern.len();
2480                // Only report if match ENDS in or after the valid zone
2481                // This ensures patterns spanning boundaries are found exactly once
2482                if match_end > chunk.valid_start {
2483                    let absolute_pos = chunk.absolute_pos + pos;
2484                    // Verify the match doesn't extend beyond our search range
2485                    if absolute_pos + pattern.len() <= end {
2486                        return Some(absolute_pos);
2487                    }
2488                }
2489            }
2490        }
2491
2492        None
2493    }
2494
2495    /// Simple byte pattern search using naive algorithm
2496    fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
2497        if needle.is_empty() || needle.len() > haystack.len() {
2498            return None;
2499        }
2500
2501        (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
2502    }
2503
2504    /// Find the next occurrence of a regex pattern, with wrap-around
2505    pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
2506        let buffer_len = self.len();
2507
2508        // Search from start_pos to end
2509        if start_pos < buffer_len {
2510            if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
2511                return Some(offset);
2512            }
2513        }
2514
2515        // Wrap around: search from beginning to start_pos
2516        if start_pos > 0 {
2517            if let Some(offset) = self.find_regex(0, start_pos, regex) {
2518                return Some(offset);
2519            }
2520        }
2521
2522        None
2523    }
2524
2525    /// Find the next occurrence of a regex pattern within an optional range
2526    pub fn find_next_regex_in_range(
2527        &self,
2528        regex: &Regex,
2529        start_pos: usize,
2530        range: Option<Range<usize>>,
2531    ) -> Option<usize> {
2532        if let Some(search_range) = range {
2533            let search_start = start_pos.max(search_range.start);
2534            let search_end = search_range.end.min(self.len());
2535
2536            if search_start < search_end {
2537                self.find_regex(search_start, search_end, regex)
2538            } else {
2539                None
2540            }
2541        } else {
2542            self.find_next_regex(regex, start_pos)
2543        }
2544    }
2545
2546    /// Find regex pattern in a byte range using overlapping chunks
2547    fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
2548        if start >= end {
2549            return None;
2550        }
2551
2552        const CHUNK_SIZE: usize = 1048576; // 1MB chunks
2553        const OVERLAP: usize = 4096; // 4KB overlap for regex
2554
2555        // Use the overlapping chunks iterator for efficient streaming search
2556        // This fixes the critical bug where regex patterns spanning chunk boundaries were missed
2557        let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
2558
2559        for chunk in chunks {
2560            // Search the entire chunk buffer
2561            if let Some(mat) = regex.find(&chunk.buffer) {
2562                let match_end = mat.end();
2563                // Only report if match ENDS in or after the valid zone
2564                // This ensures patterns spanning boundaries are found exactly once
2565                if match_end > chunk.valid_start {
2566                    let absolute_pos = chunk.absolute_pos + mat.start();
2567                    // Verify the match doesn't extend beyond our search range
2568                    let match_len = mat.end() - mat.start();
2569                    if absolute_pos + match_len <= end {
2570                        return Some(absolute_pos);
2571                    }
2572                }
2573            }
2574        }
2575
2576        None
2577    }
2578
2579    /// Replace a range with replacement text
2580    pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
2581        if range.start >= self.len() {
2582            return false;
2583        }
2584
2585        let end = range.end.min(self.len());
2586        if end > range.start {
2587            self.delete_bytes(range.start, end - range.start);
2588        }
2589
2590        if !replacement.is_empty() {
2591            self.insert(range.start, replacement);
2592        }
2593
2594        true
2595    }
2596
2597    /// Find and replace the next occurrence of a pattern
2598    pub fn replace_next(
2599        &mut self,
2600        pattern: &str,
2601        replacement: &str,
2602        start_pos: usize,
2603        range: Option<Range<usize>>,
2604    ) -> Option<usize> {
2605        if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
2606            self.replace_range(pos..pos + pattern.len(), replacement);
2607            Some(pos)
2608        } else {
2609            None
2610        }
2611    }
2612
2613    /// Replace all occurrences of a pattern with replacement text
2614    pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
2615        if pattern.is_empty() {
2616            return 0;
2617        }
2618
2619        let mut count = 0;
2620        let mut pos = 0;
2621
2622        // Keep searching and replacing
2623        // Note: we search forward from last replacement to handle growth/shrinkage
2624        // Find next occurrence (no wrap-around for replace_all)
2625        while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
2626            self.replace_range(found_pos..found_pos + pattern.len(), replacement);
2627            count += 1;
2628
2629            // Move past the replacement
2630            pos = found_pos + replacement.len();
2631
2632            // If we're at or past the end, stop
2633            if pos >= self.len() {
2634                break;
2635            }
2636        }
2637
2638        count
2639    }
2640
2641    /// Replace all occurrences of a regex pattern with replacement text
2642    pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
2643        let mut count = 0;
2644        let mut pos = 0;
2645
2646        while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
2647            // Get the match to find its length
2648            let text = self
2649                .get_text_range_mut(found_pos, self.len() - found_pos)
2650                .context("Failed to read text for regex match")?;
2651
2652            if let Some(mat) = regex.find(&text) {
2653                self.replace_range(found_pos..found_pos + mat.len(), replacement);
2654                count += 1;
2655                pos = found_pos + replacement.len();
2656
2657                if pos >= self.len() {
2658                    break;
2659                }
2660            } else {
2661                break;
2662            }
2663        }
2664
2665        Ok(count)
2666    }
2667
2668    // LSP Support (UTF-16 conversions)
2669
2670    /// Convert byte position to (line, column) in bytes
2671    pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
2672        self.offset_to_position(byte_pos)
2673            .map(|pos| (pos.line, pos.column))
2674            .unwrap_or_else(|| (byte_pos / 80, 0)) // Estimate if metadata unavailable
2675    }
2676
2677    /// Convert (line, character) to byte position - 0-indexed
2678    /// character is in BYTES, not UTF-16 code units
2679    /// Optimized to use single line_range() call instead of two
2680    pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
2681        if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2682            // Calculate line length from the range
2683            let line_len = if let Some(end_offset) = end {
2684                end_offset.saturating_sub(start)
2685            } else {
2686                self.total_bytes().saturating_sub(start)
2687            };
2688            let byte_offset = character.min(line_len);
2689            start + byte_offset
2690        } else {
2691            // Line doesn't exist, return end of buffer
2692            self.len()
2693        }
2694    }
2695
2696    /// Convert byte position to LSP position (line, UTF-16 code units)
2697    /// LSP protocol uses UTF-16 code units for character offsets
2698    pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
2699        let (line, column_bytes) = self
2700            .offset_to_position(byte_pos)
2701            .map(|pos| (pos.line, pos.column))
2702            .unwrap_or_else(|| (byte_pos / 80, 0)); // Estimate if metadata unavailable
2703
2704        // Get the line content
2705        if let Some(line_bytes) = self.get_line(line) {
2706            // Convert byte offset to UTF-16 code units
2707            let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
2708            let text_str = String::from_utf8_lossy(text_before);
2709            let utf16_offset = text_str.encode_utf16().count();
2710            (line, utf16_offset)
2711        } else {
2712            (line, 0)
2713        }
2714    }
2715
2716    /// Convert LSP position (line, UTF-16 code units) to byte position
2717    /// LSP uses UTF-16 code units for character offsets, not bytes
2718    /// Optimized to use single line_range() call instead of two
2719    pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
2720        if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2721            // Calculate line length and get line content
2722            let line_len = if let Some(end_offset) = end {
2723                end_offset.saturating_sub(line_start)
2724            } else {
2725                self.total_bytes().saturating_sub(line_start)
2726            };
2727
2728            if line_len > 0 {
2729                // If data is unloaded, return line_start as fallback
2730                let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
2731                    return line_start;
2732                };
2733                let line_str = String::from_utf8_lossy(&line_bytes);
2734
2735                // Convert UTF-16 offset to byte offset
2736                let mut utf16_count = 0;
2737                let mut byte_offset = 0;
2738
2739                for ch in line_str.chars() {
2740                    if utf16_count >= utf16_offset {
2741                        break;
2742                    }
2743                    utf16_count += ch.len_utf16();
2744                    byte_offset += ch.len_utf8();
2745                }
2746
2747                line_start + byte_offset
2748            } else {
2749                line_start
2750            }
2751        } else {
2752            // Line doesn't exist, return end of buffer
2753            self.len()
2754        }
2755    }
2756
2757    // Navigation helpers
2758
2759    /// Find the previous character boundary (UTF-8 aware)
2760    pub fn prev_char_boundary(&self, pos: usize) -> usize {
2761        if pos == 0 {
2762            return 0;
2763        }
2764
2765        // Get a few bytes before pos to find the character boundary
2766        let start = pos.saturating_sub(4);
2767        let Some(bytes) = self.get_text_range(start, pos - start) else {
2768            // Data unloaded, return pos as fallback
2769            return pos;
2770        };
2771
2772        // Walk backwards to find a UTF-8 leading byte
2773        for i in (0..bytes.len()).rev() {
2774            let byte = bytes[i];
2775            // Check if this is a UTF-8 leading byte (not a continuation byte)
2776            if (byte & 0b1100_0000) != 0b1000_0000 {
2777                return start + i;
2778            }
2779        }
2780
2781        // Fallback
2782        pos.saturating_sub(1)
2783    }
2784
2785    /// Find the next character boundary (UTF-8 aware)
2786    pub fn next_char_boundary(&self, pos: usize) -> usize {
2787        let len = self.len();
2788        if pos >= len {
2789            return len;
2790        }
2791
2792        // Get a few bytes after pos to find the character boundary
2793        let end = (pos + 5).min(len);
2794        let Some(bytes) = self.get_text_range(pos, end - pos) else {
2795            // Data unloaded, return pos as fallback
2796            return pos;
2797        };
2798
2799        // Start from index 1 (we want the NEXT boundary)
2800        for (i, &byte) in bytes.iter().enumerate().skip(1) {
2801            // Check if this is a UTF-8 leading byte (not a continuation byte)
2802            if (byte & 0b1100_0000) != 0b1000_0000 {
2803                return pos + i;
2804            }
2805        }
2806
2807        // If we got here, we're at the end or found no boundary in the range
2808        end
2809    }
2810
2811    /// Check if a byte is a UTF-8 continuation byte (not at a char boundary)
2812    /// UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF)
2813    /// This is the same check that str::is_char_boundary uses internally.
2814    #[inline]
2815    fn is_utf8_continuation_byte(byte: u8) -> bool {
2816        (byte & 0b1100_0000) == 0b1000_0000
2817    }
2818
2819    /// Snap position to a valid UTF-8 character boundary
2820    /// If already at a boundary, returns the same position.
2821    /// Otherwise, moves to the previous valid boundary.
2822    pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
2823        let len = self.len();
2824        if pos == 0 || pos >= len {
2825            return pos.min(len);
2826        }
2827
2828        // Get the byte at pos to check if we're at a character boundary
2829        let Some(bytes) = self.get_text_range(pos, 1) else {
2830            // Data unloaded, return pos as fallback
2831            return pos;
2832        };
2833
2834        // A position is at a char boundary if the byte there is NOT a continuation byte
2835        if !Self::is_utf8_continuation_byte(bytes[0]) {
2836            // Already at a character boundary
2837            return pos;
2838        }
2839
2840        // Not at a boundary, find the previous one
2841        self.prev_char_boundary(pos)
2842    }
2843
2844    /// Find the previous grapheme cluster boundary (for proper cursor movement with combining characters)
2845    ///
2846    /// This handles complex scripts like Thai where multiple Unicode code points
2847    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2848    /// is 3 code points but 1 grapheme cluster.
2849    ///
2850    /// The lookahead window starts at 32 bytes but grows whenever the
2851    /// returned boundary sits at the start of the chunk — that is, whenever
2852    /// the chunk might not contain the full grapheme. This matters for ZWJ
2853    /// emoji sequences and Zalgo strings with many combining marks, which
2854    /// can easily exceed 32 bytes.
2855    pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
2856        if pos == 0 {
2857            return 0;
2858        }
2859
2860        let mut lookback: usize = 32;
2861        loop {
2862            // IMPORTANT: Align start to a valid character boundary to avoid invalid UTF-8
2863            // when get_text_range starts mid-character
2864            let raw_start = pos.saturating_sub(lookback);
2865            let start = if raw_start == 0 {
2866                0
2867            } else {
2868                // Find the character boundary at or before raw_start
2869                self.prev_char_boundary(raw_start + 1)
2870            };
2871
2872            let Some(bytes) = self.get_text_range(start, pos - start) else {
2873                // Data unloaded, fall back to char boundary
2874                return self.prev_char_boundary(pos);
2875            };
2876
2877            let text = match std::str::from_utf8(&bytes) {
2878                Ok(s) => s,
2879                Err(e) => {
2880                    // Still got invalid UTF-8 (shouldn't happen after alignment)
2881                    // Try using just the valid portion
2882                    let valid_bytes = &bytes[..e.valid_up_to()];
2883                    match std::str::from_utf8(valid_bytes) {
2884                        Ok(s) if !s.is_empty() => s,
2885                        _ => return self.prev_char_boundary(pos),
2886                    }
2887                }
2888            };
2889
2890            // Use shared grapheme utility with relative position
2891            let rel_pos = pos - start;
2892            let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
2893
2894            // If the returned boundary is at the start of our chunk, the
2895            // grapheme may extend further back. Only trust the answer when
2896            // either we already reached the beginning of the buffer or the
2897            // boundary sits strictly inside the chunk.
2898            if new_rel_pos > 0 || start == 0 {
2899                return start + new_rel_pos;
2900            }
2901
2902            // Expand the lookback window and retry. Cap at the full buffer.
2903            if lookback >= pos {
2904                return 0;
2905            }
2906            lookback = lookback.saturating_mul(2);
2907        }
2908    }
2909
2910    /// Find the next grapheme cluster boundary (for proper cursor movement with combining characters)
2911    ///
2912    /// This handles complex scripts like Thai where multiple Unicode code points
2913    /// form a single visual character (grapheme cluster). For example, Thai "ที่"
2914    /// is 3 code points but 1 grapheme cluster.
2915    ///
2916    /// The lookahead window grows whenever the first grapheme reaches the
2917    /// end of the chunk — otherwise ZWJ emoji and Zalgo strings whose byte
2918    /// length exceeds the initial 32-byte window would be split mid-cluster.
2919    pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
2920        let len = self.len();
2921        if pos >= len {
2922            return len;
2923        }
2924
2925        let mut lookahead: usize = 32;
2926        loop {
2927            let end = (pos + lookahead).min(len);
2928            let Some(bytes) = self.get_text_range(pos, end - pos) else {
2929                // Data unloaded, fall back to char boundary
2930                return self.next_char_boundary(pos);
2931            };
2932
2933            // Convert to UTF-8 string, handling the case where we might have
2934            // grabbed bytes that end mid-character (truncate to valid UTF-8)
2935            let text = match std::str::from_utf8(&bytes) {
2936                Ok(s) => s,
2937                Err(e) => {
2938                    // The bytes end in an incomplete UTF-8 sequence
2939                    // Use only the valid portion (which includes at least the first grapheme)
2940                    let valid_bytes = &bytes[..e.valid_up_to()];
2941                    match std::str::from_utf8(valid_bytes) {
2942                        Ok(s) if !s.is_empty() => s,
2943                        _ => return self.next_char_boundary(pos),
2944                    }
2945                }
2946            };
2947
2948            let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
2949
2950            // If the first grapheme reaches the end of our chunk and there
2951            // is more buffer left beyond it, the grapheme may extend further.
2952            // Expand the window and retry.
2953            if new_rel_pos == text.len() && end < len {
2954                if lookahead >= len - pos {
2955                    return len;
2956                }
2957                lookahead = lookahead.saturating_mul(2);
2958                continue;
2959            }
2960
2961            return pos + new_rel_pos;
2962        }
2963    }
2964
2965    /// Find the previous word boundary
2966    pub fn prev_word_boundary(&self, pos: usize) -> usize {
2967        if pos == 0 {
2968            return 0;
2969        }
2970
2971        // Get some text before pos
2972        let start = pos.saturating_sub(256).max(0);
2973        let Some(bytes) = self.get_text_range(start, pos - start) else {
2974            // Data unloaded, return pos as fallback
2975            return pos;
2976        };
2977        let text = String::from_utf8_lossy(&bytes);
2978
2979        let mut found_word_char = false;
2980        let chars: Vec<char> = text.chars().collect();
2981
2982        for i in (0..chars.len()).rev() {
2983            let ch = chars[i];
2984            let is_word_char = ch.is_alphanumeric() || ch == '_';
2985
2986            if found_word_char && !is_word_char {
2987                // We've transitioned from word to non-word
2988                // Calculate the byte position
2989                let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
2990                return start + byte_offset;
2991            }
2992
2993            if is_word_char {
2994                found_word_char = true;
2995            }
2996        }
2997
2998        0
2999    }
3000
3001    /// Find the next word boundary
3002    pub fn next_word_boundary(&self, pos: usize) -> usize {
3003        let len = self.len();
3004        if pos >= len {
3005            return len;
3006        }
3007
3008        // Get some text after pos
3009        let end = (pos + 256).min(len);
3010        let Some(bytes) = self.get_text_range(pos, end - pos) else {
3011            // Data unloaded, return pos as fallback
3012            return pos;
3013        };
3014        let text = String::from_utf8_lossy(&bytes);
3015
3016        let mut found_word_char = false;
3017        let mut byte_offset = 0;
3018
3019        for ch in text.chars() {
3020            let is_word_char = ch.is_alphanumeric() || ch == '_';
3021
3022            if found_word_char && !is_word_char {
3023                // We've transitioned from word to non-word
3024                return pos + byte_offset;
3025            }
3026
3027            if is_word_char {
3028                found_word_char = true;
3029            }
3030
3031            byte_offset += ch.len_utf8();
3032        }
3033
3034        len
3035    }
3036
3037    /// Create a line iterator starting at the given byte position
3038    ///
3039    /// This iterator lazily loads chunks as needed, never scanning the entire file.
3040    /// For large files with unloaded buffers, chunks are loaded on-demand (1MB at a time).
3041    pub fn line_iterator(
3042        &mut self,
3043        byte_pos: usize,
3044        estimated_line_length: usize,
3045    ) -> LineIterator<'_> {
3046        LineIterator::new(self, byte_pos, estimated_line_length)
3047    }
3048
3049    /// Iterate over lines starting from a given byte offset, with line numbers
3050    ///
3051    /// This is a more efficient alternative to using line_iterator() + offset_to_position()
3052    /// because it calculates line numbers incrementally during iteration by accumulating
3053    /// line_feed_cnt from pieces (which is already tracked in the piece tree).
3054    ///
3055    /// Returns: Iterator yielding (byte_offset, content, line_number: Option<usize>)
3056    /// - line_number is Some(n) for small files with line metadata
3057    /// - line_number is None for large files without line metadata
3058    ///
3059    /// # Performance
3060    /// - O(1) per line for line number calculation (vs O(log n) per line with offset_to_position)
3061    /// - Uses single source of truth: piece tree's existing line_feed_cnt metadata
3062    pub fn iter_lines_from(
3063        &mut self,
3064        byte_pos: usize,
3065        max_lines: usize,
3066    ) -> Result<TextBufferLineIterator> {
3067        TextBufferLineIterator::new(self, byte_pos, max_lines)
3068    }
3069
3070    // Legacy API methods for backwards compatibility
3071
3072    /// Get the line number for a given byte offset
3073    ///
3074    /// Returns exact line number if metadata available, otherwise estimates based on bytes.
3075    ///
3076    /// # Behavior by File Size:
3077    /// - **Small files (< 1MB)**: Returns exact line number from piece tree's `line_starts` metadata
3078    /// - **Large files (≥ 1MB)**: Returns estimated line number using `byte_offset / estimated_line_length`
3079    ///
3080    /// Large files don't maintain line metadata for performance reasons. The estimation
3081    /// uses the configured `estimated_line_length` (default 80 bytes).
3082    pub fn get_line_number(&self, byte_offset: usize) -> usize {
3083        self.offset_to_position(byte_offset)
3084            .map(|pos| pos.line)
3085            .unwrap_or_else(|| {
3086                // Estimate line number based on configured average line length
3087                byte_offset / self.config.estimated_line_length
3088            })
3089    }
3090
3091    /// Get the configured estimated line length for approximate line number calculations.
3092    pub fn estimated_line_length(&self) -> usize {
3093        self.config.estimated_line_length
3094    }
3095
3096    /// Get the starting line number at a byte offset (used for viewport rendering)
3097    ///
3098    /// # Line Cache Architecture (Post-Refactoring):
3099    ///
3100    /// The concept of a separate "line cache" is **now obsolete**. After the refactoring,
3101    /// line tracking is integrated directly into the piece tree via:
3102    /// ```rust
3103    /// BufferData::Loaded {
3104    ///     data: Vec<u8>,
3105    ///     line_starts: Option<Vec<usize>>  // None = large file mode (no line metadata)
3106    /// }
3107    /// ```
3108    ///
3109    /// ## Why This Method Still Exists:
3110    /// The rendering code needs to know what line number to display in the margin at the
3111    /// top of the viewport. This method returns that line number, handling both small
3112    /// and large file modes transparently.
3113    ///
3114    /// ## Small vs Large File Modes:
3115    /// - **Small files**: `line_starts = Some(vec)` → returns exact line number from metadata
3116    /// - **Large files**: `line_starts = None` → returns estimated line number (byte_offset / estimated_line_length)
3117    ///
3118    /// ## Legacy Line Cache Methods:
3119    /// These methods are now no-ops and can be removed in a future cleanup:
3120    /// - `invalidate_line_cache_from()` - No-op (piece tree updates automatically)
3121    /// - `handle_line_cache_insertion()` - No-op (piece tree updates automatically)
3122    /// - `handle_line_cache_deletion()` - No-op (piece tree updates automatically)
3123    /// - `clear_line_cache()` - No-op (can't clear piece tree metadata)
3124    ///
3125    /// ## Bug Fix (2025-11):
3126    /// Previously this method always returned `0`, causing line numbers in the margin
3127    /// to always show 1, 2, 3... regardless of scroll position. Now it correctly returns
3128    /// the actual line number at `start_byte`.
3129    pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
3130        // No-op for cache population: LineIndex maintains all line starts automatically
3131        // But we need to return the actual line number at start_byte for rendering
3132        self.get_line_number(start_byte)
3133    }
3134
3135    /// Get cached byte offset for line (compatibility method)
3136    pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
3137        self.line_start_offset(line_number)
3138    }
3139
3140    /// Invalidate line cache from offset (no-op in new implementation)
3141    pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
3142        // No-op: LineIndex updates automatically
3143    }
3144
3145    /// Handle line cache insertion (no-op in new implementation)
3146    pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
3147        // No-op: LineIndex updates automatically during insert
3148    }
3149
3150    /// Handle line cache deletion (no-op in new implementation)
3151    pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
3152        // No-op: LineIndex updates automatically during delete
3153    }
3154
3155    /// Clear line cache (no-op in new implementation)
3156    pub fn clear_line_cache(&mut self) {
3157        // No-op: LineIndex can't be cleared
3158    }
3159
3160    // Test helper methods
3161
3162    /// Create a buffer from a string for testing
3163    #[cfg(test)]
3164    pub fn from_str_test(s: &str) -> Self {
3165        Self::from_bytes(
3166            s.as_bytes().to_vec(),
3167            std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
3168        )
3169    }
3170
3171    /// Create a new empty buffer for testing
3172    #[cfg(test)]
3173    pub fn new_test() -> Self {
3174        Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
3175    }
3176}
3177
3178/// Type alias for backwards compatibility
3179pub type Buffer = TextBuffer;
3180
3181// Re-export LineIterator from the line_iterator module
3182pub use crate::primitives::line_iterator::LineIterator;
3183
3184// ============================================================================
3185// Overlapping Chunks Iterator for Efficient Search
3186// ============================================================================
3187
3188/// Information about a chunk of data for pattern matching
3189#[derive(Debug)]
3190pub struct ChunkInfo {
3191    /// The buffer containing this chunk's data (includes overlap from previous chunk)
3192    pub buffer: Vec<u8>,
3193
3194    /// Absolute position in the document where this buffer starts
3195    pub absolute_pos: usize,
3196
3197    /// Offset within buffer where "new" data starts (valid match zone)
3198    /// Matches starting before this offset were already checked in the previous chunk
3199    pub valid_start: usize,
3200}
3201
3202/// Iterator that yields overlapping chunks for pattern matching
3203///
3204/// This iterator implements the VSCode/Sublime approach: pull overlapping chunks
3205/// from the underlying piece tree and use standard search algorithms on them.
3206///
3207/// # Algorithm
3208///
3209/// ```text
3210/// Chunk 1: [------------ valid -----------]
3211/// Chunk 2:      [overlap][---- valid ----]
3212/// Chunk 3:                   [overlap][-- valid --]
3213///
3214/// Only matches starting in the "valid" zone are reported to avoid duplicates.
3215/// ```
3216///
3217/// # Example
3218///
3219/// ```ignore
3220/// let chunks = OverlappingChunks::new(&text_buffer, start, end, 4096, pattern.len()-1);
3221/// for chunk in chunks {
3222///     // Search only starting from chunk.valid_start
3223///     if let Some(pos) = search(&chunk.buffer[chunk.valid_start..]) {
3224///         let absolute_pos = chunk.absolute_pos + chunk.valid_start + pos;
3225///         return Some(absolute_pos);
3226///     }
3227/// }
3228/// ```
3229pub struct OverlappingChunks<'a> {
3230    piece_iter: PieceRangeIter,
3231    buffers: &'a [StringBuffer],
3232
3233    // Reusable chunk buffer that we fill from pieces
3234    buffer: Vec<u8>,
3235    buffer_absolute_pos: usize,
3236
3237    // Current state
3238    current_pos: usize,
3239    end_pos: usize,
3240
3241    // Configuration
3242    chunk_size: usize,
3243    overlap: usize,
3244
3245    // Track first chunk special case
3246    first_chunk: bool,
3247
3248    // Cached piece data for incremental reading
3249    current_piece_data: Option<Vec<u8>>,
3250    current_piece_offset: usize,
3251}
3252
3253impl<'a> OverlappingChunks<'a> {
3254    /// Create a new overlapping chunks iterator
3255    ///
3256    /// # Arguments
3257    ///
3258    /// * `text_buffer` - The text buffer to iterate over
3259    /// * `start` - Start position in the document
3260    /// * `end` - End position in the document (exclusive)
3261    /// * `chunk_size` - Target size for each chunk (excluding overlap)
3262    /// * `overlap` - Number of bytes to overlap between chunks
3263    ///
3264    /// # Recommendations
3265    ///
3266    /// * For literal string search: `chunk_size=65536, overlap=pattern.len()-1`
3267    /// * For regex search: `chunk_size=1048576, overlap=4096`
3268    pub fn new(
3269        text_buffer: &'a TextBuffer,
3270        start: usize,
3271        end: usize,
3272        chunk_size: usize,
3273        overlap: usize,
3274    ) -> Self {
3275        let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
3276
3277        Self {
3278            piece_iter,
3279            buffers: &text_buffer.buffers,
3280            buffer: Vec::with_capacity(chunk_size + overlap),
3281            buffer_absolute_pos: start,
3282            current_pos: start,
3283            end_pos: end,
3284            chunk_size,
3285            overlap,
3286            first_chunk: true,
3287            current_piece_data: None,
3288            current_piece_offset: 0,
3289        }
3290    }
3291
3292    /// Read one byte from the piece iterator
3293    fn read_byte(&mut self) -> Option<u8> {
3294        loop {
3295            // If we have cached piece data, read from it
3296            if let Some(ref data) = self.current_piece_data {
3297                if self.current_piece_offset < data.len() {
3298                    let byte = data[self.current_piece_offset];
3299                    self.current_piece_offset += 1;
3300                    self.current_pos += 1;
3301                    return Some(byte);
3302                } else {
3303                    // Exhausted current piece, move to next
3304                    self.current_piece_data = None;
3305                    self.current_piece_offset = 0;
3306                }
3307            }
3308
3309            // Get next piece
3310            if let Some(piece_view) = self.piece_iter.next() {
3311                let buffer_id = piece_view.location.buffer_id();
3312                if let Some(buffer) = self.buffers.get(buffer_id) {
3313                    // Extract the relevant slice from this piece
3314                    let piece_start_in_doc = piece_view.doc_offset;
3315                    let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
3316
3317                    // Clip to our search range
3318                    let read_start = self.current_pos.max(piece_start_in_doc);
3319                    let read_end = self.end_pos.min(piece_end_in_doc);
3320
3321                    if read_end > read_start {
3322                        let offset_in_piece = read_start - piece_start_in_doc;
3323                        let bytes_to_read = read_end - read_start;
3324
3325                        let buffer_start = piece_view.buffer_offset + offset_in_piece;
3326                        let buffer_end = buffer_start + bytes_to_read;
3327
3328                        if let Some(data) = buffer.get_data() {
3329                            if buffer_end <= data.len() {
3330                                // Cache this piece's data
3331                                self.current_piece_data =
3332                                    Some(data[buffer_start..buffer_end].to_vec());
3333                                self.current_piece_offset = 0;
3334                                continue;
3335                            }
3336                        }
3337                    }
3338                }
3339            }
3340
3341            // No more data
3342            return None;
3343        }
3344    }
3345
3346    /// Fill the buffer with the next chunk of data
3347    fn fill_next_chunk(&mut self) -> bool {
3348        if self.first_chunk {
3349            // First chunk: fill up to chunk_size
3350            self.first_chunk = false;
3351            while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
3352                if let Some(byte) = self.read_byte() {
3353                    self.buffer.push(byte);
3354                } else {
3355                    break;
3356                }
3357            }
3358            !self.buffer.is_empty()
3359        } else {
3360            // Subsequent chunks: keep overlap, fill chunk_size NEW bytes
3361            if self.current_pos >= self.end_pos {
3362                return false;
3363            }
3364
3365            // Keep overlap bytes at the end
3366            if self.buffer.len() > self.overlap {
3367                let drain_amount = self.buffer.len() - self.overlap;
3368                self.buffer.drain(0..drain_amount);
3369                self.buffer_absolute_pos += drain_amount;
3370            }
3371
3372            // Fill chunk_size NEW bytes (in addition to overlap)
3373            let before_len = self.buffer.len();
3374            let target_len = self.overlap + self.chunk_size;
3375            while self.buffer.len() < target_len && self.current_pos < self.end_pos {
3376                if let Some(byte) = self.read_byte() {
3377                    self.buffer.push(byte);
3378                } else {
3379                    break;
3380                }
3381            }
3382
3383            // Return true if we added new data
3384            self.buffer.len() > before_len
3385        }
3386    }
3387}
3388
3389impl<'a> Iterator for OverlappingChunks<'a> {
3390    type Item = ChunkInfo;
3391
3392    fn next(&mut self) -> Option<Self::Item> {
3393        // Track if this is the first chunk before filling
3394        let is_first = self.buffer_absolute_pos == self.current_pos;
3395
3396        if !self.fill_next_chunk() {
3397            return None;
3398        }
3399
3400        // First chunk: all data is valid (no overlap from previous)
3401        // Subsequent chunks: overlap bytes are not valid (already checked)
3402        let valid_start = if is_first {
3403            0
3404        } else {
3405            self.overlap.min(self.buffer.len())
3406        };
3407
3408        Some(ChunkInfo {
3409            buffer: self.buffer.clone(),
3410            absolute_pos: self.buffer_absolute_pos,
3411            valid_start,
3412        })
3413    }
3414}
3415
3416#[cfg(test)]
3417mod tests;
3418
3419#[cfg(test)]
3420mod property_tests;
3421
3422/// Line data with optional line number
3423#[derive(Debug, Clone)]
3424pub struct LineData {
3425    /// Byte offset where this line starts in the document
3426    pub byte_offset: usize,
3427    /// Line content (without trailing newline)
3428    pub content: String,
3429    /// Whether this line ends with a newline
3430    pub has_newline: bool,
3431    /// Line number (None for large files without line metadata)
3432    pub line_number: Option<usize>,
3433}
3434
3435/// Iterator over lines in a TextBuffer that efficiently tracks line numbers
3436/// using piece tree metadata (single source of truth)
3437pub struct TextBufferLineIterator {
3438    /// Collected lines (we collect all at once since we need mutable access to load chunks)
3439    lines: Vec<LineData>,
3440    /// Current index in the lines vector
3441    current_index: usize,
3442    /// Whether there are more lines after these
3443    pub has_more: bool,
3444}
3445
3446impl TextBufferLineIterator {
3447    pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
3448        let buffer_len = buffer.len();
3449        if byte_pos >= buffer_len {
3450            return Ok(Self {
3451                lines: Vec::new(),
3452                current_index: 0,
3453                has_more: false,
3454            });
3455        }
3456
3457        // Check if buffer has line metadata (None for large files > 1MB)
3458        let has_line_metadata = buffer.line_count().is_some();
3459
3460        // Determine starting line number by querying piece tree once
3461        // (only if we have line metadata)
3462        let mut current_line = if has_line_metadata {
3463            buffer.offset_to_position(byte_pos).map(|pos| pos.line)
3464        } else {
3465            None
3466        };
3467
3468        let mut lines = Vec::with_capacity(max_lines);
3469        let mut current_offset = byte_pos;
3470        let estimated_line_length = 80; // Use default estimate
3471
3472        // Collect lines by scanning forward
3473        for _ in 0..max_lines {
3474            if current_offset >= buffer_len {
3475                break;
3476            }
3477
3478            let line_start = current_offset;
3479            let line_number = current_line;
3480
3481            // Estimate how many bytes to load for this line
3482            let estimated_max_line_length = estimated_line_length * 3;
3483            let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
3484
3485            // Load chunk (this handles lazy loading)
3486            let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
3487
3488            // Scan for newline
3489            let mut line_len = 0;
3490            let mut found_newline = false;
3491            for &byte in chunk.iter() {
3492                line_len += 1;
3493                if byte == b'\n' {
3494                    found_newline = true;
3495                    break;
3496                }
3497            }
3498
3499            // Handle long lines (rare case)
3500            if !found_newline && current_offset + line_len < buffer_len {
3501                // Line is longer than expected, load more data
3502                let remaining = buffer_len - current_offset - line_len;
3503                let additional_bytes = estimated_max_line_length.min(remaining);
3504                let more_chunk =
3505                    buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
3506
3507                let mut extended_chunk = chunk;
3508                extended_chunk.extend_from_slice(&more_chunk);
3509
3510                for &byte in more_chunk.iter() {
3511                    line_len += 1;
3512                    if byte == b'\n' {
3513                        found_newline = true;
3514                        break;
3515                    }
3516                }
3517
3518                let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
3519                let has_newline = line_string.ends_with('\n');
3520                let content = if has_newline {
3521                    line_string[..line_string.len() - 1].to_string()
3522                } else {
3523                    line_string
3524                };
3525
3526                lines.push(LineData {
3527                    byte_offset: line_start,
3528                    content,
3529                    has_newline,
3530                    line_number,
3531                });
3532
3533                current_offset += line_len;
3534                if has_line_metadata && found_newline {
3535                    current_line = current_line.map(|n| n + 1);
3536                }
3537                continue;
3538            }
3539
3540            // Normal case
3541            let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
3542            let has_newline = line_string.ends_with('\n');
3543            let content = if has_newline {
3544                line_string[..line_string.len() - 1].to_string()
3545            } else {
3546                line_string
3547            };
3548
3549            lines.push(LineData {
3550                byte_offset: line_start,
3551                content,
3552                has_newline,
3553                line_number,
3554            });
3555
3556            current_offset += line_len;
3557            // Increment line number if we have metadata and found a newline
3558            if has_line_metadata && found_newline {
3559                current_line = current_line.map(|n| n + 1);
3560            }
3561        }
3562
3563        // Check if there are more lines
3564        let has_more = current_offset < buffer_len;
3565
3566        Ok(Self {
3567            lines,
3568            current_index: 0,
3569            has_more,
3570        })
3571    }
3572}
3573
3574impl Iterator for TextBufferLineIterator {
3575    type Item = LineData;
3576
3577    fn next(&mut self) -> Option<Self::Item> {
3578        if self.current_index < self.lines.len() {
3579            let line = self.lines[self.current_index].clone();
3580            self.current_index += 1;
3581            Some(line)
3582        } else {
3583            None
3584        }
3585    }
3586}
fresh/model/buffer/mod.rs

fresh/model/buffer/
mod.rs